/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.36 - (show annotations) (download)
Sun Feb 10 07:35:23 2008 UTC (16 years, 9 months ago) by wakaba
Branch: MAIN
Changes since 1.35: +4 -3 lines
++ ChangeLog	10 Feb 2008 07:35:19 -0000
	* cc.cgi: In CSS mode, add 'u' error for 'structure' category (until
	it is actually implemented).  Support for '-moz-pre-wrap'.
	Typo in 'collapse' value fixed.

2008-02-10  Wakaba  <wakaba@suika.fam.cx>

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4
5 use lib qw[/home/httpd/html/www/markup/html/whatpm
6 /home/wakaba/work/manakai2/lib];
7 use CGI::Carp qw[fatalsToBrowser];
8 use Scalar::Util qw[refaddr];
9 use Time::HiRes qw/time/;
10
11 sub htescape ($) {
12 my $s = $_[0];
13 $s =~ s/&/&amp;/g;
14 $s =~ s/</&lt;/g;
15 $s =~ s/>/&gt;/g;
16 $s =~ s/"/&quot;/g;
17 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18 sprintf '<var>U+%04X</var>', ord $1;
19 }ge;
20 return $s;
21 } # htescape
22
23 my @nav;
24 my %time;
25 require Message::DOM::DOMImplementation;
26 my $dom = Message::DOM::DOMImplementation->new;
27 {
28 use Message::CGI::HTTP;
29 my $http = Message::CGI::HTTP->new;
30
31 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
32 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
33 exit;
34 }
35
36 binmode STDOUT, ':utf8';
37 $| = 1;
38
39 load_text_catalog ('en'); ## TODO: conneg
40
41 print STDOUT qq[Content-Type: text/html; charset=utf-8
42
43 <!DOCTYPE html>
44 <html lang="en">
45 <head>
46 <title>Web Document Conformance Checker (BETA)</title>
47 <link rel="stylesheet" href="../cc-style.css" type="text/css">
48 </head>
49 <body>
50 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
51 (<em>beta</em>)</h1>
52 ];
53
54 $| = 0;
55 my $input = get_input_document ($http, $dom);
56 my $char_length = 0;
57
58 print qq[
59 <div id="document-info" class="section">
60 <dl>
61 <dt>Request URI</dt>
62 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 <dt>Document URI</dt>
64 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65 <script>
66 document.title = '<'
67 + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68 + document.title;
69 </script></dd>
70 ]; # no </dl> yet
71 push @nav, ['#document-info' => 'Information'];
72
73 if (defined $input->{s}) {
74 $char_length = length $input->{s};
75
76 print STDOUT qq[
77 <dt>Base URI</dt>
78 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79 <dt>Internet Media Type</dt>
80 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 <dt>Character Encoding</dt>
83 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 <dt>Length</dt>
86 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 </dl>
88 </div>
89 ];
90
91 $input->{id_prefix} = '';
92 #$input->{nested} = 0;
93 my $result = {conforming_min => 1, conforming_max => 1};
94 check_and_print ($input => $result);
95 print_result_section ($result);
96 } else {
97 print STDOUT qq[</dl></div>];
98 print_result_input_error_section ($input);
99 }
100
101 print STDOUT qq[
102 <ul class="navigation" id="nav-items">
103 ];
104 for (@nav) {
105 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
106 }
107 print STDOUT qq[
108 </ul>
109 </body>
110 </html>
111 ];
112
113 for (qw/decode parse parse_html parse_xml parse_manifest
114 check check_manifest/) {
115 next unless defined $time{$_};
116 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
117 print $file $char_length, "\t", $time{$_}, "\n";
118 }
119
120 exit;
121 }
122
123 sub add_error ($$$) {
124 my ($layer, $err, $result) = @_;
125 if (defined $err->{level}) {
126 if ($err->{level} eq 's') {
127 $result->{$layer}->{should}++;
128 $result->{$layer}->{score_min} -= 2;
129 $result->{conforming_min} = 0;
130 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
131 $result->{$layer}->{warning}++;
132 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
133 $result->{$layer}->{unsupported}++;
134 $result->{unsupported} = 1;
135 } else {
136 $result->{$layer}->{must}++;
137 $result->{$layer}->{score_max} -= 2;
138 $result->{$layer}->{score_min} -= 2;
139 $result->{conforming_min} = 0;
140 $result->{conforming_max} = 0;
141 }
142 } else {
143 $result->{$layer}->{must}++;
144 $result->{$layer}->{score_max} -= 2;
145 $result->{$layer}->{score_min} -= 2;
146 $result->{conforming_min} = 0;
147 $result->{conforming_max} = 0;
148 }
149 } # add_error
150
151 sub check_and_print ($$) {
152 my ($input, $result) = @_;
153
154 print_http_header_section ($input, $result);
155
156 my $doc;
157 my $el;
158 my $cssom;
159 my $manifest;
160 my @subdoc;
161
162 if ($input->{media_type} eq 'text/html') {
163 ($doc, $el) = print_syntax_error_html_section ($input, $result);
164 print_source_string_section
165 ($input,
166 \($input->{s}),
167 $input->{charset} || $doc->input_encoding);
168 } elsif ({
169 'text/xml' => 1,
170 'application/atom+xml' => 1,
171 'application/rss+xml' => 1,
172 'application/svg+xml' => 1,
173 'application/xhtml+xml' => 1,
174 'application/xml' => 1,
175 }->{$input->{media_type}}) {
176 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
177 print_source_string_section ($input,
178 \($input->{s}),
179 $doc->input_encoding);
180 } elsif ($input->{media_type} eq 'text/css') {
181 $cssom = print_syntax_error_css_section ($input, $result);
182 print_source_string_section
183 ($input, \($input->{s}),
184 $cssom->manakai_input_encoding);
185 } elsif ($input->{media_type} eq 'text/cache-manifest') {
186 ## TODO: MUST be text/cache-manifest
187 $manifest = print_syntax_error_manifest_section ($input, $result);
188 print_source_string_section ($input, \($input->{s}),
189 'utf-8');
190 } else {
191 ## TODO: Change HTTP status code??
192 print_result_unknown_type_section ($input, $result);
193 }
194
195 if (defined $doc or defined $el) {
196 $doc->document_uri ($input->{uri});
197 $doc->manakai_entity_base_uri ($input->{base_uri});
198 print_structure_dump_dom_section ($input, $doc, $el);
199 my $elements = print_structure_error_dom_section
200 ($input, $doc, $el, $result, sub {
201 push @subdoc, shift;
202 });
203 print_table_section ($input, $elements->{table}) if @{$elements->{table}};
204 print_listing_section ({
205 id => 'identifiers', label => 'IDs', heading => 'Identifiers',
206 }, $input, $elements->{id}) if keys %{$elements->{id}};
207 print_listing_section ({
208 id => 'terms', label => 'Terms', heading => 'Terms',
209 }, $input, $elements->{term}) if keys %{$elements->{term}};
210 print_listing_section ({
211 id => 'classes', label => 'Classes', heading => 'Classes',
212 }, $input, $elements->{class}) if keys %{$elements->{class}};
213 } elsif (defined $cssom) {
214 print_structure_dump_cssom_section ($input, $cssom);
215 ## TODO: CSSOM validation
216 add_error ('structure', {level => 'u'} => $result);
217 } elsif (defined $manifest) {
218 print_structure_dump_manifest_section ($input, $manifest);
219 print_structure_error_manifest_section ($input, $manifest, $result);
220 }
221
222 my $id_prefix = 0;
223 for my $subinput (@subdoc) {
224 $subinput->{id_prefix} = 'subdoc-' . ++$id_prefix;
225 $subinput->{nested} = 1;
226 $subinput->{base_uri} = $subinput->{container_node}->base_uri
227 unless defined $subinput->{base_uri};
228 my $ebaseuri = htescape ($subinput->{base_uri});
229 push @nav, ['#' . $subinput->{id_prefix} => 'Sub #' . $id_prefix];
230 print STDOUT qq[<div id="$subinput->{id_prefix}" class=section>
231 <h2>Subdocument #$id_prefix</h2>
232
233 <dl>
234 <dt>Internet Media Type</dt>
235 <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
236 <dt>Container Node</dt>
237 <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
238 <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
239 <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
240 </dl>];
241
242 $subinput->{id_prefix} .= '-';
243 check_and_print ($subinput => $result);
244
245 print STDOUT qq[</div>];
246 }
247 } # check_and_print
248
249 sub print_http_header_section ($$) {
250 my ($input, $result) = @_;
251 return unless defined $input->{header_status_code} or
252 defined $input->{header_status_text} or
253 @{$input->{header_field} or []};
254
255 push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
256 print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
257 <h2>HTTP Header</h2>
258
259 <p><strong>Note</strong>: Due to the limitation of the
260 network library in use, the content of this section might
261 not be the real header.</p>
262
263 <table><tbody>
264 ];
265
266 if (defined $input->{header_status_code}) {
267 print STDOUT qq[<tr><th scope="row">Status code</th>];
268 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
269 }
270 if (defined $input->{header_status_text}) {
271 print STDOUT qq[<tr><th scope="row">Status text</th>];
272 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
273 }
274
275 for (@{$input->{header_field}}) {
276 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
277 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
278 }
279
280 print STDOUT qq[</tbody></table></div>];
281 } # print_http_header_section
282
283 sub print_syntax_error_html_section ($$) {
284 my ($input, $result) = @_;
285
286 require Encode;
287 require Whatpm::HTML;
288
289 print STDOUT qq[
290 <div id="$input->{id_prefix}parse-errors" class="section">
291 <h2>Parse Errors</h2>
292
293 <dl>];
294 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
295
296 my $onerror = sub {
297 my (%opt) = @_;
298 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
299 if ($opt{column} > 0) {
300 print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
301 } else {
302 $opt{line} = $opt{line} - 1 || 1;
303 print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{line}">Line $opt{line}</a></dt>\n];
304 }
305 $type =~ tr/ /-/;
306 $type =~ s/\|/%7C/g;
307 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
308 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
309 print STDOUT qq[$msg</dd>\n];
310
311 add_error ('syntax', \%opt => $result);
312 };
313
314 my $doc = $dom->create_document;
315 my $el;
316 my $inner_html_element = $input->{inner_html_element};
317 if (defined $inner_html_element and length $inner_html_element) {
318 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
319 my $time1 = time;
320 my $t = Encode::decode ($input->{charset}, $input->{s});
321 $time{decode} = time - $time1;
322
323 $el = $doc->create_element_ns
324 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
325 $time1 = time;
326 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
327 $time{parse} = time - $time1;
328 } else {
329 my $time1 = time;
330 Whatpm::HTML->parse_byte_string
331 ($input->{charset}, $input->{s} => $doc, $onerror);
332 $time{parse_html} = time - $time1;
333 }
334 $doc->manakai_charset ($input->{official_charset})
335 if defined $input->{official_charset};
336
337 print STDOUT qq[</dl></div>];
338
339 return ($doc, $el);
340 } # print_syntax_error_html_section
341
342 sub print_syntax_error_xml_section ($$) {
343 my ($input, $result) = @_;
344
345 require Message::DOM::XMLParserTemp;
346
347 print STDOUT qq[
348 <div id="$input->{id_prefix}parse-errors" class="section">
349 <h2>Parse Errors</h2>
350
351 <dl>];
352 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
353
354 my $onerror = sub {
355 my $err = shift;
356 my $line = $err->location->line_number;
357 print STDOUT qq[<dt><a href="#$input->{id_prefix}line-$line">Line $line</a> column ];
358 print STDOUT $err->location->column_number, "</dt><dd>";
359 print STDOUT htescape $err->text, "</dd>\n";
360
361 add_error ('syntax', {type => $err->text,
362 level => [
363 $err->SEVERITY_FATAL_ERROR => 'm',
364 $err->SEVERITY_ERROR => 'm',
365 $err->SEVERITY_WARNING => 's',
366 ]->[$err->severity]} => $result);
367
368 return 1;
369 };
370
371 my $time1 = time;
372 open my $fh, '<', \($input->{s});
373 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
374 ($fh => $dom, $onerror, charset => $input->{charset});
375 $time{parse_xml} = time - $time1;
376 $doc->manakai_charset ($input->{official_charset})
377 if defined $input->{official_charset};
378
379 print STDOUT qq[</dl></div>];
380
381 return ($doc, undef);
382 } # print_syntax_error_xml_section
383
384 sub get_css_parser () {
385 our $CSSParser;
386 return $CSSParser if $CSSParser;
387
388 require Whatpm::CSS::Parser;
389 my $p = Whatpm::CSS::Parser->new;
390
391 # if ($parse_mode eq 'q') {
392 # $p->{unitless_px} = 1;
393 # $p->{hashless_color} = 1;
394 # }
395
396 $p->{prop}->{$_} = 1 for qw/
397 background background-attachment background-color background-image
398 background-position background-position-x background-position-y
399 background-repeat border border-bottom border-bottom-color
400 border-bottom-style border-bottom-width border-collapse border-color
401 border-left border-left-color
402 border-left-style border-left-width border-right border-right-color
403 border-right-style border-right-width
404 border-spacing -manakai-border-spacing-x -manakai-border-spacing-y
405 border-style border-top border-top-color border-top-style border-top-width
406 border-width bottom
407 caption-side clear clip color content counter-increment counter-reset
408 cursor direction display empty-cells float font
409 font-family font-size font-size-adjust font-stretch
410 font-style font-variant font-weight height left
411 letter-spacing line-height
412 list-style list-style-image list-style-position list-style-type
413 margin margin-bottom margin-left margin-right margin-top marker-offset
414 marks max-height max-width min-height min-width opacity -moz-opacity
415 orphans outline outline-color outline-style outline-width overflow
416 overflow-x overflow-y
417 padding padding-bottom padding-left padding-right padding-top
418 page page-break-after page-break-before page-break-inside
419 position quotes right size table-layout
420 text-align text-decoration text-indent text-transform
421 top unicode-bidi vertical-align visibility white-space width widows
422 word-spacing z-index
423 /;
424 $p->{prop_value}->{display}->{$_} = 1 for qw/
425 block clip inline inline-block inline-table list-item none
426 table table-caption table-cell table-column table-column-group
427 table-header-group table-footer-group table-row table-row-group
428 compact marker
429 /;
430 $p->{prop_value}->{position}->{$_} = 1 for qw/
431 absolute fixed relative static
432 /;
433 $p->{prop_value}->{float}->{$_} = 1 for qw/
434 left right none
435 /;
436 $p->{prop_value}->{clear}->{$_} = 1 for qw/
437 left right none both
438 /;
439 $p->{prop_value}->{direction}->{ltr} = 1;
440 $p->{prop_value}->{direction}->{rtl} = 1;
441 $p->{prop_value}->{marks}->{crop} = 1;
442 $p->{prop_value}->{marks}->{cross} = 1;
443 $p->{prop_value}->{'unicode-bidi'}->{$_} = 1 for qw/
444 normal bidi-override embed
445 /;
446 for my $prop_name (qw/overflow overflow-x overflow-y/) {
447 $p->{prop_value}->{$prop_name}->{$_} = 1 for qw/
448 visible hidden scroll auto -webkit-marquee -moz-hidden-unscrollable
449 /;
450 }
451 $p->{prop_value}->{visibility}->{$_} = 1 for qw/
452 visible hidden collapse
453 /;
454 $p->{prop_value}->{'list-style-type'}->{$_} = 1 for qw/
455 disc circle square decimal decimal-leading-zero
456 lower-roman upper-roman lower-greek lower-latin
457 upper-latin armenian georgian lower-alpha upper-alpha none
458 hebrew cjk-ideographic hiragana katakana hiragana-iroha
459 katakana-iroha
460 /;
461 $p->{prop_value}->{'list-style-position'}->{outside} = 1;
462 $p->{prop_value}->{'list-style-position'}->{inside} = 1;
463 $p->{prop_value}->{'page-break-before'}->{$_} = 1 for qw/
464 auto always avoid left right
465 /;
466 $p->{prop_value}->{'page-break-after'}->{$_} = 1 for qw/
467 auto always avoid left right
468 /;
469 $p->{prop_value}->{'page-break-inside'}->{auto} = 1;
470 $p->{prop_value}->{'page-break-inside'}->{avoid} = 1;
471 $p->{prop_value}->{'background-repeat'}->{$_} = 1 for qw/
472 repeat repeat-x repeat-y no-repeat
473 /;
474 $p->{prop_value}->{'background-attachment'}->{scroll} = 1;
475 $p->{prop_value}->{'background-attachment'}->{fixed} = 1;
476 $p->{prop_value}->{'font-size'}->{$_} = 1 for qw/
477 xx-small x-small small medium large x-large xx-large
478 -manakai-xxx-large -webkit-xxx-large
479 larger smaller
480 /;
481 $p->{prop_value}->{'font-style'}->{normal} = 1;
482 $p->{prop_value}->{'font-style'}->{italic} = 1;
483 $p->{prop_value}->{'font-style'}->{oblique} = 1;
484 $p->{prop_value}->{'font-variant'}->{normal} = 1;
485 $p->{prop_value}->{'font-variant'}->{'small-caps'} = 1;
486 $p->{prop_value}->{'font-stretch'}->{$_} = 1 for
487 qw/normal wider narrower ultra-condensed extra-condensed
488 condensed semi-condensed semi-expanded expanded
489 extra-expanded ultra-expanded/;
490 $p->{prop_value}->{'text-align'}->{$_} = 1 for qw/
491 left right center justify begin end
492 /;
493 $p->{prop_value}->{'text-transform'}->{$_} = 1 for qw/
494 capitalize uppercase lowercase none
495 /;
496 $p->{prop_value}->{'white-space'}->{$_} = 1 for qw/
497 normal pre nowrap pre-line pre-wrap -moz-pre-wrap
498 /;
499 $p->{prop_value}->{'text-decoration'}->{$_} = 1 for qw/
500 none blink underline overline line-through
501 /;
502 $p->{prop_value}->{'caption-side'}->{$_} = 1 for qw/
503 top bottom left right
504 /;
505 $p->{prop_value}->{'table-layout'}->{auto} = 1;
506 $p->{prop_value}->{'table-layout'}->{fixed} = 1;
507 $p->{prop_value}->{'border-collapse'}->{collapse} = 1;
508 $p->{prop_value}->{'border-collapse'}->{separate} = 1;
509 $p->{prop_value}->{'empty-cells'}->{show} = 1;
510 $p->{prop_value}->{'empty-cells'}->{hide} = 1;
511 $p->{prop_value}->{cursor}->{$_} = 1 for qw/
512 auto crosshair default pointer move e-resize ne-resize nw-resize n-resize
513 se-resize sw-resize s-resize w-resize text wait help progress
514 /;
515 for my $prop (qw/border-top-style border-left-style
516 border-bottom-style border-right-style outline-style/) {
517 $p->{prop_value}->{$prop}->{$_} = 1 for qw/
518 none hidden dotted dashed solid double groove ridge inset outset
519 /;
520 }
521 for my $prop (qw/color background-color
522 border-bottom-color border-left-color border-right-color
523 border-top-color border-color/) {
524 $p->{prop_value}->{$prop}->{transparent} = 1;
525 $p->{prop_value}->{$prop}->{flavor} = 1;
526 $p->{prop_value}->{$prop}->{'-manakai-default'} = 1;
527 }
528 $p->{prop_value}->{'outline-color'}->{invert} = 1;
529 $p->{prop_value}->{'outline-color'}->{'-manakai-invert-or-currentcolor'} = 1;
530 $p->{pseudo_class}->{$_} = 1 for qw/
531 active checked disabled empty enabled first-child first-of-type
532 focus hover indeterminate last-child last-of-type link only-child
533 only-of-type root target visited
534 lang nth-child nth-last-child nth-of-type nth-last-of-type not
535 -manakai-contains -manakai-current
536 /;
537 $p->{pseudo_element}->{$_} = 1 for qw/
538 after before first-letter first-line
539 /;
540
541 return $CSSParser = $p;
542 } # get_css_parser
543
544 sub print_syntax_error_css_section ($$) {
545 my ($input, $result) = @_;
546
547 print STDOUT qq[
548 <div id="$input->{id_prefix}parse-errors" class="section">
549 <h2>Parse Errors</h2>
550
551 <dl>];
552 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
553
554 my $p = get_css_parser ();
555 $p->{onerror} = sub {
556 my (%opt) = @_;
557 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
558 if ($opt{token}) {
559 print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{token}->{line}">Line $opt{token}->{line}</a> column $opt{token}->{column}];
560 } else {
561 print STDOUT qq[<dt class="$cls">Unknown location];
562 }
563 if (defined $opt{value}) {
564 print STDOUT qq[ (<code>@{[htescape ($opt{value})]}</code>)];
565 } elsif (defined $opt{token}) {
566 print STDOUT qq[ (<code>@{[htescape (Whatpm::CSS::Tokenizer->serialize_token ($opt{token}))]}</code>)];
567 }
568 $type =~ tr/ /-/;
569 $type =~ s/\|/%7C/g;
570 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
571 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
572 print STDOUT qq[$msg</dd>\n];
573
574 add_error ('syntax', \%opt => $result);
575 };
576 $p->{href} = $input->{uri};
577 $p->{base_uri} = $input->{base_uri};
578
579 my $s = \$input->{s};
580 my $charset;
581 unless ($input->{is_char_string}) {
582 require Encode;
583 if (defined $input->{charset}) {## TODO: IANA->Perl
584 $charset = $input->{charset};
585 $s = \(Encode::decode ($input->{charset}, $$s));
586 } else {
587 ## TODO: charset detection
588 $s = \(Encode::decode ($charset = 'utf-8', $$s));
589 }
590 }
591
592 my $cssom = $p->parse_char_string ($$s);
593 $cssom->manakai_input_encoding ($charset) if defined $charset;
594
595 print STDOUT qq[</dl></div>];
596
597 return $cssom;
598 } # print_syntax_error_css_section
599
600 sub print_syntax_error_manifest_section ($$) {
601 my ($input, $result) = @_;
602
603 require Whatpm::CacheManifest;
604
605 print STDOUT qq[
606 <div id="$input->{id_prefix}parse-errors" class="section">
607 <h2>Parse Errors</h2>
608
609 <dl>];
610 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
611
612 my $onerror = sub {
613 my (%opt) = @_;
614 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
615 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
616 qq[</dt>];
617 $type =~ tr/ /-/;
618 $type =~ s/\|/%7C/g;
619 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
620 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
621 print STDOUT qq[$msg</dd>\n];
622
623 add_error ('syntax', \%opt => $result);
624 };
625
626 my $time1 = time;
627 my $manifest = Whatpm::CacheManifest->parse_byte_string
628 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
629 $time{parse_manifest} = time - $time1;
630
631 print STDOUT qq[</dl></div>];
632
633 return $manifest;
634 } # print_syntax_error_manifest_section
635
636 sub print_source_string_section ($$$) {
637 my $input = shift;
638 my $s;
639 unless ($input->{is_char_string}) {
640 require Encode;
641 my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
642 return unless $enc;
643
644 $s = \($enc->decode (${$_[0]}));
645 } else {
646 $s = $_[0];
647 }
648
649 my $i = 1;
650 push @nav, ['#source-string' => 'Source'] unless $input->{nested};
651 print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
652 <h2>Document Source</h2>
653 <ol lang="">\n];
654 if (length $$s) {
655 while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
656 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
657 "</li>\n";
658 $i++;
659 }
660 if ($$s =~ /\G([^\x0A]+)/gc) {
661 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
662 "</li>\n";
663 }
664 } else {
665 print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
666 }
667 print STDOUT "</ol></div>";
668 } # print_input_string_section
669
670 sub print_document_tree ($$) {
671 my ($input, $node) = @_;
672
673 my $r = '<ol class="xoxo">';
674
675 my @node = ($node);
676 while (@node) {
677 my $child = shift @node;
678 unless (ref $child) {
679 $r .= $child;
680 next;
681 }
682
683 my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
684 my $nt = $child->node_type;
685 if ($nt == $child->ELEMENT_NODE) {
686 my $child_nsuri = $child->namespace_uri;
687 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
688 '</code>'; ## ISSUE: case
689
690 if ($child->has_attributes) {
691 $r .= '<ul class="attributes">';
692 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
693 @{$child->attributes}) {
694 $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
695 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
696 }
697 $r .= '</ul>';
698 }
699
700 if ($child->has_child_nodes) {
701 $r .= '<ol class="children">';
702 unshift @node, @{$child->child_nodes}, '</ol></li>';
703 } else {
704 $r .= '</li>';
705 }
706 } elsif ($nt == $child->TEXT_NODE) {
707 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
708 } elsif ($nt == $child->CDATA_SECTION_NODE) {
709 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
710 } elsif ($nt == $child->COMMENT_NODE) {
711 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
712 } elsif ($nt == $child->DOCUMENT_NODE) {
713 $r .= qq'<li id="$node_id" class="tree-document">Document';
714 $r .= qq[<ul class="attributes">];
715 my $cp = $child->manakai_charset;
716 if (defined $cp) {
717 $r .= qq[<li><code>charset</code> parameter = <code>];
718 $r .= htescape ($cp) . qq[</code></li>];
719 }
720 $r .= qq[<li><code>inputEncoding</code> = ];
721 my $ie = $child->input_encoding;
722 if (defined $ie) {
723 $r .= qq[<code>@{[htescape ($ie)]}</code>];
724 if ($child->manakai_has_bom) {
725 $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
726 }
727 } else {
728 $r .= qq[(<code>null</code>)];
729 }
730 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
731 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
732 unless ($child->manakai_is_html) {
733 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
734 if (defined $child->xml_encoding) {
735 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
736 } else {
737 $r .= qq[<li>XML encoding = (null)</li>];
738 }
739 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
740 }
741 $r .= qq[</ul>];
742 if ($child->has_child_nodes) {
743 $r .= '<ol class="children">';
744 unshift @node, @{$child->child_nodes}, '</ol></li>';
745 }
746 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
747 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
748 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
749 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
750 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
751 $r .= '</ul></li>';
752 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
753 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
754 } else {
755 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
756 }
757 }
758
759 $r .= '</ol>';
760 print STDOUT $r;
761 } # print_document_tree
762
763 sub print_structure_dump_dom_section ($$$) {
764 my ($input, $doc, $el) = @_;
765
766 print STDOUT qq[
767 <div id="$input->{id_prefix}document-tree" class="section">
768 <h2>Document Tree</h2>
769 ];
770 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
771 unless $input->{nested};
772
773 print_document_tree ($input, $el || $doc);
774
775 print STDOUT qq[</div>];
776 } # print_structure_dump_dom_section
777
778 sub print_structure_dump_cssom_section ($$) {
779 my ($input, $cssom) = @_;
780
781 print STDOUT qq[
782 <div id="$input->{id_prefix}document-tree" class="section">
783 <h2>Document Tree</h2>
784 ];
785 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
786 unless $input->{nested};
787
788 ## TODO:
789 print STDOUT "<pre>".htescape ($cssom->css_text)."</pre>";
790
791 print STDOUT qq[</div>];
792 } # print_structure_dump_cssom_section
793
794 sub print_structure_dump_manifest_section ($$) {
795 my ($input, $manifest) = @_;
796
797 print STDOUT qq[
798 <div id="$input->{id_prefix}dump-manifest" class="section">
799 <h2>Cache Manifest</h2>
800 ];
801 push @nav, [qq[#$input->{id_prefix}dump-manifest] => 'Cache Manifest']
802 unless $input->{nested};
803
804 print STDOUT qq[<dl><dt>Explicit entries</dt>];
805 for my $uri (@{$manifest->[0]}) {
806 my $euri = htescape ($uri);
807 print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
808 }
809
810 print STDOUT qq[<dt>Fallback entries</dt><dd>
811 <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
812 <th scope=row>Fallback Entry</tr><tbody>];
813 for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
814 my $euri = htescape ($uri);
815 my $euri2 = htescape ($manifest->[1]->{$uri});
816 print STDOUT qq[<tr><td><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
817 <td><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
818 }
819
820 print STDOUT qq[</table><dt>Online whitelist</dt>];
821 for my $uri (@{$manifest->[2]}) {
822 my $euri = htescape ($uri);
823 print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
824 }
825
826 print STDOUT qq[</dl></div>];
827 } # print_structure_dump_manifest_section
828
829 sub print_structure_error_dom_section ($$$$$) {
830 my ($input, $doc, $el, $result, $onsubdoc) = @_;
831
832 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
833 <h2>Document Errors</h2>
834
835 <dl>];
836 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
837 unless $input->{nested};
838
839 require Whatpm::ContentChecker;
840 my $onerror = sub {
841 my %opt = @_;
842 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
843 $type =~ tr/ /-/;
844 $type =~ s/\|/%7C/g;
845 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
846 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
847 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
848 print STDOUT $msg, "</dd>\n";
849 add_error ('structure', \%opt => $result);
850 };
851
852 my $elements;
853 my $time1 = time;
854 if ($el) {
855 $elements = Whatpm::ContentChecker->check_element
856 ($el, $onerror, $onsubdoc);
857 } else {
858 $elements = Whatpm::ContentChecker->check_document
859 ($doc, $onerror, $onsubdoc);
860 }
861 $time{check} = time - $time1;
862
863 print STDOUT qq[</dl></div>];
864
865 return $elements;
866 } # print_structure_error_dom_section
867
868 sub print_structure_error_manifest_section ($$$) {
869 my ($input, $manifest, $result) = @_;
870
871 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
872 <h2>Document Errors</h2>
873
874 <dl>];
875 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
876 unless $input->{nested};
877
878 require Whatpm::CacheManifest;
879 Whatpm::CacheManifest->check_manifest ($manifest, sub {
880 my %opt = @_;
881 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
882 $type =~ tr/ /-/;
883 $type =~ s/\|/%7C/g;
884 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
885 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
886 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
887 add_error ('structure', \%opt => $result);
888 });
889
890 print STDOUT qq[</div>];
891 } # print_structure_error_manifest_section
892
893 sub print_table_section ($$) {
894 my ($input, $tables) = @_;
895
896 push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
897 unless $input->{nested};
898 print STDOUT qq[
899 <div id="$input->{id_prefix}tables" class="section">
900 <h2>Tables</h2>
901
902 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
903 <script src="../table-script.js" type="text/javascript"></script>
904 <noscript>
905 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
906 </noscript>
907 ];
908
909 require JSON;
910
911 my $i = 0;
912 for my $table_el (@$tables) {
913 $i++;
914 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
915 get_node_link ($input, $table_el) . q[</h3>];
916
917 ## TODO: Make |ContentChecker| return |form_table| result
918 ## so that this script don't have to run the algorithm twice.
919 my $table = Whatpm::HTMLTable->form_table ($table_el);
920
921 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
922 next unless $_;
923 delete $_->{element};
924 }
925
926 for (@{$table->{row_group}}) {
927 next unless $_;
928 next unless $_->{element};
929 $_->{type} = $_->{element}->manakai_local_name;
930 delete $_->{element};
931 }
932
933 for (@{$table->{cell}}) {
934 next unless $_;
935 for (@{$_}) {
936 next unless $_;
937 for (@$_) {
938 $_->{id} = refaddr $_->{element} if defined $_->{element};
939 delete $_->{element};
940 $_->{is_header} = $_->{is_header} ? 1 : 0;
941 }
942 }
943 }
944
945 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
946 print STDOUT JSON::objToJson ($table);
947 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
948 print STDOUT qq[, '$input->{id_prefix}');</script>];
949 }
950
951 print STDOUT qq[</div>];
952 } # print_table_section
953
954 sub print_listing_section ($$$) {
955 my ($opt, $input, $ids) = @_;
956
957 push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
958 unless $input->{nested};
959 print STDOUT qq[
960 <div id="$input->{id_prefix}$opt->{id}" class="section">
961 <h2>$opt->{heading}</h2>
962
963 <dl>
964 ];
965 for my $id (sort {$a cmp $b} keys %$ids) {
966 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
967 for (@{$ids->{$id}}) {
968 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
969 }
970 }
971 print STDOUT qq[</dl></div>];
972 } # print_listing_section
973
974 sub print_result_section ($) {
975 my $result = shift;
976
977 print STDOUT qq[
978 <div id="result-summary" class="section">
979 <h2>Result</h2>];
980
981 if ($result->{unsupported} and $result->{conforming_max}) {
982 print STDOUT qq[<p class=uncertain id=result-para>The conformance
983 checker cannot decide whether the document is conforming or
984 not, since the document contains one or more unsupported
985 features. The document might or might not be conforming.</p>];
986 } elsif ($result->{conforming_min}) {
987 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
988 found in this document.</p>];
989 } elsif ($result->{conforming_max}) {
990 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
991 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
992 it might be conforming.</p>];
993 } else {
994 print STDOUT qq[<p class=FAIL id=result-para>This document is
995 <strong><em>non</em>-conforming</strong>.</p>];
996 }
997
998 print STDOUT qq[<table>
999 <colgroup><col><colgroup><col><col><col><colgroup><col>
1000 <thead>
1001 <tr><th scope=col></th>
1002 <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1003 Errors</a></th>
1004 <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1005 Errors</a></th>
1006 <th scope=col><a href="../error-description#level-w">Warnings</a></th>
1007 <th scope=col>Score</th></tr></thead><tbody>];
1008
1009 my $must_error = 0;
1010 my $should_error = 0;
1011 my $warning = 0;
1012 my $score_min = 0;
1013 my $score_max = 0;
1014 my $score_base = 20;
1015 my $score_unit = $score_base / 100;
1016 for (
1017 [Transfer => 'transfer', ''],
1018 [Character => 'char', ''],
1019 [Syntax => 'syntax', '#parse-errors'],
1020 [Structure => 'structure', '#document-errors'],
1021 ) {
1022 $must_error += ($result->{$_->[1]}->{must} += 0);
1023 $should_error += ($result->{$_->[1]}->{should} += 0);
1024 $warning += ($result->{$_->[1]}->{warning} += 0);
1025 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
1026 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
1027
1028 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
1029 my $label = $_->[0];
1030 if ($result->{$_->[1]}->{must} or
1031 $result->{$_->[1]}->{should} or
1032 $result->{$_->[1]}->{warning} or
1033 $result->{$_->[1]}->{unsupported}) {
1034 $label = qq[<a href="$_->[2]">$label</a>];
1035 }
1036
1037 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
1038 if ($uncertain) {
1039 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
1040 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
1041 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
1042 } else {
1043 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
1044 }
1045 }
1046
1047 $score_max += $score_base;
1048
1049 print STDOUT qq[
1050 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
1051 </tbody>
1052 <tfoot><tr class=uncertain><th scope=row>Total</th>
1053 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
1054 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
1055 <td>$warning?</td>
1056 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
1057 </table>
1058
1059 <p><strong>Important</strong>: This conformance checking service
1060 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
1061 </div>];
1062 push @nav, ['#result-summary' => 'Result'];
1063 } # print_result_section
1064
1065 sub print_result_unknown_type_section ($$) {
1066 my ($input, $result) = @_;
1067
1068 my $euri = htescape ($input->{uri});
1069 print STDOUT qq[
1070 <div id="$input->{id_prefix}parse-errors" class="section">
1071 <h2>Errors</h2>
1072
1073 <dl>
1074 <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
1075 <dd class=unsupported><strong><a href="../error-description#level-u">Not
1076 supported</a></strong>:
1077 Media type
1078 <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
1079 is not supported.</dd>
1080 </dl>
1081 </div>
1082 ];
1083 push @nav, [qq[#$input->{id_prefix}parse-errors] => 'Errors']
1084 unless $input->{nested};
1085 add_error (char => {level => 'u'} => $result);
1086 add_error (syntax => {level => 'u'} => $result);
1087 add_error (structure => {level => 'u'} => $result);
1088 } # print_result_unknown_type_section
1089
1090 sub print_result_input_error_section ($) {
1091 my $input = shift;
1092 print STDOUT qq[<div class="section" id="result-summary">
1093 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
1094 </div>];
1095 push @nav, ['#result-summary' => 'Result'];
1096 } # print_result_input_error_section
1097
1098 sub get_error_label ($$) {
1099 my ($input, $err) = @_;
1100
1101 my $r = '';
1102
1103 if (defined $err->{line}) {
1104 if ($err->{column} > 0) {
1105 $r = qq[<a href="#$input->{id_prefix}line-$err->{line}">Line $err->{line}</a> column $err->{column}];
1106 } else {
1107 $err->{line} = $err->{line} - 1 || 1;
1108 $r = qq[<a href="#$input->{id_prefix}line-$err->{line}">Line $err->{line}</a>];
1109 }
1110 }
1111
1112 if (defined $err->{node}) {
1113 $r .= ' ' if length $r;
1114 $r = get_node_link ($input, $err->{node});
1115 }
1116
1117 if (defined $err->{index}) {
1118 $r .= ' ' if length $r;
1119 $r .= 'Index ' . (0+$err->{index});
1120 }
1121
1122 if (defined $err->{value}) {
1123 $r .= ' ' if length $r;
1124 $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
1125 }
1126
1127 return $r;
1128 } # get_error_label
1129
1130 sub get_error_level_label ($) {
1131 my $err = shift;
1132
1133 my $r = '';
1134
1135 if (not defined $err->{level} or $err->{level} eq 'm') {
1136 $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1137 error</a></strong>: ];
1138 } elsif ($err->{level} eq 's') {
1139 $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1140 error</a></strong>: ];
1141 } elsif ($err->{level} eq 'w') {
1142 $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
1143 ];
1144 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
1145 $r = qq[<strong><a href="../error-description#level-u">Not
1146 supported</a></strong>: ];
1147 } else {
1148 my $elevel = htescape ($err->{level});
1149 $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
1150 ];
1151 }
1152
1153 return $r;
1154 } # get_error_level_label
1155
1156 sub get_node_path ($) {
1157 my $node = shift;
1158 my @r;
1159 while (defined $node) {
1160 my $rs;
1161 if ($node->node_type == 1) {
1162 $rs = $node->manakai_local_name;
1163 $node = $node->parent_node;
1164 } elsif ($node->node_type == 2) {
1165 $rs = '@' . $node->manakai_local_name;
1166 $node = $node->owner_element;
1167 } elsif ($node->node_type == 3) {
1168 $rs = '"' . $node->data . '"';
1169 $node = $node->parent_node;
1170 } elsif ($node->node_type == 9) {
1171 @r = ('') unless @r;
1172 $rs = '';
1173 $node = $node->parent_node;
1174 } else {
1175 $rs = '#' . $node->node_type;
1176 $node = $node->parent_node;
1177 }
1178 unshift @r, $rs;
1179 }
1180 return join '/', @r;
1181 } # get_node_path
1182
1183 sub get_node_link ($$) {
1184 return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
1185 htescape (get_node_path ($_[1])) . qq[</a>];
1186 } # get_node_link
1187
1188 {
1189 my $Msg = {};
1190
1191 sub load_text_catalog ($) {
1192 my $lang = shift; # MUST be a canonical lang name
1193 open my $file, '<:utf8', "cc-msg.$lang.txt"
1194 or die "$0: cc-msg.$lang.txt: $!";
1195 while (<$file>) {
1196 if (s/^([^;]+);([^;]*);//) {
1197 my ($type, $cls, $msg) = ($1, $2, $_);
1198 $msg =~ tr/\x0D\x0A//d;
1199 $Msg->{$type} = [$cls, $msg];
1200 }
1201 }
1202 } # load_text_catalog
1203
1204 sub get_text ($) {
1205 my ($type, $level, $node) = @_;
1206 $type = $level . ':' . $type if defined $level;
1207 $level = 'm' unless defined $level;
1208 my @arg;
1209 {
1210 if (defined $Msg->{$type}) {
1211 my $msg = $Msg->{$type}->[1];
1212 $msg =~ s{<var>\$([0-9]+)</var>}{
1213 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
1214 }ge;
1215 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
1216 UNIVERSAL::can ($node, 'get_attribute_ns')
1217 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
1218 }ge;
1219 $msg =~ s{<var>{\@}</var>}{
1220 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
1221 }ge;
1222 $msg =~ s{<var>{local-name}</var>}{
1223 UNIVERSAL::can ($node, 'manakai_local_name')
1224 ? htescape ($node->manakai_local_name) : ''
1225 }ge;
1226 $msg =~ s{<var>{element-local-name}</var>}{
1227 (UNIVERSAL::can ($node, 'owner_element') and
1228 $node->owner_element)
1229 ? htescape ($node->owner_element->manakai_local_name)
1230 : ''
1231 }ge;
1232 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
1233 } elsif ($type =~ s/:([^:]*)$//) {
1234 unshift @arg, $1;
1235 redo;
1236 }
1237 }
1238 return ($type, 'level-'.$level, htescape ($_[0]));
1239 } # get_text
1240
1241 }
1242
1243 sub get_input_document ($$) {
1244 my ($http, $dom) = @_;
1245
1246 my $request_uri = $http->get_parameter ('uri');
1247 my $r = {};
1248 if (defined $request_uri and length $request_uri) {
1249 my $uri = $dom->create_uri_reference ($request_uri);
1250 unless ({
1251 http => 1,
1252 }->{lc $uri->uri_scheme}) {
1253 return {uri => $request_uri, request_uri => $request_uri,
1254 error_status_text => 'URI scheme not allowed'};
1255 }
1256
1257 require Message::Util::HostPermit;
1258 my $host_permit = new Message::Util::HostPermit;
1259 $host_permit->add_rule (<<EOH);
1260 Allow host=suika port=80
1261 Deny host=suika
1262 Allow host=suika.fam.cx port=80
1263 Deny host=suika.fam.cx
1264 Deny host=localhost
1265 Deny host=*.localdomain
1266 Deny ipv4=0.0.0.0/8
1267 Deny ipv4=10.0.0.0/8
1268 Deny ipv4=127.0.0.0/8
1269 Deny ipv4=169.254.0.0/16
1270 Deny ipv4=172.0.0.0/11
1271 Deny ipv4=192.0.2.0/24
1272 Deny ipv4=192.88.99.0/24
1273 Deny ipv4=192.168.0.0/16
1274 Deny ipv4=198.18.0.0/15
1275 Deny ipv4=224.0.0.0/4
1276 Deny ipv4=255.255.255.255/32
1277 Deny ipv6=0::0/0
1278 Allow host=*
1279 EOH
1280 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1281 return {uri => $request_uri, request_uri => $request_uri,
1282 error_status_text => 'Connection to the host is forbidden'};
1283 }
1284
1285 require LWP::UserAgent;
1286 my $ua = WDCC::LWPUA->new;
1287 $ua->{wdcc_dom} = $dom;
1288 $ua->{wdcc_host_permit} = $host_permit;
1289 $ua->agent ('Mozilla'); ## TODO: for now.
1290 $ua->parse_head (0);
1291 $ua->protocols_allowed ([qw/http/]);
1292 $ua->max_size (1000_000);
1293 my $req = HTTP::Request->new (GET => $request_uri);
1294 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1295 my $res = $ua->request ($req);
1296 ## TODO: 401 sets |is_success| true.
1297 if ($res->is_success or $http->get_parameter ('error-page')) {
1298 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1299 $r->{uri} = $res->request->uri;
1300 $r->{request_uri} = $request_uri;
1301
1302 ## TODO: More strict parsing...
1303 my $ct = $res->header ('Content-Type');
1304 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1305 $r->{charset} = lc $1;
1306 $r->{charset} =~ tr/\\//d;
1307 $r->{official_charset} = $r->{charset};
1308 }
1309
1310 my $input_charset = $http->get_parameter ('charset');
1311 if (defined $input_charset and length $input_charset) {
1312 $r->{charset_overridden}
1313 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1314 $r->{charset} = $input_charset;
1315 }
1316
1317 ## TODO: Support for HTTP Content-Encoding
1318
1319 $r->{s} = ''.$res->content;
1320
1321 require Whatpm::ContentType;
1322 ($r->{official_type}, $r->{media_type})
1323 = Whatpm::ContentType->get_sniffed_type
1324 (get_file_head => sub {
1325 return substr $r->{s}, 0, shift;
1326 },
1327 http_content_type_byte => $ct,
1328 has_http_content_encoding =>
1329 defined $res->header ('Content-Encoding'),
1330 supported_image_types => {});
1331 } else {
1332 $r->{uri} = $res->request->uri;
1333 $r->{request_uri} = $request_uri;
1334 $r->{error_status_text} = $res->status_line;
1335 }
1336
1337 $r->{header_field} = [];
1338 $res->scan (sub {
1339 push @{$r->{header_field}}, [$_[0], $_[1]];
1340 });
1341 $r->{header_status_code} = $res->code;
1342 $r->{header_status_text} = $res->message;
1343 } else {
1344 $r->{s} = ''.$http->get_parameter ('s');
1345 $r->{uri} = q<thismessage:/>;
1346 $r->{request_uri} = q<thismessage:/>;
1347 $r->{base_uri} = q<thismessage:/>;
1348 $r->{charset} = ''.$http->get_parameter ('_charset_');
1349 $r->{charset} =~ s/\s+//g;
1350 $r->{charset} = 'utf-8' if $r->{charset} eq '';
1351 $r->{official_charset} = $r->{charset};
1352 $r->{header_field} = [];
1353
1354 require Whatpm::ContentType;
1355 ($r->{official_type}, $r->{media_type})
1356 = Whatpm::ContentType->get_sniffed_type
1357 (get_file_head => sub {
1358 return substr $r->{s}, 0, shift;
1359 },
1360 http_content_type_byte => undef,
1361 has_http_content_encoding => 0,
1362 supported_image_types => {});
1363 }
1364
1365 my $input_format = $http->get_parameter ('i');
1366 if (defined $input_format and length $input_format) {
1367 $r->{media_type_overridden}
1368 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1369 $r->{media_type} = $input_format;
1370 }
1371 if (defined $r->{s} and not defined $r->{media_type}) {
1372 $r->{media_type} = 'text/html';
1373 $r->{media_type_overridden} = 1;
1374 }
1375
1376 if ($r->{media_type} eq 'text/xml') {
1377 unless (defined $r->{charset}) {
1378 $r->{charset} = 'us-ascii';
1379 $r->{official_charset} = $r->{charset};
1380 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1381 $r->{charset_overridden} = 0;
1382 }
1383 }
1384
1385 if (length $r->{s} > 1000_000) {
1386 $r->{error_status_text} = 'Entity-body too large';
1387 delete $r->{s};
1388 return $r;
1389 }
1390
1391 $r->{inner_html_element} = $http->get_parameter ('e');
1392
1393 return $r;
1394 } # get_input_document
1395
1396 package WDCC::LWPUA;
1397 BEGIN { push our @ISA, 'LWP::UserAgent'; }
1398
1399 sub redirect_ok {
1400 my $ua = shift;
1401 unless ($ua->SUPER::redirect_ok (@_)) {
1402 return 0;
1403 }
1404
1405 my $uris = $_[1]->header ('Location');
1406 return 0 unless $uris;
1407 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1408 unless ({
1409 http => 1,
1410 }->{lc $uri->uri_scheme}) {
1411 return 0;
1412 }
1413 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1414 return 0;
1415 }
1416 return 1;
1417 } # redirect_ok
1418
1419 =head1 AUTHOR
1420
1421 Wakaba <w@suika.fam.cx>.
1422
1423 =head1 LICENSE
1424
1425 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1426
1427 This library is free software; you can redistribute it
1428 and/or modify it under the same terms as Perl itself.
1429
1430 =cut
1431
1432 ## $Date: 2008/02/10 04:08:04 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24