/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.41 - (show annotations) (download)
Sun Mar 16 11:38:47 2008 UTC (16 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.40: +3 -3 lines
++ ChangeLog	16 Mar 2008 11:38:43 -0000
	* cc.cgi: Line break treatment was different from
	that of HTML and CSS parsers.

2008-03-16  Wakaba  <wakaba@suika.fam.cx>

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4
5 use lib qw[/home/httpd/html/www/markup/html/whatpm
6 /home/wakaba/work/manakai2/lib];
7 use CGI::Carp qw[fatalsToBrowser];
8 use Scalar::Util qw[refaddr];
9 use Time::HiRes qw/time/;
10
11 sub htescape ($) {
12 my $s = $_[0];
13 $s =~ s/&/&amp;/g;
14 $s =~ s/</&lt;/g;
15 $s =~ s/>/&gt;/g;
16 $s =~ s/"/&quot;/g;
17 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18 sprintf '<var>U+%04X</var>', ord $1;
19 }ge;
20 return $s;
21 } # htescape
22
23 my @nav;
24 my %time;
25 require Message::DOM::DOMImplementation;
26 my $dom = Message::DOM::DOMImplementation->new;
27 {
28 use Message::CGI::HTTP;
29 my $http = Message::CGI::HTTP->new;
30
31 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
32 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
33 exit;
34 }
35
36 binmode STDOUT, ':utf8';
37 $| = 1;
38
39 load_text_catalog ('en'); ## TODO: conneg
40
41 print STDOUT qq[Content-Type: text/html; charset=utf-8
42
43 <!DOCTYPE html>
44 <html lang="en">
45 <head>
46 <title>Web Document Conformance Checker (BETA)</title>
47 <link rel="stylesheet" href="../cc-style.css" type="text/css">
48 </head>
49 <body>
50 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
51 (<em>beta</em>)</h1>
52 ];
53
54 $| = 0;
55 my $input = get_input_document ($http, $dom);
56 my $char_length = 0;
57
58 print qq[
59 <div id="document-info" class="section">
60 <dl>
61 <dt>Request URI</dt>
62 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 <dt>Document URI</dt>
64 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65 <script>
66 document.title = '<'
67 + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68 + document.title;
69 </script></dd>
70 ]; # no </dl> yet
71 push @nav, ['#document-info' => 'Information'];
72
73 if (defined $input->{s}) {
74 $char_length = length $input->{s};
75
76 print STDOUT qq[
77 <dt>Base URI</dt>
78 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79 <dt>Internet Media Type</dt>
80 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 <dt>Character Encoding</dt>
83 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 <dt>Length</dt>
86 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 </dl>
88 </div>
89
90 <script src="../cc-script.js"></script>
91 ];
92
93 $input->{id_prefix} = '';
94 #$input->{nested} = 0;
95 my $result = {conforming_min => 1, conforming_max => 1};
96 check_and_print ($input => $result);
97 print_result_section ($result);
98 } else {
99 print STDOUT qq[</dl></div>];
100 print_result_input_error_section ($input);
101 }
102
103 print STDOUT qq[
104 <ul class="navigation" id="nav-items">
105 ];
106 for (@nav) {
107 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
108 }
109 print STDOUT qq[
110 </ul>
111 </body>
112 </html>
113 ];
114
115 for (qw/decode parse parse_html parse_xml parse_manifest
116 check check_manifest/) {
117 next unless defined $time{$_};
118 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
119 print $file $char_length, "\t", $time{$_}, "\n";
120 }
121
122 exit;
123 }
124
125 sub add_error ($$$) {
126 my ($layer, $err, $result) = @_;
127 if (defined $err->{level}) {
128 if ($err->{level} eq 's') {
129 $result->{$layer}->{should}++;
130 $result->{$layer}->{score_min} -= 2;
131 $result->{conforming_min} = 0;
132 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
133 $result->{$layer}->{warning}++;
134 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
135 $result->{$layer}->{unsupported}++;
136 $result->{unsupported} = 1;
137 } elsif ($err->{level} eq 'i') {
138 #
139 } else {
140 $result->{$layer}->{must}++;
141 $result->{$layer}->{score_max} -= 2;
142 $result->{$layer}->{score_min} -= 2;
143 $result->{conforming_min} = 0;
144 $result->{conforming_max} = 0;
145 }
146 } else {
147 $result->{$layer}->{must}++;
148 $result->{$layer}->{score_max} -= 2;
149 $result->{$layer}->{score_min} -= 2;
150 $result->{conforming_min} = 0;
151 $result->{conforming_max} = 0;
152 }
153 } # add_error
154
155 sub check_and_print ($$) {
156 my ($input, $result) = @_;
157
158 print_http_header_section ($input, $result);
159
160 my $doc;
161 my $el;
162 my $cssom;
163 my $manifest;
164 my @subdoc;
165
166 if ($input->{media_type} eq 'text/html') {
167 ($doc, $el) = print_syntax_error_html_section ($input, $result);
168 print_source_string_section
169 ($input,
170 \($input->{s}),
171 $input->{charset} || $doc->input_encoding);
172 } elsif ({
173 'text/xml' => 1,
174 'application/atom+xml' => 1,
175 'application/rss+xml' => 1,
176 'application/svg+xml' => 1,
177 'application/xhtml+xml' => 1,
178 'application/xml' => 1,
179 }->{$input->{media_type}}) {
180 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
181 print_source_string_section ($input,
182 \($input->{s}),
183 $doc->input_encoding);
184 } elsif ($input->{media_type} eq 'text/css') {
185 $cssom = print_syntax_error_css_section ($input, $result);
186 print_source_string_section
187 ($input, \($input->{s}),
188 $cssom->manakai_input_encoding);
189 } elsif ($input->{media_type} eq 'text/cache-manifest') {
190 ## TODO: MUST be text/cache-manifest
191 $manifest = print_syntax_error_manifest_section ($input, $result);
192 print_source_string_section ($input, \($input->{s}),
193 'utf-8');
194 } else {
195 ## TODO: Change HTTP status code??
196 print_result_unknown_type_section ($input, $result);
197 }
198
199 if (defined $doc or defined $el) {
200 $doc->document_uri ($input->{uri});
201 $doc->manakai_entity_base_uri ($input->{base_uri});
202 print_structure_dump_dom_section ($input, $doc, $el);
203 my $elements = print_structure_error_dom_section
204 ($input, $doc, $el, $result, sub {
205 push @subdoc, shift;
206 });
207 print_table_section ($input, $elements->{table}) if @{$elements->{table}};
208 print_listing_section ({
209 id => 'identifiers', label => 'IDs', heading => 'Identifiers',
210 }, $input, $elements->{id}) if keys %{$elements->{id}};
211 print_listing_section ({
212 id => 'terms', label => 'Terms', heading => 'Terms',
213 }, $input, $elements->{term}) if keys %{$elements->{term}};
214 print_listing_section ({
215 id => 'classes', label => 'Classes', heading => 'Classes',
216 }, $input, $elements->{class}) if keys %{$elements->{class}};
217 } elsif (defined $cssom) {
218 print_structure_dump_cssom_section ($input, $cssom);
219 ## TODO: CSSOM validation
220 add_error ('structure', {level => 'u'} => $result);
221 } elsif (defined $manifest) {
222 print_structure_dump_manifest_section ($input, $manifest);
223 print_structure_error_manifest_section ($input, $manifest, $result);
224 }
225
226 my $id_prefix = 0;
227 for my $subinput (@subdoc) {
228 $subinput->{id_prefix} = 'subdoc-' . ++$id_prefix;
229 $subinput->{nested} = 1;
230 $subinput->{base_uri} = $subinput->{container_node}->base_uri
231 unless defined $subinput->{base_uri};
232 my $ebaseuri = htescape ($subinput->{base_uri});
233 push @nav, ['#' . $subinput->{id_prefix} => 'Sub #' . $id_prefix];
234 print STDOUT qq[<div id="$subinput->{id_prefix}" class=section>
235 <h2>Subdocument #$id_prefix</h2>
236
237 <dl>
238 <dt>Internet Media Type</dt>
239 <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
240 <dt>Container Node</dt>
241 <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
242 <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
243 <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
244 </dl>];
245
246 $subinput->{id_prefix} .= '-';
247 check_and_print ($subinput => $result);
248
249 print STDOUT qq[</div>];
250 }
251 } # check_and_print
252
253 sub print_http_header_section ($$) {
254 my ($input, $result) = @_;
255 return unless defined $input->{header_status_code} or
256 defined $input->{header_status_text} or
257 @{$input->{header_field} or []};
258
259 push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
260 print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
261 <h2>HTTP Header</h2>
262
263 <p><strong>Note</strong>: Due to the limitation of the
264 network library in use, the content of this section might
265 not be the real header.</p>
266
267 <table><tbody>
268 ];
269
270 if (defined $input->{header_status_code}) {
271 print STDOUT qq[<tr><th scope="row">Status code</th>];
272 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
273 }
274 if (defined $input->{header_status_text}) {
275 print STDOUT qq[<tr><th scope="row">Status text</th>];
276 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
277 }
278
279 for (@{$input->{header_field}}) {
280 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
281 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
282 }
283
284 print STDOUT qq[</tbody></table></div>];
285 } # print_http_header_section
286
287 sub print_syntax_error_html_section ($$) {
288 my ($input, $result) = @_;
289
290 require Encode;
291 require Whatpm::HTML;
292
293 print STDOUT qq[
294 <div id="$input->{id_prefix}parse-errors" class="section">
295 <h2>Parse Errors</h2>
296
297 <dl id="$input->{id_prefix}parse-errors-list">];
298 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
299
300 my $onerror = sub {
301 my (%opt) = @_;
302 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
303 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
304 qq[</dt>];
305 $type =~ tr/ /-/;
306 $type =~ s/\|/%7C/g;
307 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
308 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
309 print STDOUT qq[$msg</dd>\n];
310
311 add_error ('syntax', \%opt => $result);
312 };
313
314 my $doc = $dom->create_document;
315 my $el;
316 my $inner_html_element = $input->{inner_html_element};
317 if (defined $inner_html_element and length $inner_html_element) {
318 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
319 my $time1 = time;
320 my $t = Encode::decode ($input->{charset}, $input->{s});
321 $time{decode} = time - $time1;
322
323 $el = $doc->create_element_ns
324 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
325 $time1 = time;
326 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
327 $time{parse} = time - $time1;
328 } else {
329 my $time1 = time;
330 Whatpm::HTML->parse_byte_string
331 ($input->{charset}, $input->{s} => $doc, $onerror);
332 $time{parse_html} = time - $time1;
333 }
334 $doc->manakai_charset ($input->{official_charset})
335 if defined $input->{official_charset};
336
337 print STDOUT qq[</dl></div>];
338
339 return ($doc, $el);
340 } # print_syntax_error_html_section
341
342 sub print_syntax_error_xml_section ($$) {
343 my ($input, $result) = @_;
344
345 require Message::DOM::XMLParserTemp;
346
347 print STDOUT qq[
348 <div id="$input->{id_prefix}parse-errors" class="section">
349 <h2>Parse Errors</h2>
350
351 <dl id="$input->{id_prefix}parse-errors-list">];
352 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
353
354 my $onerror = sub {
355 my $err = shift;
356 my $line = $err->location->line_number;
357 print STDOUT qq[<dt><a href="#$input->{id_prefix}line-$line">Line $line</a> column ];
358 print STDOUT $err->location->column_number, "</dt><dd>";
359 print STDOUT htescape $err->text, "</dd>\n";
360
361 add_error ('syntax', {type => $err->text,
362 level => [
363 $err->SEVERITY_FATAL_ERROR => 'm',
364 $err->SEVERITY_ERROR => 'm',
365 $err->SEVERITY_WARNING => 's',
366 ]->[$err->severity]} => $result);
367
368 return 1;
369 };
370
371 my $time1 = time;
372 open my $fh, '<', \($input->{s});
373 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
374 ($fh => $dom, $onerror, charset => $input->{charset});
375 $time{parse_xml} = time - $time1;
376 $doc->manakai_charset ($input->{official_charset})
377 if defined $input->{official_charset};
378
379 print STDOUT qq[</dl></div>];
380
381 return ($doc, undef);
382 } # print_syntax_error_xml_section
383
384 sub get_css_parser () {
385 our $CSSParser;
386 return $CSSParser if $CSSParser;
387
388 require Whatpm::CSS::Parser;
389 my $p = Whatpm::CSS::Parser->new;
390
391 $p->{prop}->{$_} = 1 for qw/
392 alignment-baseline
393 background background-attachment background-color background-image
394 background-position background-position-x background-position-y
395 background-repeat border border-bottom border-bottom-color
396 border-bottom-style border-bottom-width border-collapse border-color
397 border-left border-left-color
398 border-left-style border-left-width border-right border-right-color
399 border-right-style border-right-width
400 border-spacing -manakai-border-spacing-x -manakai-border-spacing-y
401 border-style border-top border-top-color border-top-style border-top-width
402 border-width bottom
403 caption-side clear clip color content counter-increment counter-reset
404 cursor direction display dominant-baseline empty-cells float font
405 font-family font-size font-size-adjust font-stretch
406 font-style font-variant font-weight height left
407 letter-spacing line-height
408 list-style list-style-image list-style-position list-style-type
409 margin margin-bottom margin-left margin-right margin-top marker-offset
410 marks max-height max-width min-height min-width opacity -moz-opacity
411 orphans outline outline-color outline-style outline-width overflow
412 overflow-x overflow-y
413 padding padding-bottom padding-left padding-right padding-top
414 page page-break-after page-break-before page-break-inside
415 position quotes right size table-layout
416 text-align text-anchor text-decoration text-indent text-transform
417 top unicode-bidi vertical-align visibility white-space width widows
418 word-spacing writing-mode z-index
419 /;
420 $p->{prop_value}->{display}->{$_} = 1 for qw/
421 block clip inline inline-block inline-table list-item none
422 table table-caption table-cell table-column table-column-group
423 table-header-group table-footer-group table-row table-row-group
424 compact marker
425 /;
426 $p->{prop_value}->{position}->{$_} = 1 for qw/
427 absolute fixed relative static
428 /;
429 $p->{prop_value}->{float}->{$_} = 1 for qw/
430 left right none
431 /;
432 $p->{prop_value}->{clear}->{$_} = 1 for qw/
433 left right none both
434 /;
435 $p->{prop_value}->{direction}->{ltr} = 1;
436 $p->{prop_value}->{direction}->{rtl} = 1;
437 $p->{prop_value}->{marks}->{crop} = 1;
438 $p->{prop_value}->{marks}->{cross} = 1;
439 $p->{prop_value}->{'unicode-bidi'}->{$_} = 1 for qw/
440 normal bidi-override embed
441 /;
442 for my $prop_name (qw/overflow overflow-x overflow-y/) {
443 $p->{prop_value}->{$prop_name}->{$_} = 1 for qw/
444 visible hidden scroll auto -webkit-marquee -moz-hidden-unscrollable
445 /;
446 }
447 $p->{prop_value}->{visibility}->{$_} = 1 for qw/
448 visible hidden collapse
449 /;
450 $p->{prop_value}->{'list-style-type'}->{$_} = 1 for qw/
451 disc circle square decimal decimal-leading-zero
452 lower-roman upper-roman lower-greek lower-latin
453 upper-latin armenian georgian lower-alpha upper-alpha none
454 hebrew cjk-ideographic hiragana katakana hiragana-iroha
455 katakana-iroha
456 /;
457 $p->{prop_value}->{'list-style-position'}->{outside} = 1;
458 $p->{prop_value}->{'list-style-position'}->{inside} = 1;
459 $p->{prop_value}->{'page-break-before'}->{$_} = 1 for qw/
460 auto always avoid left right
461 /;
462 $p->{prop_value}->{'page-break-after'}->{$_} = 1 for qw/
463 auto always avoid left right
464 /;
465 $p->{prop_value}->{'page-break-inside'}->{auto} = 1;
466 $p->{prop_value}->{'page-break-inside'}->{avoid} = 1;
467 $p->{prop_value}->{'background-repeat'}->{$_} = 1 for qw/
468 repeat repeat-x repeat-y no-repeat
469 /;
470 $p->{prop_value}->{'background-attachment'}->{scroll} = 1;
471 $p->{prop_value}->{'background-attachment'}->{fixed} = 1;
472 $p->{prop_value}->{'font-size'}->{$_} = 1 for qw/
473 xx-small x-small small medium large x-large xx-large
474 -manakai-xxx-large -webkit-xxx-large
475 larger smaller
476 /;
477 $p->{prop_value}->{'font-style'}->{normal} = 1;
478 $p->{prop_value}->{'font-style'}->{italic} = 1;
479 $p->{prop_value}->{'font-style'}->{oblique} = 1;
480 $p->{prop_value}->{'font-variant'}->{normal} = 1;
481 $p->{prop_value}->{'font-variant'}->{'small-caps'} = 1;
482 $p->{prop_value}->{'font-stretch'}->{$_} = 1 for
483 qw/normal wider narrower ultra-condensed extra-condensed
484 condensed semi-condensed semi-expanded expanded
485 extra-expanded ultra-expanded/;
486 $p->{prop_value}->{'text-align'}->{$_} = 1 for qw/
487 left right center justify begin end
488 /;
489 $p->{prop_value}->{'text-transform'}->{$_} = 1 for qw/
490 capitalize uppercase lowercase none
491 /;
492 $p->{prop_value}->{'white-space'}->{$_} = 1 for qw/
493 normal pre nowrap pre-line pre-wrap -moz-pre-wrap
494 /;
495 $p->{prop_value}->{'writing-mode'}->{$_} = 1 for qw/
496 lr rl tb lr-tb rl-tb tb-rl
497 /;
498 $p->{prop_value}->{'text-anchor'}->{$_} = 1 for qw/
499 start middle end
500 /;
501 $p->{prop_value}->{'dominant-baseline'}->{$_} = 1 for qw/
502 auto use-script no-change reset-size ideographic alphabetic
503 hanging mathematical central middle text-after-edge text-before-edge
504 /;
505 $p->{prop_value}->{'alignment-baseline'}->{$_} = 1 for qw/
506 auto baseline before-edge text-before-edge middle central
507 after-edge text-after-edge ideographic alphabetic hanging
508 mathematical
509 /;
510 $p->{prop_value}->{'text-decoration'}->{$_} = 1 for qw/
511 none blink underline overline line-through
512 /;
513 $p->{prop_value}->{'caption-side'}->{$_} = 1 for qw/
514 top bottom left right
515 /;
516 $p->{prop_value}->{'table-layout'}->{auto} = 1;
517 $p->{prop_value}->{'table-layout'}->{fixed} = 1;
518 $p->{prop_value}->{'border-collapse'}->{collapse} = 1;
519 $p->{prop_value}->{'border-collapse'}->{separate} = 1;
520 $p->{prop_value}->{'empty-cells'}->{show} = 1;
521 $p->{prop_value}->{'empty-cells'}->{hide} = 1;
522 $p->{prop_value}->{cursor}->{$_} = 1 for qw/
523 auto crosshair default pointer move e-resize ne-resize nw-resize n-resize
524 se-resize sw-resize s-resize w-resize text wait help progress
525 /;
526 for my $prop (qw/border-top-style border-left-style
527 border-bottom-style border-right-style outline-style/) {
528 $p->{prop_value}->{$prop}->{$_} = 1 for qw/
529 none hidden dotted dashed solid double groove ridge inset outset
530 /;
531 }
532 for my $prop (qw/color background-color
533 border-bottom-color border-left-color border-right-color
534 border-top-color border-color/) {
535 $p->{prop_value}->{$prop}->{transparent} = 1;
536 $p->{prop_value}->{$prop}->{flavor} = 1;
537 $p->{prop_value}->{$prop}->{'-manakai-default'} = 1;
538 }
539 $p->{prop_value}->{'outline-color'}->{invert} = 1;
540 $p->{prop_value}->{'outline-color'}->{'-manakai-invert-or-currentcolor'} = 1;
541 $p->{pseudo_class}->{$_} = 1 for qw/
542 active checked disabled empty enabled first-child first-of-type
543 focus hover indeterminate last-child last-of-type link only-child
544 only-of-type root target visited
545 lang nth-child nth-last-child nth-of-type nth-last-of-type not
546 -manakai-contains -manakai-current
547 /;
548 $p->{pseudo_element}->{$_} = 1 for qw/
549 after before first-letter first-line
550 /;
551
552 return $CSSParser = $p;
553 } # get_css_parser
554
555 sub print_syntax_error_css_section ($$) {
556 my ($input, $result) = @_;
557
558 print STDOUT qq[
559 <div id="$input->{id_prefix}parse-errors" class="section">
560 <h2>Parse Errors</h2>
561
562 <dl id="$input->{id_prefix}parse-errors-list">];
563 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
564
565 my $p = get_css_parser ();
566 $p->init;
567 $p->{onerror} = sub {
568 my (%opt) = @_;
569 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
570 if ($opt{token}) {
571 print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{token}->{line}">Line $opt{token}->{line}</a> column $opt{token}->{column}];
572 } else {
573 print STDOUT qq[<dt class="$cls">Unknown location];
574 }
575 if (defined $opt{value}) {
576 print STDOUT qq[ (<code>@{[htescape ($opt{value})]}</code>)];
577 } elsif (defined $opt{token}) {
578 print STDOUT qq[ (<code>@{[htescape (Whatpm::CSS::Tokenizer->serialize_token ($opt{token}))]}</code>)];
579 }
580 $type =~ tr/ /-/;
581 $type =~ s/\|/%7C/g;
582 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
583 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
584 print STDOUT qq[$msg</dd>\n];
585
586 add_error ('syntax', \%opt => $result);
587 };
588 $p->{href} = $input->{uri};
589 $p->{base_uri} = $input->{base_uri};
590
591 # if ($parse_mode eq 'q') {
592 # $p->{unitless_px} = 1;
593 # $p->{hashless_color} = 1;
594 # }
595
596 ## TODO: Make $input->{s} a ref.
597
598 my $s = \$input->{s};
599 my $charset;
600 unless ($input->{is_char_string}) {
601 require Encode;
602 if (defined $input->{charset}) {## TODO: IANA->Perl
603 $charset = $input->{charset};
604 $s = \(Encode::decode ($input->{charset}, $$s));
605 } else {
606 ## TODO: charset detection
607 $s = \(Encode::decode ($charset = 'utf-8', $$s));
608 }
609 }
610
611 my $cssom = $p->parse_char_string ($$s);
612 $cssom->manakai_input_encoding ($charset) if defined $charset;
613
614 print STDOUT qq[</dl></div>];
615
616 return $cssom;
617 } # print_syntax_error_css_section
618
619 sub print_syntax_error_manifest_section ($$) {
620 my ($input, $result) = @_;
621
622 require Whatpm::CacheManifest;
623
624 print STDOUT qq[
625 <div id="$input->{id_prefix}parse-errors" class="section">
626 <h2>Parse Errors</h2>
627
628 <dl id="$input->{id_prefix}parse-errors-list">];
629 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
630
631 my $onerror = sub {
632 my (%opt) = @_;
633 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
634 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
635 qq[</dt>];
636 $type =~ tr/ /-/;
637 $type =~ s/\|/%7C/g;
638 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
639 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
640 print STDOUT qq[$msg</dd>\n];
641
642 add_error ('syntax', \%opt => $result);
643 };
644
645 my $time1 = time;
646 my $manifest = Whatpm::CacheManifest->parse_byte_string
647 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
648 $time{parse_manifest} = time - $time1;
649
650 print STDOUT qq[</dl></div>];
651
652 return $manifest;
653 } # print_syntax_error_manifest_section
654
655 sub print_source_string_section ($$$) {
656 my $input = shift;
657 my $s;
658 unless ($input->{is_char_string}) {
659 require Encode;
660 my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
661 return unless $enc;
662
663 $s = \($enc->decode (${$_[0]}));
664 } else {
665 $s = $_[0];
666 }
667
668 my $i = 1;
669 push @nav, ['#source-string' => 'Source'] unless $input->{nested};
670 print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
671 <h2>Document Source</h2>
672 <ol lang="">\n];
673 if (length $$s) {
674 while ($$s =~ /\G([^\x0D\x0A]*?)(?>\x0D\x0A?|\x0A)/gc) {
675 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
676 "</li>\n";
677 $i++;
678 }
679 if ($$s =~ /\G([^\x0D\x0A]+)/gc) {
680 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
681 "</li>\n";
682 }
683 } else {
684 print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
685 }
686 print STDOUT "</ol></div>
687 <script> addSourceToParseErrorList ('$input->{id_prefix}'); </script>";
688 } # print_input_string_section
689
690 sub print_document_tree ($$) {
691 my ($input, $node) = @_;
692
693 my $r = '<ol class="xoxo">';
694
695 my @node = ($node);
696 while (@node) {
697 my $child = shift @node;
698 unless (ref $child) {
699 $r .= $child;
700 next;
701 }
702
703 my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
704 my $nt = $child->node_type;
705 if ($nt == $child->ELEMENT_NODE) {
706 my $child_nsuri = $child->namespace_uri;
707 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
708 '</code>'; ## ISSUE: case
709
710 if ($child->has_attributes) {
711 $r .= '<ul class="attributes">';
712 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
713 @{$child->attributes}) {
714 $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
715 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
716 }
717 $r .= '</ul>';
718 }
719
720 if ($child->has_child_nodes) {
721 $r .= '<ol class="children">';
722 unshift @node, @{$child->child_nodes}, '</ol></li>';
723 } else {
724 $r .= '</li>';
725 }
726 } elsif ($nt == $child->TEXT_NODE) {
727 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
728 } elsif ($nt == $child->CDATA_SECTION_NODE) {
729 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
730 } elsif ($nt == $child->COMMENT_NODE) {
731 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
732 } elsif ($nt == $child->DOCUMENT_NODE) {
733 $r .= qq'<li id="$node_id" class="tree-document">Document';
734 $r .= qq[<ul class="attributes">];
735 my $cp = $child->manakai_charset;
736 if (defined $cp) {
737 $r .= qq[<li><code>charset</code> parameter = <code>];
738 $r .= htescape ($cp) . qq[</code></li>];
739 }
740 $r .= qq[<li><code>inputEncoding</code> = ];
741 my $ie = $child->input_encoding;
742 if (defined $ie) {
743 $r .= qq[<code>@{[htescape ($ie)]}</code>];
744 if ($child->manakai_has_bom) {
745 $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
746 }
747 } else {
748 $r .= qq[(<code>null</code>)];
749 }
750 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
751 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
752 unless ($child->manakai_is_html) {
753 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
754 if (defined $child->xml_encoding) {
755 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
756 } else {
757 $r .= qq[<li>XML encoding = (null)</li>];
758 }
759 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
760 }
761 $r .= qq[</ul>];
762 if ($child->has_child_nodes) {
763 $r .= '<ol class="children">';
764 unshift @node, @{$child->child_nodes}, '</ol></li>';
765 }
766 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
767 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
768 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
769 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
770 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
771 $r .= '</ul></li>';
772 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
773 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
774 } else {
775 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
776 }
777 }
778
779 $r .= '</ol>';
780 print STDOUT $r;
781 } # print_document_tree
782
783 sub print_structure_dump_dom_section ($$$) {
784 my ($input, $doc, $el) = @_;
785
786 print STDOUT qq[
787 <div id="$input->{id_prefix}document-tree" class="section">
788 <h2>Document Tree</h2>
789 ];
790 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
791 unless $input->{nested};
792
793 print_document_tree ($input, $el || $doc);
794
795 print STDOUT qq[</div>];
796 } # print_structure_dump_dom_section
797
798 sub print_structure_dump_cssom_section ($$) {
799 my ($input, $cssom) = @_;
800
801 print STDOUT qq[
802 <div id="$input->{id_prefix}document-tree" class="section">
803 <h2>Document Tree</h2>
804 ];
805 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
806 unless $input->{nested};
807
808 ## TODO:
809 print STDOUT "<pre>".htescape ($cssom->css_text)."</pre>";
810
811 print STDOUT qq[</div>];
812 } # print_structure_dump_cssom_section
813
814 sub print_structure_dump_manifest_section ($$) {
815 my ($input, $manifest) = @_;
816
817 print STDOUT qq[
818 <div id="$input->{id_prefix}dump-manifest" class="section">
819 <h2>Cache Manifest</h2>
820 ];
821 push @nav, [qq[#$input->{id_prefix}dump-manifest] => 'Cache Manifest']
822 unless $input->{nested};
823
824 print STDOUT qq[<dl><dt>Explicit entries</dt>];
825 my $i = 0;
826 for my $uri (@{$manifest->[0]}) {
827 my $euri = htescape ($uri);
828 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
829 }
830
831 print STDOUT qq[<dt>Fallback entries</dt><dd>
832 <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
833 <th scope=row>Fallback Entry</tr><tbody>];
834 for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
835 my $euri = htescape ($uri);
836 my $euri2 = htescape ($manifest->[1]->{$uri});
837 print STDOUT qq[<tr><td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
838 <td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
839 }
840
841 print STDOUT qq[</table><dt>Online whitelist</dt>];
842 for my $uri (@{$manifest->[2]}) {
843 my $euri = htescape ($uri);
844 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
845 }
846
847 print STDOUT qq[</dl></div>];
848 } # print_structure_dump_manifest_section
849
850 sub print_structure_error_dom_section ($$$$$) {
851 my ($input, $doc, $el, $result, $onsubdoc) = @_;
852
853 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
854 <h2>Document Errors</h2>
855
856 <dl>];
857 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
858 unless $input->{nested};
859
860 require Whatpm::ContentChecker;
861 my $onerror = sub {
862 my %opt = @_;
863 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
864 $type =~ tr/ /-/;
865 $type =~ s/\|/%7C/g;
866 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
867 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
868 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
869 print STDOUT $msg, "</dd>\n";
870 add_error ('structure', \%opt => $result);
871 };
872
873 my $elements;
874 my $time1 = time;
875 if ($el) {
876 $elements = Whatpm::ContentChecker->check_element
877 ($el, $onerror, $onsubdoc);
878 } else {
879 $elements = Whatpm::ContentChecker->check_document
880 ($doc, $onerror, $onsubdoc);
881 }
882 $time{check} = time - $time1;
883
884 print STDOUT qq[</dl></div>];
885
886 return $elements;
887 } # print_structure_error_dom_section
888
889 sub print_structure_error_manifest_section ($$$) {
890 my ($input, $manifest, $result) = @_;
891
892 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
893 <h2>Document Errors</h2>
894
895 <dl>];
896 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
897 unless $input->{nested};
898
899 require Whatpm::CacheManifest;
900 Whatpm::CacheManifest->check_manifest ($manifest, sub {
901 my %opt = @_;
902 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
903 $type =~ tr/ /-/;
904 $type =~ s/\|/%7C/g;
905 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
906 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
907 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
908 add_error ('structure', \%opt => $result);
909 });
910
911 print STDOUT qq[</div>];
912 } # print_structure_error_manifest_section
913
914 sub print_table_section ($$) {
915 my ($input, $tables) = @_;
916
917 push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
918 unless $input->{nested};
919 print STDOUT qq[
920 <div id="$input->{id_prefix}tables" class="section">
921 <h2>Tables</h2>
922
923 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
924 <script src="../table-script.js" type="text/javascript"></script>
925 <noscript>
926 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
927 </noscript>
928 ];
929
930 require JSON;
931
932 my $i = 0;
933 for my $table_el (@$tables) {
934 $i++;
935 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
936 get_node_link ($input, $table_el) . q[</h3>];
937
938 ## TODO: Make |ContentChecker| return |form_table| result
939 ## so that this script don't have to run the algorithm twice.
940 my $table = Whatpm::HTMLTable->form_table ($table_el);
941
942 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
943 next unless $_;
944 delete $_->{element};
945 }
946
947 for (@{$table->{row_group}}) {
948 next unless $_;
949 next unless $_->{element};
950 $_->{type} = $_->{element}->manakai_local_name;
951 delete $_->{element};
952 }
953
954 for (@{$table->{cell}}) {
955 next unless $_;
956 for (@{$_}) {
957 next unless $_;
958 for (@$_) {
959 $_->{id} = refaddr $_->{element} if defined $_->{element};
960 delete $_->{element};
961 $_->{is_header} = $_->{is_header} ? 1 : 0;
962 }
963 }
964 }
965
966 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
967 print STDOUT JSON::objToJson ($table);
968 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
969 print STDOUT qq[, '$input->{id_prefix}');</script>];
970 }
971
972 print STDOUT qq[</div>];
973 } # print_table_section
974
975 sub print_listing_section ($$$) {
976 my ($opt, $input, $ids) = @_;
977
978 push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
979 unless $input->{nested};
980 print STDOUT qq[
981 <div id="$input->{id_prefix}$opt->{id}" class="section">
982 <h2>$opt->{heading}</h2>
983
984 <dl>
985 ];
986 for my $id (sort {$a cmp $b} keys %$ids) {
987 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
988 for (@{$ids->{$id}}) {
989 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
990 }
991 }
992 print STDOUT qq[</dl></div>];
993 } # print_listing_section
994
995 sub print_result_section ($) {
996 my $result = shift;
997
998 print STDOUT qq[
999 <div id="result-summary" class="section">
1000 <h2>Result</h2>];
1001
1002 if ($result->{unsupported} and $result->{conforming_max}) {
1003 print STDOUT qq[<p class=uncertain id=result-para>The conformance
1004 checker cannot decide whether the document is conforming or
1005 not, since the document contains one or more unsupported
1006 features. The document might or might not be conforming.</p>];
1007 } elsif ($result->{conforming_min}) {
1008 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
1009 found in this document.</p>];
1010 } elsif ($result->{conforming_max}) {
1011 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
1012 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
1013 it might be conforming.</p>];
1014 } else {
1015 print STDOUT qq[<p class=FAIL id=result-para>This document is
1016 <strong><em>non</em>-conforming</strong>.</p>];
1017 }
1018
1019 print STDOUT qq[<table>
1020 <colgroup><col><colgroup><col><col><col><colgroup><col>
1021 <thead>
1022 <tr><th scope=col></th>
1023 <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1024 Errors</a></th>
1025 <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1026 Errors</a></th>
1027 <th scope=col><a href="../error-description#level-w">Warnings</a></th>
1028 <th scope=col>Score</th></tr></thead><tbody>];
1029
1030 my $must_error = 0;
1031 my $should_error = 0;
1032 my $warning = 0;
1033 my $score_min = 0;
1034 my $score_max = 0;
1035 my $score_base = 20;
1036 my $score_unit = $score_base / 100;
1037 for (
1038 [Transfer => 'transfer', ''],
1039 [Character => 'char', ''],
1040 [Syntax => 'syntax', '#parse-errors'],
1041 [Structure => 'structure', '#document-errors'],
1042 ) {
1043 $must_error += ($result->{$_->[1]}->{must} += 0);
1044 $should_error += ($result->{$_->[1]}->{should} += 0);
1045 $warning += ($result->{$_->[1]}->{warning} += 0);
1046 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
1047 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
1048
1049 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
1050 my $label = $_->[0];
1051 if ($result->{$_->[1]}->{must} or
1052 $result->{$_->[1]}->{should} or
1053 $result->{$_->[1]}->{warning} or
1054 $result->{$_->[1]}->{unsupported}) {
1055 $label = qq[<a href="$_->[2]">$label</a>];
1056 }
1057
1058 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
1059 if ($uncertain) {
1060 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
1061 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
1062 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
1063 } else {
1064 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
1065 }
1066 }
1067
1068 $score_max += $score_base;
1069
1070 print STDOUT qq[
1071 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
1072 </tbody>
1073 <tfoot><tr class=uncertain><th scope=row>Total</th>
1074 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
1075 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
1076 <td>$warning?</td>
1077 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
1078 </table>
1079
1080 <p><strong>Important</strong>: This conformance checking service
1081 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
1082 </div>];
1083 push @nav, ['#result-summary' => 'Result'];
1084 } # print_result_section
1085
1086 sub print_result_unknown_type_section ($$) {
1087 my ($input, $result) = @_;
1088
1089 my $euri = htescape ($input->{uri});
1090 print STDOUT qq[
1091 <div id="$input->{id_prefix}parse-errors" class="section">
1092 <h2>Errors</h2>
1093
1094 <dl>
1095 <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
1096 <dd class=unsupported><strong><a href="../error-description#level-u">Not
1097 supported</a></strong>:
1098 Media type
1099 <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
1100 is not supported.</dd>
1101 </dl>
1102 </div>
1103 ];
1104 push @nav, [qq[#$input->{id_prefix}parse-errors] => 'Errors']
1105 unless $input->{nested};
1106 add_error (char => {level => 'u'} => $result);
1107 add_error (syntax => {level => 'u'} => $result);
1108 add_error (structure => {level => 'u'} => $result);
1109 } # print_result_unknown_type_section
1110
1111 sub print_result_input_error_section ($) {
1112 my $input = shift;
1113 print STDOUT qq[<div class="section" id="result-summary">
1114 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
1115 </div>];
1116 push @nav, ['#result-summary' => 'Result'];
1117 } # print_result_input_error_section
1118
1119 sub get_error_label ($$) {
1120 my ($input, $err) = @_;
1121
1122 my $r = '';
1123
1124 if (defined $err->{token} and defined $err->{token}->{line}) {
1125 if ($err->{token}->{column} > 0) {
1126 $r = qq[<a href="#$input->{id_prefix}line-$err->{token}->{line}">Line $err->{token}->{line}</a> column $err->{token}->{column}];
1127 } else {
1128 $err->{token}->{line} = $err->{token}->{line} - 1 || 1;
1129 $r = qq[<a href="#$input->{id_prefix}line-$err->{token}->{line}">Line $err->{token}->{line}</a>];
1130 }
1131 } elsif (defined $err->{line}) {
1132 if ($err->{column} > 0) {
1133 $r = qq[<a href="#$input->{id_prefix}line-$err->{line}">Line $err->{line}</a> column $err->{column}];
1134 } else {
1135 $err->{line} = $err->{line} - 1 || 1;
1136 $r = qq[<a href="#$input->{id_prefix}line-$err->{line}">Line $err->{line}</a>];
1137 }
1138 }
1139
1140 if (defined $err->{node}) {
1141 $r .= ' ' if length $r;
1142 $r = get_node_link ($input, $err->{node});
1143 }
1144
1145 if (defined $err->{index}) {
1146 if (length $r) {
1147 $r .= ', Index ' . (0+$err->{index});
1148 } else {
1149 $r .= "<a href='#$input->{id_prefix}index-@{[0+$err->{index}]}'>Index "
1150 . (0+$err->{index}) . '</a>';
1151 }
1152 }
1153
1154 if (defined $err->{value}) {
1155 $r .= ' ' if length $r;
1156 $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
1157 }
1158
1159 return $r;
1160 } # get_error_label
1161
1162 sub get_error_level_label ($) {
1163 my $err = shift;
1164
1165 my $r = '';
1166
1167 if (not defined $err->{level} or $err->{level} eq 'm') {
1168 $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1169 error</a></strong>: ];
1170 } elsif ($err->{level} eq 's') {
1171 $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1172 error</a></strong>: ];
1173 } elsif ($err->{level} eq 'w') {
1174 $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
1175 ];
1176 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
1177 $r = qq[<strong><a href="../error-description#level-u">Not
1178 supported</a></strong>: ];
1179 } elsif ($err->{level} eq 'i') {
1180 $r = qq[<strong><a href="../error-description#level-i">Information</a></strong>: ];
1181 } else {
1182 my $elevel = htescape ($err->{level});
1183 $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
1184 ];
1185 }
1186
1187 return $r;
1188 } # get_error_level_label
1189
1190 sub get_node_path ($) {
1191 my $node = shift;
1192 my @r;
1193 while (defined $node) {
1194 my $rs;
1195 if ($node->node_type == 1) {
1196 $rs = $node->manakai_local_name;
1197 $node = $node->parent_node;
1198 } elsif ($node->node_type == 2) {
1199 $rs = '@' . $node->manakai_local_name;
1200 $node = $node->owner_element;
1201 } elsif ($node->node_type == 3) {
1202 $rs = '"' . $node->data . '"';
1203 $node = $node->parent_node;
1204 } elsif ($node->node_type == 9) {
1205 @r = ('') unless @r;
1206 $rs = '';
1207 $node = $node->parent_node;
1208 } else {
1209 $rs = '#' . $node->node_type;
1210 $node = $node->parent_node;
1211 }
1212 unshift @r, $rs;
1213 }
1214 return join '/', @r;
1215 } # get_node_path
1216
1217 sub get_node_link ($$) {
1218 return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
1219 htescape (get_node_path ($_[1])) . qq[</a>];
1220 } # get_node_link
1221
1222 {
1223 my $Msg = {};
1224
1225 sub load_text_catalog ($) {
1226 my $lang = shift; # MUST be a canonical lang name
1227 open my $file, '<:utf8', "cc-msg.$lang.txt"
1228 or die "$0: cc-msg.$lang.txt: $!";
1229 while (<$file>) {
1230 if (s/^([^;]+);([^;]*);//) {
1231 my ($type, $cls, $msg) = ($1, $2, $_);
1232 $msg =~ tr/\x0D\x0A//d;
1233 $Msg->{$type} = [$cls, $msg];
1234 }
1235 }
1236 } # load_text_catalog
1237
1238 sub get_text ($) {
1239 my ($type, $level, $node) = @_;
1240 $type = $level . ':' . $type if defined $level;
1241 $level = 'm' unless defined $level;
1242 my @arg;
1243 {
1244 if (defined $Msg->{$type}) {
1245 my $msg = $Msg->{$type}->[1];
1246 $msg =~ s{<var>\$([0-9]+)</var>}{
1247 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
1248 }ge;
1249 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
1250 UNIVERSAL::can ($node, 'get_attribute_ns')
1251 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
1252 }ge;
1253 $msg =~ s{<var>{\@}</var>}{
1254 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
1255 }ge;
1256 $msg =~ s{<var>{local-name}</var>}{
1257 UNIVERSAL::can ($node, 'manakai_local_name')
1258 ? htescape ($node->manakai_local_name) : ''
1259 }ge;
1260 $msg =~ s{<var>{element-local-name}</var>}{
1261 (UNIVERSAL::can ($node, 'owner_element') and
1262 $node->owner_element)
1263 ? htescape ($node->owner_element->manakai_local_name)
1264 : ''
1265 }ge;
1266 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
1267 } elsif ($type =~ s/:([^:]*)$//) {
1268 unshift @arg, $1;
1269 redo;
1270 }
1271 }
1272 return ($type, 'level-'.$level, htescape ($_[0]));
1273 } # get_text
1274
1275 }
1276
1277 sub get_input_document ($$) {
1278 my ($http, $dom) = @_;
1279
1280 my $request_uri = $http->get_parameter ('uri');
1281 my $r = {};
1282 if (defined $request_uri and length $request_uri) {
1283 my $uri = $dom->create_uri_reference ($request_uri);
1284 unless ({
1285 http => 1,
1286 }->{lc $uri->uri_scheme}) {
1287 return {uri => $request_uri, request_uri => $request_uri,
1288 error_status_text => 'URI scheme not allowed'};
1289 }
1290
1291 require Message::Util::HostPermit;
1292 my $host_permit = new Message::Util::HostPermit;
1293 $host_permit->add_rule (<<EOH);
1294 Allow host=suika port=80
1295 Deny host=suika
1296 Allow host=suika.fam.cx port=80
1297 Deny host=suika.fam.cx
1298 Deny host=localhost
1299 Deny host=*.localdomain
1300 Deny ipv4=0.0.0.0/8
1301 Deny ipv4=10.0.0.0/8
1302 Deny ipv4=127.0.0.0/8
1303 Deny ipv4=169.254.0.0/16
1304 Deny ipv4=172.0.0.0/11
1305 Deny ipv4=192.0.2.0/24
1306 Deny ipv4=192.88.99.0/24
1307 Deny ipv4=192.168.0.0/16
1308 Deny ipv4=198.18.0.0/15
1309 Deny ipv4=224.0.0.0/4
1310 Deny ipv4=255.255.255.255/32
1311 Deny ipv6=0::0/0
1312 Allow host=*
1313 EOH
1314 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1315 return {uri => $request_uri, request_uri => $request_uri,
1316 error_status_text => 'Connection to the host is forbidden'};
1317 }
1318
1319 require LWP::UserAgent;
1320 my $ua = WDCC::LWPUA->new;
1321 $ua->{wdcc_dom} = $dom;
1322 $ua->{wdcc_host_permit} = $host_permit;
1323 $ua->agent ('Mozilla'); ## TODO: for now.
1324 $ua->parse_head (0);
1325 $ua->protocols_allowed ([qw/http/]);
1326 $ua->max_size (1000_000);
1327 my $req = HTTP::Request->new (GET => $request_uri);
1328 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1329 my $res = $ua->request ($req);
1330 ## TODO: 401 sets |is_success| true.
1331 if ($res->is_success or $http->get_parameter ('error-page')) {
1332 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1333 $r->{uri} = $res->request->uri;
1334 $r->{request_uri} = $request_uri;
1335
1336 ## TODO: More strict parsing...
1337 my $ct = $res->header ('Content-Type');
1338 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1339 $r->{charset} = lc $1;
1340 $r->{charset} =~ tr/\\//d;
1341 $r->{official_charset} = $r->{charset};
1342 }
1343
1344 my $input_charset = $http->get_parameter ('charset');
1345 if (defined $input_charset and length $input_charset) {
1346 $r->{charset_overridden}
1347 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1348 $r->{charset} = $input_charset;
1349 }
1350
1351 ## TODO: Support for HTTP Content-Encoding
1352
1353 $r->{s} = ''.$res->content;
1354
1355 require Whatpm::ContentType;
1356 ($r->{official_type}, $r->{media_type})
1357 = Whatpm::ContentType->get_sniffed_type
1358 (get_file_head => sub {
1359 return substr $r->{s}, 0, shift;
1360 },
1361 http_content_type_byte => $ct,
1362 has_http_content_encoding =>
1363 defined $res->header ('Content-Encoding'),
1364 supported_image_types => {});
1365 } else {
1366 $r->{uri} = $res->request->uri;
1367 $r->{request_uri} = $request_uri;
1368 $r->{error_status_text} = $res->status_line;
1369 }
1370
1371 $r->{header_field} = [];
1372 $res->scan (sub {
1373 push @{$r->{header_field}}, [$_[0], $_[1]];
1374 });
1375 $r->{header_status_code} = $res->code;
1376 $r->{header_status_text} = $res->message;
1377 } else {
1378 $r->{s} = ''.$http->get_parameter ('s');
1379 $r->{uri} = q<thismessage:/>;
1380 $r->{request_uri} = q<thismessage:/>;
1381 $r->{base_uri} = q<thismessage:/>;
1382 $r->{charset} = ''.$http->get_parameter ('_charset_');
1383 $r->{charset} =~ s/\s+//g;
1384 $r->{charset} = 'utf-8' if $r->{charset} eq '';
1385 $r->{official_charset} = $r->{charset};
1386 $r->{header_field} = [];
1387
1388 require Whatpm::ContentType;
1389 ($r->{official_type}, $r->{media_type})
1390 = Whatpm::ContentType->get_sniffed_type
1391 (get_file_head => sub {
1392 return substr $r->{s}, 0, shift;
1393 },
1394 http_content_type_byte => undef,
1395 has_http_content_encoding => 0,
1396 supported_image_types => {});
1397 }
1398
1399 my $input_format = $http->get_parameter ('i');
1400 if (defined $input_format and length $input_format) {
1401 $r->{media_type_overridden}
1402 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1403 $r->{media_type} = $input_format;
1404 }
1405 if (defined $r->{s} and not defined $r->{media_type}) {
1406 $r->{media_type} = 'text/html';
1407 $r->{media_type_overridden} = 1;
1408 }
1409
1410 if ($r->{media_type} eq 'text/xml') {
1411 unless (defined $r->{charset}) {
1412 $r->{charset} = 'us-ascii';
1413 $r->{official_charset} = $r->{charset};
1414 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1415 $r->{charset_overridden} = 0;
1416 }
1417 }
1418
1419 if (length $r->{s} > 1000_000) {
1420 $r->{error_status_text} = 'Entity-body too large';
1421 delete $r->{s};
1422 return $r;
1423 }
1424
1425 $r->{inner_html_element} = $http->get_parameter ('e');
1426
1427 return $r;
1428 } # get_input_document
1429
1430 package WDCC::LWPUA;
1431 BEGIN { push our @ISA, 'LWP::UserAgent'; }
1432
1433 sub redirect_ok {
1434 my $ua = shift;
1435 unless ($ua->SUPER::redirect_ok (@_)) {
1436 return 0;
1437 }
1438
1439 my $uris = $_[1]->header ('Location');
1440 return 0 unless $uris;
1441 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1442 unless ({
1443 http => 1,
1444 }->{lc $uri->uri_scheme}) {
1445 return 0;
1446 }
1447 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1448 return 0;
1449 }
1450 return 1;
1451 } # redirect_ok
1452
1453 =head1 AUTHOR
1454
1455 Wakaba <w@suika.fam.cx>.
1456
1457 =head1 LICENSE
1458
1459 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1460
1461 This library is free software; you can redistribute it
1462 and/or modify it under the same terms as Perl itself.
1463
1464 =cut
1465
1466 ## $Date: 2008/03/16 07:08:34 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24