/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.43 - (show annotations) (download)
Mon Mar 17 13:45:35 2008 UTC (16 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.42: +5 -1 lines
++ ChangeLog	17 Mar 2008 13:45:32 -0000
	* cc.cgi (get_error_label): Use the error location
	of the parent node, if the node does not have one.

2008-03-17  Wakaba  <wakaba@suika.fam.cx>

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4
5 use lib qw[/home/httpd/html/www/markup/html/whatpm
6 /home/wakaba/work/manakai2/lib];
7 use CGI::Carp qw[fatalsToBrowser];
8 use Scalar::Util qw[refaddr];
9 use Time::HiRes qw/time/;
10
11 sub htescape ($) {
12 my $s = $_[0];
13 $s =~ s/&/&amp;/g;
14 $s =~ s/</&lt;/g;
15 $s =~ s/>/&gt;/g;
16 $s =~ s/"/&quot;/g;
17 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18 sprintf '<var>U+%04X</var>', ord $1;
19 }ge;
20 return $s;
21 } # htescape
22
23 my @nav;
24 my %time;
25 require Message::DOM::DOMImplementation;
26 my $dom = Message::DOM::DOMImplementation->new;
27 {
28 use Message::CGI::HTTP;
29 my $http = Message::CGI::HTTP->new;
30
31 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
32 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
33 exit;
34 }
35
36 binmode STDOUT, ':utf8';
37 $| = 1;
38
39 load_text_catalog ('en'); ## TODO: conneg
40
41 print STDOUT qq[Content-Type: text/html; charset=utf-8
42
43 <!DOCTYPE html>
44 <html lang="en">
45 <head>
46 <title>Web Document Conformance Checker (BETA)</title>
47 <link rel="stylesheet" href="../cc-style.css" type="text/css">
48 </head>
49 <body>
50 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
51 (<em>beta</em>)</h1>
52 ];
53
54 $| = 0;
55 my $input = get_input_document ($http, $dom);
56 my $char_length = 0;
57
58 print qq[
59 <div id="document-info" class="section">
60 <dl>
61 <dt>Request URI</dt>
62 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 <dt>Document URI</dt>
64 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65 <script>
66 document.title = '<'
67 + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68 + document.title;
69 </script></dd>
70 ]; # no </dl> yet
71 push @nav, ['#document-info' => 'Information'];
72
73 if (defined $input->{s}) {
74 $char_length = length $input->{s};
75
76 print STDOUT qq[
77 <dt>Base URI</dt>
78 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79 <dt>Internet Media Type</dt>
80 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 <dt>Character Encoding</dt>
83 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 <dt>Length</dt>
86 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 </dl>
88 </div>
89
90 <script src="../cc-script.js"></script>
91 ];
92
93 $input->{id_prefix} = '';
94 #$input->{nested} = 0;
95 my $result = {conforming_min => 1, conforming_max => 1};
96 check_and_print ($input => $result);
97 print_result_section ($result);
98 } else {
99 print STDOUT qq[</dl></div>];
100 print_result_input_error_section ($input);
101 }
102
103 print STDOUT qq[
104 <ul class="navigation" id="nav-items">
105 ];
106 for (@nav) {
107 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
108 }
109 print STDOUT qq[
110 </ul>
111 </body>
112 </html>
113 ];
114
115 for (qw/decode parse parse_html parse_xml parse_manifest
116 check check_manifest/) {
117 next unless defined $time{$_};
118 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
119 print $file $char_length, "\t", $time{$_}, "\n";
120 }
121
122 exit;
123 }
124
125 sub add_error ($$$) {
126 my ($layer, $err, $result) = @_;
127 if (defined $err->{level}) {
128 if ($err->{level} eq 's') {
129 $result->{$layer}->{should}++;
130 $result->{$layer}->{score_min} -= 2;
131 $result->{conforming_min} = 0;
132 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
133 $result->{$layer}->{warning}++;
134 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
135 $result->{$layer}->{unsupported}++;
136 $result->{unsupported} = 1;
137 } elsif ($err->{level} eq 'i') {
138 #
139 } else {
140 $result->{$layer}->{must}++;
141 $result->{$layer}->{score_max} -= 2;
142 $result->{$layer}->{score_min} -= 2;
143 $result->{conforming_min} = 0;
144 $result->{conforming_max} = 0;
145 }
146 } else {
147 $result->{$layer}->{must}++;
148 $result->{$layer}->{score_max} -= 2;
149 $result->{$layer}->{score_min} -= 2;
150 $result->{conforming_min} = 0;
151 $result->{conforming_max} = 0;
152 }
153 } # add_error
154
155 sub check_and_print ($$) {
156 my ($input, $result) = @_;
157
158 print_http_header_section ($input, $result);
159
160 my $doc;
161 my $el;
162 my $cssom;
163 my $manifest;
164 my @subdoc;
165
166 if ($input->{media_type} eq 'text/html') {
167 ($doc, $el) = print_syntax_error_html_section ($input, $result);
168 print_source_string_section
169 ($input,
170 \($input->{s}),
171 $input->{charset} || $doc->input_encoding);
172 } elsif ({
173 'text/xml' => 1,
174 'application/atom+xml' => 1,
175 'application/rss+xml' => 1,
176 'application/svg+xml' => 1,
177 'application/xhtml+xml' => 1,
178 'application/xml' => 1,
179 }->{$input->{media_type}}) {
180 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
181 print_source_string_section ($input,
182 \($input->{s}),
183 $doc->input_encoding);
184 } elsif ($input->{media_type} eq 'text/css') {
185 $cssom = print_syntax_error_css_section ($input, $result);
186 print_source_string_section
187 ($input, \($input->{s}),
188 $cssom->manakai_input_encoding);
189 } elsif ($input->{media_type} eq 'text/cache-manifest') {
190 ## TODO: MUST be text/cache-manifest
191 $manifest = print_syntax_error_manifest_section ($input, $result);
192 print_source_string_section ($input, \($input->{s}),
193 'utf-8');
194 } else {
195 ## TODO: Change HTTP status code??
196 print_result_unknown_type_section ($input, $result);
197 }
198
199 if (defined $doc or defined $el) {
200 $doc->document_uri ($input->{uri});
201 $doc->manakai_entity_base_uri ($input->{base_uri});
202 print_structure_dump_dom_section ($input, $doc, $el);
203 my $elements = print_structure_error_dom_section
204 ($input, $doc, $el, $result, sub {
205 push @subdoc, shift;
206 });
207 print_table_section ($input, $elements->{table}) if @{$elements->{table}};
208 print_listing_section ({
209 id => 'identifiers', label => 'IDs', heading => 'Identifiers',
210 }, $input, $elements->{id}) if keys %{$elements->{id}};
211 print_listing_section ({
212 id => 'terms', label => 'Terms', heading => 'Terms',
213 }, $input, $elements->{term}) if keys %{$elements->{term}};
214 print_listing_section ({
215 id => 'classes', label => 'Classes', heading => 'Classes',
216 }, $input, $elements->{class}) if keys %{$elements->{class}};
217 } elsif (defined $cssom) {
218 print_structure_dump_cssom_section ($input, $cssom);
219 ## TODO: CSSOM validation
220 add_error ('structure', {level => 'u'} => $result);
221 } elsif (defined $manifest) {
222 print_structure_dump_manifest_section ($input, $manifest);
223 print_structure_error_manifest_section ($input, $manifest, $result);
224 }
225
226 my $id_prefix = 0;
227 for my $subinput (@subdoc) {
228 $subinput->{id_prefix} = 'subdoc-' . ++$id_prefix;
229 $subinput->{nested} = 1;
230 $subinput->{base_uri} = $subinput->{container_node}->base_uri
231 unless defined $subinput->{base_uri};
232 my $ebaseuri = htescape ($subinput->{base_uri});
233 push @nav, ['#' . $subinput->{id_prefix} => 'Sub #' . $id_prefix];
234 print STDOUT qq[<div id="$subinput->{id_prefix}" class=section>
235 <h2>Subdocument #$id_prefix</h2>
236
237 <dl>
238 <dt>Internet Media Type</dt>
239 <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
240 <dt>Container Node</dt>
241 <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
242 <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
243 <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
244 </dl>];
245
246 $subinput->{id_prefix} .= '-';
247 check_and_print ($subinput => $result);
248
249 print STDOUT qq[</div>];
250 }
251 } # check_and_print
252
253 sub print_http_header_section ($$) {
254 my ($input, $result) = @_;
255 return unless defined $input->{header_status_code} or
256 defined $input->{header_status_text} or
257 @{$input->{header_field} or []};
258
259 push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
260 print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
261 <h2>HTTP Header</h2>
262
263 <p><strong>Note</strong>: Due to the limitation of the
264 network library in use, the content of this section might
265 not be the real header.</p>
266
267 <table><tbody>
268 ];
269
270 if (defined $input->{header_status_code}) {
271 print STDOUT qq[<tr><th scope="row">Status code</th>];
272 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
273 }
274 if (defined $input->{header_status_text}) {
275 print STDOUT qq[<tr><th scope="row">Status text</th>];
276 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
277 }
278
279 for (@{$input->{header_field}}) {
280 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
281 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
282 }
283
284 print STDOUT qq[</tbody></table></div>];
285 } # print_http_header_section
286
287 sub print_syntax_error_html_section ($$) {
288 my ($input, $result) = @_;
289
290 require Encode;
291 require Whatpm::HTML;
292
293 print STDOUT qq[
294 <div id="$input->{id_prefix}parse-errors" class="section">
295 <h2>Parse Errors</h2>
296
297 <dl id="$input->{id_prefix}parse-errors-list">];
298 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
299
300 my $onerror = sub {
301 my (%opt) = @_;
302 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
303 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
304 qq[</dt>];
305 $type =~ tr/ /-/;
306 $type =~ s/\|/%7C/g;
307 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
308 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
309 print STDOUT qq[$msg</dd>\n];
310
311 add_error ('syntax', \%opt => $result);
312 };
313
314 my $doc = $dom->create_document;
315 my $el;
316 my $inner_html_element = $input->{inner_html_element};
317 if (defined $inner_html_element and length $inner_html_element) {
318 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
319 my $time1 = time;
320 my $t = Encode::decode ($input->{charset}, $input->{s});
321 $time{decode} = time - $time1;
322
323 $el = $doc->create_element_ns
324 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
325 $time1 = time;
326 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
327 $time{parse} = time - $time1;
328 } else {
329 my $time1 = time;
330 Whatpm::HTML->parse_byte_string
331 ($input->{charset}, $input->{s} => $doc, $onerror);
332 $time{parse_html} = time - $time1;
333 }
334 $doc->manakai_charset ($input->{official_charset})
335 if defined $input->{official_charset};
336
337 print STDOUT qq[</dl></div>];
338
339 return ($doc, $el);
340 } # print_syntax_error_html_section
341
342 sub print_syntax_error_xml_section ($$) {
343 my ($input, $result) = @_;
344
345 require Message::DOM::XMLParserTemp;
346
347 print STDOUT qq[
348 <div id="$input->{id_prefix}parse-errors" class="section">
349 <h2>Parse Errors</h2>
350
351 <dl id="$input->{id_prefix}parse-errors-list">];
352 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
353
354 my $onerror = sub {
355 my $err = shift;
356 my $line = $err->location->line_number;
357 print STDOUT qq[<dt><a href="#$input->{id_prefix}line-$line">Line $line</a> column ];
358 print STDOUT $err->location->column_number, "</dt><dd>";
359 print STDOUT htescape $err->text, "</dd>\n";
360
361 add_error ('syntax', {type => $err->text,
362 level => [
363 $err->SEVERITY_FATAL_ERROR => 'm',
364 $err->SEVERITY_ERROR => 'm',
365 $err->SEVERITY_WARNING => 's',
366 ]->[$err->severity]} => $result);
367
368 return 1;
369 };
370
371 my $time1 = time;
372 open my $fh, '<', \($input->{s});
373 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
374 ($fh => $dom, $onerror, charset => $input->{charset});
375 $time{parse_xml} = time - $time1;
376 $doc->manakai_charset ($input->{official_charset})
377 if defined $input->{official_charset};
378
379 print STDOUT qq[</dl></div>];
380
381 return ($doc, undef);
382 } # print_syntax_error_xml_section
383
384 sub get_css_parser () {
385 our $CSSParser;
386 return $CSSParser if $CSSParser;
387
388 require Whatpm::CSS::Parser;
389 my $p = Whatpm::CSS::Parser->new;
390
391 $p->{prop}->{$_} = 1 for qw/
392 alignment-baseline
393 background background-attachment background-color background-image
394 background-position background-position-x background-position-y
395 background-repeat border border-bottom border-bottom-color
396 border-bottom-style border-bottom-width border-collapse border-color
397 border-left border-left-color
398 border-left-style border-left-width border-right border-right-color
399 border-right-style border-right-width
400 border-spacing -manakai-border-spacing-x -manakai-border-spacing-y
401 border-style border-top border-top-color border-top-style border-top-width
402 border-width bottom
403 caption-side clear clip color content counter-increment counter-reset
404 cursor direction display dominant-baseline empty-cells float font
405 font-family font-size font-size-adjust font-stretch
406 font-style font-variant font-weight height left
407 letter-spacing line-height
408 list-style list-style-image list-style-position list-style-type
409 margin margin-bottom margin-left margin-right margin-top marker-offset
410 marks max-height max-width min-height min-width opacity -moz-opacity
411 orphans outline outline-color outline-style outline-width overflow
412 overflow-x overflow-y
413 padding padding-bottom padding-left padding-right padding-top
414 page page-break-after page-break-before page-break-inside
415 position quotes right size table-layout
416 text-align text-anchor text-decoration text-indent text-transform
417 top unicode-bidi vertical-align visibility white-space width widows
418 word-spacing writing-mode z-index
419 /;
420 $p->{prop_value}->{display}->{$_} = 1 for qw/
421 block clip inline inline-block inline-table list-item none
422 table table-caption table-cell table-column table-column-group
423 table-header-group table-footer-group table-row table-row-group
424 compact marker
425 /;
426 $p->{prop_value}->{position}->{$_} = 1 for qw/
427 absolute fixed relative static
428 /;
429 $p->{prop_value}->{float}->{$_} = 1 for qw/
430 left right none
431 /;
432 $p->{prop_value}->{clear}->{$_} = 1 for qw/
433 left right none both
434 /;
435 $p->{prop_value}->{direction}->{ltr} = 1;
436 $p->{prop_value}->{direction}->{rtl} = 1;
437 $p->{prop_value}->{marks}->{crop} = 1;
438 $p->{prop_value}->{marks}->{cross} = 1;
439 $p->{prop_value}->{'unicode-bidi'}->{$_} = 1 for qw/
440 normal bidi-override embed
441 /;
442 for my $prop_name (qw/overflow overflow-x overflow-y/) {
443 $p->{prop_value}->{$prop_name}->{$_} = 1 for qw/
444 visible hidden scroll auto -webkit-marquee -moz-hidden-unscrollable
445 /;
446 }
447 $p->{prop_value}->{visibility}->{$_} = 1 for qw/
448 visible hidden collapse
449 /;
450 $p->{prop_value}->{'list-style-type'}->{$_} = 1 for qw/
451 disc circle square decimal decimal-leading-zero
452 lower-roman upper-roman lower-greek lower-latin
453 upper-latin armenian georgian lower-alpha upper-alpha none
454 hebrew cjk-ideographic hiragana katakana hiragana-iroha
455 katakana-iroha
456 /;
457 $p->{prop_value}->{'list-style-position'}->{outside} = 1;
458 $p->{prop_value}->{'list-style-position'}->{inside} = 1;
459 $p->{prop_value}->{'page-break-before'}->{$_} = 1 for qw/
460 auto always avoid left right
461 /;
462 $p->{prop_value}->{'page-break-after'}->{$_} = 1 for qw/
463 auto always avoid left right
464 /;
465 $p->{prop_value}->{'page-break-inside'}->{auto} = 1;
466 $p->{prop_value}->{'page-break-inside'}->{avoid} = 1;
467 $p->{prop_value}->{'background-repeat'}->{$_} = 1 for qw/
468 repeat repeat-x repeat-y no-repeat
469 /;
470 $p->{prop_value}->{'background-attachment'}->{scroll} = 1;
471 $p->{prop_value}->{'background-attachment'}->{fixed} = 1;
472 $p->{prop_value}->{'font-size'}->{$_} = 1 for qw/
473 xx-small x-small small medium large x-large xx-large
474 -manakai-xxx-large -webkit-xxx-large
475 larger smaller
476 /;
477 $p->{prop_value}->{'font-style'}->{normal} = 1;
478 $p->{prop_value}->{'font-style'}->{italic} = 1;
479 $p->{prop_value}->{'font-style'}->{oblique} = 1;
480 $p->{prop_value}->{'font-variant'}->{normal} = 1;
481 $p->{prop_value}->{'font-variant'}->{'small-caps'} = 1;
482 $p->{prop_value}->{'font-stretch'}->{$_} = 1 for
483 qw/normal wider narrower ultra-condensed extra-condensed
484 condensed semi-condensed semi-expanded expanded
485 extra-expanded ultra-expanded/;
486 $p->{prop_value}->{'text-align'}->{$_} = 1 for qw/
487 left right center justify begin end
488 /;
489 $p->{prop_value}->{'text-transform'}->{$_} = 1 for qw/
490 capitalize uppercase lowercase none
491 /;
492 $p->{prop_value}->{'white-space'}->{$_} = 1 for qw/
493 normal pre nowrap pre-line pre-wrap -moz-pre-wrap
494 /;
495 $p->{prop_value}->{'writing-mode'}->{$_} = 1 for qw/
496 lr rl tb lr-tb rl-tb tb-rl
497 /;
498 $p->{prop_value}->{'text-anchor'}->{$_} = 1 for qw/
499 start middle end
500 /;
501 $p->{prop_value}->{'dominant-baseline'}->{$_} = 1 for qw/
502 auto use-script no-change reset-size ideographic alphabetic
503 hanging mathematical central middle text-after-edge text-before-edge
504 /;
505 $p->{prop_value}->{'alignment-baseline'}->{$_} = 1 for qw/
506 auto baseline before-edge text-before-edge middle central
507 after-edge text-after-edge ideographic alphabetic hanging
508 mathematical
509 /;
510 $p->{prop_value}->{'text-decoration'}->{$_} = 1 for qw/
511 none blink underline overline line-through
512 /;
513 $p->{prop_value}->{'caption-side'}->{$_} = 1 for qw/
514 top bottom left right
515 /;
516 $p->{prop_value}->{'table-layout'}->{auto} = 1;
517 $p->{prop_value}->{'table-layout'}->{fixed} = 1;
518 $p->{prop_value}->{'border-collapse'}->{collapse} = 1;
519 $p->{prop_value}->{'border-collapse'}->{separate} = 1;
520 $p->{prop_value}->{'empty-cells'}->{show} = 1;
521 $p->{prop_value}->{'empty-cells'}->{hide} = 1;
522 $p->{prop_value}->{cursor}->{$_} = 1 for qw/
523 auto crosshair default pointer move e-resize ne-resize nw-resize n-resize
524 se-resize sw-resize s-resize w-resize text wait help progress
525 /;
526 for my $prop (qw/border-top-style border-left-style
527 border-bottom-style border-right-style outline-style/) {
528 $p->{prop_value}->{$prop}->{$_} = 1 for qw/
529 none hidden dotted dashed solid double groove ridge inset outset
530 /;
531 }
532 for my $prop (qw/color background-color
533 border-bottom-color border-left-color border-right-color
534 border-top-color border-color/) {
535 $p->{prop_value}->{$prop}->{transparent} = 1;
536 $p->{prop_value}->{$prop}->{flavor} = 1;
537 $p->{prop_value}->{$prop}->{'-manakai-default'} = 1;
538 }
539 $p->{prop_value}->{'outline-color'}->{invert} = 1;
540 $p->{prop_value}->{'outline-color'}->{'-manakai-invert-or-currentcolor'} = 1;
541 $p->{pseudo_class}->{$_} = 1 for qw/
542 active checked disabled empty enabled first-child first-of-type
543 focus hover indeterminate last-child last-of-type link only-child
544 only-of-type root target visited
545 lang nth-child nth-last-child nth-of-type nth-last-of-type not
546 -manakai-contains -manakai-current
547 /;
548 $p->{pseudo_element}->{$_} = 1 for qw/
549 after before first-letter first-line
550 /;
551
552 return $CSSParser = $p;
553 } # get_css_parser
554
555 sub print_syntax_error_css_section ($$) {
556 my ($input, $result) = @_;
557
558 print STDOUT qq[
559 <div id="$input->{id_prefix}parse-errors" class="section">
560 <h2>Parse Errors</h2>
561
562 <dl id="$input->{id_prefix}parse-errors-list">];
563 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
564
565 my $p = get_css_parser ();
566 $p->init;
567 $p->{onerror} = sub {
568 my (%opt) = @_;
569 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
570 if ($opt{token}) {
571 print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{token}->{line}">Line $opt{token}->{line}</a> column $opt{token}->{column}];
572 } else {
573 print STDOUT qq[<dt class="$cls">Unknown location];
574 }
575 if (defined $opt{value}) {
576 print STDOUT qq[ (<code>@{[htescape ($opt{value})]}</code>)];
577 } elsif (defined $opt{token}) {
578 print STDOUT qq[ (<code>@{[htescape (Whatpm::CSS::Tokenizer->serialize_token ($opt{token}))]}</code>)];
579 }
580 $type =~ tr/ /-/;
581 $type =~ s/\|/%7C/g;
582 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
583 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
584 print STDOUT qq[$msg</dd>\n];
585
586 add_error ('syntax', \%opt => $result);
587 };
588 $p->{href} = $input->{uri};
589 $p->{base_uri} = $input->{base_uri};
590
591 # if ($parse_mode eq 'q') {
592 # $p->{unitless_px} = 1;
593 # $p->{hashless_color} = 1;
594 # }
595
596 ## TODO: Make $input->{s} a ref.
597
598 my $s = \$input->{s};
599 my $charset;
600 unless ($input->{is_char_string}) {
601 require Encode;
602 if (defined $input->{charset}) {## TODO: IANA->Perl
603 $charset = $input->{charset};
604 $s = \(Encode::decode ($input->{charset}, $$s));
605 } else {
606 ## TODO: charset detection
607 $s = \(Encode::decode ($charset = 'utf-8', $$s));
608 }
609 }
610
611 my $cssom = $p->parse_char_string ($$s);
612 $cssom->manakai_input_encoding ($charset) if defined $charset;
613
614 print STDOUT qq[</dl></div>];
615
616 return $cssom;
617 } # print_syntax_error_css_section
618
619 sub print_syntax_error_manifest_section ($$) {
620 my ($input, $result) = @_;
621
622 require Whatpm::CacheManifest;
623
624 print STDOUT qq[
625 <div id="$input->{id_prefix}parse-errors" class="section">
626 <h2>Parse Errors</h2>
627
628 <dl id="$input->{id_prefix}parse-errors-list">];
629 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
630
631 my $onerror = sub {
632 my (%opt) = @_;
633 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
634 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
635 qq[</dt>];
636 $type =~ tr/ /-/;
637 $type =~ s/\|/%7C/g;
638 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
639 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
640 print STDOUT qq[$msg</dd>\n];
641
642 add_error ('syntax', \%opt => $result);
643 };
644
645 my $time1 = time;
646 my $manifest = Whatpm::CacheManifest->parse_byte_string
647 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
648 $time{parse_manifest} = time - $time1;
649
650 print STDOUT qq[</dl></div>];
651
652 return $manifest;
653 } # print_syntax_error_manifest_section
654
655 sub print_source_string_section ($$$) {
656 my $input = shift;
657 my $s;
658 unless ($input->{is_char_string}) {
659 require Encode;
660 my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
661 return unless $enc;
662
663 $s = \($enc->decode (${$_[0]}));
664 } else {
665 $s = $_[0];
666 }
667
668 my $i = 1;
669 push @nav, ['#source-string' => 'Source'] unless $input->{nested};
670 print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
671 <h2>Document Source</h2>
672 <ol lang="">\n];
673 if (length $$s) {
674 while ($$s =~ /\G([^\x0D\x0A]*?)(?>\x0D\x0A?|\x0A)/gc) {
675 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
676 "</li>\n";
677 $i++;
678 }
679 if ($$s =~ /\G([^\x0D\x0A]+)/gc) {
680 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
681 "</li>\n";
682 }
683 } else {
684 print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
685 }
686 print STDOUT "</ol></div>
687 <script>
688 addSourceToParseErrorList ('$input->{id_prefix}', 'parse-errors-list');
689 </script>";
690 } # print_input_string_section
691
692 sub print_document_tree ($$) {
693 my ($input, $node) = @_;
694
695 my $r = '<ol class="xoxo">';
696
697 my @node = ($node);
698 while (@node) {
699 my $child = shift @node;
700 unless (ref $child) {
701 $r .= $child;
702 next;
703 }
704
705 my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
706 my $nt = $child->node_type;
707 if ($nt == $child->ELEMENT_NODE) {
708 my $child_nsuri = $child->namespace_uri;
709 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
710 '</code>'; ## ISSUE: case
711
712 if ($child->has_attributes) {
713 $r .= '<ul class="attributes">';
714 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
715 @{$child->attributes}) {
716 $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
717 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
718 }
719 $r .= '</ul>';
720 }
721
722 if ($child->has_child_nodes) {
723 $r .= '<ol class="children">';
724 unshift @node, @{$child->child_nodes}, '</ol></li>';
725 } else {
726 $r .= '</li>';
727 }
728 } elsif ($nt == $child->TEXT_NODE) {
729 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
730 } elsif ($nt == $child->CDATA_SECTION_NODE) {
731 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
732 } elsif ($nt == $child->COMMENT_NODE) {
733 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
734 } elsif ($nt == $child->DOCUMENT_NODE) {
735 $r .= qq'<li id="$node_id" class="tree-document">Document';
736 $r .= qq[<ul class="attributes">];
737 my $cp = $child->manakai_charset;
738 if (defined $cp) {
739 $r .= qq[<li><code>charset</code> parameter = <code>];
740 $r .= htescape ($cp) . qq[</code></li>];
741 }
742 $r .= qq[<li><code>inputEncoding</code> = ];
743 my $ie = $child->input_encoding;
744 if (defined $ie) {
745 $r .= qq[<code>@{[htescape ($ie)]}</code>];
746 if ($child->manakai_has_bom) {
747 $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
748 }
749 } else {
750 $r .= qq[(<code>null</code>)];
751 }
752 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
753 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
754 unless ($child->manakai_is_html) {
755 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
756 if (defined $child->xml_encoding) {
757 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
758 } else {
759 $r .= qq[<li>XML encoding = (null)</li>];
760 }
761 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
762 }
763 $r .= qq[</ul>];
764 if ($child->has_child_nodes) {
765 $r .= '<ol class="children">';
766 unshift @node, @{$child->child_nodes}, '</ol></li>';
767 }
768 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
769 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
770 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
771 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
772 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
773 $r .= '</ul></li>';
774 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
775 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
776 } else {
777 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
778 }
779 }
780
781 $r .= '</ol>';
782 print STDOUT $r;
783 } # print_document_tree
784
785 sub print_structure_dump_dom_section ($$$) {
786 my ($input, $doc, $el) = @_;
787
788 print STDOUT qq[
789 <div id="$input->{id_prefix}document-tree" class="section">
790 <h2>Document Tree</h2>
791 ];
792 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
793 unless $input->{nested};
794
795 print_document_tree ($input, $el || $doc);
796
797 print STDOUT qq[</div>];
798 } # print_structure_dump_dom_section
799
800 sub print_structure_dump_cssom_section ($$) {
801 my ($input, $cssom) = @_;
802
803 print STDOUT qq[
804 <div id="$input->{id_prefix}document-tree" class="section">
805 <h2>Document Tree</h2>
806 ];
807 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
808 unless $input->{nested};
809
810 ## TODO:
811 print STDOUT "<pre>".htescape ($cssom->css_text)."</pre>";
812
813 print STDOUT qq[</div>];
814 } # print_structure_dump_cssom_section
815
816 sub print_structure_dump_manifest_section ($$) {
817 my ($input, $manifest) = @_;
818
819 print STDOUT qq[
820 <div id="$input->{id_prefix}dump-manifest" class="section">
821 <h2>Cache Manifest</h2>
822 ];
823 push @nav, [qq[#$input->{id_prefix}dump-manifest] => 'Cache Manifest']
824 unless $input->{nested};
825
826 print STDOUT qq[<dl><dt>Explicit entries</dt>];
827 my $i = 0;
828 for my $uri (@{$manifest->[0]}) {
829 my $euri = htescape ($uri);
830 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
831 }
832
833 print STDOUT qq[<dt>Fallback entries</dt><dd>
834 <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
835 <th scope=row>Fallback Entry</tr><tbody>];
836 for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
837 my $euri = htescape ($uri);
838 my $euri2 = htescape ($manifest->[1]->{$uri});
839 print STDOUT qq[<tr><td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
840 <td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
841 }
842
843 print STDOUT qq[</table><dt>Online whitelist</dt>];
844 for my $uri (@{$manifest->[2]}) {
845 my $euri = htescape ($uri);
846 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
847 }
848
849 print STDOUT qq[</dl></div>];
850 } # print_structure_dump_manifest_section
851
852 sub print_structure_error_dom_section ($$$$$) {
853 my ($input, $doc, $el, $result, $onsubdoc) = @_;
854
855 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
856 <h2>Document Errors</h2>
857
858 <dl id=document-errors-list>];
859 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
860 unless $input->{nested};
861
862 require Whatpm::ContentChecker;
863 my $onerror = sub {
864 my %opt = @_;
865 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
866 $type =~ tr/ /-/;
867 $type =~ s/\|/%7C/g;
868 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
869 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
870 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
871 print STDOUT $msg, "</dd>\n";
872 add_error ('structure', \%opt => $result);
873 };
874
875 my $elements;
876 my $time1 = time;
877 if ($el) {
878 $elements = Whatpm::ContentChecker->check_element
879 ($el, $onerror, $onsubdoc);
880 } else {
881 $elements = Whatpm::ContentChecker->check_document
882 ($doc, $onerror, $onsubdoc);
883 }
884 $time{check} = time - $time1;
885
886 print STDOUT qq[</dl>
887 <script>
888 addSourceToParseErrorList ('$input->{id_prefix}', 'document-errors-list');
889 </script></div>];
890
891 return $elements;
892 } # print_structure_error_dom_section
893
894 sub print_structure_error_manifest_section ($$$) {
895 my ($input, $manifest, $result) = @_;
896
897 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
898 <h2>Document Errors</h2>
899
900 <dl>];
901 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
902 unless $input->{nested};
903
904 require Whatpm::CacheManifest;
905 Whatpm::CacheManifest->check_manifest ($manifest, sub {
906 my %opt = @_;
907 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
908 $type =~ tr/ /-/;
909 $type =~ s/\|/%7C/g;
910 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
911 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
912 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
913 add_error ('structure', \%opt => $result);
914 });
915
916 print STDOUT qq[</div>];
917 } # print_structure_error_manifest_section
918
919 sub print_table_section ($$) {
920 my ($input, $tables) = @_;
921
922 push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
923 unless $input->{nested};
924 print STDOUT qq[
925 <div id="$input->{id_prefix}tables" class="section">
926 <h2>Tables</h2>
927
928 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
929 <script src="../table-script.js" type="text/javascript"></script>
930 <noscript>
931 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
932 </noscript>
933 ];
934
935 require JSON;
936
937 my $i = 0;
938 for my $table_el (@$tables) {
939 $i++;
940 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
941 get_node_link ($input, $table_el) . q[</h3>];
942
943 ## TODO: Make |ContentChecker| return |form_table| result
944 ## so that this script don't have to run the algorithm twice.
945 my $table = Whatpm::HTMLTable->form_table ($table_el);
946
947 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
948 next unless $_;
949 delete $_->{element};
950 }
951
952 for (@{$table->{row_group}}) {
953 next unless $_;
954 next unless $_->{element};
955 $_->{type} = $_->{element}->manakai_local_name;
956 delete $_->{element};
957 }
958
959 for (@{$table->{cell}}) {
960 next unless $_;
961 for (@{$_}) {
962 next unless $_;
963 for (@$_) {
964 $_->{id} = refaddr $_->{element} if defined $_->{element};
965 delete $_->{element};
966 $_->{is_header} = $_->{is_header} ? 1 : 0;
967 }
968 }
969 }
970
971 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
972 print STDOUT JSON::objToJson ($table);
973 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
974 print STDOUT qq[, '$input->{id_prefix}');</script>];
975 }
976
977 print STDOUT qq[</div>];
978 } # print_table_section
979
980 sub print_listing_section ($$$) {
981 my ($opt, $input, $ids) = @_;
982
983 push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
984 unless $input->{nested};
985 print STDOUT qq[
986 <div id="$input->{id_prefix}$opt->{id}" class="section">
987 <h2>$opt->{heading}</h2>
988
989 <dl>
990 ];
991 for my $id (sort {$a cmp $b} keys %$ids) {
992 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
993 for (@{$ids->{$id}}) {
994 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
995 }
996 }
997 print STDOUT qq[</dl></div>];
998 } # print_listing_section
999
1000 sub print_result_section ($) {
1001 my $result = shift;
1002
1003 print STDOUT qq[
1004 <div id="result-summary" class="section">
1005 <h2>Result</h2>];
1006
1007 if ($result->{unsupported} and $result->{conforming_max}) {
1008 print STDOUT qq[<p class=uncertain id=result-para>The conformance
1009 checker cannot decide whether the document is conforming or
1010 not, since the document contains one or more unsupported
1011 features. The document might or might not be conforming.</p>];
1012 } elsif ($result->{conforming_min}) {
1013 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
1014 found in this document.</p>];
1015 } elsif ($result->{conforming_max}) {
1016 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
1017 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
1018 it might be conforming.</p>];
1019 } else {
1020 print STDOUT qq[<p class=FAIL id=result-para>This document is
1021 <strong><em>non</em>-conforming</strong>.</p>];
1022 }
1023
1024 print STDOUT qq[<table>
1025 <colgroup><col><colgroup><col><col><col><colgroup><col>
1026 <thead>
1027 <tr><th scope=col></th>
1028 <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1029 Errors</a></th>
1030 <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1031 Errors</a></th>
1032 <th scope=col><a href="../error-description#level-w">Warnings</a></th>
1033 <th scope=col>Score</th></tr></thead><tbody>];
1034
1035 my $must_error = 0;
1036 my $should_error = 0;
1037 my $warning = 0;
1038 my $score_min = 0;
1039 my $score_max = 0;
1040 my $score_base = 20;
1041 my $score_unit = $score_base / 100;
1042 for (
1043 [Transfer => 'transfer', ''],
1044 [Character => 'char', ''],
1045 [Syntax => 'syntax', '#parse-errors'],
1046 [Structure => 'structure', '#document-errors'],
1047 ) {
1048 $must_error += ($result->{$_->[1]}->{must} += 0);
1049 $should_error += ($result->{$_->[1]}->{should} += 0);
1050 $warning += ($result->{$_->[1]}->{warning} += 0);
1051 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
1052 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
1053
1054 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
1055 my $label = $_->[0];
1056 if ($result->{$_->[1]}->{must} or
1057 $result->{$_->[1]}->{should} or
1058 $result->{$_->[1]}->{warning} or
1059 $result->{$_->[1]}->{unsupported}) {
1060 $label = qq[<a href="$_->[2]">$label</a>];
1061 }
1062
1063 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
1064 if ($uncertain) {
1065 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
1066 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
1067 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
1068 } else {
1069 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
1070 }
1071 }
1072
1073 $score_max += $score_base;
1074
1075 print STDOUT qq[
1076 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
1077 </tbody>
1078 <tfoot><tr class=uncertain><th scope=row>Total</th>
1079 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
1080 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
1081 <td>$warning?</td>
1082 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
1083 </table>
1084
1085 <p><strong>Important</strong>: This conformance checking service
1086 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
1087 </div>];
1088 push @nav, ['#result-summary' => 'Result'];
1089 } # print_result_section
1090
1091 sub print_result_unknown_type_section ($$) {
1092 my ($input, $result) = @_;
1093
1094 my $euri = htescape ($input->{uri});
1095 print STDOUT qq[
1096 <div id="$input->{id_prefix}parse-errors" class="section">
1097 <h2>Errors</h2>
1098
1099 <dl>
1100 <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
1101 <dd class=unsupported><strong><a href="../error-description#level-u">Not
1102 supported</a></strong>:
1103 Media type
1104 <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
1105 is not supported.</dd>
1106 </dl>
1107 </div>
1108 ];
1109 push @nav, [qq[#$input->{id_prefix}parse-errors] => 'Errors']
1110 unless $input->{nested};
1111 add_error (char => {level => 'u'} => $result);
1112 add_error (syntax => {level => 'u'} => $result);
1113 add_error (structure => {level => 'u'} => $result);
1114 } # print_result_unknown_type_section
1115
1116 sub print_result_input_error_section ($) {
1117 my $input = shift;
1118 print STDOUT qq[<div class="section" id="result-summary">
1119 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
1120 </div>];
1121 push @nav, ['#result-summary' => 'Result'];
1122 } # print_result_input_error_section
1123
1124 sub get_error_label ($$) {
1125 my ($input, $err) = @_;
1126
1127 my $r = '';
1128
1129 my $line;
1130 my $column;
1131
1132 if (defined $err->{node}) {
1133 $line = $err->{node}->get_user_data ('manakai_source_line');
1134 if (defined $line) {
1135 $column = $err->{node}->get_user_data ('manakai_source_column');
1136 } else {
1137 if ($err->{node}->node_type == $err->{node}->ATTRIBUTE_NODE) {
1138 my $owner = $err->{node}->owner_element;
1139 $line = $owner->get_user_data ('manakai_source_line');
1140 $column = $owner->get_user_data ('manakai_source_column');
1141 } else {
1142 my $parent = $err->{node}->parent_node;
1143 $line = $parent->get_user_data ('manakai_source_line');
1144 $column = $parent->get_user_data ('manakai_source_column');
1145 }
1146 }
1147 }
1148 unless (defined $line) {
1149 if (defined $err->{token} and defined $err->{token}->{line}) {
1150 $line = $err->{token}->{line};
1151 $column = $err->{token}->{column};
1152 } elsif (defined $err->{line}) {
1153 $line = $err->{line};
1154 $column = $err->{column};
1155 }
1156 }
1157
1158 if (defined $line) {
1159 if (defined $column and $column > 0) {
1160 $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a> column $column];
1161 } else {
1162 $line = $line - 1 || 1;
1163 $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a>];
1164 }
1165 }
1166
1167 if (defined $err->{node}) {
1168 $r .= ' ' if length $r;
1169 $r .= get_node_link ($input, $err->{node});
1170 }
1171
1172 if (defined $err->{index}) {
1173 if (length $r) {
1174 $r .= ', Index ' . (0+$err->{index});
1175 } else {
1176 $r .= "<a href='#$input->{id_prefix}index-@{[0+$err->{index}]}'>Index "
1177 . (0+$err->{index}) . '</a>';
1178 }
1179 }
1180
1181 if (defined $err->{value}) {
1182 $r .= ' ' if length $r;
1183 $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
1184 }
1185
1186 return $r;
1187 } # get_error_label
1188
1189 sub get_error_level_label ($) {
1190 my $err = shift;
1191
1192 my $r = '';
1193
1194 if (not defined $err->{level} or $err->{level} eq 'm') {
1195 $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1196 error</a></strong>: ];
1197 } elsif ($err->{level} eq 's') {
1198 $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1199 error</a></strong>: ];
1200 } elsif ($err->{level} eq 'w') {
1201 $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
1202 ];
1203 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
1204 $r = qq[<strong><a href="../error-description#level-u">Not
1205 supported</a></strong>: ];
1206 } elsif ($err->{level} eq 'i') {
1207 $r = qq[<strong><a href="../error-description#level-i">Information</a></strong>: ];
1208 } else {
1209 my $elevel = htescape ($err->{level});
1210 $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
1211 ];
1212 }
1213
1214 return $r;
1215 } # get_error_level_label
1216
1217 sub get_node_path ($) {
1218 my $node = shift;
1219 my @r;
1220 while (defined $node) {
1221 my $rs;
1222 if ($node->node_type == 1) {
1223 $rs = $node->manakai_local_name;
1224 $node = $node->parent_node;
1225 } elsif ($node->node_type == 2) {
1226 $rs = '@' . $node->manakai_local_name;
1227 $node = $node->owner_element;
1228 } elsif ($node->node_type == 3) {
1229 $rs = '"' . $node->data . '"';
1230 $node = $node->parent_node;
1231 } elsif ($node->node_type == 9) {
1232 @r = ('') unless @r;
1233 $rs = '';
1234 $node = $node->parent_node;
1235 } else {
1236 $rs = '#' . $node->node_type;
1237 $node = $node->parent_node;
1238 }
1239 unshift @r, $rs;
1240 }
1241 return join '/', @r;
1242 } # get_node_path
1243
1244 sub get_node_link ($$) {
1245 return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
1246 htescape (get_node_path ($_[1])) . qq[</a>];
1247 } # get_node_link
1248
1249 {
1250 my $Msg = {};
1251
1252 sub load_text_catalog ($) {
1253 my $lang = shift; # MUST be a canonical lang name
1254 open my $file, '<:utf8', "cc-msg.$lang.txt"
1255 or die "$0: cc-msg.$lang.txt: $!";
1256 while (<$file>) {
1257 if (s/^([^;]+);([^;]*);//) {
1258 my ($type, $cls, $msg) = ($1, $2, $_);
1259 $msg =~ tr/\x0D\x0A//d;
1260 $Msg->{$type} = [$cls, $msg];
1261 }
1262 }
1263 } # load_text_catalog
1264
1265 sub get_text ($) {
1266 my ($type, $level, $node) = @_;
1267 $type = $level . ':' . $type if defined $level;
1268 $level = 'm' unless defined $level;
1269 my @arg;
1270 {
1271 if (defined $Msg->{$type}) {
1272 my $msg = $Msg->{$type}->[1];
1273 $msg =~ s{<var>\$([0-9]+)</var>}{
1274 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
1275 }ge;
1276 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
1277 UNIVERSAL::can ($node, 'get_attribute_ns')
1278 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
1279 }ge;
1280 $msg =~ s{<var>{\@}</var>}{
1281 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
1282 }ge;
1283 $msg =~ s{<var>{local-name}</var>}{
1284 UNIVERSAL::can ($node, 'manakai_local_name')
1285 ? htescape ($node->manakai_local_name) : ''
1286 }ge;
1287 $msg =~ s{<var>{element-local-name}</var>}{
1288 (UNIVERSAL::can ($node, 'owner_element') and
1289 $node->owner_element)
1290 ? htescape ($node->owner_element->manakai_local_name)
1291 : ''
1292 }ge;
1293 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
1294 } elsif ($type =~ s/:([^:]*)$//) {
1295 unshift @arg, $1;
1296 redo;
1297 }
1298 }
1299 return ($type, 'level-'.$level, htescape ($_[0]));
1300 } # get_text
1301
1302 }
1303
1304 sub get_input_document ($$) {
1305 my ($http, $dom) = @_;
1306
1307 my $request_uri = $http->get_parameter ('uri');
1308 my $r = {};
1309 if (defined $request_uri and length $request_uri) {
1310 my $uri = $dom->create_uri_reference ($request_uri);
1311 unless ({
1312 http => 1,
1313 }->{lc $uri->uri_scheme}) {
1314 return {uri => $request_uri, request_uri => $request_uri,
1315 error_status_text => 'URI scheme not allowed'};
1316 }
1317
1318 require Message::Util::HostPermit;
1319 my $host_permit = new Message::Util::HostPermit;
1320 $host_permit->add_rule (<<EOH);
1321 Allow host=suika port=80
1322 Deny host=suika
1323 Allow host=suika.fam.cx port=80
1324 Deny host=suika.fam.cx
1325 Deny host=localhost
1326 Deny host=*.localdomain
1327 Deny ipv4=0.0.0.0/8
1328 Deny ipv4=10.0.0.0/8
1329 Deny ipv4=127.0.0.0/8
1330 Deny ipv4=169.254.0.0/16
1331 Deny ipv4=172.0.0.0/11
1332 Deny ipv4=192.0.2.0/24
1333 Deny ipv4=192.88.99.0/24
1334 Deny ipv4=192.168.0.0/16
1335 Deny ipv4=198.18.0.0/15
1336 Deny ipv4=224.0.0.0/4
1337 Deny ipv4=255.255.255.255/32
1338 Deny ipv6=0::0/0
1339 Allow host=*
1340 EOH
1341 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1342 return {uri => $request_uri, request_uri => $request_uri,
1343 error_status_text => 'Connection to the host is forbidden'};
1344 }
1345
1346 require LWP::UserAgent;
1347 my $ua = WDCC::LWPUA->new;
1348 $ua->{wdcc_dom} = $dom;
1349 $ua->{wdcc_host_permit} = $host_permit;
1350 $ua->agent ('Mozilla'); ## TODO: for now.
1351 $ua->parse_head (0);
1352 $ua->protocols_allowed ([qw/http/]);
1353 $ua->max_size (1000_000);
1354 my $req = HTTP::Request->new (GET => $request_uri);
1355 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1356 my $res = $ua->request ($req);
1357 ## TODO: 401 sets |is_success| true.
1358 if ($res->is_success or $http->get_parameter ('error-page')) {
1359 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1360 $r->{uri} = $res->request->uri;
1361 $r->{request_uri} = $request_uri;
1362
1363 ## TODO: More strict parsing...
1364 my $ct = $res->header ('Content-Type');
1365 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1366 $r->{charset} = lc $1;
1367 $r->{charset} =~ tr/\\//d;
1368 $r->{official_charset} = $r->{charset};
1369 }
1370
1371 my $input_charset = $http->get_parameter ('charset');
1372 if (defined $input_charset and length $input_charset) {
1373 $r->{charset_overridden}
1374 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1375 $r->{charset} = $input_charset;
1376 }
1377
1378 ## TODO: Support for HTTP Content-Encoding
1379
1380 $r->{s} = ''.$res->content;
1381
1382 require Whatpm::ContentType;
1383 ($r->{official_type}, $r->{media_type})
1384 = Whatpm::ContentType->get_sniffed_type
1385 (get_file_head => sub {
1386 return substr $r->{s}, 0, shift;
1387 },
1388 http_content_type_byte => $ct,
1389 has_http_content_encoding =>
1390 defined $res->header ('Content-Encoding'),
1391 supported_image_types => {});
1392 } else {
1393 $r->{uri} = $res->request->uri;
1394 $r->{request_uri} = $request_uri;
1395 $r->{error_status_text} = $res->status_line;
1396 }
1397
1398 $r->{header_field} = [];
1399 $res->scan (sub {
1400 push @{$r->{header_field}}, [$_[0], $_[1]];
1401 });
1402 $r->{header_status_code} = $res->code;
1403 $r->{header_status_text} = $res->message;
1404 } else {
1405 $r->{s} = ''.$http->get_parameter ('s');
1406 $r->{uri} = q<thismessage:/>;
1407 $r->{request_uri} = q<thismessage:/>;
1408 $r->{base_uri} = q<thismessage:/>;
1409 $r->{charset} = ''.$http->get_parameter ('_charset_');
1410 $r->{charset} =~ s/\s+//g;
1411 $r->{charset} = 'utf-8' if $r->{charset} eq '';
1412 $r->{official_charset} = $r->{charset};
1413 $r->{header_field} = [];
1414
1415 require Whatpm::ContentType;
1416 ($r->{official_type}, $r->{media_type})
1417 = Whatpm::ContentType->get_sniffed_type
1418 (get_file_head => sub {
1419 return substr $r->{s}, 0, shift;
1420 },
1421 http_content_type_byte => undef,
1422 has_http_content_encoding => 0,
1423 supported_image_types => {});
1424 }
1425
1426 my $input_format = $http->get_parameter ('i');
1427 if (defined $input_format and length $input_format) {
1428 $r->{media_type_overridden}
1429 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1430 $r->{media_type} = $input_format;
1431 }
1432 if (defined $r->{s} and not defined $r->{media_type}) {
1433 $r->{media_type} = 'text/html';
1434 $r->{media_type_overridden} = 1;
1435 }
1436
1437 if ($r->{media_type} eq 'text/xml') {
1438 unless (defined $r->{charset}) {
1439 $r->{charset} = 'us-ascii';
1440 $r->{official_charset} = $r->{charset};
1441 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1442 $r->{charset_overridden} = 0;
1443 }
1444 }
1445
1446 if (length $r->{s} > 1000_000) {
1447 $r->{error_status_text} = 'Entity-body too large';
1448 delete $r->{s};
1449 return $r;
1450 }
1451
1452 $r->{inner_html_element} = $http->get_parameter ('e');
1453
1454 return $r;
1455 } # get_input_document
1456
1457 package WDCC::LWPUA;
1458 BEGIN { push our @ISA, 'LWP::UserAgent'; }
1459
1460 sub redirect_ok {
1461 my $ua = shift;
1462 unless ($ua->SUPER::redirect_ok (@_)) {
1463 return 0;
1464 }
1465
1466 my $uris = $_[1]->header ('Location');
1467 return 0 unless $uris;
1468 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1469 unless ({
1470 http => 1,
1471 }->{lc $uri->uri_scheme}) {
1472 return 0;
1473 }
1474 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1475 return 0;
1476 }
1477 return 1;
1478 } # redirect_ok
1479
1480 =head1 AUTHOR
1481
1482 Wakaba <w@suika.fam.cx>.
1483
1484 =head1 LICENSE
1485
1486 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1487
1488 This library is free software; you can redistribute it
1489 and/or modify it under the same terms as Perl itself.
1490
1491 =cut
1492
1493 ## $Date: 2008/03/17 13:25:19 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24