/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.44 - (show annotations) (download)
Mon Mar 17 13:52:48 2008 UTC (16 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.43: +5 -3 lines
++ ChangeLog	17 Mar 2008 13:52:22 -0000
	* cc.cgi (get_error_label): No-parent case was not considered.

2008-03-17  Wakaba  <wakaba@suika.fam.cx>

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4
5 use lib qw[/home/httpd/html/www/markup/html/whatpm
6 /home/wakaba/work/manakai2/lib];
7 use CGI::Carp qw[fatalsToBrowser];
8 use Scalar::Util qw[refaddr];
9 use Time::HiRes qw/time/;
10
11 sub htescape ($) {
12 my $s = $_[0];
13 $s =~ s/&/&amp;/g;
14 $s =~ s/</&lt;/g;
15 $s =~ s/>/&gt;/g;
16 $s =~ s/"/&quot;/g;
17 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18 sprintf '<var>U+%04X</var>', ord $1;
19 }ge;
20 return $s;
21 } # htescape
22
23 my @nav;
24 my %time;
25 require Message::DOM::DOMImplementation;
26 my $dom = Message::DOM::DOMImplementation->new;
27 {
28 use Message::CGI::HTTP;
29 my $http = Message::CGI::HTTP->new;
30
31 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
32 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
33 exit;
34 }
35
36 binmode STDOUT, ':utf8';
37 $| = 1;
38
39 load_text_catalog ('en'); ## TODO: conneg
40
41 print STDOUT qq[Content-Type: text/html; charset=utf-8
42
43 <!DOCTYPE html>
44 <html lang="en">
45 <head>
46 <title>Web Document Conformance Checker (BETA)</title>
47 <link rel="stylesheet" href="../cc-style.css" type="text/css">
48 </head>
49 <body>
50 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
51 (<em>beta</em>)</h1>
52 ];
53
54 $| = 0;
55 my $input = get_input_document ($http, $dom);
56 my $char_length = 0;
57
58 print qq[
59 <div id="document-info" class="section">
60 <dl>
61 <dt>Request URI</dt>
62 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 <dt>Document URI</dt>
64 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65 <script>
66 document.title = '<'
67 + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68 + document.title;
69 </script></dd>
70 ]; # no </dl> yet
71 push @nav, ['#document-info' => 'Information'];
72
73 if (defined $input->{s}) {
74 $char_length = length $input->{s};
75
76 print STDOUT qq[
77 <dt>Base URI</dt>
78 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79 <dt>Internet Media Type</dt>
80 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 <dt>Character Encoding</dt>
83 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 <dt>Length</dt>
86 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 </dl>
88 </div>
89
90 <script src="../cc-script.js"></script>
91 ];
92
93 $input->{id_prefix} = '';
94 #$input->{nested} = 0;
95 my $result = {conforming_min => 1, conforming_max => 1};
96 check_and_print ($input => $result);
97 print_result_section ($result);
98 } else {
99 print STDOUT qq[</dl></div>];
100 print_result_input_error_section ($input);
101 }
102
103 print STDOUT qq[
104 <ul class="navigation" id="nav-items">
105 ];
106 for (@nav) {
107 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
108 }
109 print STDOUT qq[
110 </ul>
111 </body>
112 </html>
113 ];
114
115 for (qw/decode parse parse_html parse_xml parse_manifest
116 check check_manifest/) {
117 next unless defined $time{$_};
118 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
119 print $file $char_length, "\t", $time{$_}, "\n";
120 }
121
122 exit;
123 }
124
125 sub add_error ($$$) {
126 my ($layer, $err, $result) = @_;
127 if (defined $err->{level}) {
128 if ($err->{level} eq 's') {
129 $result->{$layer}->{should}++;
130 $result->{$layer}->{score_min} -= 2;
131 $result->{conforming_min} = 0;
132 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
133 $result->{$layer}->{warning}++;
134 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
135 $result->{$layer}->{unsupported}++;
136 $result->{unsupported} = 1;
137 } elsif ($err->{level} eq 'i') {
138 #
139 } else {
140 $result->{$layer}->{must}++;
141 $result->{$layer}->{score_max} -= 2;
142 $result->{$layer}->{score_min} -= 2;
143 $result->{conforming_min} = 0;
144 $result->{conforming_max} = 0;
145 }
146 } else {
147 $result->{$layer}->{must}++;
148 $result->{$layer}->{score_max} -= 2;
149 $result->{$layer}->{score_min} -= 2;
150 $result->{conforming_min} = 0;
151 $result->{conforming_max} = 0;
152 }
153 } # add_error
154
155 sub check_and_print ($$) {
156 my ($input, $result) = @_;
157
158 print_http_header_section ($input, $result);
159
160 my $doc;
161 my $el;
162 my $cssom;
163 my $manifest;
164 my @subdoc;
165
166 if ($input->{media_type} eq 'text/html') {
167 ($doc, $el) = print_syntax_error_html_section ($input, $result);
168 print_source_string_section
169 ($input,
170 \($input->{s}),
171 $input->{charset} || $doc->input_encoding);
172 } elsif ({
173 'text/xml' => 1,
174 'application/atom+xml' => 1,
175 'application/rss+xml' => 1,
176 'application/svg+xml' => 1,
177 'application/xhtml+xml' => 1,
178 'application/xml' => 1,
179 }->{$input->{media_type}}) {
180 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
181 print_source_string_section ($input,
182 \($input->{s}),
183 $doc->input_encoding);
184 } elsif ($input->{media_type} eq 'text/css') {
185 $cssom = print_syntax_error_css_section ($input, $result);
186 print_source_string_section
187 ($input, \($input->{s}),
188 $cssom->manakai_input_encoding);
189 } elsif ($input->{media_type} eq 'text/cache-manifest') {
190 ## TODO: MUST be text/cache-manifest
191 $manifest = print_syntax_error_manifest_section ($input, $result);
192 print_source_string_section ($input, \($input->{s}),
193 'utf-8');
194 } else {
195 ## TODO: Change HTTP status code??
196 print_result_unknown_type_section ($input, $result);
197 }
198
199 if (defined $doc or defined $el) {
200 $doc->document_uri ($input->{uri});
201 $doc->manakai_entity_base_uri ($input->{base_uri});
202 print_structure_dump_dom_section ($input, $doc, $el);
203 my $elements = print_structure_error_dom_section
204 ($input, $doc, $el, $result, sub {
205 push @subdoc, shift;
206 });
207 print_table_section ($input, $elements->{table}) if @{$elements->{table}};
208 print_listing_section ({
209 id => 'identifiers', label => 'IDs', heading => 'Identifiers',
210 }, $input, $elements->{id}) if keys %{$elements->{id}};
211 print_listing_section ({
212 id => 'terms', label => 'Terms', heading => 'Terms',
213 }, $input, $elements->{term}) if keys %{$elements->{term}};
214 print_listing_section ({
215 id => 'classes', label => 'Classes', heading => 'Classes',
216 }, $input, $elements->{class}) if keys %{$elements->{class}};
217 } elsif (defined $cssom) {
218 print_structure_dump_cssom_section ($input, $cssom);
219 ## TODO: CSSOM validation
220 add_error ('structure', {level => 'u'} => $result);
221 } elsif (defined $manifest) {
222 print_structure_dump_manifest_section ($input, $manifest);
223 print_structure_error_manifest_section ($input, $manifest, $result);
224 }
225
226 my $id_prefix = 0;
227 for my $subinput (@subdoc) {
228 $subinput->{id_prefix} = 'subdoc-' . ++$id_prefix;
229 $subinput->{nested} = 1;
230 $subinput->{base_uri} = $subinput->{container_node}->base_uri
231 unless defined $subinput->{base_uri};
232 my $ebaseuri = htescape ($subinput->{base_uri});
233 push @nav, ['#' . $subinput->{id_prefix} => 'Sub #' . $id_prefix];
234 print STDOUT qq[<div id="$subinput->{id_prefix}" class=section>
235 <h2>Subdocument #$id_prefix</h2>
236
237 <dl>
238 <dt>Internet Media Type</dt>
239 <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
240 <dt>Container Node</dt>
241 <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
242 <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
243 <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
244 </dl>];
245
246 $subinput->{id_prefix} .= '-';
247 check_and_print ($subinput => $result);
248
249 print STDOUT qq[</div>];
250 }
251 } # check_and_print
252
253 sub print_http_header_section ($$) {
254 my ($input, $result) = @_;
255 return unless defined $input->{header_status_code} or
256 defined $input->{header_status_text} or
257 @{$input->{header_field} or []};
258
259 push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
260 print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
261 <h2>HTTP Header</h2>
262
263 <p><strong>Note</strong>: Due to the limitation of the
264 network library in use, the content of this section might
265 not be the real header.</p>
266
267 <table><tbody>
268 ];
269
270 if (defined $input->{header_status_code}) {
271 print STDOUT qq[<tr><th scope="row">Status code</th>];
272 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
273 }
274 if (defined $input->{header_status_text}) {
275 print STDOUT qq[<tr><th scope="row">Status text</th>];
276 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
277 }
278
279 for (@{$input->{header_field}}) {
280 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
281 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
282 }
283
284 print STDOUT qq[</tbody></table></div>];
285 } # print_http_header_section
286
287 sub print_syntax_error_html_section ($$) {
288 my ($input, $result) = @_;
289
290 require Encode;
291 require Whatpm::HTML;
292
293 print STDOUT qq[
294 <div id="$input->{id_prefix}parse-errors" class="section">
295 <h2>Parse Errors</h2>
296
297 <dl id="$input->{id_prefix}parse-errors-list">];
298 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
299
300 my $onerror = sub {
301 my (%opt) = @_;
302 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
303 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
304 qq[</dt>];
305 $type =~ tr/ /-/;
306 $type =~ s/\|/%7C/g;
307 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
308 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
309 print STDOUT qq[$msg</dd>\n];
310
311 add_error ('syntax', \%opt => $result);
312 };
313
314 my $doc = $dom->create_document;
315 my $el;
316 my $inner_html_element = $input->{inner_html_element};
317 if (defined $inner_html_element and length $inner_html_element) {
318 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
319 my $time1 = time;
320 my $t = Encode::decode ($input->{charset}, $input->{s});
321 $time{decode} = time - $time1;
322
323 $el = $doc->create_element_ns
324 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
325 $time1 = time;
326 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
327 $time{parse} = time - $time1;
328 } else {
329 my $time1 = time;
330 Whatpm::HTML->parse_byte_string
331 ($input->{charset}, $input->{s} => $doc, $onerror);
332 $time{parse_html} = time - $time1;
333 }
334 $doc->manakai_charset ($input->{official_charset})
335 if defined $input->{official_charset};
336
337 print STDOUT qq[</dl></div>];
338
339 return ($doc, $el);
340 } # print_syntax_error_html_section
341
342 sub print_syntax_error_xml_section ($$) {
343 my ($input, $result) = @_;
344
345 require Message::DOM::XMLParserTemp;
346
347 print STDOUT qq[
348 <div id="$input->{id_prefix}parse-errors" class="section">
349 <h2>Parse Errors</h2>
350
351 <dl id="$input->{id_prefix}parse-errors-list">];
352 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
353
354 my $onerror = sub {
355 my $err = shift;
356 my $line = $err->location->line_number;
357 print STDOUT qq[<dt><a href="#$input->{id_prefix}line-$line">Line $line</a> column ];
358 print STDOUT $err->location->column_number, "</dt><dd>";
359 print STDOUT htescape $err->text, "</dd>\n";
360
361 add_error ('syntax', {type => $err->text,
362 level => [
363 $err->SEVERITY_FATAL_ERROR => 'm',
364 $err->SEVERITY_ERROR => 'm',
365 $err->SEVERITY_WARNING => 's',
366 ]->[$err->severity]} => $result);
367
368 return 1;
369 };
370
371 my $time1 = time;
372 open my $fh, '<', \($input->{s});
373 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
374 ($fh => $dom, $onerror, charset => $input->{charset});
375 $time{parse_xml} = time - $time1;
376 $doc->manakai_charset ($input->{official_charset})
377 if defined $input->{official_charset};
378
379 print STDOUT qq[</dl></div>];
380
381 return ($doc, undef);
382 } # print_syntax_error_xml_section
383
384 sub get_css_parser () {
385 our $CSSParser;
386 return $CSSParser if $CSSParser;
387
388 require Whatpm::CSS::Parser;
389 my $p = Whatpm::CSS::Parser->new;
390
391 $p->{prop}->{$_} = 1 for qw/
392 alignment-baseline
393 background background-attachment background-color background-image
394 background-position background-position-x background-position-y
395 background-repeat border border-bottom border-bottom-color
396 border-bottom-style border-bottom-width border-collapse border-color
397 border-left border-left-color
398 border-left-style border-left-width border-right border-right-color
399 border-right-style border-right-width
400 border-spacing -manakai-border-spacing-x -manakai-border-spacing-y
401 border-style border-top border-top-color border-top-style border-top-width
402 border-width bottom
403 caption-side clear clip color content counter-increment counter-reset
404 cursor direction display dominant-baseline empty-cells float font
405 font-family font-size font-size-adjust font-stretch
406 font-style font-variant font-weight height left
407 letter-spacing line-height
408 list-style list-style-image list-style-position list-style-type
409 margin margin-bottom margin-left margin-right margin-top marker-offset
410 marks max-height max-width min-height min-width opacity -moz-opacity
411 orphans outline outline-color outline-style outline-width overflow
412 overflow-x overflow-y
413 padding padding-bottom padding-left padding-right padding-top
414 page page-break-after page-break-before page-break-inside
415 position quotes right size table-layout
416 text-align text-anchor text-decoration text-indent text-transform
417 top unicode-bidi vertical-align visibility white-space width widows
418 word-spacing writing-mode z-index
419 /;
420 $p->{prop_value}->{display}->{$_} = 1 for qw/
421 block clip inline inline-block inline-table list-item none
422 table table-caption table-cell table-column table-column-group
423 table-header-group table-footer-group table-row table-row-group
424 compact marker
425 /;
426 $p->{prop_value}->{position}->{$_} = 1 for qw/
427 absolute fixed relative static
428 /;
429 $p->{prop_value}->{float}->{$_} = 1 for qw/
430 left right none
431 /;
432 $p->{prop_value}->{clear}->{$_} = 1 for qw/
433 left right none both
434 /;
435 $p->{prop_value}->{direction}->{ltr} = 1;
436 $p->{prop_value}->{direction}->{rtl} = 1;
437 $p->{prop_value}->{marks}->{crop} = 1;
438 $p->{prop_value}->{marks}->{cross} = 1;
439 $p->{prop_value}->{'unicode-bidi'}->{$_} = 1 for qw/
440 normal bidi-override embed
441 /;
442 for my $prop_name (qw/overflow overflow-x overflow-y/) {
443 $p->{prop_value}->{$prop_name}->{$_} = 1 for qw/
444 visible hidden scroll auto -webkit-marquee -moz-hidden-unscrollable
445 /;
446 }
447 $p->{prop_value}->{visibility}->{$_} = 1 for qw/
448 visible hidden collapse
449 /;
450 $p->{prop_value}->{'list-style-type'}->{$_} = 1 for qw/
451 disc circle square decimal decimal-leading-zero
452 lower-roman upper-roman lower-greek lower-latin
453 upper-latin armenian georgian lower-alpha upper-alpha none
454 hebrew cjk-ideographic hiragana katakana hiragana-iroha
455 katakana-iroha
456 /;
457 $p->{prop_value}->{'list-style-position'}->{outside} = 1;
458 $p->{prop_value}->{'list-style-position'}->{inside} = 1;
459 $p->{prop_value}->{'page-break-before'}->{$_} = 1 for qw/
460 auto always avoid left right
461 /;
462 $p->{prop_value}->{'page-break-after'}->{$_} = 1 for qw/
463 auto always avoid left right
464 /;
465 $p->{prop_value}->{'page-break-inside'}->{auto} = 1;
466 $p->{prop_value}->{'page-break-inside'}->{avoid} = 1;
467 $p->{prop_value}->{'background-repeat'}->{$_} = 1 for qw/
468 repeat repeat-x repeat-y no-repeat
469 /;
470 $p->{prop_value}->{'background-attachment'}->{scroll} = 1;
471 $p->{prop_value}->{'background-attachment'}->{fixed} = 1;
472 $p->{prop_value}->{'font-size'}->{$_} = 1 for qw/
473 xx-small x-small small medium large x-large xx-large
474 -manakai-xxx-large -webkit-xxx-large
475 larger smaller
476 /;
477 $p->{prop_value}->{'font-style'}->{normal} = 1;
478 $p->{prop_value}->{'font-style'}->{italic} = 1;
479 $p->{prop_value}->{'font-style'}->{oblique} = 1;
480 $p->{prop_value}->{'font-variant'}->{normal} = 1;
481 $p->{prop_value}->{'font-variant'}->{'small-caps'} = 1;
482 $p->{prop_value}->{'font-stretch'}->{$_} = 1 for
483 qw/normal wider narrower ultra-condensed extra-condensed
484 condensed semi-condensed semi-expanded expanded
485 extra-expanded ultra-expanded/;
486 $p->{prop_value}->{'text-align'}->{$_} = 1 for qw/
487 left right center justify begin end
488 /;
489 $p->{prop_value}->{'text-transform'}->{$_} = 1 for qw/
490 capitalize uppercase lowercase none
491 /;
492 $p->{prop_value}->{'white-space'}->{$_} = 1 for qw/
493 normal pre nowrap pre-line pre-wrap -moz-pre-wrap
494 /;
495 $p->{prop_value}->{'writing-mode'}->{$_} = 1 for qw/
496 lr rl tb lr-tb rl-tb tb-rl
497 /;
498 $p->{prop_value}->{'text-anchor'}->{$_} = 1 for qw/
499 start middle end
500 /;
501 $p->{prop_value}->{'dominant-baseline'}->{$_} = 1 for qw/
502 auto use-script no-change reset-size ideographic alphabetic
503 hanging mathematical central middle text-after-edge text-before-edge
504 /;
505 $p->{prop_value}->{'alignment-baseline'}->{$_} = 1 for qw/
506 auto baseline before-edge text-before-edge middle central
507 after-edge text-after-edge ideographic alphabetic hanging
508 mathematical
509 /;
510 $p->{prop_value}->{'text-decoration'}->{$_} = 1 for qw/
511 none blink underline overline line-through
512 /;
513 $p->{prop_value}->{'caption-side'}->{$_} = 1 for qw/
514 top bottom left right
515 /;
516 $p->{prop_value}->{'table-layout'}->{auto} = 1;
517 $p->{prop_value}->{'table-layout'}->{fixed} = 1;
518 $p->{prop_value}->{'border-collapse'}->{collapse} = 1;
519 $p->{prop_value}->{'border-collapse'}->{separate} = 1;
520 $p->{prop_value}->{'empty-cells'}->{show} = 1;
521 $p->{prop_value}->{'empty-cells'}->{hide} = 1;
522 $p->{prop_value}->{cursor}->{$_} = 1 for qw/
523 auto crosshair default pointer move e-resize ne-resize nw-resize n-resize
524 se-resize sw-resize s-resize w-resize text wait help progress
525 /;
526 for my $prop (qw/border-top-style border-left-style
527 border-bottom-style border-right-style outline-style/) {
528 $p->{prop_value}->{$prop}->{$_} = 1 for qw/
529 none hidden dotted dashed solid double groove ridge inset outset
530 /;
531 }
532 for my $prop (qw/color background-color
533 border-bottom-color border-left-color border-right-color
534 border-top-color border-color/) {
535 $p->{prop_value}->{$prop}->{transparent} = 1;
536 $p->{prop_value}->{$prop}->{flavor} = 1;
537 $p->{prop_value}->{$prop}->{'-manakai-default'} = 1;
538 }
539 $p->{prop_value}->{'outline-color'}->{invert} = 1;
540 $p->{prop_value}->{'outline-color'}->{'-manakai-invert-or-currentcolor'} = 1;
541 $p->{pseudo_class}->{$_} = 1 for qw/
542 active checked disabled empty enabled first-child first-of-type
543 focus hover indeterminate last-child last-of-type link only-child
544 only-of-type root target visited
545 lang nth-child nth-last-child nth-of-type nth-last-of-type not
546 -manakai-contains -manakai-current
547 /;
548 $p->{pseudo_element}->{$_} = 1 for qw/
549 after before first-letter first-line
550 /;
551
552 return $CSSParser = $p;
553 } # get_css_parser
554
555 sub print_syntax_error_css_section ($$) {
556 my ($input, $result) = @_;
557
558 print STDOUT qq[
559 <div id="$input->{id_prefix}parse-errors" class="section">
560 <h2>Parse Errors</h2>
561
562 <dl id="$input->{id_prefix}parse-errors-list">];
563 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
564
565 my $p = get_css_parser ();
566 $p->init;
567 $p->{onerror} = sub {
568 my (%opt) = @_;
569 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
570 if ($opt{token}) {
571 print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{token}->{line}">Line $opt{token}->{line}</a> column $opt{token}->{column}];
572 } else {
573 print STDOUT qq[<dt class="$cls">Unknown location];
574 }
575 if (defined $opt{value}) {
576 print STDOUT qq[ (<code>@{[htescape ($opt{value})]}</code>)];
577 } elsif (defined $opt{token}) {
578 print STDOUT qq[ (<code>@{[htescape (Whatpm::CSS::Tokenizer->serialize_token ($opt{token}))]}</code>)];
579 }
580 $type =~ tr/ /-/;
581 $type =~ s/\|/%7C/g;
582 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
583 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
584 print STDOUT qq[$msg</dd>\n];
585
586 add_error ('syntax', \%opt => $result);
587 };
588 $p->{href} = $input->{uri};
589 $p->{base_uri} = $input->{base_uri};
590
591 # if ($parse_mode eq 'q') {
592 # $p->{unitless_px} = 1;
593 # $p->{hashless_color} = 1;
594 # }
595
596 ## TODO: Make $input->{s} a ref.
597
598 my $s = \$input->{s};
599 my $charset;
600 unless ($input->{is_char_string}) {
601 require Encode;
602 if (defined $input->{charset}) {## TODO: IANA->Perl
603 $charset = $input->{charset};
604 $s = \(Encode::decode ($input->{charset}, $$s));
605 } else {
606 ## TODO: charset detection
607 $s = \(Encode::decode ($charset = 'utf-8', $$s));
608 }
609 }
610
611 my $cssom = $p->parse_char_string ($$s);
612 $cssom->manakai_input_encoding ($charset) if defined $charset;
613
614 print STDOUT qq[</dl></div>];
615
616 return $cssom;
617 } # print_syntax_error_css_section
618
619 sub print_syntax_error_manifest_section ($$) {
620 my ($input, $result) = @_;
621
622 require Whatpm::CacheManifest;
623
624 print STDOUT qq[
625 <div id="$input->{id_prefix}parse-errors" class="section">
626 <h2>Parse Errors</h2>
627
628 <dl id="$input->{id_prefix}parse-errors-list">];
629 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
630
631 my $onerror = sub {
632 my (%opt) = @_;
633 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
634 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
635 qq[</dt>];
636 $type =~ tr/ /-/;
637 $type =~ s/\|/%7C/g;
638 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
639 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
640 print STDOUT qq[$msg</dd>\n];
641
642 add_error ('syntax', \%opt => $result);
643 };
644
645 my $time1 = time;
646 my $manifest = Whatpm::CacheManifest->parse_byte_string
647 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
648 $time{parse_manifest} = time - $time1;
649
650 print STDOUT qq[</dl></div>];
651
652 return $manifest;
653 } # print_syntax_error_manifest_section
654
655 sub print_source_string_section ($$$) {
656 my $input = shift;
657 my $s;
658 unless ($input->{is_char_string}) {
659 require Encode;
660 my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
661 return unless $enc;
662
663 $s = \($enc->decode (${$_[0]}));
664 } else {
665 $s = $_[0];
666 }
667
668 my $i = 1;
669 push @nav, ['#source-string' => 'Source'] unless $input->{nested};
670 print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
671 <h2>Document Source</h2>
672 <ol lang="">\n];
673 if (length $$s) {
674 while ($$s =~ /\G([^\x0D\x0A]*?)(?>\x0D\x0A?|\x0A)/gc) {
675 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
676 "</li>\n";
677 $i++;
678 }
679 if ($$s =~ /\G([^\x0D\x0A]+)/gc) {
680 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
681 "</li>\n";
682 }
683 } else {
684 print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
685 }
686 print STDOUT "</ol></div>
687 <script>
688 addSourceToParseErrorList ('$input->{id_prefix}', 'parse-errors-list');
689 </script>";
690 } # print_input_string_section
691
692 sub print_document_tree ($$) {
693 my ($input, $node) = @_;
694
695 my $r = '<ol class="xoxo">';
696
697 my @node = ($node);
698 while (@node) {
699 my $child = shift @node;
700 unless (ref $child) {
701 $r .= $child;
702 next;
703 }
704
705 my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
706 my $nt = $child->node_type;
707 if ($nt == $child->ELEMENT_NODE) {
708 my $child_nsuri = $child->namespace_uri;
709 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
710 '</code>'; ## ISSUE: case
711
712 if ($child->has_attributes) {
713 $r .= '<ul class="attributes">';
714 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
715 @{$child->attributes}) {
716 $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
717 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
718 }
719 $r .= '</ul>';
720 }
721
722 if ($child->has_child_nodes) {
723 $r .= '<ol class="children">';
724 unshift @node, @{$child->child_nodes}, '</ol></li>';
725 } else {
726 $r .= '</li>';
727 }
728 } elsif ($nt == $child->TEXT_NODE) {
729 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
730 } elsif ($nt == $child->CDATA_SECTION_NODE) {
731 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
732 } elsif ($nt == $child->COMMENT_NODE) {
733 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
734 } elsif ($nt == $child->DOCUMENT_NODE) {
735 $r .= qq'<li id="$node_id" class="tree-document">Document';
736 $r .= qq[<ul class="attributes">];
737 my $cp = $child->manakai_charset;
738 if (defined $cp) {
739 $r .= qq[<li><code>charset</code> parameter = <code>];
740 $r .= htescape ($cp) . qq[</code></li>];
741 }
742 $r .= qq[<li><code>inputEncoding</code> = ];
743 my $ie = $child->input_encoding;
744 if (defined $ie) {
745 $r .= qq[<code>@{[htescape ($ie)]}</code>];
746 if ($child->manakai_has_bom) {
747 $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
748 }
749 } else {
750 $r .= qq[(<code>null</code>)];
751 }
752 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
753 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
754 unless ($child->manakai_is_html) {
755 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
756 if (defined $child->xml_encoding) {
757 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
758 } else {
759 $r .= qq[<li>XML encoding = (null)</li>];
760 }
761 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
762 }
763 $r .= qq[</ul>];
764 if ($child->has_child_nodes) {
765 $r .= '<ol class="children">';
766 unshift @node, @{$child->child_nodes}, '</ol></li>';
767 }
768 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
769 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
770 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
771 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
772 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
773 $r .= '</ul></li>';
774 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
775 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
776 } else {
777 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
778 }
779 }
780
781 $r .= '</ol>';
782 print STDOUT $r;
783 } # print_document_tree
784
785 sub print_structure_dump_dom_section ($$$) {
786 my ($input, $doc, $el) = @_;
787
788 print STDOUT qq[
789 <div id="$input->{id_prefix}document-tree" class="section">
790 <h2>Document Tree</h2>
791 ];
792 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
793 unless $input->{nested};
794
795 print_document_tree ($input, $el || $doc);
796
797 print STDOUT qq[</div>];
798 } # print_structure_dump_dom_section
799
800 sub print_structure_dump_cssom_section ($$) {
801 my ($input, $cssom) = @_;
802
803 print STDOUT qq[
804 <div id="$input->{id_prefix}document-tree" class="section">
805 <h2>Document Tree</h2>
806 ];
807 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
808 unless $input->{nested};
809
810 ## TODO:
811 print STDOUT "<pre>".htescape ($cssom->css_text)."</pre>";
812
813 print STDOUT qq[</div>];
814 } # print_structure_dump_cssom_section
815
816 sub print_structure_dump_manifest_section ($$) {
817 my ($input, $manifest) = @_;
818
819 print STDOUT qq[
820 <div id="$input->{id_prefix}dump-manifest" class="section">
821 <h2>Cache Manifest</h2>
822 ];
823 push @nav, [qq[#$input->{id_prefix}dump-manifest] => 'Cache Manifest']
824 unless $input->{nested};
825
826 print STDOUT qq[<dl><dt>Explicit entries</dt>];
827 my $i = 0;
828 for my $uri (@{$manifest->[0]}) {
829 my $euri = htescape ($uri);
830 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
831 }
832
833 print STDOUT qq[<dt>Fallback entries</dt><dd>
834 <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
835 <th scope=row>Fallback Entry</tr><tbody>];
836 for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
837 my $euri = htescape ($uri);
838 my $euri2 = htescape ($manifest->[1]->{$uri});
839 print STDOUT qq[<tr><td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
840 <td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
841 }
842
843 print STDOUT qq[</table><dt>Online whitelist</dt>];
844 for my $uri (@{$manifest->[2]}) {
845 my $euri = htescape ($uri);
846 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
847 }
848
849 print STDOUT qq[</dl></div>];
850 } # print_structure_dump_manifest_section
851
852 sub print_structure_error_dom_section ($$$$$) {
853 my ($input, $doc, $el, $result, $onsubdoc) = @_;
854
855 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
856 <h2>Document Errors</h2>
857
858 <dl id=document-errors-list>];
859 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
860 unless $input->{nested};
861
862 require Whatpm::ContentChecker;
863 my $onerror = sub {
864 my %opt = @_;
865 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
866 $type =~ tr/ /-/;
867 $type =~ s/\|/%7C/g;
868 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
869 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
870 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
871 print STDOUT $msg, "</dd>\n";
872 add_error ('structure', \%opt => $result);
873 };
874
875 my $elements;
876 my $time1 = time;
877 if ($el) {
878 $elements = Whatpm::ContentChecker->check_element
879 ($el, $onerror, $onsubdoc);
880 } else {
881 $elements = Whatpm::ContentChecker->check_document
882 ($doc, $onerror, $onsubdoc);
883 }
884 $time{check} = time - $time1;
885
886 print STDOUT qq[</dl>
887 <script>
888 addSourceToParseErrorList ('$input->{id_prefix}', 'document-errors-list');
889 </script></div>];
890
891 return $elements;
892 } # print_structure_error_dom_section
893
894 sub print_structure_error_manifest_section ($$$) {
895 my ($input, $manifest, $result) = @_;
896
897 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
898 <h2>Document Errors</h2>
899
900 <dl>];
901 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
902 unless $input->{nested};
903
904 require Whatpm::CacheManifest;
905 Whatpm::CacheManifest->check_manifest ($manifest, sub {
906 my %opt = @_;
907 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
908 $type =~ tr/ /-/;
909 $type =~ s/\|/%7C/g;
910 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
911 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
912 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
913 add_error ('structure', \%opt => $result);
914 });
915
916 print STDOUT qq[</div>];
917 } # print_structure_error_manifest_section
918
919 sub print_table_section ($$) {
920 my ($input, $tables) = @_;
921
922 push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
923 unless $input->{nested};
924 print STDOUT qq[
925 <div id="$input->{id_prefix}tables" class="section">
926 <h2>Tables</h2>
927
928 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
929 <script src="../table-script.js" type="text/javascript"></script>
930 <noscript>
931 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
932 </noscript>
933 ];
934
935 require JSON;
936
937 my $i = 0;
938 for my $table_el (@$tables) {
939 $i++;
940 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
941 get_node_link ($input, $table_el) . q[</h3>];
942
943 ## TODO: Make |ContentChecker| return |form_table| result
944 ## so that this script don't have to run the algorithm twice.
945 my $table = Whatpm::HTMLTable->form_table ($table_el);
946
947 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
948 next unless $_;
949 delete $_->{element};
950 }
951
952 for (@{$table->{row_group}}) {
953 next unless $_;
954 next unless $_->{element};
955 $_->{type} = $_->{element}->manakai_local_name;
956 delete $_->{element};
957 }
958
959 for (@{$table->{cell}}) {
960 next unless $_;
961 for (@{$_}) {
962 next unless $_;
963 for (@$_) {
964 $_->{id} = refaddr $_->{element} if defined $_->{element};
965 delete $_->{element};
966 $_->{is_header} = $_->{is_header} ? 1 : 0;
967 }
968 }
969 }
970
971 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
972 print STDOUT JSON::objToJson ($table);
973 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
974 print STDOUT qq[, '$input->{id_prefix}');</script>];
975 }
976
977 print STDOUT qq[</div>];
978 } # print_table_section
979
980 sub print_listing_section ($$$) {
981 my ($opt, $input, $ids) = @_;
982
983 push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
984 unless $input->{nested};
985 print STDOUT qq[
986 <div id="$input->{id_prefix}$opt->{id}" class="section">
987 <h2>$opt->{heading}</h2>
988
989 <dl>
990 ];
991 for my $id (sort {$a cmp $b} keys %$ids) {
992 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
993 for (@{$ids->{$id}}) {
994 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
995 }
996 }
997 print STDOUT qq[</dl></div>];
998 } # print_listing_section
999
1000 sub print_result_section ($) {
1001 my $result = shift;
1002
1003 print STDOUT qq[
1004 <div id="result-summary" class="section">
1005 <h2>Result</h2>];
1006
1007 if ($result->{unsupported} and $result->{conforming_max}) {
1008 print STDOUT qq[<p class=uncertain id=result-para>The conformance
1009 checker cannot decide whether the document is conforming or
1010 not, since the document contains one or more unsupported
1011 features. The document might or might not be conforming.</p>];
1012 } elsif ($result->{conforming_min}) {
1013 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
1014 found in this document.</p>];
1015 } elsif ($result->{conforming_max}) {
1016 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
1017 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
1018 it might be conforming.</p>];
1019 } else {
1020 print STDOUT qq[<p class=FAIL id=result-para>This document is
1021 <strong><em>non</em>-conforming</strong>.</p>];
1022 }
1023
1024 print STDOUT qq[<table>
1025 <colgroup><col><colgroup><col><col><col><colgroup><col>
1026 <thead>
1027 <tr><th scope=col></th>
1028 <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1029 Errors</a></th>
1030 <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1031 Errors</a></th>
1032 <th scope=col><a href="../error-description#level-w">Warnings</a></th>
1033 <th scope=col>Score</th></tr></thead><tbody>];
1034
1035 my $must_error = 0;
1036 my $should_error = 0;
1037 my $warning = 0;
1038 my $score_min = 0;
1039 my $score_max = 0;
1040 my $score_base = 20;
1041 my $score_unit = $score_base / 100;
1042 for (
1043 [Transfer => 'transfer', ''],
1044 [Character => 'char', ''],
1045 [Syntax => 'syntax', '#parse-errors'],
1046 [Structure => 'structure', '#document-errors'],
1047 ) {
1048 $must_error += ($result->{$_->[1]}->{must} += 0);
1049 $should_error += ($result->{$_->[1]}->{should} += 0);
1050 $warning += ($result->{$_->[1]}->{warning} += 0);
1051 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
1052 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
1053
1054 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
1055 my $label = $_->[0];
1056 if ($result->{$_->[1]}->{must} or
1057 $result->{$_->[1]}->{should} or
1058 $result->{$_->[1]}->{warning} or
1059 $result->{$_->[1]}->{unsupported}) {
1060 $label = qq[<a href="$_->[2]">$label</a>];
1061 }
1062
1063 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
1064 if ($uncertain) {
1065 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
1066 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
1067 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
1068 } else {
1069 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
1070 }
1071 }
1072
1073 $score_max += $score_base;
1074
1075 print STDOUT qq[
1076 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
1077 </tbody>
1078 <tfoot><tr class=uncertain><th scope=row>Total</th>
1079 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
1080 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
1081 <td>$warning?</td>
1082 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
1083 </table>
1084
1085 <p><strong>Important</strong>: This conformance checking service
1086 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
1087 </div>];
1088 push @nav, ['#result-summary' => 'Result'];
1089 } # print_result_section
1090
1091 sub print_result_unknown_type_section ($$) {
1092 my ($input, $result) = @_;
1093
1094 my $euri = htescape ($input->{uri});
1095 print STDOUT qq[
1096 <div id="$input->{id_prefix}parse-errors" class="section">
1097 <h2>Errors</h2>
1098
1099 <dl>
1100 <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
1101 <dd class=unsupported><strong><a href="../error-description#level-u">Not
1102 supported</a></strong>:
1103 Media type
1104 <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
1105 is not supported.</dd>
1106 </dl>
1107 </div>
1108 ];
1109 push @nav, [qq[#$input->{id_prefix}parse-errors] => 'Errors']
1110 unless $input->{nested};
1111 add_error (char => {level => 'u'} => $result);
1112 add_error (syntax => {level => 'u'} => $result);
1113 add_error (structure => {level => 'u'} => $result);
1114 } # print_result_unknown_type_section
1115
1116 sub print_result_input_error_section ($) {
1117 my $input = shift;
1118 print STDOUT qq[<div class="section" id="result-summary">
1119 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
1120 </div>];
1121 push @nav, ['#result-summary' => 'Result'];
1122 } # print_result_input_error_section
1123
1124 sub get_error_label ($$) {
1125 my ($input, $err) = @_;
1126
1127 my $r = '';
1128
1129 my $line;
1130 my $column;
1131
1132 if (defined $err->{node}) {
1133 $line = $err->{node}->get_user_data ('manakai_source_line');
1134 if (defined $line) {
1135 $column = $err->{node}->get_user_data ('manakai_source_column');
1136 } else {
1137 if ($err->{node}->node_type == $err->{node}->ATTRIBUTE_NODE) {
1138 my $owner = $err->{node}->owner_element;
1139 $line = $owner->get_user_data ('manakai_source_line');
1140 $column = $owner->get_user_data ('manakai_source_column');
1141 } else {
1142 my $parent = $err->{node}->parent_node;
1143 if ($parent) {
1144 $line = $parent->get_user_data ('manakai_source_line');
1145 $column = $parent->get_user_data ('manakai_source_column');
1146 }
1147 }
1148 }
1149 }
1150 unless (defined $line) {
1151 if (defined $err->{token} and defined $err->{token}->{line}) {
1152 $line = $err->{token}->{line};
1153 $column = $err->{token}->{column};
1154 } elsif (defined $err->{line}) {
1155 $line = $err->{line};
1156 $column = $err->{column};
1157 }
1158 }
1159
1160 if (defined $line) {
1161 if (defined $column and $column > 0) {
1162 $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a> column $column];
1163 } else {
1164 $line = $line - 1 || 1;
1165 $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a>];
1166 }
1167 }
1168
1169 if (defined $err->{node}) {
1170 $r .= ' ' if length $r;
1171 $r .= get_node_link ($input, $err->{node});
1172 }
1173
1174 if (defined $err->{index}) {
1175 if (length $r) {
1176 $r .= ', Index ' . (0+$err->{index});
1177 } else {
1178 $r .= "<a href='#$input->{id_prefix}index-@{[0+$err->{index}]}'>Index "
1179 . (0+$err->{index}) . '</a>';
1180 }
1181 }
1182
1183 if (defined $err->{value}) {
1184 $r .= ' ' if length $r;
1185 $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
1186 }
1187
1188 return $r;
1189 } # get_error_label
1190
1191 sub get_error_level_label ($) {
1192 my $err = shift;
1193
1194 my $r = '';
1195
1196 if (not defined $err->{level} or $err->{level} eq 'm') {
1197 $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1198 error</a></strong>: ];
1199 } elsif ($err->{level} eq 's') {
1200 $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1201 error</a></strong>: ];
1202 } elsif ($err->{level} eq 'w') {
1203 $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
1204 ];
1205 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
1206 $r = qq[<strong><a href="../error-description#level-u">Not
1207 supported</a></strong>: ];
1208 } elsif ($err->{level} eq 'i') {
1209 $r = qq[<strong><a href="../error-description#level-i">Information</a></strong>: ];
1210 } else {
1211 my $elevel = htescape ($err->{level});
1212 $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
1213 ];
1214 }
1215
1216 return $r;
1217 } # get_error_level_label
1218
1219 sub get_node_path ($) {
1220 my $node = shift;
1221 my @r;
1222 while (defined $node) {
1223 my $rs;
1224 if ($node->node_type == 1) {
1225 $rs = $node->manakai_local_name;
1226 $node = $node->parent_node;
1227 } elsif ($node->node_type == 2) {
1228 $rs = '@' . $node->manakai_local_name;
1229 $node = $node->owner_element;
1230 } elsif ($node->node_type == 3) {
1231 $rs = '"' . $node->data . '"';
1232 $node = $node->parent_node;
1233 } elsif ($node->node_type == 9) {
1234 @r = ('') unless @r;
1235 $rs = '';
1236 $node = $node->parent_node;
1237 } else {
1238 $rs = '#' . $node->node_type;
1239 $node = $node->parent_node;
1240 }
1241 unshift @r, $rs;
1242 }
1243 return join '/', @r;
1244 } # get_node_path
1245
1246 sub get_node_link ($$) {
1247 return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
1248 htescape (get_node_path ($_[1])) . qq[</a>];
1249 } # get_node_link
1250
1251 {
1252 my $Msg = {};
1253
1254 sub load_text_catalog ($) {
1255 my $lang = shift; # MUST be a canonical lang name
1256 open my $file, '<:utf8', "cc-msg.$lang.txt"
1257 or die "$0: cc-msg.$lang.txt: $!";
1258 while (<$file>) {
1259 if (s/^([^;]+);([^;]*);//) {
1260 my ($type, $cls, $msg) = ($1, $2, $_);
1261 $msg =~ tr/\x0D\x0A//d;
1262 $Msg->{$type} = [$cls, $msg];
1263 }
1264 }
1265 } # load_text_catalog
1266
1267 sub get_text ($) {
1268 my ($type, $level, $node) = @_;
1269 $type = $level . ':' . $type if defined $level;
1270 $level = 'm' unless defined $level;
1271 my @arg;
1272 {
1273 if (defined $Msg->{$type}) {
1274 my $msg = $Msg->{$type}->[1];
1275 $msg =~ s{<var>\$([0-9]+)</var>}{
1276 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
1277 }ge;
1278 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
1279 UNIVERSAL::can ($node, 'get_attribute_ns')
1280 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
1281 }ge;
1282 $msg =~ s{<var>{\@}</var>}{
1283 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
1284 }ge;
1285 $msg =~ s{<var>{local-name}</var>}{
1286 UNIVERSAL::can ($node, 'manakai_local_name')
1287 ? htescape ($node->manakai_local_name) : ''
1288 }ge;
1289 $msg =~ s{<var>{element-local-name}</var>}{
1290 (UNIVERSAL::can ($node, 'owner_element') and
1291 $node->owner_element)
1292 ? htescape ($node->owner_element->manakai_local_name)
1293 : ''
1294 }ge;
1295 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
1296 } elsif ($type =~ s/:([^:]*)$//) {
1297 unshift @arg, $1;
1298 redo;
1299 }
1300 }
1301 return ($type, 'level-'.$level, htescape ($_[0]));
1302 } # get_text
1303
1304 }
1305
1306 sub get_input_document ($$) {
1307 my ($http, $dom) = @_;
1308
1309 my $request_uri = $http->get_parameter ('uri');
1310 my $r = {};
1311 if (defined $request_uri and length $request_uri) {
1312 my $uri = $dom->create_uri_reference ($request_uri);
1313 unless ({
1314 http => 1,
1315 }->{lc $uri->uri_scheme}) {
1316 return {uri => $request_uri, request_uri => $request_uri,
1317 error_status_text => 'URI scheme not allowed'};
1318 }
1319
1320 require Message::Util::HostPermit;
1321 my $host_permit = new Message::Util::HostPermit;
1322 $host_permit->add_rule (<<EOH);
1323 Allow host=suika port=80
1324 Deny host=suika
1325 Allow host=suika.fam.cx port=80
1326 Deny host=suika.fam.cx
1327 Deny host=localhost
1328 Deny host=*.localdomain
1329 Deny ipv4=0.0.0.0/8
1330 Deny ipv4=10.0.0.0/8
1331 Deny ipv4=127.0.0.0/8
1332 Deny ipv4=169.254.0.0/16
1333 Deny ipv4=172.0.0.0/11
1334 Deny ipv4=192.0.2.0/24
1335 Deny ipv4=192.88.99.0/24
1336 Deny ipv4=192.168.0.0/16
1337 Deny ipv4=198.18.0.0/15
1338 Deny ipv4=224.0.0.0/4
1339 Deny ipv4=255.255.255.255/32
1340 Deny ipv6=0::0/0
1341 Allow host=*
1342 EOH
1343 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1344 return {uri => $request_uri, request_uri => $request_uri,
1345 error_status_text => 'Connection to the host is forbidden'};
1346 }
1347
1348 require LWP::UserAgent;
1349 my $ua = WDCC::LWPUA->new;
1350 $ua->{wdcc_dom} = $dom;
1351 $ua->{wdcc_host_permit} = $host_permit;
1352 $ua->agent ('Mozilla'); ## TODO: for now.
1353 $ua->parse_head (0);
1354 $ua->protocols_allowed ([qw/http/]);
1355 $ua->max_size (1000_000);
1356 my $req = HTTP::Request->new (GET => $request_uri);
1357 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1358 my $res = $ua->request ($req);
1359 ## TODO: 401 sets |is_success| true.
1360 if ($res->is_success or $http->get_parameter ('error-page')) {
1361 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1362 $r->{uri} = $res->request->uri;
1363 $r->{request_uri} = $request_uri;
1364
1365 ## TODO: More strict parsing...
1366 my $ct = $res->header ('Content-Type');
1367 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1368 $r->{charset} = lc $1;
1369 $r->{charset} =~ tr/\\//d;
1370 $r->{official_charset} = $r->{charset};
1371 }
1372
1373 my $input_charset = $http->get_parameter ('charset');
1374 if (defined $input_charset and length $input_charset) {
1375 $r->{charset_overridden}
1376 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1377 $r->{charset} = $input_charset;
1378 }
1379
1380 ## TODO: Support for HTTP Content-Encoding
1381
1382 $r->{s} = ''.$res->content;
1383
1384 require Whatpm::ContentType;
1385 ($r->{official_type}, $r->{media_type})
1386 = Whatpm::ContentType->get_sniffed_type
1387 (get_file_head => sub {
1388 return substr $r->{s}, 0, shift;
1389 },
1390 http_content_type_byte => $ct,
1391 has_http_content_encoding =>
1392 defined $res->header ('Content-Encoding'),
1393 supported_image_types => {});
1394 } else {
1395 $r->{uri} = $res->request->uri;
1396 $r->{request_uri} = $request_uri;
1397 $r->{error_status_text} = $res->status_line;
1398 }
1399
1400 $r->{header_field} = [];
1401 $res->scan (sub {
1402 push @{$r->{header_field}}, [$_[0], $_[1]];
1403 });
1404 $r->{header_status_code} = $res->code;
1405 $r->{header_status_text} = $res->message;
1406 } else {
1407 $r->{s} = ''.$http->get_parameter ('s');
1408 $r->{uri} = q<thismessage:/>;
1409 $r->{request_uri} = q<thismessage:/>;
1410 $r->{base_uri} = q<thismessage:/>;
1411 $r->{charset} = ''.$http->get_parameter ('_charset_');
1412 $r->{charset} =~ s/\s+//g;
1413 $r->{charset} = 'utf-8' if $r->{charset} eq '';
1414 $r->{official_charset} = $r->{charset};
1415 $r->{header_field} = [];
1416
1417 require Whatpm::ContentType;
1418 ($r->{official_type}, $r->{media_type})
1419 = Whatpm::ContentType->get_sniffed_type
1420 (get_file_head => sub {
1421 return substr $r->{s}, 0, shift;
1422 },
1423 http_content_type_byte => undef,
1424 has_http_content_encoding => 0,
1425 supported_image_types => {});
1426 }
1427
1428 my $input_format = $http->get_parameter ('i');
1429 if (defined $input_format and length $input_format) {
1430 $r->{media_type_overridden}
1431 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1432 $r->{media_type} = $input_format;
1433 }
1434 if (defined $r->{s} and not defined $r->{media_type}) {
1435 $r->{media_type} = 'text/html';
1436 $r->{media_type_overridden} = 1;
1437 }
1438
1439 if ($r->{media_type} eq 'text/xml') {
1440 unless (defined $r->{charset}) {
1441 $r->{charset} = 'us-ascii';
1442 $r->{official_charset} = $r->{charset};
1443 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1444 $r->{charset_overridden} = 0;
1445 }
1446 }
1447
1448 if (length $r->{s} > 1000_000) {
1449 $r->{error_status_text} = 'Entity-body too large';
1450 delete $r->{s};
1451 return $r;
1452 }
1453
1454 $r->{inner_html_element} = $http->get_parameter ('e');
1455
1456 return $r;
1457 } # get_input_document
1458
1459 package WDCC::LWPUA;
1460 BEGIN { push our @ISA, 'LWP::UserAgent'; }
1461
1462 sub redirect_ok {
1463 my $ua = shift;
1464 unless ($ua->SUPER::redirect_ok (@_)) {
1465 return 0;
1466 }
1467
1468 my $uris = $_[1]->header ('Location');
1469 return 0 unless $uris;
1470 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1471 unless ({
1472 http => 1,
1473 }->{lc $uri->uri_scheme}) {
1474 return 0;
1475 }
1476 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1477 return 0;
1478 }
1479 return 1;
1480 } # redirect_ok
1481
1482 =head1 AUTHOR
1483
1484 Wakaba <w@suika.fam.cx>.
1485
1486 =head1 LICENSE
1487
1488 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1489
1490 This library is free software; you can redistribute it
1491 and/or modify it under the same terms as Perl itself.
1492
1493 =cut
1494
1495 ## $Date: 2008/03/17 13:45:35 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24