/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.37 - (show annotations) (download)
Sun Feb 24 02:17:51 2008 UTC (16 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.36: +43 -15 lines
++ ChangeLog	24 Feb 2008 02:17:37 -0000
2008-02-24  Wakaba  <wakaba@suika.fam.cx>

	* cc.cgi: Support for level-i (informational).

	* cc-style.css: New style rules for informational messages added.

	* error-description-source.xml (#information, #level-i): Added.

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4
5 use lib qw[/home/httpd/html/www/markup/html/whatpm
6 /home/wakaba/work/manakai2/lib];
7 use CGI::Carp qw[fatalsToBrowser];
8 use Scalar::Util qw[refaddr];
9 use Time::HiRes qw/time/;
10
11 sub htescape ($) {
12 my $s = $_[0];
13 $s =~ s/&/&amp;/g;
14 $s =~ s/</&lt;/g;
15 $s =~ s/>/&gt;/g;
16 $s =~ s/"/&quot;/g;
17 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18 sprintf '<var>U+%04X</var>', ord $1;
19 }ge;
20 return $s;
21 } # htescape
22
23 my @nav;
24 my %time;
25 require Message::DOM::DOMImplementation;
26 my $dom = Message::DOM::DOMImplementation->new;
27 {
28 use Message::CGI::HTTP;
29 my $http = Message::CGI::HTTP->new;
30
31 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
32 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
33 exit;
34 }
35
36 binmode STDOUT, ':utf8';
37 $| = 1;
38
39 load_text_catalog ('en'); ## TODO: conneg
40
41 print STDOUT qq[Content-Type: text/html; charset=utf-8
42
43 <!DOCTYPE html>
44 <html lang="en">
45 <head>
46 <title>Web Document Conformance Checker (BETA)</title>
47 <link rel="stylesheet" href="../cc-style.css" type="text/css">
48 </head>
49 <body>
50 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
51 (<em>beta</em>)</h1>
52 ];
53
54 $| = 0;
55 my $input = get_input_document ($http, $dom);
56 my $char_length = 0;
57
58 print qq[
59 <div id="document-info" class="section">
60 <dl>
61 <dt>Request URI</dt>
62 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 <dt>Document URI</dt>
64 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65 <script>
66 document.title = '<'
67 + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68 + document.title;
69 </script></dd>
70 ]; # no </dl> yet
71 push @nav, ['#document-info' => 'Information'];
72
73 if (defined $input->{s}) {
74 $char_length = length $input->{s};
75
76 print STDOUT qq[
77 <dt>Base URI</dt>
78 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79 <dt>Internet Media Type</dt>
80 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 <dt>Character Encoding</dt>
83 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 <dt>Length</dt>
86 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 </dl>
88 </div>
89 ];
90
91 $input->{id_prefix} = '';
92 #$input->{nested} = 0;
93 my $result = {conforming_min => 1, conforming_max => 1};
94 check_and_print ($input => $result);
95 print_result_section ($result);
96 } else {
97 print STDOUT qq[</dl></div>];
98 print_result_input_error_section ($input);
99 }
100
101 print STDOUT qq[
102 <ul class="navigation" id="nav-items">
103 ];
104 for (@nav) {
105 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
106 }
107 print STDOUT qq[
108 </ul>
109 </body>
110 </html>
111 ];
112
113 for (qw/decode parse parse_html parse_xml parse_manifest
114 check check_manifest/) {
115 next unless defined $time{$_};
116 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
117 print $file $char_length, "\t", $time{$_}, "\n";
118 }
119
120 exit;
121 }
122
123 sub add_error ($$$) {
124 my ($layer, $err, $result) = @_;
125 if (defined $err->{level}) {
126 if ($err->{level} eq 's') {
127 $result->{$layer}->{should}++;
128 $result->{$layer}->{score_min} -= 2;
129 $result->{conforming_min} = 0;
130 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
131 $result->{$layer}->{warning}++;
132 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
133 $result->{$layer}->{unsupported}++;
134 $result->{unsupported} = 1;
135 } elsif ($err->{level} eq 'i') {
136 #
137 } else {
138 $result->{$layer}->{must}++;
139 $result->{$layer}->{score_max} -= 2;
140 $result->{$layer}->{score_min} -= 2;
141 $result->{conforming_min} = 0;
142 $result->{conforming_max} = 0;
143 }
144 } else {
145 $result->{$layer}->{must}++;
146 $result->{$layer}->{score_max} -= 2;
147 $result->{$layer}->{score_min} -= 2;
148 $result->{conforming_min} = 0;
149 $result->{conforming_max} = 0;
150 }
151 } # add_error
152
153 sub check_and_print ($$) {
154 my ($input, $result) = @_;
155
156 print_http_header_section ($input, $result);
157
158 my $doc;
159 my $el;
160 my $cssom;
161 my $manifest;
162 my @subdoc;
163
164 if ($input->{media_type} eq 'text/html') {
165 ($doc, $el) = print_syntax_error_html_section ($input, $result);
166 print_source_string_section
167 ($input,
168 \($input->{s}),
169 $input->{charset} || $doc->input_encoding);
170 } elsif ({
171 'text/xml' => 1,
172 'application/atom+xml' => 1,
173 'application/rss+xml' => 1,
174 'application/svg+xml' => 1,
175 'application/xhtml+xml' => 1,
176 'application/xml' => 1,
177 }->{$input->{media_type}}) {
178 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
179 print_source_string_section ($input,
180 \($input->{s}),
181 $doc->input_encoding);
182 } elsif ($input->{media_type} eq 'text/css') {
183 $cssom = print_syntax_error_css_section ($input, $result);
184 print_source_string_section
185 ($input, \($input->{s}),
186 $cssom->manakai_input_encoding);
187 } elsif ($input->{media_type} eq 'text/cache-manifest') {
188 ## TODO: MUST be text/cache-manifest
189 $manifest = print_syntax_error_manifest_section ($input, $result);
190 print_source_string_section ($input, \($input->{s}),
191 'utf-8');
192 } else {
193 ## TODO: Change HTTP status code??
194 print_result_unknown_type_section ($input, $result);
195 }
196
197 if (defined $doc or defined $el) {
198 $doc->document_uri ($input->{uri});
199 $doc->manakai_entity_base_uri ($input->{base_uri});
200 print_structure_dump_dom_section ($input, $doc, $el);
201 my $elements = print_structure_error_dom_section
202 ($input, $doc, $el, $result, sub {
203 push @subdoc, shift;
204 });
205 print_table_section ($input, $elements->{table}) if @{$elements->{table}};
206 print_listing_section ({
207 id => 'identifiers', label => 'IDs', heading => 'Identifiers',
208 }, $input, $elements->{id}) if keys %{$elements->{id}};
209 print_listing_section ({
210 id => 'terms', label => 'Terms', heading => 'Terms',
211 }, $input, $elements->{term}) if keys %{$elements->{term}};
212 print_listing_section ({
213 id => 'classes', label => 'Classes', heading => 'Classes',
214 }, $input, $elements->{class}) if keys %{$elements->{class}};
215 } elsif (defined $cssom) {
216 print_structure_dump_cssom_section ($input, $cssom);
217 ## TODO: CSSOM validation
218 add_error ('structure', {level => 'u'} => $result);
219 } elsif (defined $manifest) {
220 print_structure_dump_manifest_section ($input, $manifest);
221 print_structure_error_manifest_section ($input, $manifest, $result);
222 }
223
224 my $id_prefix = 0;
225 for my $subinput (@subdoc) {
226 $subinput->{id_prefix} = 'subdoc-' . ++$id_prefix;
227 $subinput->{nested} = 1;
228 $subinput->{base_uri} = $subinput->{container_node}->base_uri
229 unless defined $subinput->{base_uri};
230 my $ebaseuri = htescape ($subinput->{base_uri});
231 push @nav, ['#' . $subinput->{id_prefix} => 'Sub #' . $id_prefix];
232 print STDOUT qq[<div id="$subinput->{id_prefix}" class=section>
233 <h2>Subdocument #$id_prefix</h2>
234
235 <dl>
236 <dt>Internet Media Type</dt>
237 <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
238 <dt>Container Node</dt>
239 <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
240 <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
241 <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
242 </dl>];
243
244 $subinput->{id_prefix} .= '-';
245 check_and_print ($subinput => $result);
246
247 print STDOUT qq[</div>];
248 }
249 } # check_and_print
250
251 sub print_http_header_section ($$) {
252 my ($input, $result) = @_;
253 return unless defined $input->{header_status_code} or
254 defined $input->{header_status_text} or
255 @{$input->{header_field} or []};
256
257 push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
258 print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
259 <h2>HTTP Header</h2>
260
261 <p><strong>Note</strong>: Due to the limitation of the
262 network library in use, the content of this section might
263 not be the real header.</p>
264
265 <table><tbody>
266 ];
267
268 if (defined $input->{header_status_code}) {
269 print STDOUT qq[<tr><th scope="row">Status code</th>];
270 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
271 }
272 if (defined $input->{header_status_text}) {
273 print STDOUT qq[<tr><th scope="row">Status text</th>];
274 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
275 }
276
277 for (@{$input->{header_field}}) {
278 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
279 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
280 }
281
282 print STDOUT qq[</tbody></table></div>];
283 } # print_http_header_section
284
285 sub print_syntax_error_html_section ($$) {
286 my ($input, $result) = @_;
287
288 require Encode;
289 require Whatpm::HTML;
290
291 print STDOUT qq[
292 <div id="$input->{id_prefix}parse-errors" class="section">
293 <h2>Parse Errors</h2>
294
295 <dl>];
296 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
297
298 my $onerror = sub {
299 my (%opt) = @_;
300 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
301 if ($opt{column} > 0) {
302 print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
303 } else {
304 $opt{line} = $opt{line} - 1 || 1;
305 print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{line}">Line $opt{line}</a></dt>\n];
306 }
307 $type =~ tr/ /-/;
308 $type =~ s/\|/%7C/g;
309 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
310 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
311 print STDOUT qq[$msg</dd>\n];
312
313 add_error ('syntax', \%opt => $result);
314 };
315
316 my $doc = $dom->create_document;
317 my $el;
318 my $inner_html_element = $input->{inner_html_element};
319 if (defined $inner_html_element and length $inner_html_element) {
320 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
321 my $time1 = time;
322 my $t = Encode::decode ($input->{charset}, $input->{s});
323 $time{decode} = time - $time1;
324
325 $el = $doc->create_element_ns
326 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
327 $time1 = time;
328 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
329 $time{parse} = time - $time1;
330 } else {
331 my $time1 = time;
332 Whatpm::HTML->parse_byte_string
333 ($input->{charset}, $input->{s} => $doc, $onerror);
334 $time{parse_html} = time - $time1;
335 }
336 $doc->manakai_charset ($input->{official_charset})
337 if defined $input->{official_charset};
338
339 print STDOUT qq[</dl></div>];
340
341 return ($doc, $el);
342 } # print_syntax_error_html_section
343
344 sub print_syntax_error_xml_section ($$) {
345 my ($input, $result) = @_;
346
347 require Message::DOM::XMLParserTemp;
348
349 print STDOUT qq[
350 <div id="$input->{id_prefix}parse-errors" class="section">
351 <h2>Parse Errors</h2>
352
353 <dl>];
354 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
355
356 my $onerror = sub {
357 my $err = shift;
358 my $line = $err->location->line_number;
359 print STDOUT qq[<dt><a href="#$input->{id_prefix}line-$line">Line $line</a> column ];
360 print STDOUT $err->location->column_number, "</dt><dd>";
361 print STDOUT htescape $err->text, "</dd>\n";
362
363 add_error ('syntax', {type => $err->text,
364 level => [
365 $err->SEVERITY_FATAL_ERROR => 'm',
366 $err->SEVERITY_ERROR => 'm',
367 $err->SEVERITY_WARNING => 's',
368 ]->[$err->severity]} => $result);
369
370 return 1;
371 };
372
373 my $time1 = time;
374 open my $fh, '<', \($input->{s});
375 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
376 ($fh => $dom, $onerror, charset => $input->{charset});
377 $time{parse_xml} = time - $time1;
378 $doc->manakai_charset ($input->{official_charset})
379 if defined $input->{official_charset};
380
381 print STDOUT qq[</dl></div>];
382
383 return ($doc, undef);
384 } # print_syntax_error_xml_section
385
386 sub get_css_parser () {
387 our $CSSParser;
388 return $CSSParser if $CSSParser;
389
390 require Whatpm::CSS::Parser;
391 my $p = Whatpm::CSS::Parser->new;
392
393 $p->{prop}->{$_} = 1 for qw/
394 alignment-baseline
395 background background-attachment background-color background-image
396 background-position background-position-x background-position-y
397 background-repeat border border-bottom border-bottom-color
398 border-bottom-style border-bottom-width border-collapse border-color
399 border-left border-left-color
400 border-left-style border-left-width border-right border-right-color
401 border-right-style border-right-width
402 border-spacing -manakai-border-spacing-x -manakai-border-spacing-y
403 border-style border-top border-top-color border-top-style border-top-width
404 border-width bottom
405 caption-side clear clip color content counter-increment counter-reset
406 cursor direction display dominant-baseline empty-cells float font
407 font-family font-size font-size-adjust font-stretch
408 font-style font-variant font-weight height left
409 letter-spacing line-height
410 list-style list-style-image list-style-position list-style-type
411 margin margin-bottom margin-left margin-right margin-top marker-offset
412 marks max-height max-width min-height min-width opacity -moz-opacity
413 orphans outline outline-color outline-style outline-width overflow
414 overflow-x overflow-y
415 padding padding-bottom padding-left padding-right padding-top
416 page page-break-after page-break-before page-break-inside
417 position quotes right size table-layout
418 text-align text-anchor text-decoration text-indent text-transform
419 top unicode-bidi vertical-align visibility white-space width widows
420 word-spacing writing-mode z-index
421 /;
422 $p->{prop_value}->{display}->{$_} = 1 for qw/
423 block clip inline inline-block inline-table list-item none
424 table table-caption table-cell table-column table-column-group
425 table-header-group table-footer-group table-row table-row-group
426 compact marker
427 /;
428 $p->{prop_value}->{position}->{$_} = 1 for qw/
429 absolute fixed relative static
430 /;
431 $p->{prop_value}->{float}->{$_} = 1 for qw/
432 left right none
433 /;
434 $p->{prop_value}->{clear}->{$_} = 1 for qw/
435 left right none both
436 /;
437 $p->{prop_value}->{direction}->{ltr} = 1;
438 $p->{prop_value}->{direction}->{rtl} = 1;
439 $p->{prop_value}->{marks}->{crop} = 1;
440 $p->{prop_value}->{marks}->{cross} = 1;
441 $p->{prop_value}->{'unicode-bidi'}->{$_} = 1 for qw/
442 normal bidi-override embed
443 /;
444 for my $prop_name (qw/overflow overflow-x overflow-y/) {
445 $p->{prop_value}->{$prop_name}->{$_} = 1 for qw/
446 visible hidden scroll auto -webkit-marquee -moz-hidden-unscrollable
447 /;
448 }
449 $p->{prop_value}->{visibility}->{$_} = 1 for qw/
450 visible hidden collapse
451 /;
452 $p->{prop_value}->{'list-style-type'}->{$_} = 1 for qw/
453 disc circle square decimal decimal-leading-zero
454 lower-roman upper-roman lower-greek lower-latin
455 upper-latin armenian georgian lower-alpha upper-alpha none
456 hebrew cjk-ideographic hiragana katakana hiragana-iroha
457 katakana-iroha
458 /;
459 $p->{prop_value}->{'list-style-position'}->{outside} = 1;
460 $p->{prop_value}->{'list-style-position'}->{inside} = 1;
461 $p->{prop_value}->{'page-break-before'}->{$_} = 1 for qw/
462 auto always avoid left right
463 /;
464 $p->{prop_value}->{'page-break-after'}->{$_} = 1 for qw/
465 auto always avoid left right
466 /;
467 $p->{prop_value}->{'page-break-inside'}->{auto} = 1;
468 $p->{prop_value}->{'page-break-inside'}->{avoid} = 1;
469 $p->{prop_value}->{'background-repeat'}->{$_} = 1 for qw/
470 repeat repeat-x repeat-y no-repeat
471 /;
472 $p->{prop_value}->{'background-attachment'}->{scroll} = 1;
473 $p->{prop_value}->{'background-attachment'}->{fixed} = 1;
474 $p->{prop_value}->{'font-size'}->{$_} = 1 for qw/
475 xx-small x-small small medium large x-large xx-large
476 -manakai-xxx-large -webkit-xxx-large
477 larger smaller
478 /;
479 $p->{prop_value}->{'font-style'}->{normal} = 1;
480 $p->{prop_value}->{'font-style'}->{italic} = 1;
481 $p->{prop_value}->{'font-style'}->{oblique} = 1;
482 $p->{prop_value}->{'font-variant'}->{normal} = 1;
483 $p->{prop_value}->{'font-variant'}->{'small-caps'} = 1;
484 $p->{prop_value}->{'font-stretch'}->{$_} = 1 for
485 qw/normal wider narrower ultra-condensed extra-condensed
486 condensed semi-condensed semi-expanded expanded
487 extra-expanded ultra-expanded/;
488 $p->{prop_value}->{'text-align'}->{$_} = 1 for qw/
489 left right center justify begin end
490 /;
491 $p->{prop_value}->{'text-transform'}->{$_} = 1 for qw/
492 capitalize uppercase lowercase none
493 /;
494 $p->{prop_value}->{'white-space'}->{$_} = 1 for qw/
495 normal pre nowrap pre-line pre-wrap -moz-pre-wrap
496 /;
497 $p->{prop_value}->{'writing-mode'}->{$_} = 1 for qw/
498 lr rl tb lr-tb rl-tb tb-rl
499 /;
500 $p->{prop_value}->{'text-anchor'}->{$_} = 1 for qw/
501 start middle end
502 /;
503 $p->{prop_value}->{'dominant-baseline'}->{$_} = 1 for qw/
504 auto use-script no-change reset-size ideographic alphabetic
505 hanging mathematical central middle text-after-edge text-before-edge
506 /;
507 $p->{prop_value}->{'alignment-baseline'}->{$_} = 1 for qw/
508 auto baseline before-edge text-before-edge middle central
509 after-edge text-after-edge ideographic alphabetic hanging
510 mathematical
511 /;
512 $p->{prop_value}->{'text-decoration'}->{$_} = 1 for qw/
513 none blink underline overline line-through
514 /;
515 $p->{prop_value}->{'caption-side'}->{$_} = 1 for qw/
516 top bottom left right
517 /;
518 $p->{prop_value}->{'table-layout'}->{auto} = 1;
519 $p->{prop_value}->{'table-layout'}->{fixed} = 1;
520 $p->{prop_value}->{'border-collapse'}->{collapse} = 1;
521 $p->{prop_value}->{'border-collapse'}->{separate} = 1;
522 $p->{prop_value}->{'empty-cells'}->{show} = 1;
523 $p->{prop_value}->{'empty-cells'}->{hide} = 1;
524 $p->{prop_value}->{cursor}->{$_} = 1 for qw/
525 auto crosshair default pointer move e-resize ne-resize nw-resize n-resize
526 se-resize sw-resize s-resize w-resize text wait help progress
527 /;
528 for my $prop (qw/border-top-style border-left-style
529 border-bottom-style border-right-style outline-style/) {
530 $p->{prop_value}->{$prop}->{$_} = 1 for qw/
531 none hidden dotted dashed solid double groove ridge inset outset
532 /;
533 }
534 for my $prop (qw/color background-color
535 border-bottom-color border-left-color border-right-color
536 border-top-color border-color/) {
537 $p->{prop_value}->{$prop}->{transparent} = 1;
538 $p->{prop_value}->{$prop}->{flavor} = 1;
539 $p->{prop_value}->{$prop}->{'-manakai-default'} = 1;
540 }
541 $p->{prop_value}->{'outline-color'}->{invert} = 1;
542 $p->{prop_value}->{'outline-color'}->{'-manakai-invert-or-currentcolor'} = 1;
543 $p->{pseudo_class}->{$_} = 1 for qw/
544 active checked disabled empty enabled first-child first-of-type
545 focus hover indeterminate last-child last-of-type link only-child
546 only-of-type root target visited
547 lang nth-child nth-last-child nth-of-type nth-last-of-type not
548 -manakai-contains -manakai-current
549 /;
550 $p->{pseudo_element}->{$_} = 1 for qw/
551 after before first-letter first-line
552 /;
553
554 return $CSSParser = $p;
555 } # get_css_parser
556
557 sub print_syntax_error_css_section ($$) {
558 my ($input, $result) = @_;
559
560 print STDOUT qq[
561 <div id="$input->{id_prefix}parse-errors" class="section">
562 <h2>Parse Errors</h2>
563
564 <dl>];
565 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
566
567 my $p = get_css_parser ();
568 $p->init;
569 $p->{onerror} = sub {
570 my (%opt) = @_;
571 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
572 if ($opt{token}) {
573 print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{token}->{line}">Line $opt{token}->{line}</a> column $opt{token}->{column}];
574 } else {
575 print STDOUT qq[<dt class="$cls">Unknown location];
576 }
577 if (defined $opt{value}) {
578 print STDOUT qq[ (<code>@{[htescape ($opt{value})]}</code>)];
579 } elsif (defined $opt{token}) {
580 print STDOUT qq[ (<code>@{[htescape (Whatpm::CSS::Tokenizer->serialize_token ($opt{token}))]}</code>)];
581 }
582 $type =~ tr/ /-/;
583 $type =~ s/\|/%7C/g;
584 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
585 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
586 print STDOUT qq[$msg</dd>\n];
587
588 add_error ('syntax', \%opt => $result);
589 };
590 $p->{href} = $input->{uri};
591 $p->{base_uri} = $input->{base_uri};
592
593 # if ($parse_mode eq 'q') {
594 # $p->{unitless_px} = 1;
595 # $p->{hashless_color} = 1;
596 # }
597
598 ## TODO: Make $input->{s} a ref.
599
600 my $s = \$input->{s};
601 my $charset;
602 unless ($input->{is_char_string}) {
603 require Encode;
604 if (defined $input->{charset}) {## TODO: IANA->Perl
605 $charset = $input->{charset};
606 $s = \(Encode::decode ($input->{charset}, $$s));
607 } else {
608 ## TODO: charset detection
609 $s = \(Encode::decode ($charset = 'utf-8', $$s));
610 }
611 }
612
613 my $cssom = $p->parse_char_string ($$s);
614 $cssom->manakai_input_encoding ($charset) if defined $charset;
615
616 print STDOUT qq[</dl></div>];
617
618 return $cssom;
619 } # print_syntax_error_css_section
620
621 sub print_syntax_error_manifest_section ($$) {
622 my ($input, $result) = @_;
623
624 require Whatpm::CacheManifest;
625
626 print STDOUT qq[
627 <div id="$input->{id_prefix}parse-errors" class="section">
628 <h2>Parse Errors</h2>
629
630 <dl>];
631 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
632
633 my $onerror = sub {
634 my (%opt) = @_;
635 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
636 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
637 qq[</dt>];
638 $type =~ tr/ /-/;
639 $type =~ s/\|/%7C/g;
640 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
641 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
642 print STDOUT qq[$msg</dd>\n];
643
644 add_error ('syntax', \%opt => $result);
645 };
646
647 my $time1 = time;
648 my $manifest = Whatpm::CacheManifest->parse_byte_string
649 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
650 $time{parse_manifest} = time - $time1;
651
652 print STDOUT qq[</dl></div>];
653
654 return $manifest;
655 } # print_syntax_error_manifest_section
656
657 sub print_source_string_section ($$$) {
658 my $input = shift;
659 my $s;
660 unless ($input->{is_char_string}) {
661 require Encode;
662 my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
663 return unless $enc;
664
665 $s = \($enc->decode (${$_[0]}));
666 } else {
667 $s = $_[0];
668 }
669
670 my $i = 1;
671 push @nav, ['#source-string' => 'Source'] unless $input->{nested};
672 print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
673 <h2>Document Source</h2>
674 <ol lang="">\n];
675 if (length $$s) {
676 while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
677 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
678 "</li>\n";
679 $i++;
680 }
681 if ($$s =~ /\G([^\x0A]+)/gc) {
682 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
683 "</li>\n";
684 }
685 } else {
686 print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
687 }
688 print STDOUT "</ol></div>";
689 } # print_input_string_section
690
691 sub print_document_tree ($$) {
692 my ($input, $node) = @_;
693
694 my $r = '<ol class="xoxo">';
695
696 my @node = ($node);
697 while (@node) {
698 my $child = shift @node;
699 unless (ref $child) {
700 $r .= $child;
701 next;
702 }
703
704 my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
705 my $nt = $child->node_type;
706 if ($nt == $child->ELEMENT_NODE) {
707 my $child_nsuri = $child->namespace_uri;
708 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
709 '</code>'; ## ISSUE: case
710
711 if ($child->has_attributes) {
712 $r .= '<ul class="attributes">';
713 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
714 @{$child->attributes}) {
715 $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
716 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
717 }
718 $r .= '</ul>';
719 }
720
721 if ($child->has_child_nodes) {
722 $r .= '<ol class="children">';
723 unshift @node, @{$child->child_nodes}, '</ol></li>';
724 } else {
725 $r .= '</li>';
726 }
727 } elsif ($nt == $child->TEXT_NODE) {
728 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
729 } elsif ($nt == $child->CDATA_SECTION_NODE) {
730 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
731 } elsif ($nt == $child->COMMENT_NODE) {
732 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
733 } elsif ($nt == $child->DOCUMENT_NODE) {
734 $r .= qq'<li id="$node_id" class="tree-document">Document';
735 $r .= qq[<ul class="attributes">];
736 my $cp = $child->manakai_charset;
737 if (defined $cp) {
738 $r .= qq[<li><code>charset</code> parameter = <code>];
739 $r .= htescape ($cp) . qq[</code></li>];
740 }
741 $r .= qq[<li><code>inputEncoding</code> = ];
742 my $ie = $child->input_encoding;
743 if (defined $ie) {
744 $r .= qq[<code>@{[htescape ($ie)]}</code>];
745 if ($child->manakai_has_bom) {
746 $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
747 }
748 } else {
749 $r .= qq[(<code>null</code>)];
750 }
751 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
752 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
753 unless ($child->manakai_is_html) {
754 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
755 if (defined $child->xml_encoding) {
756 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
757 } else {
758 $r .= qq[<li>XML encoding = (null)</li>];
759 }
760 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
761 }
762 $r .= qq[</ul>];
763 if ($child->has_child_nodes) {
764 $r .= '<ol class="children">';
765 unshift @node, @{$child->child_nodes}, '</ol></li>';
766 }
767 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
768 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
769 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
770 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
771 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
772 $r .= '</ul></li>';
773 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
774 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
775 } else {
776 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
777 }
778 }
779
780 $r .= '</ol>';
781 print STDOUT $r;
782 } # print_document_tree
783
784 sub print_structure_dump_dom_section ($$$) {
785 my ($input, $doc, $el) = @_;
786
787 print STDOUT qq[
788 <div id="$input->{id_prefix}document-tree" class="section">
789 <h2>Document Tree</h2>
790 ];
791 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
792 unless $input->{nested};
793
794 print_document_tree ($input, $el || $doc);
795
796 print STDOUT qq[</div>];
797 } # print_structure_dump_dom_section
798
799 sub print_structure_dump_cssom_section ($$) {
800 my ($input, $cssom) = @_;
801
802 print STDOUT qq[
803 <div id="$input->{id_prefix}document-tree" class="section">
804 <h2>Document Tree</h2>
805 ];
806 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
807 unless $input->{nested};
808
809 ## TODO:
810 print STDOUT "<pre>".htescape ($cssom->css_text)."</pre>";
811
812 print STDOUT qq[</div>];
813 } # print_structure_dump_cssom_section
814
815 sub print_structure_dump_manifest_section ($$) {
816 my ($input, $manifest) = @_;
817
818 print STDOUT qq[
819 <div id="$input->{id_prefix}dump-manifest" class="section">
820 <h2>Cache Manifest</h2>
821 ];
822 push @nav, [qq[#$input->{id_prefix}dump-manifest] => 'Cache Manifest']
823 unless $input->{nested};
824
825 print STDOUT qq[<dl><dt>Explicit entries</dt>];
826 my $i = 0;
827 for my $uri (@{$manifest->[0]}) {
828 my $euri = htescape ($uri);
829 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
830 }
831
832 print STDOUT qq[<dt>Fallback entries</dt><dd>
833 <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
834 <th scope=row>Fallback Entry</tr><tbody>];
835 for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
836 my $euri = htescape ($uri);
837 my $euri2 = htescape ($manifest->[1]->{$uri});
838 print STDOUT qq[<tr><td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
839 <td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
840 }
841
842 print STDOUT qq[</table><dt>Online whitelist</dt>];
843 for my $uri (@{$manifest->[2]}) {
844 my $euri = htescape ($uri);
845 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
846 }
847
848 print STDOUT qq[</dl></div>];
849 } # print_structure_dump_manifest_section
850
851 sub print_structure_error_dom_section ($$$$$) {
852 my ($input, $doc, $el, $result, $onsubdoc) = @_;
853
854 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
855 <h2>Document Errors</h2>
856
857 <dl>];
858 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
859 unless $input->{nested};
860
861 require Whatpm::ContentChecker;
862 my $onerror = sub {
863 my %opt = @_;
864 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
865 $type =~ tr/ /-/;
866 $type =~ s/\|/%7C/g;
867 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
868 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
869 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
870 print STDOUT $msg, "</dd>\n";
871 add_error ('structure', \%opt => $result);
872 };
873
874 my $elements;
875 my $time1 = time;
876 if ($el) {
877 $elements = Whatpm::ContentChecker->check_element
878 ($el, $onerror, $onsubdoc);
879 } else {
880 $elements = Whatpm::ContentChecker->check_document
881 ($doc, $onerror, $onsubdoc);
882 }
883 $time{check} = time - $time1;
884
885 print STDOUT qq[</dl></div>];
886
887 return $elements;
888 } # print_structure_error_dom_section
889
890 sub print_structure_error_manifest_section ($$$) {
891 my ($input, $manifest, $result) = @_;
892
893 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
894 <h2>Document Errors</h2>
895
896 <dl>];
897 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
898 unless $input->{nested};
899
900 require Whatpm::CacheManifest;
901 Whatpm::CacheManifest->check_manifest ($manifest, sub {
902 my %opt = @_;
903 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
904 $type =~ tr/ /-/;
905 $type =~ s/\|/%7C/g;
906 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
907 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
908 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
909 add_error ('structure', \%opt => $result);
910 });
911
912 print STDOUT qq[</div>];
913 } # print_structure_error_manifest_section
914
915 sub print_table_section ($$) {
916 my ($input, $tables) = @_;
917
918 push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
919 unless $input->{nested};
920 print STDOUT qq[
921 <div id="$input->{id_prefix}tables" class="section">
922 <h2>Tables</h2>
923
924 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
925 <script src="../table-script.js" type="text/javascript"></script>
926 <noscript>
927 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
928 </noscript>
929 ];
930
931 require JSON;
932
933 my $i = 0;
934 for my $table_el (@$tables) {
935 $i++;
936 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
937 get_node_link ($input, $table_el) . q[</h3>];
938
939 ## TODO: Make |ContentChecker| return |form_table| result
940 ## so that this script don't have to run the algorithm twice.
941 my $table = Whatpm::HTMLTable->form_table ($table_el);
942
943 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
944 next unless $_;
945 delete $_->{element};
946 }
947
948 for (@{$table->{row_group}}) {
949 next unless $_;
950 next unless $_->{element};
951 $_->{type} = $_->{element}->manakai_local_name;
952 delete $_->{element};
953 }
954
955 for (@{$table->{cell}}) {
956 next unless $_;
957 for (@{$_}) {
958 next unless $_;
959 for (@$_) {
960 $_->{id} = refaddr $_->{element} if defined $_->{element};
961 delete $_->{element};
962 $_->{is_header} = $_->{is_header} ? 1 : 0;
963 }
964 }
965 }
966
967 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
968 print STDOUT JSON::objToJson ($table);
969 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
970 print STDOUT qq[, '$input->{id_prefix}');</script>];
971 }
972
973 print STDOUT qq[</div>];
974 } # print_table_section
975
976 sub print_listing_section ($$$) {
977 my ($opt, $input, $ids) = @_;
978
979 push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
980 unless $input->{nested};
981 print STDOUT qq[
982 <div id="$input->{id_prefix}$opt->{id}" class="section">
983 <h2>$opt->{heading}</h2>
984
985 <dl>
986 ];
987 for my $id (sort {$a cmp $b} keys %$ids) {
988 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
989 for (@{$ids->{$id}}) {
990 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
991 }
992 }
993 print STDOUT qq[</dl></div>];
994 } # print_listing_section
995
996 sub print_result_section ($) {
997 my $result = shift;
998
999 print STDOUT qq[
1000 <div id="result-summary" class="section">
1001 <h2>Result</h2>];
1002
1003 if ($result->{unsupported} and $result->{conforming_max}) {
1004 print STDOUT qq[<p class=uncertain id=result-para>The conformance
1005 checker cannot decide whether the document is conforming or
1006 not, since the document contains one or more unsupported
1007 features. The document might or might not be conforming.</p>];
1008 } elsif ($result->{conforming_min}) {
1009 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
1010 found in this document.</p>];
1011 } elsif ($result->{conforming_max}) {
1012 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
1013 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
1014 it might be conforming.</p>];
1015 } else {
1016 print STDOUT qq[<p class=FAIL id=result-para>This document is
1017 <strong><em>non</em>-conforming</strong>.</p>];
1018 }
1019
1020 print STDOUT qq[<table>
1021 <colgroup><col><colgroup><col><col><col><colgroup><col>
1022 <thead>
1023 <tr><th scope=col></th>
1024 <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1025 Errors</a></th>
1026 <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1027 Errors</a></th>
1028 <th scope=col><a href="../error-description#level-w">Warnings</a></th>
1029 <th scope=col>Score</th></tr></thead><tbody>];
1030
1031 my $must_error = 0;
1032 my $should_error = 0;
1033 my $warning = 0;
1034 my $score_min = 0;
1035 my $score_max = 0;
1036 my $score_base = 20;
1037 my $score_unit = $score_base / 100;
1038 for (
1039 [Transfer => 'transfer', ''],
1040 [Character => 'char', ''],
1041 [Syntax => 'syntax', '#parse-errors'],
1042 [Structure => 'structure', '#document-errors'],
1043 ) {
1044 $must_error += ($result->{$_->[1]}->{must} += 0);
1045 $should_error += ($result->{$_->[1]}->{should} += 0);
1046 $warning += ($result->{$_->[1]}->{warning} += 0);
1047 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
1048 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
1049
1050 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
1051 my $label = $_->[0];
1052 if ($result->{$_->[1]}->{must} or
1053 $result->{$_->[1]}->{should} or
1054 $result->{$_->[1]}->{warning} or
1055 $result->{$_->[1]}->{unsupported}) {
1056 $label = qq[<a href="$_->[2]">$label</a>];
1057 }
1058
1059 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
1060 if ($uncertain) {
1061 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
1062 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
1063 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
1064 } else {
1065 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
1066 }
1067 }
1068
1069 $score_max += $score_base;
1070
1071 print STDOUT qq[
1072 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
1073 </tbody>
1074 <tfoot><tr class=uncertain><th scope=row>Total</th>
1075 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
1076 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
1077 <td>$warning?</td>
1078 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
1079 </table>
1080
1081 <p><strong>Important</strong>: This conformance checking service
1082 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
1083 </div>];
1084 push @nav, ['#result-summary' => 'Result'];
1085 } # print_result_section
1086
1087 sub print_result_unknown_type_section ($$) {
1088 my ($input, $result) = @_;
1089
1090 my $euri = htescape ($input->{uri});
1091 print STDOUT qq[
1092 <div id="$input->{id_prefix}parse-errors" class="section">
1093 <h2>Errors</h2>
1094
1095 <dl>
1096 <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
1097 <dd class=unsupported><strong><a href="../error-description#level-u">Not
1098 supported</a></strong>:
1099 Media type
1100 <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
1101 is not supported.</dd>
1102 </dl>
1103 </div>
1104 ];
1105 push @nav, [qq[#$input->{id_prefix}parse-errors] => 'Errors']
1106 unless $input->{nested};
1107 add_error (char => {level => 'u'} => $result);
1108 add_error (syntax => {level => 'u'} => $result);
1109 add_error (structure => {level => 'u'} => $result);
1110 } # print_result_unknown_type_section
1111
1112 sub print_result_input_error_section ($) {
1113 my $input = shift;
1114 print STDOUT qq[<div class="section" id="result-summary">
1115 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
1116 </div>];
1117 push @nav, ['#result-summary' => 'Result'];
1118 } # print_result_input_error_section
1119
1120 sub get_error_label ($$) {
1121 my ($input, $err) = @_;
1122
1123 my $r = '';
1124
1125 if (defined $err->{line}) {
1126 if ($err->{column} > 0) {
1127 $r = qq[<a href="#$input->{id_prefix}line-$err->{line}">Line $err->{line}</a> column $err->{column}];
1128 } else {
1129 $err->{line} = $err->{line} - 1 || 1;
1130 $r = qq[<a href="#$input->{id_prefix}line-$err->{line}">Line $err->{line}</a>];
1131 }
1132 }
1133
1134 if (defined $err->{node}) {
1135 $r .= ' ' if length $r;
1136 $r = get_node_link ($input, $err->{node});
1137 }
1138
1139 if (defined $err->{index}) {
1140 if (length $r) {
1141 $r .= ', Index ' . (0+$err->{index});
1142 } else {
1143 $r .= "<a href='#$input->{id_prefix}index-@{[0+$err->{index}]}'>Index "
1144 . (0+$err->{index}) . '</a>';
1145 }
1146 }
1147
1148 if (defined $err->{value}) {
1149 $r .= ' ' if length $r;
1150 $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
1151 }
1152
1153 return $r;
1154 } # get_error_label
1155
1156 sub get_error_level_label ($) {
1157 my $err = shift;
1158
1159 my $r = '';
1160
1161 if (not defined $err->{level} or $err->{level} eq 'm') {
1162 $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1163 error</a></strong>: ];
1164 } elsif ($err->{level} eq 's') {
1165 $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1166 error</a></strong>: ];
1167 } elsif ($err->{level} eq 'w') {
1168 $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
1169 ];
1170 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
1171 $r = qq[<strong><a href="../error-description#level-u">Not
1172 supported</a></strong>: ];
1173 } elsif ($err->{level} eq 'i') {
1174 $r = qq[<strong><a href="../error-description#level-i">Information</a></strong>: ];
1175 } else {
1176 my $elevel = htescape ($err->{level});
1177 $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
1178 ];
1179 }
1180
1181 return $r;
1182 } # get_error_level_label
1183
1184 sub get_node_path ($) {
1185 my $node = shift;
1186 my @r;
1187 while (defined $node) {
1188 my $rs;
1189 if ($node->node_type == 1) {
1190 $rs = $node->manakai_local_name;
1191 $node = $node->parent_node;
1192 } elsif ($node->node_type == 2) {
1193 $rs = '@' . $node->manakai_local_name;
1194 $node = $node->owner_element;
1195 } elsif ($node->node_type == 3) {
1196 $rs = '"' . $node->data . '"';
1197 $node = $node->parent_node;
1198 } elsif ($node->node_type == 9) {
1199 @r = ('') unless @r;
1200 $rs = '';
1201 $node = $node->parent_node;
1202 } else {
1203 $rs = '#' . $node->node_type;
1204 $node = $node->parent_node;
1205 }
1206 unshift @r, $rs;
1207 }
1208 return join '/', @r;
1209 } # get_node_path
1210
1211 sub get_node_link ($$) {
1212 return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
1213 htescape (get_node_path ($_[1])) . qq[</a>];
1214 } # get_node_link
1215
1216 {
1217 my $Msg = {};
1218
1219 sub load_text_catalog ($) {
1220 my $lang = shift; # MUST be a canonical lang name
1221 open my $file, '<:utf8', "cc-msg.$lang.txt"
1222 or die "$0: cc-msg.$lang.txt: $!";
1223 while (<$file>) {
1224 if (s/^([^;]+);([^;]*);//) {
1225 my ($type, $cls, $msg) = ($1, $2, $_);
1226 $msg =~ tr/\x0D\x0A//d;
1227 $Msg->{$type} = [$cls, $msg];
1228 }
1229 }
1230 } # load_text_catalog
1231
1232 sub get_text ($) {
1233 my ($type, $level, $node) = @_;
1234 $type = $level . ':' . $type if defined $level;
1235 $level = 'm' unless defined $level;
1236 my @arg;
1237 {
1238 if (defined $Msg->{$type}) {
1239 my $msg = $Msg->{$type}->[1];
1240 $msg =~ s{<var>\$([0-9]+)</var>}{
1241 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
1242 }ge;
1243 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
1244 UNIVERSAL::can ($node, 'get_attribute_ns')
1245 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
1246 }ge;
1247 $msg =~ s{<var>{\@}</var>}{
1248 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
1249 }ge;
1250 $msg =~ s{<var>{local-name}</var>}{
1251 UNIVERSAL::can ($node, 'manakai_local_name')
1252 ? htescape ($node->manakai_local_name) : ''
1253 }ge;
1254 $msg =~ s{<var>{element-local-name}</var>}{
1255 (UNIVERSAL::can ($node, 'owner_element') and
1256 $node->owner_element)
1257 ? htescape ($node->owner_element->manakai_local_name)
1258 : ''
1259 }ge;
1260 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
1261 } elsif ($type =~ s/:([^:]*)$//) {
1262 unshift @arg, $1;
1263 redo;
1264 }
1265 }
1266 return ($type, 'level-'.$level, htescape ($_[0]));
1267 } # get_text
1268
1269 }
1270
1271 sub get_input_document ($$) {
1272 my ($http, $dom) = @_;
1273
1274 my $request_uri = $http->get_parameter ('uri');
1275 my $r = {};
1276 if (defined $request_uri and length $request_uri) {
1277 my $uri = $dom->create_uri_reference ($request_uri);
1278 unless ({
1279 http => 1,
1280 }->{lc $uri->uri_scheme}) {
1281 return {uri => $request_uri, request_uri => $request_uri,
1282 error_status_text => 'URI scheme not allowed'};
1283 }
1284
1285 require Message::Util::HostPermit;
1286 my $host_permit = new Message::Util::HostPermit;
1287 $host_permit->add_rule (<<EOH);
1288 Allow host=suika port=80
1289 Deny host=suika
1290 Allow host=suika.fam.cx port=80
1291 Deny host=suika.fam.cx
1292 Deny host=localhost
1293 Deny host=*.localdomain
1294 Deny ipv4=0.0.0.0/8
1295 Deny ipv4=10.0.0.0/8
1296 Deny ipv4=127.0.0.0/8
1297 Deny ipv4=169.254.0.0/16
1298 Deny ipv4=172.0.0.0/11
1299 Deny ipv4=192.0.2.0/24
1300 Deny ipv4=192.88.99.0/24
1301 Deny ipv4=192.168.0.0/16
1302 Deny ipv4=198.18.0.0/15
1303 Deny ipv4=224.0.0.0/4
1304 Deny ipv4=255.255.255.255/32
1305 Deny ipv6=0::0/0
1306 Allow host=*
1307 EOH
1308 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1309 return {uri => $request_uri, request_uri => $request_uri,
1310 error_status_text => 'Connection to the host is forbidden'};
1311 }
1312
1313 require LWP::UserAgent;
1314 my $ua = WDCC::LWPUA->new;
1315 $ua->{wdcc_dom} = $dom;
1316 $ua->{wdcc_host_permit} = $host_permit;
1317 $ua->agent ('Mozilla'); ## TODO: for now.
1318 $ua->parse_head (0);
1319 $ua->protocols_allowed ([qw/http/]);
1320 $ua->max_size (1000_000);
1321 my $req = HTTP::Request->new (GET => $request_uri);
1322 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1323 my $res = $ua->request ($req);
1324 ## TODO: 401 sets |is_success| true.
1325 if ($res->is_success or $http->get_parameter ('error-page')) {
1326 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1327 $r->{uri} = $res->request->uri;
1328 $r->{request_uri} = $request_uri;
1329
1330 ## TODO: More strict parsing...
1331 my $ct = $res->header ('Content-Type');
1332 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1333 $r->{charset} = lc $1;
1334 $r->{charset} =~ tr/\\//d;
1335 $r->{official_charset} = $r->{charset};
1336 }
1337
1338 my $input_charset = $http->get_parameter ('charset');
1339 if (defined $input_charset and length $input_charset) {
1340 $r->{charset_overridden}
1341 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1342 $r->{charset} = $input_charset;
1343 }
1344
1345 ## TODO: Support for HTTP Content-Encoding
1346
1347 $r->{s} = ''.$res->content;
1348
1349 require Whatpm::ContentType;
1350 ($r->{official_type}, $r->{media_type})
1351 = Whatpm::ContentType->get_sniffed_type
1352 (get_file_head => sub {
1353 return substr $r->{s}, 0, shift;
1354 },
1355 http_content_type_byte => $ct,
1356 has_http_content_encoding =>
1357 defined $res->header ('Content-Encoding'),
1358 supported_image_types => {});
1359 } else {
1360 $r->{uri} = $res->request->uri;
1361 $r->{request_uri} = $request_uri;
1362 $r->{error_status_text} = $res->status_line;
1363 }
1364
1365 $r->{header_field} = [];
1366 $res->scan (sub {
1367 push @{$r->{header_field}}, [$_[0], $_[1]];
1368 });
1369 $r->{header_status_code} = $res->code;
1370 $r->{header_status_text} = $res->message;
1371 } else {
1372 $r->{s} = ''.$http->get_parameter ('s');
1373 $r->{uri} = q<thismessage:/>;
1374 $r->{request_uri} = q<thismessage:/>;
1375 $r->{base_uri} = q<thismessage:/>;
1376 $r->{charset} = ''.$http->get_parameter ('_charset_');
1377 $r->{charset} =~ s/\s+//g;
1378 $r->{charset} = 'utf-8' if $r->{charset} eq '';
1379 $r->{official_charset} = $r->{charset};
1380 $r->{header_field} = [];
1381
1382 require Whatpm::ContentType;
1383 ($r->{official_type}, $r->{media_type})
1384 = Whatpm::ContentType->get_sniffed_type
1385 (get_file_head => sub {
1386 return substr $r->{s}, 0, shift;
1387 },
1388 http_content_type_byte => undef,
1389 has_http_content_encoding => 0,
1390 supported_image_types => {});
1391 }
1392
1393 my $input_format = $http->get_parameter ('i');
1394 if (defined $input_format and length $input_format) {
1395 $r->{media_type_overridden}
1396 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1397 $r->{media_type} = $input_format;
1398 }
1399 if (defined $r->{s} and not defined $r->{media_type}) {
1400 $r->{media_type} = 'text/html';
1401 $r->{media_type_overridden} = 1;
1402 }
1403
1404 if ($r->{media_type} eq 'text/xml') {
1405 unless (defined $r->{charset}) {
1406 $r->{charset} = 'us-ascii';
1407 $r->{official_charset} = $r->{charset};
1408 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1409 $r->{charset_overridden} = 0;
1410 }
1411 }
1412
1413 if (length $r->{s} > 1000_000) {
1414 $r->{error_status_text} = 'Entity-body too large';
1415 delete $r->{s};
1416 return $r;
1417 }
1418
1419 $r->{inner_html_element} = $http->get_parameter ('e');
1420
1421 return $r;
1422 } # get_input_document
1423
1424 package WDCC::LWPUA;
1425 BEGIN { push our @ISA, 'LWP::UserAgent'; }
1426
1427 sub redirect_ok {
1428 my $ua = shift;
1429 unless ($ua->SUPER::redirect_ok (@_)) {
1430 return 0;
1431 }
1432
1433 my $uris = $_[1]->header ('Location');
1434 return 0 unless $uris;
1435 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1436 unless ({
1437 http => 1,
1438 }->{lc $uri->uri_scheme}) {
1439 return 0;
1440 }
1441 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1442 return 0;
1443 }
1444 return 1;
1445 } # redirect_ok
1446
1447 =head1 AUTHOR
1448
1449 Wakaba <w@suika.fam.cx>.
1450
1451 =head1 LICENSE
1452
1453 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1454
1455 This library is free software; you can redistribute it
1456 and/or modify it under the same terms as Perl itself.
1457
1458 =cut
1459
1460 ## $Date: 2008/02/10 07:35:23 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24