/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.35 - (show annotations) (download)
Sun Feb 10 04:08:04 2008 UTC (16 years, 9 months ago) by wakaba
Branch: MAIN
Changes since 1.34: +299 -34 lines
++ ChangeLog	10 Feb 2008 04:07:28 -0000
	* cc.cgi: |text/css| support.  |id_prefix| support was
	partially broken.

2008-02-10  Wakaba  <wakaba@suika.fam.cx>

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4
5 use lib qw[/home/httpd/html/www/markup/html/whatpm
6 /home/wakaba/work/manakai2/lib];
7 use CGI::Carp qw[fatalsToBrowser];
8 use Scalar::Util qw[refaddr];
9 use Time::HiRes qw/time/;
10
11 sub htescape ($) {
12 my $s = $_[0];
13 $s =~ s/&/&amp;/g;
14 $s =~ s/</&lt;/g;
15 $s =~ s/>/&gt;/g;
16 $s =~ s/"/&quot;/g;
17 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18 sprintf '<var>U+%04X</var>', ord $1;
19 }ge;
20 return $s;
21 } # htescape
22
23 my @nav;
24 my %time;
25 require Message::DOM::DOMImplementation;
26 my $dom = Message::DOM::DOMImplementation->new;
27 {
28 use Message::CGI::HTTP;
29 my $http = Message::CGI::HTTP->new;
30
31 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
32 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
33 exit;
34 }
35
36 binmode STDOUT, ':utf8';
37 $| = 1;
38
39 load_text_catalog ('en'); ## TODO: conneg
40
41 print STDOUT qq[Content-Type: text/html; charset=utf-8
42
43 <!DOCTYPE html>
44 <html lang="en">
45 <head>
46 <title>Web Document Conformance Checker (BETA)</title>
47 <link rel="stylesheet" href="../cc-style.css" type="text/css">
48 </head>
49 <body>
50 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
51 (<em>beta</em>)</h1>
52 ];
53
54 $| = 0;
55 my $input = get_input_document ($http, $dom);
56 my $char_length = 0;
57
58 print qq[
59 <div id="document-info" class="section">
60 <dl>
61 <dt>Request URI</dt>
62 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 <dt>Document URI</dt>
64 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65 <script>
66 document.title = '<'
67 + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68 + document.title;
69 </script></dd>
70 ]; # no </dl> yet
71 push @nav, ['#document-info' => 'Information'];
72
73 if (defined $input->{s}) {
74 $char_length = length $input->{s};
75
76 print STDOUT qq[
77 <dt>Base URI</dt>
78 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79 <dt>Internet Media Type</dt>
80 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 <dt>Character Encoding</dt>
83 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 <dt>Length</dt>
86 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 </dl>
88 </div>
89 ];
90
91 $input->{id_prefix} = '';
92 #$input->{nested} = 0;
93 my $result = {conforming_min => 1, conforming_max => 1};
94 check_and_print ($input => $result);
95 print_result_section ($result);
96 } else {
97 print STDOUT qq[</dl></div>];
98 print_result_input_error_section ($input);
99 }
100
101 print STDOUT qq[
102 <ul class="navigation" id="nav-items">
103 ];
104 for (@nav) {
105 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
106 }
107 print STDOUT qq[
108 </ul>
109 </body>
110 </html>
111 ];
112
113 for (qw/decode parse parse_html parse_xml parse_manifest
114 check check_manifest/) {
115 next unless defined $time{$_};
116 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
117 print $file $char_length, "\t", $time{$_}, "\n";
118 }
119
120 exit;
121 }
122
123 sub add_error ($$$) {
124 my ($layer, $err, $result) = @_;
125 if (defined $err->{level}) {
126 if ($err->{level} eq 's') {
127 $result->{$layer}->{should}++;
128 $result->{$layer}->{score_min} -= 2;
129 $result->{conforming_min} = 0;
130 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
131 $result->{$layer}->{warning}++;
132 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
133 $result->{$layer}->{unsupported}++;
134 $result->{unsupported} = 1;
135 } else {
136 $result->{$layer}->{must}++;
137 $result->{$layer}->{score_max} -= 2;
138 $result->{$layer}->{score_min} -= 2;
139 $result->{conforming_min} = 0;
140 $result->{conforming_max} = 0;
141 }
142 } else {
143 $result->{$layer}->{must}++;
144 $result->{$layer}->{score_max} -= 2;
145 $result->{$layer}->{score_min} -= 2;
146 $result->{conforming_min} = 0;
147 $result->{conforming_max} = 0;
148 }
149 } # add_error
150
151 sub check_and_print ($$) {
152 my ($input, $result) = @_;
153
154 print_http_header_section ($input, $result);
155
156 my $doc;
157 my $el;
158 my $cssom;
159 my $manifest;
160 my @subdoc;
161
162 if ($input->{media_type} eq 'text/html') {
163 ($doc, $el) = print_syntax_error_html_section ($input, $result);
164 print_source_string_section
165 ($input,
166 \($input->{s}),
167 $input->{charset} || $doc->input_encoding);
168 } elsif ({
169 'text/xml' => 1,
170 'application/atom+xml' => 1,
171 'application/rss+xml' => 1,
172 'application/svg+xml' => 1,
173 'application/xhtml+xml' => 1,
174 'application/xml' => 1,
175 }->{$input->{media_type}}) {
176 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
177 print_source_string_section ($input,
178 \($input->{s}),
179 $doc->input_encoding);
180 } elsif ($input->{media_type} eq 'text/css') {
181 $cssom = print_syntax_error_css_section ($input, $result);
182 print_source_string_section
183 ($input, \($input->{s}),
184 $cssom->manakai_input_encoding);
185 } elsif ($input->{media_type} eq 'text/cache-manifest') {
186 ## TODO: MUST be text/cache-manifest
187 $manifest = print_syntax_error_manifest_section ($input, $result);
188 print_source_string_section ($input, \($input->{s}),
189 'utf-8');
190 } else {
191 ## TODO: Change HTTP status code??
192 print_result_unknown_type_section ($input, $result);
193 }
194
195 if (defined $doc or defined $el) {
196 $doc->document_uri ($input->{uri});
197 $doc->manakai_entity_base_uri ($input->{base_uri});
198 print_structure_dump_dom_section ($input, $doc, $el);
199 my $elements = print_structure_error_dom_section
200 ($input, $doc, $el, $result, sub {
201 push @subdoc, shift;
202 });
203 print_table_section ($input, $elements->{table}) if @{$elements->{table}};
204 print_listing_section ({
205 id => 'identifiers', label => 'IDs', heading => 'Identifiers',
206 }, $input, $elements->{id}) if keys %{$elements->{id}};
207 print_listing_section ({
208 id => 'terms', label => 'Terms', heading => 'Terms',
209 }, $input, $elements->{term}) if keys %{$elements->{term}};
210 print_listing_section ({
211 id => 'classes', label => 'Classes', heading => 'Classes',
212 }, $input, $elements->{class}) if keys %{$elements->{class}};
213 } elsif (defined $cssom) {
214 print_structure_dump_cssom_section ($input, $cssom);
215 ## TODO: CSSOM validation
216 } elsif (defined $manifest) {
217 print_structure_dump_manifest_section ($input, $manifest);
218 print_structure_error_manifest_section ($input, $manifest, $result);
219 }
220
221 my $id_prefix = 0;
222 for my $subinput (@subdoc) {
223 $subinput->{id_prefix} = 'subdoc-' . ++$id_prefix;
224 $subinput->{nested} = 1;
225 $subinput->{base_uri} = $subinput->{container_node}->base_uri
226 unless defined $subinput->{base_uri};
227 my $ebaseuri = htescape ($subinput->{base_uri});
228 push @nav, ['#' . $subinput->{id_prefix} => 'Sub #' . $id_prefix];
229 print STDOUT qq[<div id="$subinput->{id_prefix}" class=section>
230 <h2>Subdocument #$id_prefix</h2>
231
232 <dl>
233 <dt>Internet Media Type</dt>
234 <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
235 <dt>Container Node</dt>
236 <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
237 <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
238 <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
239 </dl>];
240
241 $subinput->{id_prefix} .= '-';
242 check_and_print ($subinput => $result);
243
244 print STDOUT qq[</div>];
245 }
246 } # check_and_print
247
248 sub print_http_header_section ($$) {
249 my ($input, $result) = @_;
250 return unless defined $input->{header_status_code} or
251 defined $input->{header_status_text} or
252 @{$input->{header_field} or []};
253
254 push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
255 print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
256 <h2>HTTP Header</h2>
257
258 <p><strong>Note</strong>: Due to the limitation of the
259 network library in use, the content of this section might
260 not be the real header.</p>
261
262 <table><tbody>
263 ];
264
265 if (defined $input->{header_status_code}) {
266 print STDOUT qq[<tr><th scope="row">Status code</th>];
267 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
268 }
269 if (defined $input->{header_status_text}) {
270 print STDOUT qq[<tr><th scope="row">Status text</th>];
271 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
272 }
273
274 for (@{$input->{header_field}}) {
275 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
276 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
277 }
278
279 print STDOUT qq[</tbody></table></div>];
280 } # print_http_header_section
281
282 sub print_syntax_error_html_section ($$) {
283 my ($input, $result) = @_;
284
285 require Encode;
286 require Whatpm::HTML;
287
288 print STDOUT qq[
289 <div id="$input->{id_prefix}parse-errors" class="section">
290 <h2>Parse Errors</h2>
291
292 <dl>];
293 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
294
295 my $onerror = sub {
296 my (%opt) = @_;
297 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
298 if ($opt{column} > 0) {
299 print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
300 } else {
301 $opt{line} = $opt{line} - 1 || 1;
302 print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{line}">Line $opt{line}</a></dt>\n];
303 }
304 $type =~ tr/ /-/;
305 $type =~ s/\|/%7C/g;
306 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
307 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
308 print STDOUT qq[$msg</dd>\n];
309
310 add_error ('syntax', \%opt => $result);
311 };
312
313 my $doc = $dom->create_document;
314 my $el;
315 my $inner_html_element = $input->{inner_html_element};
316 if (defined $inner_html_element and length $inner_html_element) {
317 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
318 my $time1 = time;
319 my $t = Encode::decode ($input->{charset}, $input->{s});
320 $time{decode} = time - $time1;
321
322 $el = $doc->create_element_ns
323 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
324 $time1 = time;
325 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
326 $time{parse} = time - $time1;
327 } else {
328 my $time1 = time;
329 Whatpm::HTML->parse_byte_string
330 ($input->{charset}, $input->{s} => $doc, $onerror);
331 $time{parse_html} = time - $time1;
332 }
333 $doc->manakai_charset ($input->{official_charset})
334 if defined $input->{official_charset};
335
336 print STDOUT qq[</dl></div>];
337
338 return ($doc, $el);
339 } # print_syntax_error_html_section
340
341 sub print_syntax_error_xml_section ($$) {
342 my ($input, $result) = @_;
343
344 require Message::DOM::XMLParserTemp;
345
346 print STDOUT qq[
347 <div id="$input->{id_prefix}parse-errors" class="section">
348 <h2>Parse Errors</h2>
349
350 <dl>];
351 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
352
353 my $onerror = sub {
354 my $err = shift;
355 my $line = $err->location->line_number;
356 print STDOUT qq[<dt><a href="#$input->{id_prefix}line-$line">Line $line</a> column ];
357 print STDOUT $err->location->column_number, "</dt><dd>";
358 print STDOUT htescape $err->text, "</dd>\n";
359
360 add_error ('syntax', {type => $err->text,
361 level => [
362 $err->SEVERITY_FATAL_ERROR => 'm',
363 $err->SEVERITY_ERROR => 'm',
364 $err->SEVERITY_WARNING => 's',
365 ]->[$err->severity]} => $result);
366
367 return 1;
368 };
369
370 my $time1 = time;
371 open my $fh, '<', \($input->{s});
372 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
373 ($fh => $dom, $onerror, charset => $input->{charset});
374 $time{parse_xml} = time - $time1;
375 $doc->manakai_charset ($input->{official_charset})
376 if defined $input->{official_charset};
377
378 print STDOUT qq[</dl></div>];
379
380 return ($doc, undef);
381 } # print_syntax_error_xml_section
382
383 sub get_css_parser () {
384 our $CSSParser;
385 return $CSSParser if $CSSParser;
386
387 require Whatpm::CSS::Parser;
388 my $p = Whatpm::CSS::Parser->new;
389
390 # if ($parse_mode eq 'q') {
391 # $p->{unitless_px} = 1;
392 # $p->{hashless_color} = 1;
393 # }
394
395 $p->{prop}->{$_} = 1 for qw/
396 background background-attachment background-color background-image
397 background-position background-position-x background-position-y
398 background-repeat border border-bottom border-bottom-color
399 border-bottom-style border-bottom-width border-collapse border-color
400 border-left border-left-color
401 border-left-style border-left-width border-right border-right-color
402 border-right-style border-right-width
403 border-spacing -manakai-border-spacing-x -manakai-border-spacing-y
404 border-style border-top border-top-color border-top-style border-top-width
405 border-width bottom
406 caption-side clear clip color content counter-increment counter-reset
407 cursor direction display empty-cells float font
408 font-family font-size font-size-adjust font-stretch
409 font-style font-variant font-weight height left
410 letter-spacing line-height
411 list-style list-style-image list-style-position list-style-type
412 margin margin-bottom margin-left margin-right margin-top marker-offset
413 marks max-height max-width min-height min-width opacity -moz-opacity
414 orphans outline outline-color outline-style outline-width overflow
415 overflow-x overflow-y
416 padding padding-bottom padding-left padding-right padding-top
417 page page-break-after page-break-before page-break-inside
418 position quotes right size table-layout
419 text-align text-decoration text-indent text-transform
420 top unicode-bidi vertical-align visibility white-space width widows
421 word-spacing z-index
422 /;
423 $p->{prop_value}->{display}->{$_} = 1 for qw/
424 block clip inline inline-block inline-table list-item none
425 table table-caption table-cell table-column table-column-group
426 table-header-group table-footer-group table-row table-row-group
427 compact marker
428 /;
429 $p->{prop_value}->{position}->{$_} = 1 for qw/
430 absolute fixed relative static
431 /;
432 $p->{prop_value}->{float}->{$_} = 1 for qw/
433 left right none
434 /;
435 $p->{prop_value}->{clear}->{$_} = 1 for qw/
436 left right none both
437 /;
438 $p->{prop_value}->{direction}->{ltr} = 1;
439 $p->{prop_value}->{direction}->{rtl} = 1;
440 $p->{prop_value}->{marks}->{crop} = 1;
441 $p->{prop_value}->{marks}->{cross} = 1;
442 $p->{prop_value}->{'unicode-bidi'}->{$_} = 1 for qw/
443 normal bidi-override embed
444 /;
445 for my $prop_name (qw/overflow overflow-x overflow-y/) {
446 $p->{prop_value}->{$prop_name}->{$_} = 1 for qw/
447 visible hidden scroll auto -webkit-marquee -moz-hidden-unscrollable
448 /;
449 }
450 $p->{prop_value}->{visibility}->{$_} = 1 for qw/
451 visible hidden collapse
452 /;
453 $p->{prop_value}->{'list-style-type'}->{$_} = 1 for qw/
454 disc circle square decimal decimal-leading-zero
455 lower-roman upper-roman lower-greek lower-latin
456 upper-latin armenian georgian lower-alpha upper-alpha none
457 hebrew cjk-ideographic hiragana katakana hiragana-iroha
458 katakana-iroha
459 /;
460 $p->{prop_value}->{'list-style-position'}->{outside} = 1;
461 $p->{prop_value}->{'list-style-position'}->{inside} = 1;
462 $p->{prop_value}->{'page-break-before'}->{$_} = 1 for qw/
463 auto always avoid left right
464 /;
465 $p->{prop_value}->{'page-break-after'}->{$_} = 1 for qw/
466 auto always avoid left right
467 /;
468 $p->{prop_value}->{'page-break-inside'}->{auto} = 1;
469 $p->{prop_value}->{'page-break-inside'}->{avoid} = 1;
470 $p->{prop_value}->{'background-repeat'}->{$_} = 1 for qw/
471 repeat repeat-x repeat-y no-repeat
472 /;
473 $p->{prop_value}->{'background-attachment'}->{scroll} = 1;
474 $p->{prop_value}->{'background-attachment'}->{fixed} = 1;
475 $p->{prop_value}->{'font-size'}->{$_} = 1 for qw/
476 xx-small x-small small medium large x-large xx-large
477 -manakai-xxx-large -webkit-xxx-large
478 larger smaller
479 /;
480 $p->{prop_value}->{'font-style'}->{normal} = 1;
481 $p->{prop_value}->{'font-style'}->{italic} = 1;
482 $p->{prop_value}->{'font-style'}->{oblique} = 1;
483 $p->{prop_value}->{'font-variant'}->{normal} = 1;
484 $p->{prop_value}->{'font-variant'}->{'small-caps'} = 1;
485 $p->{prop_value}->{'font-stretch'}->{$_} = 1 for
486 qw/normal wider narrower ultra-condensed extra-condensed
487 condensed semi-condensed semi-expanded expanded
488 extra-expanded ultra-expanded/;
489 $p->{prop_value}->{'text-align'}->{$_} = 1 for qw/
490 left right center justify begin end
491 /;
492 $p->{prop_value}->{'text-transform'}->{$_} = 1 for qw/
493 capitalize uppercase lowercase none
494 /;
495 $p->{prop_value}->{'white-space'}->{$_} = 1 for qw/
496 normal pre nowrap pre-line pre-wrap
497 /;
498 $p->{prop_value}->{'text-decoration'}->{$_} = 1 for qw/
499 none blink underline overline line-through
500 /;
501 $p->{prop_value}->{'caption-side'}->{$_} = 1 for qw/
502 top bottom left right
503 /;
504 $p->{prop_value}->{'table-layout'}->{auto} = 1;
505 $p->{prop_value}->{'table-layout'}->{fixed} = 1;
506 $p->{prop_value}->{'border-collapse'}->{collapase} = 1;
507 $p->{prop_value}->{'border-collapse'}->{separate} = 1;
508 $p->{prop_value}->{'empty-cells'}->{show} = 1;
509 $p->{prop_value}->{'empty-cells'}->{hide} = 1;
510 $p->{prop_value}->{cursor}->{$_} = 1 for qw/
511 auto crosshair default pointer move e-resize ne-resize nw-resize n-resize
512 se-resize sw-resize s-resize w-resize text wait help progress
513 /;
514 for my $prop (qw/border-top-style border-left-style
515 border-bottom-style border-right-style outline-style/) {
516 $p->{prop_value}->{$prop}->{$_} = 1 for qw/
517 none hidden dotted dashed solid double groove ridge inset outset
518 /;
519 }
520 for my $prop (qw/color background-color
521 border-bottom-color border-left-color border-right-color
522 border-top-color border-color/) {
523 $p->{prop_value}->{$prop}->{transparent} = 1;
524 $p->{prop_value}->{$prop}->{flavor} = 1;
525 $p->{prop_value}->{$prop}->{'-manakai-default'} = 1;
526 }
527 $p->{prop_value}->{'outline-color'}->{invert} = 1;
528 $p->{prop_value}->{'outline-color'}->{'-manakai-invert-or-currentcolor'} = 1;
529 $p->{pseudo_class}->{$_} = 1 for qw/
530 active checked disabled empty enabled first-child first-of-type
531 focus hover indeterminate last-child last-of-type link only-child
532 only-of-type root target visited
533 lang nth-child nth-last-child nth-of-type nth-last-of-type not
534 -manakai-contains -manakai-current
535 /;
536 $p->{pseudo_element}->{$_} = 1 for qw/
537 after before first-letter first-line
538 /;
539
540 return $CSSParser = $p;
541 } # get_css_parser
542
543 sub print_syntax_error_css_section ($$) {
544 my ($input, $result) = @_;
545
546 print STDOUT qq[
547 <div id="$input->{id_prefix}parse-errors" class="section">
548 <h2>Parse Errors</h2>
549
550 <dl>];
551 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
552
553 my $p = get_css_parser ();
554 $p->{onerror} = sub {
555 my (%opt) = @_;
556 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
557 if ($opt{token}) {
558 print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{token}->{line}">Line $opt{token}->{line}</a> column $opt{token}->{column}];
559 } else {
560 print STDOUT qq[<dt class="$cls">Unknown location];
561 }
562 if (defined $opt{value}) {
563 print STDOUT qq[ (<code>@{[htescape ($opt{value})]}</code>)];
564 } elsif (defined $opt{token}) {
565 print STDOUT qq[ (<code>@{[htescape (Whatpm::CSS::Tokenizer->serialize_token ($opt{token}))]}</code>)];
566 }
567 $type =~ tr/ /-/;
568 $type =~ s/\|/%7C/g;
569 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
570 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
571 print STDOUT qq[$msg</dd>\n];
572
573 add_error ('syntax', \%opt => $result);
574 };
575 $p->{href} = $input->{uri};
576 $p->{base_uri} = $input->{base_uri};
577
578 my $s = \$input->{s};
579 my $charset;
580 unless ($input->{is_char_string}) {
581 require Encode;
582 if (defined $input->{charset}) {## TODO: IANA->Perl
583 $charset = $input->{charset};
584 $s = \(Encode::decode ($input->{charset}, $$s));
585 } else {
586 ## TODO: charset detection
587 $s = \(Encode::decode ($charset = 'utf-8', $$s));
588 }
589 }
590
591 my $cssom = $p->parse_char_string ($$s);
592 $cssom->manakai_input_encoding ($charset) if defined $charset;
593
594 print STDOUT qq[</dl></div>];
595
596 return $cssom;
597 } # print_syntax_error_css_section
598
599 sub print_syntax_error_manifest_section ($$) {
600 my ($input, $result) = @_;
601
602 require Whatpm::CacheManifest;
603
604 print STDOUT qq[
605 <div id="$input->{id_prefix}parse-errors" class="section">
606 <h2>Parse Errors</h2>
607
608 <dl>];
609 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
610
611 my $onerror = sub {
612 my (%opt) = @_;
613 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
614 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
615 qq[</dt>];
616 $type =~ tr/ /-/;
617 $type =~ s/\|/%7C/g;
618 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
619 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
620 print STDOUT qq[$msg</dd>\n];
621
622 add_error ('syntax', \%opt => $result);
623 };
624
625 my $time1 = time;
626 my $manifest = Whatpm::CacheManifest->parse_byte_string
627 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
628 $time{parse_manifest} = time - $time1;
629
630 print STDOUT qq[</dl></div>];
631
632 return $manifest;
633 } # print_syntax_error_manifest_section
634
635 sub print_source_string_section ($$$) {
636 my $input = shift;
637 my $s;
638 unless ($input->{is_char_string}) {
639 require Encode;
640 my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
641 return unless $enc;
642
643 $s = \($enc->decode (${$_[0]}));
644 } else {
645 $s = $_[0];
646 }
647
648 my $i = 1;
649 push @nav, ['#source-string' => 'Source'] unless $input->{nested};
650 print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
651 <h2>Document Source</h2>
652 <ol lang="">\n];
653 if (length $$s) {
654 while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
655 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
656 "</li>\n";
657 $i++;
658 }
659 if ($$s =~ /\G([^\x0A]+)/gc) {
660 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
661 "</li>\n";
662 }
663 } else {
664 print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
665 }
666 print STDOUT "</ol></div>";
667 } # print_input_string_section
668
669 sub print_document_tree ($$) {
670 my ($input, $node) = @_;
671
672 my $r = '<ol class="xoxo">';
673
674 my @node = ($node);
675 while (@node) {
676 my $child = shift @node;
677 unless (ref $child) {
678 $r .= $child;
679 next;
680 }
681
682 my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
683 my $nt = $child->node_type;
684 if ($nt == $child->ELEMENT_NODE) {
685 my $child_nsuri = $child->namespace_uri;
686 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
687 '</code>'; ## ISSUE: case
688
689 if ($child->has_attributes) {
690 $r .= '<ul class="attributes">';
691 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
692 @{$child->attributes}) {
693 $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
694 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
695 }
696 $r .= '</ul>';
697 }
698
699 if ($child->has_child_nodes) {
700 $r .= '<ol class="children">';
701 unshift @node, @{$child->child_nodes}, '</ol></li>';
702 } else {
703 $r .= '</li>';
704 }
705 } elsif ($nt == $child->TEXT_NODE) {
706 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
707 } elsif ($nt == $child->CDATA_SECTION_NODE) {
708 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
709 } elsif ($nt == $child->COMMENT_NODE) {
710 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
711 } elsif ($nt == $child->DOCUMENT_NODE) {
712 $r .= qq'<li id="$node_id" class="tree-document">Document';
713 $r .= qq[<ul class="attributes">];
714 my $cp = $child->manakai_charset;
715 if (defined $cp) {
716 $r .= qq[<li><code>charset</code> parameter = <code>];
717 $r .= htescape ($cp) . qq[</code></li>];
718 }
719 $r .= qq[<li><code>inputEncoding</code> = ];
720 my $ie = $child->input_encoding;
721 if (defined $ie) {
722 $r .= qq[<code>@{[htescape ($ie)]}</code>];
723 if ($child->manakai_has_bom) {
724 $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
725 }
726 } else {
727 $r .= qq[(<code>null</code>)];
728 }
729 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
730 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
731 unless ($child->manakai_is_html) {
732 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
733 if (defined $child->xml_encoding) {
734 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
735 } else {
736 $r .= qq[<li>XML encoding = (null)</li>];
737 }
738 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
739 }
740 $r .= qq[</ul>];
741 if ($child->has_child_nodes) {
742 $r .= '<ol class="children">';
743 unshift @node, @{$child->child_nodes}, '</ol></li>';
744 }
745 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
746 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
747 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
748 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
749 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
750 $r .= '</ul></li>';
751 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
752 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
753 } else {
754 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
755 }
756 }
757
758 $r .= '</ol>';
759 print STDOUT $r;
760 } # print_document_tree
761
762 sub print_structure_dump_dom_section ($$$) {
763 my ($input, $doc, $el) = @_;
764
765 print STDOUT qq[
766 <div id="$input->{id_prefix}document-tree" class="section">
767 <h2>Document Tree</h2>
768 ];
769 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
770 unless $input->{nested};
771
772 print_document_tree ($input, $el || $doc);
773
774 print STDOUT qq[</div>];
775 } # print_structure_dump_dom_section
776
777 sub print_structure_dump_cssom_section ($$) {
778 my ($input, $cssom) = @_;
779
780 print STDOUT qq[
781 <div id="$input->{id_prefix}document-tree" class="section">
782 <h2>Document Tree</h2>
783 ];
784 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
785 unless $input->{nested};
786
787 ## TODO:
788 print STDOUT "<pre>".htescape ($cssom->css_text)."</pre>";
789
790 print STDOUT qq[</div>];
791 } # print_structure_dump_cssom_section
792
793 sub print_structure_dump_manifest_section ($$) {
794 my ($input, $manifest) = @_;
795
796 print STDOUT qq[
797 <div id="$input->{id_prefix}dump-manifest" class="section">
798 <h2>Cache Manifest</h2>
799 ];
800 push @nav, [qq[#$input->{id_prefix}dump-manifest] => 'Cache Manifest']
801 unless $input->{nested};
802
803 print STDOUT qq[<dl><dt>Explicit entries</dt>];
804 for my $uri (@{$manifest->[0]}) {
805 my $euri = htescape ($uri);
806 print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
807 }
808
809 print STDOUT qq[<dt>Fallback entries</dt><dd>
810 <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
811 <th scope=row>Fallback Entry</tr><tbody>];
812 for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
813 my $euri = htescape ($uri);
814 my $euri2 = htescape ($manifest->[1]->{$uri});
815 print STDOUT qq[<tr><td><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
816 <td><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
817 }
818
819 print STDOUT qq[</table><dt>Online whitelist</dt>];
820 for my $uri (@{$manifest->[2]}) {
821 my $euri = htescape ($uri);
822 print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
823 }
824
825 print STDOUT qq[</dl></div>];
826 } # print_structure_dump_manifest_section
827
828 sub print_structure_error_dom_section ($$$$$) {
829 my ($input, $doc, $el, $result, $onsubdoc) = @_;
830
831 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
832 <h2>Document Errors</h2>
833
834 <dl>];
835 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
836 unless $input->{nested};
837
838 require Whatpm::ContentChecker;
839 my $onerror = sub {
840 my %opt = @_;
841 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
842 $type =~ tr/ /-/;
843 $type =~ s/\|/%7C/g;
844 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
845 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
846 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
847 print STDOUT $msg, "</dd>\n";
848 add_error ('structure', \%opt => $result);
849 };
850
851 my $elements;
852 my $time1 = time;
853 if ($el) {
854 $elements = Whatpm::ContentChecker->check_element
855 ($el, $onerror, $onsubdoc);
856 } else {
857 $elements = Whatpm::ContentChecker->check_document
858 ($doc, $onerror, $onsubdoc);
859 }
860 $time{check} = time - $time1;
861
862 print STDOUT qq[</dl></div>];
863
864 return $elements;
865 } # print_structure_error_dom_section
866
867 sub print_structure_error_manifest_section ($$$) {
868 my ($input, $manifest, $result) = @_;
869
870 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
871 <h2>Document Errors</h2>
872
873 <dl>];
874 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
875 unless $input->{nested};
876
877 require Whatpm::CacheManifest;
878 Whatpm::CacheManifest->check_manifest ($manifest, sub {
879 my %opt = @_;
880 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
881 $type =~ tr/ /-/;
882 $type =~ s/\|/%7C/g;
883 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
884 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
885 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
886 add_error ('structure', \%opt => $result);
887 });
888
889 print STDOUT qq[</div>];
890 } # print_structure_error_manifest_section
891
892 sub print_table_section ($$) {
893 my ($input, $tables) = @_;
894
895 push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
896 unless $input->{nested};
897 print STDOUT qq[
898 <div id="$input->{id_prefix}tables" class="section">
899 <h2>Tables</h2>
900
901 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
902 <script src="../table-script.js" type="text/javascript"></script>
903 <noscript>
904 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
905 </noscript>
906 ];
907
908 require JSON;
909
910 my $i = 0;
911 for my $table_el (@$tables) {
912 $i++;
913 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
914 get_node_link ($input, $table_el) . q[</h3>];
915
916 ## TODO: Make |ContentChecker| return |form_table| result
917 ## so that this script don't have to run the algorithm twice.
918 my $table = Whatpm::HTMLTable->form_table ($table_el);
919
920 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
921 next unless $_;
922 delete $_->{element};
923 }
924
925 for (@{$table->{row_group}}) {
926 next unless $_;
927 next unless $_->{element};
928 $_->{type} = $_->{element}->manakai_local_name;
929 delete $_->{element};
930 }
931
932 for (@{$table->{cell}}) {
933 next unless $_;
934 for (@{$_}) {
935 next unless $_;
936 for (@$_) {
937 $_->{id} = refaddr $_->{element} if defined $_->{element};
938 delete $_->{element};
939 $_->{is_header} = $_->{is_header} ? 1 : 0;
940 }
941 }
942 }
943
944 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
945 print STDOUT JSON::objToJson ($table);
946 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
947 print STDOUT qq[, '$input->{id_prefix}');</script>];
948 }
949
950 print STDOUT qq[</div>];
951 } # print_table_section
952
953 sub print_listing_section ($$$) {
954 my ($opt, $input, $ids) = @_;
955
956 push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
957 unless $input->{nested};
958 print STDOUT qq[
959 <div id="$input->{id_prefix}$opt->{id}" class="section">
960 <h2>$opt->{heading}</h2>
961
962 <dl>
963 ];
964 for my $id (sort {$a cmp $b} keys %$ids) {
965 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
966 for (@{$ids->{$id}}) {
967 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
968 }
969 }
970 print STDOUT qq[</dl></div>];
971 } # print_listing_section
972
973 sub print_result_section ($) {
974 my $result = shift;
975
976 print STDOUT qq[
977 <div id="result-summary" class="section">
978 <h2>Result</h2>];
979
980 if ($result->{unsupported} and $result->{conforming_max}) {
981 print STDOUT qq[<p class=uncertain id=result-para>The conformance
982 checker cannot decide whether the document is conforming or
983 not, since the document contains one or more unsupported
984 features. The document might or might not be conforming.</p>];
985 } elsif ($result->{conforming_min}) {
986 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
987 found in this document.</p>];
988 } elsif ($result->{conforming_max}) {
989 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
990 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
991 it might be conforming.</p>];
992 } else {
993 print STDOUT qq[<p class=FAIL id=result-para>This document is
994 <strong><em>non</em>-conforming</strong>.</p>];
995 }
996
997 print STDOUT qq[<table>
998 <colgroup><col><colgroup><col><col><col><colgroup><col>
999 <thead>
1000 <tr><th scope=col></th>
1001 <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1002 Errors</a></th>
1003 <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1004 Errors</a></th>
1005 <th scope=col><a href="../error-description#level-w">Warnings</a></th>
1006 <th scope=col>Score</th></tr></thead><tbody>];
1007
1008 my $must_error = 0;
1009 my $should_error = 0;
1010 my $warning = 0;
1011 my $score_min = 0;
1012 my $score_max = 0;
1013 my $score_base = 20;
1014 my $score_unit = $score_base / 100;
1015 for (
1016 [Transfer => 'transfer', ''],
1017 [Character => 'char', ''],
1018 [Syntax => 'syntax', '#parse-errors'],
1019 [Structure => 'structure', '#document-errors'],
1020 ) {
1021 $must_error += ($result->{$_->[1]}->{must} += 0);
1022 $should_error += ($result->{$_->[1]}->{should} += 0);
1023 $warning += ($result->{$_->[1]}->{warning} += 0);
1024 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
1025 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
1026
1027 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
1028 my $label = $_->[0];
1029 if ($result->{$_->[1]}->{must} or
1030 $result->{$_->[1]}->{should} or
1031 $result->{$_->[1]}->{warning} or
1032 $result->{$_->[1]}->{unsupported}) {
1033 $label = qq[<a href="$_->[2]">$label</a>];
1034 }
1035
1036 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
1037 if ($uncertain) {
1038 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
1039 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
1040 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
1041 } else {
1042 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
1043 }
1044 }
1045
1046 $score_max += $score_base;
1047
1048 print STDOUT qq[
1049 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
1050 </tbody>
1051 <tfoot><tr class=uncertain><th scope=row>Total</th>
1052 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
1053 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
1054 <td>$warning?</td>
1055 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
1056 </table>
1057
1058 <p><strong>Important</strong>: This conformance checking service
1059 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
1060 </div>];
1061 push @nav, ['#result-summary' => 'Result'];
1062 } # print_result_section
1063
1064 sub print_result_unknown_type_section ($$) {
1065 my ($input, $result) = @_;
1066
1067 my $euri = htescape ($input->{uri});
1068 print STDOUT qq[
1069 <div id="$input->{id_prefix}parse-errors" class="section">
1070 <h2>Errors</h2>
1071
1072 <dl>
1073 <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
1074 <dd class=unsupported><strong><a href="../error-description#level-u">Not
1075 supported</a></strong>:
1076 Media type
1077 <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
1078 is not supported.</dd>
1079 </dl>
1080 </div>
1081 ];
1082 push @nav, [qq[#$input->{id_prefix}parse-errors] => 'Errors']
1083 unless $input->{nested};
1084 add_error (char => {level => 'u'} => $result);
1085 add_error (syntax => {level => 'u'} => $result);
1086 add_error (structure => {level => 'u'} => $result);
1087 } # print_result_unknown_type_section
1088
1089 sub print_result_input_error_section ($) {
1090 my $input = shift;
1091 print STDOUT qq[<div class="section" id="result-summary">
1092 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
1093 </div>];
1094 push @nav, ['#result-summary' => 'Result'];
1095 } # print_result_input_error_section
1096
1097 sub get_error_label ($$) {
1098 my ($input, $err) = @_;
1099
1100 my $r = '';
1101
1102 if (defined $err->{line}) {
1103 if ($err->{column} > 0) {
1104 $r = qq[<a href="#$input->{id_prefix}line-$err->{line}">Line $err->{line}</a> column $err->{column}];
1105 } else {
1106 $err->{line} = $err->{line} - 1 || 1;
1107 $r = qq[<a href="#$input->{id_prefix}line-$err->{line}">Line $err->{line}</a>];
1108 }
1109 }
1110
1111 if (defined $err->{node}) {
1112 $r .= ' ' if length $r;
1113 $r = get_node_link ($input, $err->{node});
1114 }
1115
1116 if (defined $err->{index}) {
1117 $r .= ' ' if length $r;
1118 $r .= 'Index ' . (0+$err->{index});
1119 }
1120
1121 if (defined $err->{value}) {
1122 $r .= ' ' if length $r;
1123 $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
1124 }
1125
1126 return $r;
1127 } # get_error_label
1128
1129 sub get_error_level_label ($) {
1130 my $err = shift;
1131
1132 my $r = '';
1133
1134 if (not defined $err->{level} or $err->{level} eq 'm') {
1135 $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1136 error</a></strong>: ];
1137 } elsif ($err->{level} eq 's') {
1138 $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1139 error</a></strong>: ];
1140 } elsif ($err->{level} eq 'w') {
1141 $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
1142 ];
1143 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
1144 $r = qq[<strong><a href="../error-description#level-u">Not
1145 supported</a></strong>: ];
1146 } else {
1147 my $elevel = htescape ($err->{level});
1148 $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
1149 ];
1150 }
1151
1152 return $r;
1153 } # get_error_level_label
1154
1155 sub get_node_path ($) {
1156 my $node = shift;
1157 my @r;
1158 while (defined $node) {
1159 my $rs;
1160 if ($node->node_type == 1) {
1161 $rs = $node->manakai_local_name;
1162 $node = $node->parent_node;
1163 } elsif ($node->node_type == 2) {
1164 $rs = '@' . $node->manakai_local_name;
1165 $node = $node->owner_element;
1166 } elsif ($node->node_type == 3) {
1167 $rs = '"' . $node->data . '"';
1168 $node = $node->parent_node;
1169 } elsif ($node->node_type == 9) {
1170 @r = ('') unless @r;
1171 $rs = '';
1172 $node = $node->parent_node;
1173 } else {
1174 $rs = '#' . $node->node_type;
1175 $node = $node->parent_node;
1176 }
1177 unshift @r, $rs;
1178 }
1179 return join '/', @r;
1180 } # get_node_path
1181
1182 sub get_node_link ($$) {
1183 return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
1184 htescape (get_node_path ($_[1])) . qq[</a>];
1185 } # get_node_link
1186
1187 {
1188 my $Msg = {};
1189
1190 sub load_text_catalog ($) {
1191 my $lang = shift; # MUST be a canonical lang name
1192 open my $file, '<:utf8', "cc-msg.$lang.txt"
1193 or die "$0: cc-msg.$lang.txt: $!";
1194 while (<$file>) {
1195 if (s/^([^;]+);([^;]*);//) {
1196 my ($type, $cls, $msg) = ($1, $2, $_);
1197 $msg =~ tr/\x0D\x0A//d;
1198 $Msg->{$type} = [$cls, $msg];
1199 }
1200 }
1201 } # load_text_catalog
1202
1203 sub get_text ($) {
1204 my ($type, $level, $node) = @_;
1205 $type = $level . ':' . $type if defined $level;
1206 $level = 'm' unless defined $level;
1207 my @arg;
1208 {
1209 if (defined $Msg->{$type}) {
1210 my $msg = $Msg->{$type}->[1];
1211 $msg =~ s{<var>\$([0-9]+)</var>}{
1212 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
1213 }ge;
1214 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
1215 UNIVERSAL::can ($node, 'get_attribute_ns')
1216 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
1217 }ge;
1218 $msg =~ s{<var>{\@}</var>}{
1219 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
1220 }ge;
1221 $msg =~ s{<var>{local-name}</var>}{
1222 UNIVERSAL::can ($node, 'manakai_local_name')
1223 ? htescape ($node->manakai_local_name) : ''
1224 }ge;
1225 $msg =~ s{<var>{element-local-name}</var>}{
1226 (UNIVERSAL::can ($node, 'owner_element') and
1227 $node->owner_element)
1228 ? htescape ($node->owner_element->manakai_local_name)
1229 : ''
1230 }ge;
1231 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
1232 } elsif ($type =~ s/:([^:]*)$//) {
1233 unshift @arg, $1;
1234 redo;
1235 }
1236 }
1237 return ($type, 'level-'.$level, htescape ($_[0]));
1238 } # get_text
1239
1240 }
1241
1242 sub get_input_document ($$) {
1243 my ($http, $dom) = @_;
1244
1245 my $request_uri = $http->get_parameter ('uri');
1246 my $r = {};
1247 if (defined $request_uri and length $request_uri) {
1248 my $uri = $dom->create_uri_reference ($request_uri);
1249 unless ({
1250 http => 1,
1251 }->{lc $uri->uri_scheme}) {
1252 return {uri => $request_uri, request_uri => $request_uri,
1253 error_status_text => 'URI scheme not allowed'};
1254 }
1255
1256 require Message::Util::HostPermit;
1257 my $host_permit = new Message::Util::HostPermit;
1258 $host_permit->add_rule (<<EOH);
1259 Allow host=suika port=80
1260 Deny host=suika
1261 Allow host=suika.fam.cx port=80
1262 Deny host=suika.fam.cx
1263 Deny host=localhost
1264 Deny host=*.localdomain
1265 Deny ipv4=0.0.0.0/8
1266 Deny ipv4=10.0.0.0/8
1267 Deny ipv4=127.0.0.0/8
1268 Deny ipv4=169.254.0.0/16
1269 Deny ipv4=172.0.0.0/11
1270 Deny ipv4=192.0.2.0/24
1271 Deny ipv4=192.88.99.0/24
1272 Deny ipv4=192.168.0.0/16
1273 Deny ipv4=198.18.0.0/15
1274 Deny ipv4=224.0.0.0/4
1275 Deny ipv4=255.255.255.255/32
1276 Deny ipv6=0::0/0
1277 Allow host=*
1278 EOH
1279 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1280 return {uri => $request_uri, request_uri => $request_uri,
1281 error_status_text => 'Connection to the host is forbidden'};
1282 }
1283
1284 require LWP::UserAgent;
1285 my $ua = WDCC::LWPUA->new;
1286 $ua->{wdcc_dom} = $dom;
1287 $ua->{wdcc_host_permit} = $host_permit;
1288 $ua->agent ('Mozilla'); ## TODO: for now.
1289 $ua->parse_head (0);
1290 $ua->protocols_allowed ([qw/http/]);
1291 $ua->max_size (1000_000);
1292 my $req = HTTP::Request->new (GET => $request_uri);
1293 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1294 my $res = $ua->request ($req);
1295 ## TODO: 401 sets |is_success| true.
1296 if ($res->is_success or $http->get_parameter ('error-page')) {
1297 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1298 $r->{uri} = $res->request->uri;
1299 $r->{request_uri} = $request_uri;
1300
1301 ## TODO: More strict parsing...
1302 my $ct = $res->header ('Content-Type');
1303 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1304 $r->{charset} = lc $1;
1305 $r->{charset} =~ tr/\\//d;
1306 $r->{official_charset} = $r->{charset};
1307 }
1308
1309 my $input_charset = $http->get_parameter ('charset');
1310 if (defined $input_charset and length $input_charset) {
1311 $r->{charset_overridden}
1312 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1313 $r->{charset} = $input_charset;
1314 }
1315
1316 ## TODO: Support for HTTP Content-Encoding
1317
1318 $r->{s} = ''.$res->content;
1319
1320 require Whatpm::ContentType;
1321 ($r->{official_type}, $r->{media_type})
1322 = Whatpm::ContentType->get_sniffed_type
1323 (get_file_head => sub {
1324 return substr $r->{s}, 0, shift;
1325 },
1326 http_content_type_byte => $ct,
1327 has_http_content_encoding =>
1328 defined $res->header ('Content-Encoding'),
1329 supported_image_types => {});
1330 } else {
1331 $r->{uri} = $res->request->uri;
1332 $r->{request_uri} = $request_uri;
1333 $r->{error_status_text} = $res->status_line;
1334 }
1335
1336 $r->{header_field} = [];
1337 $res->scan (sub {
1338 push @{$r->{header_field}}, [$_[0], $_[1]];
1339 });
1340 $r->{header_status_code} = $res->code;
1341 $r->{header_status_text} = $res->message;
1342 } else {
1343 $r->{s} = ''.$http->get_parameter ('s');
1344 $r->{uri} = q<thismessage:/>;
1345 $r->{request_uri} = q<thismessage:/>;
1346 $r->{base_uri} = q<thismessage:/>;
1347 $r->{charset} = ''.$http->get_parameter ('_charset_');
1348 $r->{charset} =~ s/\s+//g;
1349 $r->{charset} = 'utf-8' if $r->{charset} eq '';
1350 $r->{official_charset} = $r->{charset};
1351 $r->{header_field} = [];
1352
1353 require Whatpm::ContentType;
1354 ($r->{official_type}, $r->{media_type})
1355 = Whatpm::ContentType->get_sniffed_type
1356 (get_file_head => sub {
1357 return substr $r->{s}, 0, shift;
1358 },
1359 http_content_type_byte => undef,
1360 has_http_content_encoding => 0,
1361 supported_image_types => {});
1362 }
1363
1364 my $input_format = $http->get_parameter ('i');
1365 if (defined $input_format and length $input_format) {
1366 $r->{media_type_overridden}
1367 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1368 $r->{media_type} = $input_format;
1369 }
1370 if (defined $r->{s} and not defined $r->{media_type}) {
1371 $r->{media_type} = 'text/html';
1372 $r->{media_type_overridden} = 1;
1373 }
1374
1375 if ($r->{media_type} eq 'text/xml') {
1376 unless (defined $r->{charset}) {
1377 $r->{charset} = 'us-ascii';
1378 $r->{official_charset} = $r->{charset};
1379 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1380 $r->{charset_overridden} = 0;
1381 }
1382 }
1383
1384 if (length $r->{s} > 1000_000) {
1385 $r->{error_status_text} = 'Entity-body too large';
1386 delete $r->{s};
1387 return $r;
1388 }
1389
1390 $r->{inner_html_element} = $http->get_parameter ('e');
1391
1392 return $r;
1393 } # get_input_document
1394
1395 package WDCC::LWPUA;
1396 BEGIN { push our @ISA, 'LWP::UserAgent'; }
1397
1398 sub redirect_ok {
1399 my $ua = shift;
1400 unless ($ua->SUPER::redirect_ok (@_)) {
1401 return 0;
1402 }
1403
1404 my $uris = $_[1]->header ('Location');
1405 return 0 unless $uris;
1406 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1407 unless ({
1408 http => 1,
1409 }->{lc $uri->uri_scheme}) {
1410 return 0;
1411 }
1412 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1413 return 0;
1414 }
1415 return 1;
1416 } # redirect_ok
1417
1418 =head1 AUTHOR
1419
1420 Wakaba <w@suika.fam.cx>.
1421
1422 =head1 LICENSE
1423
1424 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1425
1426 This library is free software; you can redistribute it
1427 and/or modify it under the same terms as Perl itself.
1428
1429 =cut
1430
1431 ## $Date: 2008/02/10 03:11:06 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24