/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.38 - (show annotations) (download)
Tue Mar 11 14:10:11 2008 UTC (16 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.37: +3 -7 lines
++ ChangeLog	11 Mar 2008 14:10:08 -0000
2008-03-11  Wakaba  <wakaba@suika.fam.cx>

	* cc.cgi: |value| should be output in |dt| in Parse Errors
	section.

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4
5 use lib qw[/home/httpd/html/www/markup/html/whatpm
6 /home/wakaba/work/manakai2/lib];
7 use CGI::Carp qw[fatalsToBrowser];
8 use Scalar::Util qw[refaddr];
9 use Time::HiRes qw/time/;
10
11 sub htescape ($) {
12 my $s = $_[0];
13 $s =~ s/&/&amp;/g;
14 $s =~ s/</&lt;/g;
15 $s =~ s/>/&gt;/g;
16 $s =~ s/"/&quot;/g;
17 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18 sprintf '<var>U+%04X</var>', ord $1;
19 }ge;
20 return $s;
21 } # htescape
22
23 my @nav;
24 my %time;
25 require Message::DOM::DOMImplementation;
26 my $dom = Message::DOM::DOMImplementation->new;
27 {
28 use Message::CGI::HTTP;
29 my $http = Message::CGI::HTTP->new;
30
31 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
32 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
33 exit;
34 }
35
36 binmode STDOUT, ':utf8';
37 $| = 1;
38
39 load_text_catalog ('en'); ## TODO: conneg
40
41 print STDOUT qq[Content-Type: text/html; charset=utf-8
42
43 <!DOCTYPE html>
44 <html lang="en">
45 <head>
46 <title>Web Document Conformance Checker (BETA)</title>
47 <link rel="stylesheet" href="../cc-style.css" type="text/css">
48 </head>
49 <body>
50 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
51 (<em>beta</em>)</h1>
52 ];
53
54 $| = 0;
55 my $input = get_input_document ($http, $dom);
56 my $char_length = 0;
57
58 print qq[
59 <div id="document-info" class="section">
60 <dl>
61 <dt>Request URI</dt>
62 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 <dt>Document URI</dt>
64 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65 <script>
66 document.title = '<'
67 + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68 + document.title;
69 </script></dd>
70 ]; # no </dl> yet
71 push @nav, ['#document-info' => 'Information'];
72
73 if (defined $input->{s}) {
74 $char_length = length $input->{s};
75
76 print STDOUT qq[
77 <dt>Base URI</dt>
78 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79 <dt>Internet Media Type</dt>
80 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 <dt>Character Encoding</dt>
83 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 <dt>Length</dt>
86 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 </dl>
88 </div>
89 ];
90
91 $input->{id_prefix} = '';
92 #$input->{nested} = 0;
93 my $result = {conforming_min => 1, conforming_max => 1};
94 check_and_print ($input => $result);
95 print_result_section ($result);
96 } else {
97 print STDOUT qq[</dl></div>];
98 print_result_input_error_section ($input);
99 }
100
101 print STDOUT qq[
102 <ul class="navigation" id="nav-items">
103 ];
104 for (@nav) {
105 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
106 }
107 print STDOUT qq[
108 </ul>
109 </body>
110 </html>
111 ];
112
113 for (qw/decode parse parse_html parse_xml parse_manifest
114 check check_manifest/) {
115 next unless defined $time{$_};
116 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
117 print $file $char_length, "\t", $time{$_}, "\n";
118 }
119
120 exit;
121 }
122
123 sub add_error ($$$) {
124 my ($layer, $err, $result) = @_;
125 if (defined $err->{level}) {
126 if ($err->{level} eq 's') {
127 $result->{$layer}->{should}++;
128 $result->{$layer}->{score_min} -= 2;
129 $result->{conforming_min} = 0;
130 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
131 $result->{$layer}->{warning}++;
132 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
133 $result->{$layer}->{unsupported}++;
134 $result->{unsupported} = 1;
135 } elsif ($err->{level} eq 'i') {
136 #
137 } else {
138 $result->{$layer}->{must}++;
139 $result->{$layer}->{score_max} -= 2;
140 $result->{$layer}->{score_min} -= 2;
141 $result->{conforming_min} = 0;
142 $result->{conforming_max} = 0;
143 }
144 } else {
145 $result->{$layer}->{must}++;
146 $result->{$layer}->{score_max} -= 2;
147 $result->{$layer}->{score_min} -= 2;
148 $result->{conforming_min} = 0;
149 $result->{conforming_max} = 0;
150 }
151 } # add_error
152
153 sub check_and_print ($$) {
154 my ($input, $result) = @_;
155
156 print_http_header_section ($input, $result);
157
158 my $doc;
159 my $el;
160 my $cssom;
161 my $manifest;
162 my @subdoc;
163
164 if ($input->{media_type} eq 'text/html') {
165 ($doc, $el) = print_syntax_error_html_section ($input, $result);
166 print_source_string_section
167 ($input,
168 \($input->{s}),
169 $input->{charset} || $doc->input_encoding);
170 } elsif ({
171 'text/xml' => 1,
172 'application/atom+xml' => 1,
173 'application/rss+xml' => 1,
174 'application/svg+xml' => 1,
175 'application/xhtml+xml' => 1,
176 'application/xml' => 1,
177 }->{$input->{media_type}}) {
178 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
179 print_source_string_section ($input,
180 \($input->{s}),
181 $doc->input_encoding);
182 } elsif ($input->{media_type} eq 'text/css') {
183 $cssom = print_syntax_error_css_section ($input, $result);
184 print_source_string_section
185 ($input, \($input->{s}),
186 $cssom->manakai_input_encoding);
187 } elsif ($input->{media_type} eq 'text/cache-manifest') {
188 ## TODO: MUST be text/cache-manifest
189 $manifest = print_syntax_error_manifest_section ($input, $result);
190 print_source_string_section ($input, \($input->{s}),
191 'utf-8');
192 } else {
193 ## TODO: Change HTTP status code??
194 print_result_unknown_type_section ($input, $result);
195 }
196
197 if (defined $doc or defined $el) {
198 $doc->document_uri ($input->{uri});
199 $doc->manakai_entity_base_uri ($input->{base_uri});
200 print_structure_dump_dom_section ($input, $doc, $el);
201 my $elements = print_structure_error_dom_section
202 ($input, $doc, $el, $result, sub {
203 push @subdoc, shift;
204 });
205 print_table_section ($input, $elements->{table}) if @{$elements->{table}};
206 print_listing_section ({
207 id => 'identifiers', label => 'IDs', heading => 'Identifiers',
208 }, $input, $elements->{id}) if keys %{$elements->{id}};
209 print_listing_section ({
210 id => 'terms', label => 'Terms', heading => 'Terms',
211 }, $input, $elements->{term}) if keys %{$elements->{term}};
212 print_listing_section ({
213 id => 'classes', label => 'Classes', heading => 'Classes',
214 }, $input, $elements->{class}) if keys %{$elements->{class}};
215 } elsif (defined $cssom) {
216 print_structure_dump_cssom_section ($input, $cssom);
217 ## TODO: CSSOM validation
218 add_error ('structure', {level => 'u'} => $result);
219 } elsif (defined $manifest) {
220 print_structure_dump_manifest_section ($input, $manifest);
221 print_structure_error_manifest_section ($input, $manifest, $result);
222 }
223
224 my $id_prefix = 0;
225 for my $subinput (@subdoc) {
226 $subinput->{id_prefix} = 'subdoc-' . ++$id_prefix;
227 $subinput->{nested} = 1;
228 $subinput->{base_uri} = $subinput->{container_node}->base_uri
229 unless defined $subinput->{base_uri};
230 my $ebaseuri = htescape ($subinput->{base_uri});
231 push @nav, ['#' . $subinput->{id_prefix} => 'Sub #' . $id_prefix];
232 print STDOUT qq[<div id="$subinput->{id_prefix}" class=section>
233 <h2>Subdocument #$id_prefix</h2>
234
235 <dl>
236 <dt>Internet Media Type</dt>
237 <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
238 <dt>Container Node</dt>
239 <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
240 <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
241 <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
242 </dl>];
243
244 $subinput->{id_prefix} .= '-';
245 check_and_print ($subinput => $result);
246
247 print STDOUT qq[</div>];
248 }
249 } # check_and_print
250
251 sub print_http_header_section ($$) {
252 my ($input, $result) = @_;
253 return unless defined $input->{header_status_code} or
254 defined $input->{header_status_text} or
255 @{$input->{header_field} or []};
256
257 push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
258 print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
259 <h2>HTTP Header</h2>
260
261 <p><strong>Note</strong>: Due to the limitation of the
262 network library in use, the content of this section might
263 not be the real header.</p>
264
265 <table><tbody>
266 ];
267
268 if (defined $input->{header_status_code}) {
269 print STDOUT qq[<tr><th scope="row">Status code</th>];
270 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
271 }
272 if (defined $input->{header_status_text}) {
273 print STDOUT qq[<tr><th scope="row">Status text</th>];
274 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
275 }
276
277 for (@{$input->{header_field}}) {
278 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
279 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
280 }
281
282 print STDOUT qq[</tbody></table></div>];
283 } # print_http_header_section
284
285 sub print_syntax_error_html_section ($$) {
286 my ($input, $result) = @_;
287
288 require Encode;
289 require Whatpm::HTML;
290
291 print STDOUT qq[
292 <div id="$input->{id_prefix}parse-errors" class="section">
293 <h2>Parse Errors</h2>
294
295 <dl>];
296 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
297
298 my $onerror = sub {
299 my (%opt) = @_;
300 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
301 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
302 qq[</dt>];
303 $type =~ tr/ /-/;
304 $type =~ s/\|/%7C/g;
305 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
306 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
307 print STDOUT qq[$msg</dd>\n];
308
309 add_error ('syntax', \%opt => $result);
310 };
311
312 my $doc = $dom->create_document;
313 my $el;
314 my $inner_html_element = $input->{inner_html_element};
315 if (defined $inner_html_element and length $inner_html_element) {
316 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
317 my $time1 = time;
318 my $t = Encode::decode ($input->{charset}, $input->{s});
319 $time{decode} = time - $time1;
320
321 $el = $doc->create_element_ns
322 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
323 $time1 = time;
324 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
325 $time{parse} = time - $time1;
326 } else {
327 my $time1 = time;
328 Whatpm::HTML->parse_byte_string
329 ($input->{charset}, $input->{s} => $doc, $onerror);
330 $time{parse_html} = time - $time1;
331 }
332 $doc->manakai_charset ($input->{official_charset})
333 if defined $input->{official_charset};
334
335 print STDOUT qq[</dl></div>];
336
337 return ($doc, $el);
338 } # print_syntax_error_html_section
339
340 sub print_syntax_error_xml_section ($$) {
341 my ($input, $result) = @_;
342
343 require Message::DOM::XMLParserTemp;
344
345 print STDOUT qq[
346 <div id="$input->{id_prefix}parse-errors" class="section">
347 <h2>Parse Errors</h2>
348
349 <dl>];
350 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
351
352 my $onerror = sub {
353 my $err = shift;
354 my $line = $err->location->line_number;
355 print STDOUT qq[<dt><a href="#$input->{id_prefix}line-$line">Line $line</a> column ];
356 print STDOUT $err->location->column_number, "</dt><dd>";
357 print STDOUT htescape $err->text, "</dd>\n";
358
359 add_error ('syntax', {type => $err->text,
360 level => [
361 $err->SEVERITY_FATAL_ERROR => 'm',
362 $err->SEVERITY_ERROR => 'm',
363 $err->SEVERITY_WARNING => 's',
364 ]->[$err->severity]} => $result);
365
366 return 1;
367 };
368
369 my $time1 = time;
370 open my $fh, '<', \($input->{s});
371 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
372 ($fh => $dom, $onerror, charset => $input->{charset});
373 $time{parse_xml} = time - $time1;
374 $doc->manakai_charset ($input->{official_charset})
375 if defined $input->{official_charset};
376
377 print STDOUT qq[</dl></div>];
378
379 return ($doc, undef);
380 } # print_syntax_error_xml_section
381
382 sub get_css_parser () {
383 our $CSSParser;
384 return $CSSParser if $CSSParser;
385
386 require Whatpm::CSS::Parser;
387 my $p = Whatpm::CSS::Parser->new;
388
389 $p->{prop}->{$_} = 1 for qw/
390 alignment-baseline
391 background background-attachment background-color background-image
392 background-position background-position-x background-position-y
393 background-repeat border border-bottom border-bottom-color
394 border-bottom-style border-bottom-width border-collapse border-color
395 border-left border-left-color
396 border-left-style border-left-width border-right border-right-color
397 border-right-style border-right-width
398 border-spacing -manakai-border-spacing-x -manakai-border-spacing-y
399 border-style border-top border-top-color border-top-style border-top-width
400 border-width bottom
401 caption-side clear clip color content counter-increment counter-reset
402 cursor direction display dominant-baseline empty-cells float font
403 font-family font-size font-size-adjust font-stretch
404 font-style font-variant font-weight height left
405 letter-spacing line-height
406 list-style list-style-image list-style-position list-style-type
407 margin margin-bottom margin-left margin-right margin-top marker-offset
408 marks max-height max-width min-height min-width opacity -moz-opacity
409 orphans outline outline-color outline-style outline-width overflow
410 overflow-x overflow-y
411 padding padding-bottom padding-left padding-right padding-top
412 page page-break-after page-break-before page-break-inside
413 position quotes right size table-layout
414 text-align text-anchor text-decoration text-indent text-transform
415 top unicode-bidi vertical-align visibility white-space width widows
416 word-spacing writing-mode z-index
417 /;
418 $p->{prop_value}->{display}->{$_} = 1 for qw/
419 block clip inline inline-block inline-table list-item none
420 table table-caption table-cell table-column table-column-group
421 table-header-group table-footer-group table-row table-row-group
422 compact marker
423 /;
424 $p->{prop_value}->{position}->{$_} = 1 for qw/
425 absolute fixed relative static
426 /;
427 $p->{prop_value}->{float}->{$_} = 1 for qw/
428 left right none
429 /;
430 $p->{prop_value}->{clear}->{$_} = 1 for qw/
431 left right none both
432 /;
433 $p->{prop_value}->{direction}->{ltr} = 1;
434 $p->{prop_value}->{direction}->{rtl} = 1;
435 $p->{prop_value}->{marks}->{crop} = 1;
436 $p->{prop_value}->{marks}->{cross} = 1;
437 $p->{prop_value}->{'unicode-bidi'}->{$_} = 1 for qw/
438 normal bidi-override embed
439 /;
440 for my $prop_name (qw/overflow overflow-x overflow-y/) {
441 $p->{prop_value}->{$prop_name}->{$_} = 1 for qw/
442 visible hidden scroll auto -webkit-marquee -moz-hidden-unscrollable
443 /;
444 }
445 $p->{prop_value}->{visibility}->{$_} = 1 for qw/
446 visible hidden collapse
447 /;
448 $p->{prop_value}->{'list-style-type'}->{$_} = 1 for qw/
449 disc circle square decimal decimal-leading-zero
450 lower-roman upper-roman lower-greek lower-latin
451 upper-latin armenian georgian lower-alpha upper-alpha none
452 hebrew cjk-ideographic hiragana katakana hiragana-iroha
453 katakana-iroha
454 /;
455 $p->{prop_value}->{'list-style-position'}->{outside} = 1;
456 $p->{prop_value}->{'list-style-position'}->{inside} = 1;
457 $p->{prop_value}->{'page-break-before'}->{$_} = 1 for qw/
458 auto always avoid left right
459 /;
460 $p->{prop_value}->{'page-break-after'}->{$_} = 1 for qw/
461 auto always avoid left right
462 /;
463 $p->{prop_value}->{'page-break-inside'}->{auto} = 1;
464 $p->{prop_value}->{'page-break-inside'}->{avoid} = 1;
465 $p->{prop_value}->{'background-repeat'}->{$_} = 1 for qw/
466 repeat repeat-x repeat-y no-repeat
467 /;
468 $p->{prop_value}->{'background-attachment'}->{scroll} = 1;
469 $p->{prop_value}->{'background-attachment'}->{fixed} = 1;
470 $p->{prop_value}->{'font-size'}->{$_} = 1 for qw/
471 xx-small x-small small medium large x-large xx-large
472 -manakai-xxx-large -webkit-xxx-large
473 larger smaller
474 /;
475 $p->{prop_value}->{'font-style'}->{normal} = 1;
476 $p->{prop_value}->{'font-style'}->{italic} = 1;
477 $p->{prop_value}->{'font-style'}->{oblique} = 1;
478 $p->{prop_value}->{'font-variant'}->{normal} = 1;
479 $p->{prop_value}->{'font-variant'}->{'small-caps'} = 1;
480 $p->{prop_value}->{'font-stretch'}->{$_} = 1 for
481 qw/normal wider narrower ultra-condensed extra-condensed
482 condensed semi-condensed semi-expanded expanded
483 extra-expanded ultra-expanded/;
484 $p->{prop_value}->{'text-align'}->{$_} = 1 for qw/
485 left right center justify begin end
486 /;
487 $p->{prop_value}->{'text-transform'}->{$_} = 1 for qw/
488 capitalize uppercase lowercase none
489 /;
490 $p->{prop_value}->{'white-space'}->{$_} = 1 for qw/
491 normal pre nowrap pre-line pre-wrap -moz-pre-wrap
492 /;
493 $p->{prop_value}->{'writing-mode'}->{$_} = 1 for qw/
494 lr rl tb lr-tb rl-tb tb-rl
495 /;
496 $p->{prop_value}->{'text-anchor'}->{$_} = 1 for qw/
497 start middle end
498 /;
499 $p->{prop_value}->{'dominant-baseline'}->{$_} = 1 for qw/
500 auto use-script no-change reset-size ideographic alphabetic
501 hanging mathematical central middle text-after-edge text-before-edge
502 /;
503 $p->{prop_value}->{'alignment-baseline'}->{$_} = 1 for qw/
504 auto baseline before-edge text-before-edge middle central
505 after-edge text-after-edge ideographic alphabetic hanging
506 mathematical
507 /;
508 $p->{prop_value}->{'text-decoration'}->{$_} = 1 for qw/
509 none blink underline overline line-through
510 /;
511 $p->{prop_value}->{'caption-side'}->{$_} = 1 for qw/
512 top bottom left right
513 /;
514 $p->{prop_value}->{'table-layout'}->{auto} = 1;
515 $p->{prop_value}->{'table-layout'}->{fixed} = 1;
516 $p->{prop_value}->{'border-collapse'}->{collapse} = 1;
517 $p->{prop_value}->{'border-collapse'}->{separate} = 1;
518 $p->{prop_value}->{'empty-cells'}->{show} = 1;
519 $p->{prop_value}->{'empty-cells'}->{hide} = 1;
520 $p->{prop_value}->{cursor}->{$_} = 1 for qw/
521 auto crosshair default pointer move e-resize ne-resize nw-resize n-resize
522 se-resize sw-resize s-resize w-resize text wait help progress
523 /;
524 for my $prop (qw/border-top-style border-left-style
525 border-bottom-style border-right-style outline-style/) {
526 $p->{prop_value}->{$prop}->{$_} = 1 for qw/
527 none hidden dotted dashed solid double groove ridge inset outset
528 /;
529 }
530 for my $prop (qw/color background-color
531 border-bottom-color border-left-color border-right-color
532 border-top-color border-color/) {
533 $p->{prop_value}->{$prop}->{transparent} = 1;
534 $p->{prop_value}->{$prop}->{flavor} = 1;
535 $p->{prop_value}->{$prop}->{'-manakai-default'} = 1;
536 }
537 $p->{prop_value}->{'outline-color'}->{invert} = 1;
538 $p->{prop_value}->{'outline-color'}->{'-manakai-invert-or-currentcolor'} = 1;
539 $p->{pseudo_class}->{$_} = 1 for qw/
540 active checked disabled empty enabled first-child first-of-type
541 focus hover indeterminate last-child last-of-type link only-child
542 only-of-type root target visited
543 lang nth-child nth-last-child nth-of-type nth-last-of-type not
544 -manakai-contains -manakai-current
545 /;
546 $p->{pseudo_element}->{$_} = 1 for qw/
547 after before first-letter first-line
548 /;
549
550 return $CSSParser = $p;
551 } # get_css_parser
552
553 sub print_syntax_error_css_section ($$) {
554 my ($input, $result) = @_;
555
556 print STDOUT qq[
557 <div id="$input->{id_prefix}parse-errors" class="section">
558 <h2>Parse Errors</h2>
559
560 <dl>];
561 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
562
563 my $p = get_css_parser ();
564 $p->init;
565 $p->{onerror} = sub {
566 my (%opt) = @_;
567 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
568 if ($opt{token}) {
569 print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{token}->{line}">Line $opt{token}->{line}</a> column $opt{token}->{column}];
570 } else {
571 print STDOUT qq[<dt class="$cls">Unknown location];
572 }
573 if (defined $opt{value}) {
574 print STDOUT qq[ (<code>@{[htescape ($opt{value})]}</code>)];
575 } elsif (defined $opt{token}) {
576 print STDOUT qq[ (<code>@{[htescape (Whatpm::CSS::Tokenizer->serialize_token ($opt{token}))]}</code>)];
577 }
578 $type =~ tr/ /-/;
579 $type =~ s/\|/%7C/g;
580 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
581 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
582 print STDOUT qq[$msg</dd>\n];
583
584 add_error ('syntax', \%opt => $result);
585 };
586 $p->{href} = $input->{uri};
587 $p->{base_uri} = $input->{base_uri};
588
589 # if ($parse_mode eq 'q') {
590 # $p->{unitless_px} = 1;
591 # $p->{hashless_color} = 1;
592 # }
593
594 ## TODO: Make $input->{s} a ref.
595
596 my $s = \$input->{s};
597 my $charset;
598 unless ($input->{is_char_string}) {
599 require Encode;
600 if (defined $input->{charset}) {## TODO: IANA->Perl
601 $charset = $input->{charset};
602 $s = \(Encode::decode ($input->{charset}, $$s));
603 } else {
604 ## TODO: charset detection
605 $s = \(Encode::decode ($charset = 'utf-8', $$s));
606 }
607 }
608
609 my $cssom = $p->parse_char_string ($$s);
610 $cssom->manakai_input_encoding ($charset) if defined $charset;
611
612 print STDOUT qq[</dl></div>];
613
614 return $cssom;
615 } # print_syntax_error_css_section
616
617 sub print_syntax_error_manifest_section ($$) {
618 my ($input, $result) = @_;
619
620 require Whatpm::CacheManifest;
621
622 print STDOUT qq[
623 <div id="$input->{id_prefix}parse-errors" class="section">
624 <h2>Parse Errors</h2>
625
626 <dl>];
627 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
628
629 my $onerror = sub {
630 my (%opt) = @_;
631 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
632 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
633 qq[</dt>];
634 $type =~ tr/ /-/;
635 $type =~ s/\|/%7C/g;
636 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
637 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
638 print STDOUT qq[$msg</dd>\n];
639
640 add_error ('syntax', \%opt => $result);
641 };
642
643 my $time1 = time;
644 my $manifest = Whatpm::CacheManifest->parse_byte_string
645 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
646 $time{parse_manifest} = time - $time1;
647
648 print STDOUT qq[</dl></div>];
649
650 return $manifest;
651 } # print_syntax_error_manifest_section
652
653 sub print_source_string_section ($$$) {
654 my $input = shift;
655 my $s;
656 unless ($input->{is_char_string}) {
657 require Encode;
658 my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
659 return unless $enc;
660
661 $s = \($enc->decode (${$_[0]}));
662 } else {
663 $s = $_[0];
664 }
665
666 my $i = 1;
667 push @nav, ['#source-string' => 'Source'] unless $input->{nested};
668 print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
669 <h2>Document Source</h2>
670 <ol lang="">\n];
671 if (length $$s) {
672 while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
673 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
674 "</li>\n";
675 $i++;
676 }
677 if ($$s =~ /\G([^\x0A]+)/gc) {
678 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
679 "</li>\n";
680 }
681 } else {
682 print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
683 }
684 print STDOUT "</ol></div>";
685 } # print_input_string_section
686
687 sub print_document_tree ($$) {
688 my ($input, $node) = @_;
689
690 my $r = '<ol class="xoxo">';
691
692 my @node = ($node);
693 while (@node) {
694 my $child = shift @node;
695 unless (ref $child) {
696 $r .= $child;
697 next;
698 }
699
700 my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
701 my $nt = $child->node_type;
702 if ($nt == $child->ELEMENT_NODE) {
703 my $child_nsuri = $child->namespace_uri;
704 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
705 '</code>'; ## ISSUE: case
706
707 if ($child->has_attributes) {
708 $r .= '<ul class="attributes">';
709 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
710 @{$child->attributes}) {
711 $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
712 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
713 }
714 $r .= '</ul>';
715 }
716
717 if ($child->has_child_nodes) {
718 $r .= '<ol class="children">';
719 unshift @node, @{$child->child_nodes}, '</ol></li>';
720 } else {
721 $r .= '</li>';
722 }
723 } elsif ($nt == $child->TEXT_NODE) {
724 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
725 } elsif ($nt == $child->CDATA_SECTION_NODE) {
726 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
727 } elsif ($nt == $child->COMMENT_NODE) {
728 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
729 } elsif ($nt == $child->DOCUMENT_NODE) {
730 $r .= qq'<li id="$node_id" class="tree-document">Document';
731 $r .= qq[<ul class="attributes">];
732 my $cp = $child->manakai_charset;
733 if (defined $cp) {
734 $r .= qq[<li><code>charset</code> parameter = <code>];
735 $r .= htescape ($cp) . qq[</code></li>];
736 }
737 $r .= qq[<li><code>inputEncoding</code> = ];
738 my $ie = $child->input_encoding;
739 if (defined $ie) {
740 $r .= qq[<code>@{[htescape ($ie)]}</code>];
741 if ($child->manakai_has_bom) {
742 $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
743 }
744 } else {
745 $r .= qq[(<code>null</code>)];
746 }
747 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
748 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
749 unless ($child->manakai_is_html) {
750 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
751 if (defined $child->xml_encoding) {
752 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
753 } else {
754 $r .= qq[<li>XML encoding = (null)</li>];
755 }
756 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
757 }
758 $r .= qq[</ul>];
759 if ($child->has_child_nodes) {
760 $r .= '<ol class="children">';
761 unshift @node, @{$child->child_nodes}, '</ol></li>';
762 }
763 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
764 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
765 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
766 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
767 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
768 $r .= '</ul></li>';
769 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
770 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
771 } else {
772 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
773 }
774 }
775
776 $r .= '</ol>';
777 print STDOUT $r;
778 } # print_document_tree
779
780 sub print_structure_dump_dom_section ($$$) {
781 my ($input, $doc, $el) = @_;
782
783 print STDOUT qq[
784 <div id="$input->{id_prefix}document-tree" class="section">
785 <h2>Document Tree</h2>
786 ];
787 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
788 unless $input->{nested};
789
790 print_document_tree ($input, $el || $doc);
791
792 print STDOUT qq[</div>];
793 } # print_structure_dump_dom_section
794
795 sub print_structure_dump_cssom_section ($$) {
796 my ($input, $cssom) = @_;
797
798 print STDOUT qq[
799 <div id="$input->{id_prefix}document-tree" class="section">
800 <h2>Document Tree</h2>
801 ];
802 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
803 unless $input->{nested};
804
805 ## TODO:
806 print STDOUT "<pre>".htescape ($cssom->css_text)."</pre>";
807
808 print STDOUT qq[</div>];
809 } # print_structure_dump_cssom_section
810
811 sub print_structure_dump_manifest_section ($$) {
812 my ($input, $manifest) = @_;
813
814 print STDOUT qq[
815 <div id="$input->{id_prefix}dump-manifest" class="section">
816 <h2>Cache Manifest</h2>
817 ];
818 push @nav, [qq[#$input->{id_prefix}dump-manifest] => 'Cache Manifest']
819 unless $input->{nested};
820
821 print STDOUT qq[<dl><dt>Explicit entries</dt>];
822 my $i = 0;
823 for my $uri (@{$manifest->[0]}) {
824 my $euri = htescape ($uri);
825 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
826 }
827
828 print STDOUT qq[<dt>Fallback entries</dt><dd>
829 <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
830 <th scope=row>Fallback Entry</tr><tbody>];
831 for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
832 my $euri = htescape ($uri);
833 my $euri2 = htescape ($manifest->[1]->{$uri});
834 print STDOUT qq[<tr><td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
835 <td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
836 }
837
838 print STDOUT qq[</table><dt>Online whitelist</dt>];
839 for my $uri (@{$manifest->[2]}) {
840 my $euri = htescape ($uri);
841 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
842 }
843
844 print STDOUT qq[</dl></div>];
845 } # print_structure_dump_manifest_section
846
847 sub print_structure_error_dom_section ($$$$$) {
848 my ($input, $doc, $el, $result, $onsubdoc) = @_;
849
850 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
851 <h2>Document Errors</h2>
852
853 <dl>];
854 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
855 unless $input->{nested};
856
857 require Whatpm::ContentChecker;
858 my $onerror = sub {
859 my %opt = @_;
860 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
861 $type =~ tr/ /-/;
862 $type =~ s/\|/%7C/g;
863 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
864 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
865 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
866 print STDOUT $msg, "</dd>\n";
867 add_error ('structure', \%opt => $result);
868 };
869
870 my $elements;
871 my $time1 = time;
872 if ($el) {
873 $elements = Whatpm::ContentChecker->check_element
874 ($el, $onerror, $onsubdoc);
875 } else {
876 $elements = Whatpm::ContentChecker->check_document
877 ($doc, $onerror, $onsubdoc);
878 }
879 $time{check} = time - $time1;
880
881 print STDOUT qq[</dl></div>];
882
883 return $elements;
884 } # print_structure_error_dom_section
885
886 sub print_structure_error_manifest_section ($$$) {
887 my ($input, $manifest, $result) = @_;
888
889 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
890 <h2>Document Errors</h2>
891
892 <dl>];
893 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
894 unless $input->{nested};
895
896 require Whatpm::CacheManifest;
897 Whatpm::CacheManifest->check_manifest ($manifest, sub {
898 my %opt = @_;
899 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
900 $type =~ tr/ /-/;
901 $type =~ s/\|/%7C/g;
902 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
903 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
904 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
905 add_error ('structure', \%opt => $result);
906 });
907
908 print STDOUT qq[</div>];
909 } # print_structure_error_manifest_section
910
911 sub print_table_section ($$) {
912 my ($input, $tables) = @_;
913
914 push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
915 unless $input->{nested};
916 print STDOUT qq[
917 <div id="$input->{id_prefix}tables" class="section">
918 <h2>Tables</h2>
919
920 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
921 <script src="../table-script.js" type="text/javascript"></script>
922 <noscript>
923 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
924 </noscript>
925 ];
926
927 require JSON;
928
929 my $i = 0;
930 for my $table_el (@$tables) {
931 $i++;
932 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
933 get_node_link ($input, $table_el) . q[</h3>];
934
935 ## TODO: Make |ContentChecker| return |form_table| result
936 ## so that this script don't have to run the algorithm twice.
937 my $table = Whatpm::HTMLTable->form_table ($table_el);
938
939 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
940 next unless $_;
941 delete $_->{element};
942 }
943
944 for (@{$table->{row_group}}) {
945 next unless $_;
946 next unless $_->{element};
947 $_->{type} = $_->{element}->manakai_local_name;
948 delete $_->{element};
949 }
950
951 for (@{$table->{cell}}) {
952 next unless $_;
953 for (@{$_}) {
954 next unless $_;
955 for (@$_) {
956 $_->{id} = refaddr $_->{element} if defined $_->{element};
957 delete $_->{element};
958 $_->{is_header} = $_->{is_header} ? 1 : 0;
959 }
960 }
961 }
962
963 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
964 print STDOUT JSON::objToJson ($table);
965 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
966 print STDOUT qq[, '$input->{id_prefix}');</script>];
967 }
968
969 print STDOUT qq[</div>];
970 } # print_table_section
971
972 sub print_listing_section ($$$) {
973 my ($opt, $input, $ids) = @_;
974
975 push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
976 unless $input->{nested};
977 print STDOUT qq[
978 <div id="$input->{id_prefix}$opt->{id}" class="section">
979 <h2>$opt->{heading}</h2>
980
981 <dl>
982 ];
983 for my $id (sort {$a cmp $b} keys %$ids) {
984 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
985 for (@{$ids->{$id}}) {
986 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
987 }
988 }
989 print STDOUT qq[</dl></div>];
990 } # print_listing_section
991
992 sub print_result_section ($) {
993 my $result = shift;
994
995 print STDOUT qq[
996 <div id="result-summary" class="section">
997 <h2>Result</h2>];
998
999 if ($result->{unsupported} and $result->{conforming_max}) {
1000 print STDOUT qq[<p class=uncertain id=result-para>The conformance
1001 checker cannot decide whether the document is conforming or
1002 not, since the document contains one or more unsupported
1003 features. The document might or might not be conforming.</p>];
1004 } elsif ($result->{conforming_min}) {
1005 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
1006 found in this document.</p>];
1007 } elsif ($result->{conforming_max}) {
1008 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
1009 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
1010 it might be conforming.</p>];
1011 } else {
1012 print STDOUT qq[<p class=FAIL id=result-para>This document is
1013 <strong><em>non</em>-conforming</strong>.</p>];
1014 }
1015
1016 print STDOUT qq[<table>
1017 <colgroup><col><colgroup><col><col><col><colgroup><col>
1018 <thead>
1019 <tr><th scope=col></th>
1020 <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1021 Errors</a></th>
1022 <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1023 Errors</a></th>
1024 <th scope=col><a href="../error-description#level-w">Warnings</a></th>
1025 <th scope=col>Score</th></tr></thead><tbody>];
1026
1027 my $must_error = 0;
1028 my $should_error = 0;
1029 my $warning = 0;
1030 my $score_min = 0;
1031 my $score_max = 0;
1032 my $score_base = 20;
1033 my $score_unit = $score_base / 100;
1034 for (
1035 [Transfer => 'transfer', ''],
1036 [Character => 'char', ''],
1037 [Syntax => 'syntax', '#parse-errors'],
1038 [Structure => 'structure', '#document-errors'],
1039 ) {
1040 $must_error += ($result->{$_->[1]}->{must} += 0);
1041 $should_error += ($result->{$_->[1]}->{should} += 0);
1042 $warning += ($result->{$_->[1]}->{warning} += 0);
1043 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
1044 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
1045
1046 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
1047 my $label = $_->[0];
1048 if ($result->{$_->[1]}->{must} or
1049 $result->{$_->[1]}->{should} or
1050 $result->{$_->[1]}->{warning} or
1051 $result->{$_->[1]}->{unsupported}) {
1052 $label = qq[<a href="$_->[2]">$label</a>];
1053 }
1054
1055 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
1056 if ($uncertain) {
1057 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
1058 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
1059 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
1060 } else {
1061 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
1062 }
1063 }
1064
1065 $score_max += $score_base;
1066
1067 print STDOUT qq[
1068 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
1069 </tbody>
1070 <tfoot><tr class=uncertain><th scope=row>Total</th>
1071 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
1072 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
1073 <td>$warning?</td>
1074 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
1075 </table>
1076
1077 <p><strong>Important</strong>: This conformance checking service
1078 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
1079 </div>];
1080 push @nav, ['#result-summary' => 'Result'];
1081 } # print_result_section
1082
1083 sub print_result_unknown_type_section ($$) {
1084 my ($input, $result) = @_;
1085
1086 my $euri = htescape ($input->{uri});
1087 print STDOUT qq[
1088 <div id="$input->{id_prefix}parse-errors" class="section">
1089 <h2>Errors</h2>
1090
1091 <dl>
1092 <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
1093 <dd class=unsupported><strong><a href="../error-description#level-u">Not
1094 supported</a></strong>:
1095 Media type
1096 <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
1097 is not supported.</dd>
1098 </dl>
1099 </div>
1100 ];
1101 push @nav, [qq[#$input->{id_prefix}parse-errors] => 'Errors']
1102 unless $input->{nested};
1103 add_error (char => {level => 'u'} => $result);
1104 add_error (syntax => {level => 'u'} => $result);
1105 add_error (structure => {level => 'u'} => $result);
1106 } # print_result_unknown_type_section
1107
1108 sub print_result_input_error_section ($) {
1109 my $input = shift;
1110 print STDOUT qq[<div class="section" id="result-summary">
1111 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
1112 </div>];
1113 push @nav, ['#result-summary' => 'Result'];
1114 } # print_result_input_error_section
1115
1116 sub get_error_label ($$) {
1117 my ($input, $err) = @_;
1118
1119 my $r = '';
1120
1121 if (defined $err->{line}) {
1122 if ($err->{column} > 0) {
1123 $r = qq[<a href="#$input->{id_prefix}line-$err->{line}">Line $err->{line}</a> column $err->{column}];
1124 } else {
1125 $err->{line} = $err->{line} - 1 || 1;
1126 $r = qq[<a href="#$input->{id_prefix}line-$err->{line}">Line $err->{line}</a>];
1127 }
1128 }
1129
1130 if (defined $err->{node}) {
1131 $r .= ' ' if length $r;
1132 $r = get_node_link ($input, $err->{node});
1133 }
1134
1135 if (defined $err->{index}) {
1136 if (length $r) {
1137 $r .= ', Index ' . (0+$err->{index});
1138 } else {
1139 $r .= "<a href='#$input->{id_prefix}index-@{[0+$err->{index}]}'>Index "
1140 . (0+$err->{index}) . '</a>';
1141 }
1142 }
1143
1144 if (defined $err->{value}) {
1145 $r .= ' ' if length $r;
1146 $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
1147 }
1148
1149 return $r;
1150 } # get_error_label
1151
1152 sub get_error_level_label ($) {
1153 my $err = shift;
1154
1155 my $r = '';
1156
1157 if (not defined $err->{level} or $err->{level} eq 'm') {
1158 $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1159 error</a></strong>: ];
1160 } elsif ($err->{level} eq 's') {
1161 $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1162 error</a></strong>: ];
1163 } elsif ($err->{level} eq 'w') {
1164 $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
1165 ];
1166 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
1167 $r = qq[<strong><a href="../error-description#level-u">Not
1168 supported</a></strong>: ];
1169 } elsif ($err->{level} eq 'i') {
1170 $r = qq[<strong><a href="../error-description#level-i">Information</a></strong>: ];
1171 } else {
1172 my $elevel = htescape ($err->{level});
1173 $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
1174 ];
1175 }
1176
1177 return $r;
1178 } # get_error_level_label
1179
1180 sub get_node_path ($) {
1181 my $node = shift;
1182 my @r;
1183 while (defined $node) {
1184 my $rs;
1185 if ($node->node_type == 1) {
1186 $rs = $node->manakai_local_name;
1187 $node = $node->parent_node;
1188 } elsif ($node->node_type == 2) {
1189 $rs = '@' . $node->manakai_local_name;
1190 $node = $node->owner_element;
1191 } elsif ($node->node_type == 3) {
1192 $rs = '"' . $node->data . '"';
1193 $node = $node->parent_node;
1194 } elsif ($node->node_type == 9) {
1195 @r = ('') unless @r;
1196 $rs = '';
1197 $node = $node->parent_node;
1198 } else {
1199 $rs = '#' . $node->node_type;
1200 $node = $node->parent_node;
1201 }
1202 unshift @r, $rs;
1203 }
1204 return join '/', @r;
1205 } # get_node_path
1206
1207 sub get_node_link ($$) {
1208 return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
1209 htescape (get_node_path ($_[1])) . qq[</a>];
1210 } # get_node_link
1211
1212 {
1213 my $Msg = {};
1214
1215 sub load_text_catalog ($) {
1216 my $lang = shift; # MUST be a canonical lang name
1217 open my $file, '<:utf8', "cc-msg.$lang.txt"
1218 or die "$0: cc-msg.$lang.txt: $!";
1219 while (<$file>) {
1220 if (s/^([^;]+);([^;]*);//) {
1221 my ($type, $cls, $msg) = ($1, $2, $_);
1222 $msg =~ tr/\x0D\x0A//d;
1223 $Msg->{$type} = [$cls, $msg];
1224 }
1225 }
1226 } # load_text_catalog
1227
1228 sub get_text ($) {
1229 my ($type, $level, $node) = @_;
1230 $type = $level . ':' . $type if defined $level;
1231 $level = 'm' unless defined $level;
1232 my @arg;
1233 {
1234 if (defined $Msg->{$type}) {
1235 my $msg = $Msg->{$type}->[1];
1236 $msg =~ s{<var>\$([0-9]+)</var>}{
1237 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
1238 }ge;
1239 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
1240 UNIVERSAL::can ($node, 'get_attribute_ns')
1241 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
1242 }ge;
1243 $msg =~ s{<var>{\@}</var>}{
1244 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
1245 }ge;
1246 $msg =~ s{<var>{local-name}</var>}{
1247 UNIVERSAL::can ($node, 'manakai_local_name')
1248 ? htescape ($node->manakai_local_name) : ''
1249 }ge;
1250 $msg =~ s{<var>{element-local-name}</var>}{
1251 (UNIVERSAL::can ($node, 'owner_element') and
1252 $node->owner_element)
1253 ? htescape ($node->owner_element->manakai_local_name)
1254 : ''
1255 }ge;
1256 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
1257 } elsif ($type =~ s/:([^:]*)$//) {
1258 unshift @arg, $1;
1259 redo;
1260 }
1261 }
1262 return ($type, 'level-'.$level, htescape ($_[0]));
1263 } # get_text
1264
1265 }
1266
1267 sub get_input_document ($$) {
1268 my ($http, $dom) = @_;
1269
1270 my $request_uri = $http->get_parameter ('uri');
1271 my $r = {};
1272 if (defined $request_uri and length $request_uri) {
1273 my $uri = $dom->create_uri_reference ($request_uri);
1274 unless ({
1275 http => 1,
1276 }->{lc $uri->uri_scheme}) {
1277 return {uri => $request_uri, request_uri => $request_uri,
1278 error_status_text => 'URI scheme not allowed'};
1279 }
1280
1281 require Message::Util::HostPermit;
1282 my $host_permit = new Message::Util::HostPermit;
1283 $host_permit->add_rule (<<EOH);
1284 Allow host=suika port=80
1285 Deny host=suika
1286 Allow host=suika.fam.cx port=80
1287 Deny host=suika.fam.cx
1288 Deny host=localhost
1289 Deny host=*.localdomain
1290 Deny ipv4=0.0.0.0/8
1291 Deny ipv4=10.0.0.0/8
1292 Deny ipv4=127.0.0.0/8
1293 Deny ipv4=169.254.0.0/16
1294 Deny ipv4=172.0.0.0/11
1295 Deny ipv4=192.0.2.0/24
1296 Deny ipv4=192.88.99.0/24
1297 Deny ipv4=192.168.0.0/16
1298 Deny ipv4=198.18.0.0/15
1299 Deny ipv4=224.0.0.0/4
1300 Deny ipv4=255.255.255.255/32
1301 Deny ipv6=0::0/0
1302 Allow host=*
1303 EOH
1304 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1305 return {uri => $request_uri, request_uri => $request_uri,
1306 error_status_text => 'Connection to the host is forbidden'};
1307 }
1308
1309 require LWP::UserAgent;
1310 my $ua = WDCC::LWPUA->new;
1311 $ua->{wdcc_dom} = $dom;
1312 $ua->{wdcc_host_permit} = $host_permit;
1313 $ua->agent ('Mozilla'); ## TODO: for now.
1314 $ua->parse_head (0);
1315 $ua->protocols_allowed ([qw/http/]);
1316 $ua->max_size (1000_000);
1317 my $req = HTTP::Request->new (GET => $request_uri);
1318 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1319 my $res = $ua->request ($req);
1320 ## TODO: 401 sets |is_success| true.
1321 if ($res->is_success or $http->get_parameter ('error-page')) {
1322 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1323 $r->{uri} = $res->request->uri;
1324 $r->{request_uri} = $request_uri;
1325
1326 ## TODO: More strict parsing...
1327 my $ct = $res->header ('Content-Type');
1328 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1329 $r->{charset} = lc $1;
1330 $r->{charset} =~ tr/\\//d;
1331 $r->{official_charset} = $r->{charset};
1332 }
1333
1334 my $input_charset = $http->get_parameter ('charset');
1335 if (defined $input_charset and length $input_charset) {
1336 $r->{charset_overridden}
1337 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1338 $r->{charset} = $input_charset;
1339 }
1340
1341 ## TODO: Support for HTTP Content-Encoding
1342
1343 $r->{s} = ''.$res->content;
1344
1345 require Whatpm::ContentType;
1346 ($r->{official_type}, $r->{media_type})
1347 = Whatpm::ContentType->get_sniffed_type
1348 (get_file_head => sub {
1349 return substr $r->{s}, 0, shift;
1350 },
1351 http_content_type_byte => $ct,
1352 has_http_content_encoding =>
1353 defined $res->header ('Content-Encoding'),
1354 supported_image_types => {});
1355 } else {
1356 $r->{uri} = $res->request->uri;
1357 $r->{request_uri} = $request_uri;
1358 $r->{error_status_text} = $res->status_line;
1359 }
1360
1361 $r->{header_field} = [];
1362 $res->scan (sub {
1363 push @{$r->{header_field}}, [$_[0], $_[1]];
1364 });
1365 $r->{header_status_code} = $res->code;
1366 $r->{header_status_text} = $res->message;
1367 } else {
1368 $r->{s} = ''.$http->get_parameter ('s');
1369 $r->{uri} = q<thismessage:/>;
1370 $r->{request_uri} = q<thismessage:/>;
1371 $r->{base_uri} = q<thismessage:/>;
1372 $r->{charset} = ''.$http->get_parameter ('_charset_');
1373 $r->{charset} =~ s/\s+//g;
1374 $r->{charset} = 'utf-8' if $r->{charset} eq '';
1375 $r->{official_charset} = $r->{charset};
1376 $r->{header_field} = [];
1377
1378 require Whatpm::ContentType;
1379 ($r->{official_type}, $r->{media_type})
1380 = Whatpm::ContentType->get_sniffed_type
1381 (get_file_head => sub {
1382 return substr $r->{s}, 0, shift;
1383 },
1384 http_content_type_byte => undef,
1385 has_http_content_encoding => 0,
1386 supported_image_types => {});
1387 }
1388
1389 my $input_format = $http->get_parameter ('i');
1390 if (defined $input_format and length $input_format) {
1391 $r->{media_type_overridden}
1392 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1393 $r->{media_type} = $input_format;
1394 }
1395 if (defined $r->{s} and not defined $r->{media_type}) {
1396 $r->{media_type} = 'text/html';
1397 $r->{media_type_overridden} = 1;
1398 }
1399
1400 if ($r->{media_type} eq 'text/xml') {
1401 unless (defined $r->{charset}) {
1402 $r->{charset} = 'us-ascii';
1403 $r->{official_charset} = $r->{charset};
1404 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1405 $r->{charset_overridden} = 0;
1406 }
1407 }
1408
1409 if (length $r->{s} > 1000_000) {
1410 $r->{error_status_text} = 'Entity-body too large';
1411 delete $r->{s};
1412 return $r;
1413 }
1414
1415 $r->{inner_html_element} = $http->get_parameter ('e');
1416
1417 return $r;
1418 } # get_input_document
1419
1420 package WDCC::LWPUA;
1421 BEGIN { push our @ISA, 'LWP::UserAgent'; }
1422
1423 sub redirect_ok {
1424 my $ua = shift;
1425 unless ($ua->SUPER::redirect_ok (@_)) {
1426 return 0;
1427 }
1428
1429 my $uris = $_[1]->header ('Location');
1430 return 0 unless $uris;
1431 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1432 unless ({
1433 http => 1,
1434 }->{lc $uri->uri_scheme}) {
1435 return 0;
1436 }
1437 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1438 return 0;
1439 }
1440 return 1;
1441 } # redirect_ok
1442
1443 =head1 AUTHOR
1444
1445 Wakaba <w@suika.fam.cx>.
1446
1447 =head1 LICENSE
1448
1449 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1450
1451 This library is free software; you can redistribute it
1452 and/or modify it under the same terms as Perl itself.
1453
1454 =cut
1455
1456 ## $Date: 2008/02/24 02:17:51 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24