/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.45 - (show annotations) (download)
Fri Mar 21 08:59:47 2008 UTC (16 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.44: +62 -2 lines
++ ChangeLog	21 Mar 2008 08:59:37 -0000
2008-03-21  Wakaba  <wakaba@suika.fam.cx>

	* cc.cgi: Experimental support for application/rdf+xml type.
	Generate section on RDF triples extracted from the document.
	(print_rdf_section): New function.
	(get_rdf_resource_html): New function.

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4
5 use lib qw[/home/httpd/html/www/markup/html/whatpm
6 /home/wakaba/work/manakai2/lib];
7 use CGI::Carp qw[fatalsToBrowser];
8 use Scalar::Util qw[refaddr];
9 use Time::HiRes qw/time/;
10
11 sub htescape ($) {
12 my $s = $_[0];
13 $s =~ s/&/&amp;/g;
14 $s =~ s/</&lt;/g;
15 $s =~ s/>/&gt;/g;
16 $s =~ s/"/&quot;/g;
17 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18 sprintf '<var>U+%04X</var>', ord $1;
19 }ge;
20 return $s;
21 } # htescape
22
23 my @nav;
24 my %time;
25 require Message::DOM::DOMImplementation;
26 my $dom = Message::DOM::DOMImplementation->new;
27 {
28 use Message::CGI::HTTP;
29 my $http = Message::CGI::HTTP->new;
30
31 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
32 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
33 exit;
34 }
35
36 binmode STDOUT, ':utf8';
37 $| = 1;
38
39 load_text_catalog ('en'); ## TODO: conneg
40
41 print STDOUT qq[Content-Type: text/html; charset=utf-8
42
43 <!DOCTYPE html>
44 <html lang="en">
45 <head>
46 <title>Web Document Conformance Checker (BETA)</title>
47 <link rel="stylesheet" href="../cc-style.css" type="text/css">
48 </head>
49 <body>
50 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
51 (<em>beta</em>)</h1>
52 ];
53
54 $| = 0;
55 my $input = get_input_document ($http, $dom);
56 my $char_length = 0;
57
58 print qq[
59 <div id="document-info" class="section">
60 <dl>
61 <dt>Request URI</dt>
62 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 <dt>Document URI</dt>
64 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65 <script>
66 document.title = '<'
67 + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68 + document.title;
69 </script></dd>
70 ]; # no </dl> yet
71 push @nav, ['#document-info' => 'Information'];
72
73 if (defined $input->{s}) {
74 $char_length = length $input->{s};
75
76 print STDOUT qq[
77 <dt>Base URI</dt>
78 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79 <dt>Internet Media Type</dt>
80 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 <dt>Character Encoding</dt>
83 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 <dt>Length</dt>
86 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 </dl>
88 </div>
89
90 <script src="../cc-script.js"></script>
91 ];
92
93 $input->{id_prefix} = '';
94 #$input->{nested} = 0;
95 my $result = {conforming_min => 1, conforming_max => 1};
96 check_and_print ($input => $result);
97 print_result_section ($result);
98 } else {
99 print STDOUT qq[</dl></div>];
100 print_result_input_error_section ($input);
101 }
102
103 print STDOUT qq[
104 <ul class="navigation" id="nav-items">
105 ];
106 for (@nav) {
107 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
108 }
109 print STDOUT qq[
110 </ul>
111 </body>
112 </html>
113 ];
114
115 for (qw/decode parse parse_html parse_xml parse_manifest
116 check check_manifest/) {
117 next unless defined $time{$_};
118 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
119 print $file $char_length, "\t", $time{$_}, "\n";
120 }
121
122 exit;
123 }
124
125 sub add_error ($$$) {
126 my ($layer, $err, $result) = @_;
127 if (defined $err->{level}) {
128 if ($err->{level} eq 's') {
129 $result->{$layer}->{should}++;
130 $result->{$layer}->{score_min} -= 2;
131 $result->{conforming_min} = 0;
132 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
133 $result->{$layer}->{warning}++;
134 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
135 $result->{$layer}->{unsupported}++;
136 $result->{unsupported} = 1;
137 } elsif ($err->{level} eq 'i') {
138 #
139 } else {
140 $result->{$layer}->{must}++;
141 $result->{$layer}->{score_max} -= 2;
142 $result->{$layer}->{score_min} -= 2;
143 $result->{conforming_min} = 0;
144 $result->{conforming_max} = 0;
145 }
146 } else {
147 $result->{$layer}->{must}++;
148 $result->{$layer}->{score_max} -= 2;
149 $result->{$layer}->{score_min} -= 2;
150 $result->{conforming_min} = 0;
151 $result->{conforming_max} = 0;
152 }
153 } # add_error
154
155 sub check_and_print ($$) {
156 my ($input, $result) = @_;
157
158 print_http_header_section ($input, $result);
159
160 my $doc;
161 my $el;
162 my $cssom;
163 my $manifest;
164 my @subdoc;
165
166 if ($input->{media_type} eq 'text/html') {
167 ($doc, $el) = print_syntax_error_html_section ($input, $result);
168 print_source_string_section
169 ($input,
170 \($input->{s}),
171 $input->{charset} || $doc->input_encoding);
172 } elsif ({
173 'text/xml' => 1,
174 'application/atom+xml' => 1,
175 'application/rss+xml' => 1,
176 'image/svg+xml' => 1,
177 'application/xhtml+xml' => 1,
178 'application/xml' => 1,
179 ## TODO: Should we make all XML MIME Types fall
180 ## into this category?
181
182 'application/rdf+xml' => 1, ## NOTE: This type has different model.
183 }->{$input->{media_type}}) {
184 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
185 print_source_string_section ($input,
186 \($input->{s}),
187 $doc->input_encoding);
188 } elsif ($input->{media_type} eq 'text/css') {
189 $cssom = print_syntax_error_css_section ($input, $result);
190 print_source_string_section
191 ($input, \($input->{s}),
192 $cssom->manakai_input_encoding);
193 } elsif ($input->{media_type} eq 'text/cache-manifest') {
194 ## TODO: MUST be text/cache-manifest
195 $manifest = print_syntax_error_manifest_section ($input, $result);
196 print_source_string_section ($input, \($input->{s}),
197 'utf-8');
198 } else {
199 ## TODO: Change HTTP status code??
200 print_result_unknown_type_section ($input, $result);
201 }
202
203 if (defined $doc or defined $el) {
204 $doc->document_uri ($input->{uri});
205 $doc->manakai_entity_base_uri ($input->{base_uri});
206 print_structure_dump_dom_section ($input, $doc, $el);
207 my $elements = print_structure_error_dom_section
208 ($input, $doc, $el, $result, sub {
209 push @subdoc, shift;
210 });
211 print_table_section ($input, $elements->{table}) if @{$elements->{table}};
212 print_listing_section ({
213 id => 'identifiers', label => 'IDs', heading => 'Identifiers',
214 }, $input, $elements->{id}) if keys %{$elements->{id}};
215 print_listing_section ({
216 id => 'terms', label => 'Terms', heading => 'Terms',
217 }, $input, $elements->{term}) if keys %{$elements->{term}};
218 print_listing_section ({
219 id => 'classes', label => 'Classes', heading => 'Classes',
220 }, $input, $elements->{class}) if keys %{$elements->{class}};
221 print_rdf_section ($input, $elements->{rdf}) if @{$elements->{rdf}};
222 } elsif (defined $cssom) {
223 print_structure_dump_cssom_section ($input, $cssom);
224 ## TODO: CSSOM validation
225 add_error ('structure', {level => 'u'} => $result);
226 } elsif (defined $manifest) {
227 print_structure_dump_manifest_section ($input, $manifest);
228 print_structure_error_manifest_section ($input, $manifest, $result);
229 }
230
231 my $id_prefix = 0;
232 for my $subinput (@subdoc) {
233 $subinput->{id_prefix} = 'subdoc-' . ++$id_prefix;
234 $subinput->{nested} = 1;
235 $subinput->{base_uri} = $subinput->{container_node}->base_uri
236 unless defined $subinput->{base_uri};
237 my $ebaseuri = htescape ($subinput->{base_uri});
238 push @nav, ['#' . $subinput->{id_prefix} => 'Sub #' . $id_prefix];
239 print STDOUT qq[<div id="$subinput->{id_prefix}" class=section>
240 <h2>Subdocument #$id_prefix</h2>
241
242 <dl>
243 <dt>Internet Media Type</dt>
244 <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
245 <dt>Container Node</dt>
246 <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
247 <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
248 <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
249 </dl>];
250
251 $subinput->{id_prefix} .= '-';
252 check_and_print ($subinput => $result);
253
254 print STDOUT qq[</div>];
255 }
256 } # check_and_print
257
258 sub print_http_header_section ($$) {
259 my ($input, $result) = @_;
260 return unless defined $input->{header_status_code} or
261 defined $input->{header_status_text} or
262 @{$input->{header_field} or []};
263
264 push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
265 print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
266 <h2>HTTP Header</h2>
267
268 <p><strong>Note</strong>: Due to the limitation of the
269 network library in use, the content of this section might
270 not be the real header.</p>
271
272 <table><tbody>
273 ];
274
275 if (defined $input->{header_status_code}) {
276 print STDOUT qq[<tr><th scope="row">Status code</th>];
277 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
278 }
279 if (defined $input->{header_status_text}) {
280 print STDOUT qq[<tr><th scope="row">Status text</th>];
281 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
282 }
283
284 for (@{$input->{header_field}}) {
285 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
286 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
287 }
288
289 print STDOUT qq[</tbody></table></div>];
290 } # print_http_header_section
291
292 sub print_syntax_error_html_section ($$) {
293 my ($input, $result) = @_;
294
295 require Encode;
296 require Whatpm::HTML;
297
298 print STDOUT qq[
299 <div id="$input->{id_prefix}parse-errors" class="section">
300 <h2>Parse Errors</h2>
301
302 <dl id="$input->{id_prefix}parse-errors-list">];
303 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
304
305 my $onerror = sub {
306 my (%opt) = @_;
307 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
308 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
309 qq[</dt>];
310 $type =~ tr/ /-/;
311 $type =~ s/\|/%7C/g;
312 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
313 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
314 print STDOUT qq[$msg</dd>\n];
315
316 add_error ('syntax', \%opt => $result);
317 };
318
319 my $doc = $dom->create_document;
320 my $el;
321 my $inner_html_element = $input->{inner_html_element};
322 if (defined $inner_html_element and length $inner_html_element) {
323 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
324 my $time1 = time;
325 my $t = Encode::decode ($input->{charset}, $input->{s});
326 $time{decode} = time - $time1;
327
328 $el = $doc->create_element_ns
329 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
330 $time1 = time;
331 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
332 $time{parse} = time - $time1;
333 } else {
334 my $time1 = time;
335 Whatpm::HTML->parse_byte_string
336 ($input->{charset}, $input->{s} => $doc, $onerror);
337 $time{parse_html} = time - $time1;
338 }
339 $doc->manakai_charset ($input->{official_charset})
340 if defined $input->{official_charset};
341
342 print STDOUT qq[</dl></div>];
343
344 return ($doc, $el);
345 } # print_syntax_error_html_section
346
347 sub print_syntax_error_xml_section ($$) {
348 my ($input, $result) = @_;
349
350 require Message::DOM::XMLParserTemp;
351
352 print STDOUT qq[
353 <div id="$input->{id_prefix}parse-errors" class="section">
354 <h2>Parse Errors</h2>
355
356 <dl id="$input->{id_prefix}parse-errors-list">];
357 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
358
359 my $onerror = sub {
360 my $err = shift;
361 my $line = $err->location->line_number;
362 print STDOUT qq[<dt><a href="#$input->{id_prefix}line-$line">Line $line</a> column ];
363 print STDOUT $err->location->column_number, "</dt><dd>";
364 print STDOUT htescape $err->text, "</dd>\n";
365
366 add_error ('syntax', {type => $err->text,
367 level => [
368 $err->SEVERITY_FATAL_ERROR => 'm',
369 $err->SEVERITY_ERROR => 'm',
370 $err->SEVERITY_WARNING => 's',
371 ]->[$err->severity]} => $result);
372
373 return 1;
374 };
375
376 my $time1 = time;
377 open my $fh, '<', \($input->{s});
378 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
379 ($fh => $dom, $onerror, charset => $input->{charset});
380 $time{parse_xml} = time - $time1;
381 $doc->manakai_charset ($input->{official_charset})
382 if defined $input->{official_charset};
383
384 print STDOUT qq[</dl></div>];
385
386 return ($doc, undef);
387 } # print_syntax_error_xml_section
388
389 sub get_css_parser () {
390 our $CSSParser;
391 return $CSSParser if $CSSParser;
392
393 require Whatpm::CSS::Parser;
394 my $p = Whatpm::CSS::Parser->new;
395
396 $p->{prop}->{$_} = 1 for qw/
397 alignment-baseline
398 background background-attachment background-color background-image
399 background-position background-position-x background-position-y
400 background-repeat border border-bottom border-bottom-color
401 border-bottom-style border-bottom-width border-collapse border-color
402 border-left border-left-color
403 border-left-style border-left-width border-right border-right-color
404 border-right-style border-right-width
405 border-spacing -manakai-border-spacing-x -manakai-border-spacing-y
406 border-style border-top border-top-color border-top-style border-top-width
407 border-width bottom
408 caption-side clear clip color content counter-increment counter-reset
409 cursor direction display dominant-baseline empty-cells float font
410 font-family font-size font-size-adjust font-stretch
411 font-style font-variant font-weight height left
412 letter-spacing line-height
413 list-style list-style-image list-style-position list-style-type
414 margin margin-bottom margin-left margin-right margin-top marker-offset
415 marks max-height max-width min-height min-width opacity -moz-opacity
416 orphans outline outline-color outline-style outline-width overflow
417 overflow-x overflow-y
418 padding padding-bottom padding-left padding-right padding-top
419 page page-break-after page-break-before page-break-inside
420 position quotes right size table-layout
421 text-align text-anchor text-decoration text-indent text-transform
422 top unicode-bidi vertical-align visibility white-space width widows
423 word-spacing writing-mode z-index
424 /;
425 $p->{prop_value}->{display}->{$_} = 1 for qw/
426 block clip inline inline-block inline-table list-item none
427 table table-caption table-cell table-column table-column-group
428 table-header-group table-footer-group table-row table-row-group
429 compact marker
430 /;
431 $p->{prop_value}->{position}->{$_} = 1 for qw/
432 absolute fixed relative static
433 /;
434 $p->{prop_value}->{float}->{$_} = 1 for qw/
435 left right none
436 /;
437 $p->{prop_value}->{clear}->{$_} = 1 for qw/
438 left right none both
439 /;
440 $p->{prop_value}->{direction}->{ltr} = 1;
441 $p->{prop_value}->{direction}->{rtl} = 1;
442 $p->{prop_value}->{marks}->{crop} = 1;
443 $p->{prop_value}->{marks}->{cross} = 1;
444 $p->{prop_value}->{'unicode-bidi'}->{$_} = 1 for qw/
445 normal bidi-override embed
446 /;
447 for my $prop_name (qw/overflow overflow-x overflow-y/) {
448 $p->{prop_value}->{$prop_name}->{$_} = 1 for qw/
449 visible hidden scroll auto -webkit-marquee -moz-hidden-unscrollable
450 /;
451 }
452 $p->{prop_value}->{visibility}->{$_} = 1 for qw/
453 visible hidden collapse
454 /;
455 $p->{prop_value}->{'list-style-type'}->{$_} = 1 for qw/
456 disc circle square decimal decimal-leading-zero
457 lower-roman upper-roman lower-greek lower-latin
458 upper-latin armenian georgian lower-alpha upper-alpha none
459 hebrew cjk-ideographic hiragana katakana hiragana-iroha
460 katakana-iroha
461 /;
462 $p->{prop_value}->{'list-style-position'}->{outside} = 1;
463 $p->{prop_value}->{'list-style-position'}->{inside} = 1;
464 $p->{prop_value}->{'page-break-before'}->{$_} = 1 for qw/
465 auto always avoid left right
466 /;
467 $p->{prop_value}->{'page-break-after'}->{$_} = 1 for qw/
468 auto always avoid left right
469 /;
470 $p->{prop_value}->{'page-break-inside'}->{auto} = 1;
471 $p->{prop_value}->{'page-break-inside'}->{avoid} = 1;
472 $p->{prop_value}->{'background-repeat'}->{$_} = 1 for qw/
473 repeat repeat-x repeat-y no-repeat
474 /;
475 $p->{prop_value}->{'background-attachment'}->{scroll} = 1;
476 $p->{prop_value}->{'background-attachment'}->{fixed} = 1;
477 $p->{prop_value}->{'font-size'}->{$_} = 1 for qw/
478 xx-small x-small small medium large x-large xx-large
479 -manakai-xxx-large -webkit-xxx-large
480 larger smaller
481 /;
482 $p->{prop_value}->{'font-style'}->{normal} = 1;
483 $p->{prop_value}->{'font-style'}->{italic} = 1;
484 $p->{prop_value}->{'font-style'}->{oblique} = 1;
485 $p->{prop_value}->{'font-variant'}->{normal} = 1;
486 $p->{prop_value}->{'font-variant'}->{'small-caps'} = 1;
487 $p->{prop_value}->{'font-stretch'}->{$_} = 1 for
488 qw/normal wider narrower ultra-condensed extra-condensed
489 condensed semi-condensed semi-expanded expanded
490 extra-expanded ultra-expanded/;
491 $p->{prop_value}->{'text-align'}->{$_} = 1 for qw/
492 left right center justify begin end
493 /;
494 $p->{prop_value}->{'text-transform'}->{$_} = 1 for qw/
495 capitalize uppercase lowercase none
496 /;
497 $p->{prop_value}->{'white-space'}->{$_} = 1 for qw/
498 normal pre nowrap pre-line pre-wrap -moz-pre-wrap
499 /;
500 $p->{prop_value}->{'writing-mode'}->{$_} = 1 for qw/
501 lr rl tb lr-tb rl-tb tb-rl
502 /;
503 $p->{prop_value}->{'text-anchor'}->{$_} = 1 for qw/
504 start middle end
505 /;
506 $p->{prop_value}->{'dominant-baseline'}->{$_} = 1 for qw/
507 auto use-script no-change reset-size ideographic alphabetic
508 hanging mathematical central middle text-after-edge text-before-edge
509 /;
510 $p->{prop_value}->{'alignment-baseline'}->{$_} = 1 for qw/
511 auto baseline before-edge text-before-edge middle central
512 after-edge text-after-edge ideographic alphabetic hanging
513 mathematical
514 /;
515 $p->{prop_value}->{'text-decoration'}->{$_} = 1 for qw/
516 none blink underline overline line-through
517 /;
518 $p->{prop_value}->{'caption-side'}->{$_} = 1 for qw/
519 top bottom left right
520 /;
521 $p->{prop_value}->{'table-layout'}->{auto} = 1;
522 $p->{prop_value}->{'table-layout'}->{fixed} = 1;
523 $p->{prop_value}->{'border-collapse'}->{collapse} = 1;
524 $p->{prop_value}->{'border-collapse'}->{separate} = 1;
525 $p->{prop_value}->{'empty-cells'}->{show} = 1;
526 $p->{prop_value}->{'empty-cells'}->{hide} = 1;
527 $p->{prop_value}->{cursor}->{$_} = 1 for qw/
528 auto crosshair default pointer move e-resize ne-resize nw-resize n-resize
529 se-resize sw-resize s-resize w-resize text wait help progress
530 /;
531 for my $prop (qw/border-top-style border-left-style
532 border-bottom-style border-right-style outline-style/) {
533 $p->{prop_value}->{$prop}->{$_} = 1 for qw/
534 none hidden dotted dashed solid double groove ridge inset outset
535 /;
536 }
537 for my $prop (qw/color background-color
538 border-bottom-color border-left-color border-right-color
539 border-top-color border-color/) {
540 $p->{prop_value}->{$prop}->{transparent} = 1;
541 $p->{prop_value}->{$prop}->{flavor} = 1;
542 $p->{prop_value}->{$prop}->{'-manakai-default'} = 1;
543 }
544 $p->{prop_value}->{'outline-color'}->{invert} = 1;
545 $p->{prop_value}->{'outline-color'}->{'-manakai-invert-or-currentcolor'} = 1;
546 $p->{pseudo_class}->{$_} = 1 for qw/
547 active checked disabled empty enabled first-child first-of-type
548 focus hover indeterminate last-child last-of-type link only-child
549 only-of-type root target visited
550 lang nth-child nth-last-child nth-of-type nth-last-of-type not
551 -manakai-contains -manakai-current
552 /;
553 $p->{pseudo_element}->{$_} = 1 for qw/
554 after before first-letter first-line
555 /;
556
557 return $CSSParser = $p;
558 } # get_css_parser
559
560 sub print_syntax_error_css_section ($$) {
561 my ($input, $result) = @_;
562
563 print STDOUT qq[
564 <div id="$input->{id_prefix}parse-errors" class="section">
565 <h2>Parse Errors</h2>
566
567 <dl id="$input->{id_prefix}parse-errors-list">];
568 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
569
570 my $p = get_css_parser ();
571 $p->init;
572 $p->{onerror} = sub {
573 my (%opt) = @_;
574 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
575 if ($opt{token}) {
576 print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{token}->{line}">Line $opt{token}->{line}</a> column $opt{token}->{column}];
577 } else {
578 print STDOUT qq[<dt class="$cls">Unknown location];
579 }
580 if (defined $opt{value}) {
581 print STDOUT qq[ (<code>@{[htescape ($opt{value})]}</code>)];
582 } elsif (defined $opt{token}) {
583 print STDOUT qq[ (<code>@{[htescape (Whatpm::CSS::Tokenizer->serialize_token ($opt{token}))]}</code>)];
584 }
585 $type =~ tr/ /-/;
586 $type =~ s/\|/%7C/g;
587 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
588 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
589 print STDOUT qq[$msg</dd>\n];
590
591 add_error ('syntax', \%opt => $result);
592 };
593 $p->{href} = $input->{uri};
594 $p->{base_uri} = $input->{base_uri};
595
596 # if ($parse_mode eq 'q') {
597 # $p->{unitless_px} = 1;
598 # $p->{hashless_color} = 1;
599 # }
600
601 ## TODO: Make $input->{s} a ref.
602
603 my $s = \$input->{s};
604 my $charset;
605 unless ($input->{is_char_string}) {
606 require Encode;
607 if (defined $input->{charset}) {## TODO: IANA->Perl
608 $charset = $input->{charset};
609 $s = \(Encode::decode ($input->{charset}, $$s));
610 } else {
611 ## TODO: charset detection
612 $s = \(Encode::decode ($charset = 'utf-8', $$s));
613 }
614 }
615
616 my $cssom = $p->parse_char_string ($$s);
617 $cssom->manakai_input_encoding ($charset) if defined $charset;
618
619 print STDOUT qq[</dl></div>];
620
621 return $cssom;
622 } # print_syntax_error_css_section
623
624 sub print_syntax_error_manifest_section ($$) {
625 my ($input, $result) = @_;
626
627 require Whatpm::CacheManifest;
628
629 print STDOUT qq[
630 <div id="$input->{id_prefix}parse-errors" class="section">
631 <h2>Parse Errors</h2>
632
633 <dl id="$input->{id_prefix}parse-errors-list">];
634 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
635
636 my $onerror = sub {
637 my (%opt) = @_;
638 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
639 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
640 qq[</dt>];
641 $type =~ tr/ /-/;
642 $type =~ s/\|/%7C/g;
643 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
644 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
645 print STDOUT qq[$msg</dd>\n];
646
647 add_error ('syntax', \%opt => $result);
648 };
649
650 my $time1 = time;
651 my $manifest = Whatpm::CacheManifest->parse_byte_string
652 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
653 $time{parse_manifest} = time - $time1;
654
655 print STDOUT qq[</dl></div>];
656
657 return $manifest;
658 } # print_syntax_error_manifest_section
659
660 sub print_source_string_section ($$$) {
661 my $input = shift;
662 my $s;
663 unless ($input->{is_char_string}) {
664 require Encode;
665 my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
666 return unless $enc;
667
668 $s = \($enc->decode (${$_[0]}));
669 } else {
670 $s = $_[0];
671 }
672
673 my $i = 1;
674 push @nav, ['#source-string' => 'Source'] unless $input->{nested};
675 print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
676 <h2>Document Source</h2>
677 <ol lang="">\n];
678 if (length $$s) {
679 while ($$s =~ /\G([^\x0D\x0A]*?)(?>\x0D\x0A?|\x0A)/gc) {
680 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
681 "</li>\n";
682 $i++;
683 }
684 if ($$s =~ /\G([^\x0D\x0A]+)/gc) {
685 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
686 "</li>\n";
687 }
688 } else {
689 print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
690 }
691 print STDOUT "</ol></div>
692 <script>
693 addSourceToParseErrorList ('$input->{id_prefix}', 'parse-errors-list');
694 </script>";
695 } # print_input_string_section
696
697 sub print_document_tree ($$) {
698 my ($input, $node) = @_;
699
700 my $r = '<ol class="xoxo">';
701
702 my @node = ($node);
703 while (@node) {
704 my $child = shift @node;
705 unless (ref $child) {
706 $r .= $child;
707 next;
708 }
709
710 my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
711 my $nt = $child->node_type;
712 if ($nt == $child->ELEMENT_NODE) {
713 my $child_nsuri = $child->namespace_uri;
714 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
715 '</code>'; ## ISSUE: case
716
717 if ($child->has_attributes) {
718 $r .= '<ul class="attributes">';
719 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
720 @{$child->attributes}) {
721 $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
722 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
723 }
724 $r .= '</ul>';
725 }
726
727 if ($child->has_child_nodes) {
728 $r .= '<ol class="children">';
729 unshift @node, @{$child->child_nodes}, '</ol></li>';
730 } else {
731 $r .= '</li>';
732 }
733 } elsif ($nt == $child->TEXT_NODE) {
734 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
735 } elsif ($nt == $child->CDATA_SECTION_NODE) {
736 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
737 } elsif ($nt == $child->COMMENT_NODE) {
738 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
739 } elsif ($nt == $child->DOCUMENT_NODE) {
740 $r .= qq'<li id="$node_id" class="tree-document">Document';
741 $r .= qq[<ul class="attributes">];
742 my $cp = $child->manakai_charset;
743 if (defined $cp) {
744 $r .= qq[<li><code>charset</code> parameter = <code>];
745 $r .= htescape ($cp) . qq[</code></li>];
746 }
747 $r .= qq[<li><code>inputEncoding</code> = ];
748 my $ie = $child->input_encoding;
749 if (defined $ie) {
750 $r .= qq[<code>@{[htescape ($ie)]}</code>];
751 if ($child->manakai_has_bom) {
752 $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
753 }
754 } else {
755 $r .= qq[(<code>null</code>)];
756 }
757 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
758 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
759 unless ($child->manakai_is_html) {
760 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
761 if (defined $child->xml_encoding) {
762 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
763 } else {
764 $r .= qq[<li>XML encoding = (null)</li>];
765 }
766 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
767 }
768 $r .= qq[</ul>];
769 if ($child->has_child_nodes) {
770 $r .= '<ol class="children">';
771 unshift @node, @{$child->child_nodes}, '</ol></li>';
772 }
773 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
774 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
775 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
776 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
777 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
778 $r .= '</ul></li>';
779 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
780 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
781 } else {
782 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
783 }
784 }
785
786 $r .= '</ol>';
787 print STDOUT $r;
788 } # print_document_tree
789
790 sub print_structure_dump_dom_section ($$$) {
791 my ($input, $doc, $el) = @_;
792
793 print STDOUT qq[
794 <div id="$input->{id_prefix}document-tree" class="section">
795 <h2>Document Tree</h2>
796 ];
797 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
798 unless $input->{nested};
799
800 print_document_tree ($input, $el || $doc);
801
802 print STDOUT qq[</div>];
803 } # print_structure_dump_dom_section
804
805 sub print_structure_dump_cssom_section ($$) {
806 my ($input, $cssom) = @_;
807
808 print STDOUT qq[
809 <div id="$input->{id_prefix}document-tree" class="section">
810 <h2>Document Tree</h2>
811 ];
812 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
813 unless $input->{nested};
814
815 ## TODO:
816 print STDOUT "<pre>".htescape ($cssom->css_text)."</pre>";
817
818 print STDOUT qq[</div>];
819 } # print_structure_dump_cssom_section
820
821 sub print_structure_dump_manifest_section ($$) {
822 my ($input, $manifest) = @_;
823
824 print STDOUT qq[
825 <div id="$input->{id_prefix}dump-manifest" class="section">
826 <h2>Cache Manifest</h2>
827 ];
828 push @nav, [qq[#$input->{id_prefix}dump-manifest] => 'Cache Manifest']
829 unless $input->{nested};
830
831 print STDOUT qq[<dl><dt>Explicit entries</dt>];
832 my $i = 0;
833 for my $uri (@{$manifest->[0]}) {
834 my $euri = htescape ($uri);
835 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
836 }
837
838 print STDOUT qq[<dt>Fallback entries</dt><dd>
839 <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
840 <th scope=row>Fallback Entry</tr><tbody>];
841 for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
842 my $euri = htescape ($uri);
843 my $euri2 = htescape ($manifest->[1]->{$uri});
844 print STDOUT qq[<tr><td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
845 <td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
846 }
847
848 print STDOUT qq[</table><dt>Online whitelist</dt>];
849 for my $uri (@{$manifest->[2]}) {
850 my $euri = htescape ($uri);
851 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
852 }
853
854 print STDOUT qq[</dl></div>];
855 } # print_structure_dump_manifest_section
856
857 sub print_structure_error_dom_section ($$$$$) {
858 my ($input, $doc, $el, $result, $onsubdoc) = @_;
859
860 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
861 <h2>Document Errors</h2>
862
863 <dl id=document-errors-list>];
864 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
865 unless $input->{nested};
866
867 require Whatpm::ContentChecker;
868 my $onerror = sub {
869 my %opt = @_;
870 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
871 $type =~ tr/ /-/;
872 $type =~ s/\|/%7C/g;
873 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
874 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
875 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
876 print STDOUT $msg, "</dd>\n";
877 add_error ('structure', \%opt => $result);
878 };
879
880 my $elements;
881 my $time1 = time;
882 if ($el) {
883 $elements = Whatpm::ContentChecker->check_element
884 ($el, $onerror, $onsubdoc);
885 } else {
886 $elements = Whatpm::ContentChecker->check_document
887 ($doc, $onerror, $onsubdoc);
888 }
889 $time{check} = time - $time1;
890
891 print STDOUT qq[</dl>
892 <script>
893 addSourceToParseErrorList ('$input->{id_prefix}', 'document-errors-list');
894 </script></div>];
895
896 return $elements;
897 } # print_structure_error_dom_section
898
899 sub print_structure_error_manifest_section ($$$) {
900 my ($input, $manifest, $result) = @_;
901
902 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
903 <h2>Document Errors</h2>
904
905 <dl>];
906 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
907 unless $input->{nested};
908
909 require Whatpm::CacheManifest;
910 Whatpm::CacheManifest->check_manifest ($manifest, sub {
911 my %opt = @_;
912 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
913 $type =~ tr/ /-/;
914 $type =~ s/\|/%7C/g;
915 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
916 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
917 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
918 add_error ('structure', \%opt => $result);
919 });
920
921 print STDOUT qq[</div>];
922 } # print_structure_error_manifest_section
923
924 sub print_table_section ($$) {
925 my ($input, $tables) = @_;
926
927 push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
928 unless $input->{nested};
929 print STDOUT qq[
930 <div id="$input->{id_prefix}tables" class="section">
931 <h2>Tables</h2>
932
933 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
934 <script src="../table-script.js" type="text/javascript"></script>
935 <noscript>
936 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
937 </noscript>
938 ];
939
940 require JSON;
941
942 my $i = 0;
943 for my $table_el (@$tables) {
944 $i++;
945 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
946 get_node_link ($input, $table_el) . q[</h3>];
947
948 ## TODO: Make |ContentChecker| return |form_table| result
949 ## so that this script don't have to run the algorithm twice.
950 my $table = Whatpm::HTMLTable->form_table ($table_el);
951
952 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
953 next unless $_;
954 delete $_->{element};
955 }
956
957 for (@{$table->{row_group}}) {
958 next unless $_;
959 next unless $_->{element};
960 $_->{type} = $_->{element}->manakai_local_name;
961 delete $_->{element};
962 }
963
964 for (@{$table->{cell}}) {
965 next unless $_;
966 for (@{$_}) {
967 next unless $_;
968 for (@$_) {
969 $_->{id} = refaddr $_->{element} if defined $_->{element};
970 delete $_->{element};
971 $_->{is_header} = $_->{is_header} ? 1 : 0;
972 }
973 }
974 }
975
976 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
977 print STDOUT JSON::objToJson ($table);
978 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
979 print STDOUT qq[, '$input->{id_prefix}');</script>];
980 }
981
982 print STDOUT qq[</div>];
983 } # print_table_section
984
985 sub print_listing_section ($$$) {
986 my ($opt, $input, $ids) = @_;
987
988 push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
989 unless $input->{nested};
990 print STDOUT qq[
991 <div id="$input->{id_prefix}$opt->{id}" class="section">
992 <h2>$opt->{heading}</h2>
993
994 <dl>
995 ];
996 for my $id (sort {$a cmp $b} keys %$ids) {
997 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
998 for (@{$ids->{$id}}) {
999 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
1000 }
1001 }
1002 print STDOUT qq[</dl></div>];
1003 } # print_listing_section
1004
1005 sub print_rdf_section ($$$) {
1006 my ($input, $rdfs) = @_;
1007
1008 push @nav, ['#' . $input->{id_prefix} . 'rdf' => 'RDF']
1009 unless $input->{nested};
1010 print STDOUT qq[
1011 <div id="$input->{id_prefix}rdf" class="section">
1012 <h2>RDF Triples</h2>
1013
1014 <dl>];
1015 my $i = 0;
1016 for my $rdf (@$rdfs) {
1017 print STDOUT qq[<dt id="$input->{id_prefix}rdf-@{[$i++]}">];
1018 print STDOUT get_node_link ($input, $rdf->[0]);
1019 print STDOUT qq[<dd><dl>];
1020 for my $triple (@{$rdf->[1]}) {
1021 print STDOUT '<dt>' . get_node_link ($input, $triple->[0]) . '<dd>';
1022 print STDOUT get_rdf_resource_html ($triple->[1]);
1023 print STDOUT ' ';
1024 print STDOUT get_rdf_resource_html ($triple->[2]);
1025 print STDOUT ' ';
1026 print STDOUT get_rdf_resource_html ($triple->[3]);
1027 }
1028 print STDOUT qq[</dl>];
1029 }
1030 print STDOUT qq[</dl></div>];
1031 } # print_rdf_section
1032
1033 sub get_rdf_resource_html ($) {
1034 my $resource = shift;
1035 if ($resource->{uri}) {
1036 my $euri = htescape ($resource->{uri});
1037 return '<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
1038 '</a>></code>';
1039 } elsif ($resource->{bnodeid}) {
1040 return htescape ('_:' . $resource->{bnodeid});
1041 } elsif ($resource->{nodes}) {
1042 return '(rdf:XMLLiteral)';
1043 } elsif (defined $resource->{value}) {
1044 my $elang = htescape (defined $resource->{language}
1045 ? $resource->{language} : '');
1046 my $r = qq[<q lang="$elang">] . htescape ($resource->{value}) . '</q>';
1047 if (defined $resource->{datatype}) {
1048 my $euri = htescape ($resource->{datatype});
1049 $r .= '^^<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
1050 '</a>></code>';
1051 } elsif (length $resource->{language}) {
1052 $r .= '@' . htescape ($resource->{language});
1053 }
1054 return $r;
1055 } else {
1056 return '??';
1057 }
1058 } # get_rdf_resource_html
1059
1060 sub print_result_section ($) {
1061 my $result = shift;
1062
1063 print STDOUT qq[
1064 <div id="result-summary" class="section">
1065 <h2>Result</h2>];
1066
1067 if ($result->{unsupported} and $result->{conforming_max}) {
1068 print STDOUT qq[<p class=uncertain id=result-para>The conformance
1069 checker cannot decide whether the document is conforming or
1070 not, since the document contains one or more unsupported
1071 features. The document might or might not be conforming.</p>];
1072 } elsif ($result->{conforming_min}) {
1073 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
1074 found in this document.</p>];
1075 } elsif ($result->{conforming_max}) {
1076 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
1077 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
1078 it might be conforming.</p>];
1079 } else {
1080 print STDOUT qq[<p class=FAIL id=result-para>This document is
1081 <strong><em>non</em>-conforming</strong>.</p>];
1082 }
1083
1084 print STDOUT qq[<table>
1085 <colgroup><col><colgroup><col><col><col><colgroup><col>
1086 <thead>
1087 <tr><th scope=col></th>
1088 <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1089 Errors</a></th>
1090 <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1091 Errors</a></th>
1092 <th scope=col><a href="../error-description#level-w">Warnings</a></th>
1093 <th scope=col>Score</th></tr></thead><tbody>];
1094
1095 my $must_error = 0;
1096 my $should_error = 0;
1097 my $warning = 0;
1098 my $score_min = 0;
1099 my $score_max = 0;
1100 my $score_base = 20;
1101 my $score_unit = $score_base / 100;
1102 for (
1103 [Transfer => 'transfer', ''],
1104 [Character => 'char', ''],
1105 [Syntax => 'syntax', '#parse-errors'],
1106 [Structure => 'structure', '#document-errors'],
1107 ) {
1108 $must_error += ($result->{$_->[1]}->{must} += 0);
1109 $should_error += ($result->{$_->[1]}->{should} += 0);
1110 $warning += ($result->{$_->[1]}->{warning} += 0);
1111 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
1112 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
1113
1114 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
1115 my $label = $_->[0];
1116 if ($result->{$_->[1]}->{must} or
1117 $result->{$_->[1]}->{should} or
1118 $result->{$_->[1]}->{warning} or
1119 $result->{$_->[1]}->{unsupported}) {
1120 $label = qq[<a href="$_->[2]">$label</a>];
1121 }
1122
1123 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
1124 if ($uncertain) {
1125 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
1126 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
1127 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
1128 } else {
1129 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
1130 }
1131 }
1132
1133 $score_max += $score_base;
1134
1135 print STDOUT qq[
1136 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
1137 </tbody>
1138 <tfoot><tr class=uncertain><th scope=row>Total</th>
1139 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
1140 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
1141 <td>$warning?</td>
1142 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
1143 </table>
1144
1145 <p><strong>Important</strong>: This conformance checking service
1146 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
1147 </div>];
1148 push @nav, ['#result-summary' => 'Result'];
1149 } # print_result_section
1150
1151 sub print_result_unknown_type_section ($$) {
1152 my ($input, $result) = @_;
1153
1154 my $euri = htescape ($input->{uri});
1155 print STDOUT qq[
1156 <div id="$input->{id_prefix}parse-errors" class="section">
1157 <h2>Errors</h2>
1158
1159 <dl>
1160 <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
1161 <dd class=unsupported><strong><a href="../error-description#level-u">Not
1162 supported</a></strong>:
1163 Media type
1164 <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
1165 is not supported.</dd>
1166 </dl>
1167 </div>
1168 ];
1169 push @nav, [qq[#$input->{id_prefix}parse-errors] => 'Errors']
1170 unless $input->{nested};
1171 add_error (char => {level => 'u'} => $result);
1172 add_error (syntax => {level => 'u'} => $result);
1173 add_error (structure => {level => 'u'} => $result);
1174 } # print_result_unknown_type_section
1175
1176 sub print_result_input_error_section ($) {
1177 my $input = shift;
1178 print STDOUT qq[<div class="section" id="result-summary">
1179 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
1180 </div>];
1181 push @nav, ['#result-summary' => 'Result'];
1182 } # print_result_input_error_section
1183
1184 sub get_error_label ($$) {
1185 my ($input, $err) = @_;
1186
1187 my $r = '';
1188
1189 my $line;
1190 my $column;
1191
1192 if (defined $err->{node}) {
1193 $line = $err->{node}->get_user_data ('manakai_source_line');
1194 if (defined $line) {
1195 $column = $err->{node}->get_user_data ('manakai_source_column');
1196 } else {
1197 if ($err->{node}->node_type == $err->{node}->ATTRIBUTE_NODE) {
1198 my $owner = $err->{node}->owner_element;
1199 $line = $owner->get_user_data ('manakai_source_line');
1200 $column = $owner->get_user_data ('manakai_source_column');
1201 } else {
1202 my $parent = $err->{node}->parent_node;
1203 if ($parent) {
1204 $line = $parent->get_user_data ('manakai_source_line');
1205 $column = $parent->get_user_data ('manakai_source_column');
1206 }
1207 }
1208 }
1209 }
1210 unless (defined $line) {
1211 if (defined $err->{token} and defined $err->{token}->{line}) {
1212 $line = $err->{token}->{line};
1213 $column = $err->{token}->{column};
1214 } elsif (defined $err->{line}) {
1215 $line = $err->{line};
1216 $column = $err->{column};
1217 }
1218 }
1219
1220 if (defined $line) {
1221 if (defined $column and $column > 0) {
1222 $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a> column $column];
1223 } else {
1224 $line = $line - 1 || 1;
1225 $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a>];
1226 }
1227 }
1228
1229 if (defined $err->{node}) {
1230 $r .= ' ' if length $r;
1231 $r .= get_node_link ($input, $err->{node});
1232 }
1233
1234 if (defined $err->{index}) {
1235 if (length $r) {
1236 $r .= ', Index ' . (0+$err->{index});
1237 } else {
1238 $r .= "<a href='#$input->{id_prefix}index-@{[0+$err->{index}]}'>Index "
1239 . (0+$err->{index}) . '</a>';
1240 }
1241 }
1242
1243 if (defined $err->{value}) {
1244 $r .= ' ' if length $r;
1245 $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
1246 }
1247
1248 return $r;
1249 } # get_error_label
1250
1251 sub get_error_level_label ($) {
1252 my $err = shift;
1253
1254 my $r = '';
1255
1256 if (not defined $err->{level} or $err->{level} eq 'm') {
1257 $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1258 error</a></strong>: ];
1259 } elsif ($err->{level} eq 's') {
1260 $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1261 error</a></strong>: ];
1262 } elsif ($err->{level} eq 'w') {
1263 $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
1264 ];
1265 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
1266 $r = qq[<strong><a href="../error-description#level-u">Not
1267 supported</a></strong>: ];
1268 } elsif ($err->{level} eq 'i') {
1269 $r = qq[<strong><a href="../error-description#level-i">Information</a></strong>: ];
1270 } else {
1271 my $elevel = htescape ($err->{level});
1272 $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
1273 ];
1274 }
1275
1276 return $r;
1277 } # get_error_level_label
1278
1279 sub get_node_path ($) {
1280 my $node = shift;
1281 my @r;
1282 while (defined $node) {
1283 my $rs;
1284 if ($node->node_type == 1) {
1285 $rs = $node->manakai_local_name;
1286 $node = $node->parent_node;
1287 } elsif ($node->node_type == 2) {
1288 $rs = '@' . $node->manakai_local_name;
1289 $node = $node->owner_element;
1290 } elsif ($node->node_type == 3) {
1291 $rs = '"' . $node->data . '"';
1292 $node = $node->parent_node;
1293 } elsif ($node->node_type == 9) {
1294 @r = ('') unless @r;
1295 $rs = '';
1296 $node = $node->parent_node;
1297 } else {
1298 $rs = '#' . $node->node_type;
1299 $node = $node->parent_node;
1300 }
1301 unshift @r, $rs;
1302 }
1303 return join '/', @r;
1304 } # get_node_path
1305
1306 sub get_node_link ($$) {
1307 return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
1308 htescape (get_node_path ($_[1])) . qq[</a>];
1309 } # get_node_link
1310
1311 {
1312 my $Msg = {};
1313
1314 sub load_text_catalog ($) {
1315 my $lang = shift; # MUST be a canonical lang name
1316 open my $file, '<:utf8', "cc-msg.$lang.txt"
1317 or die "$0: cc-msg.$lang.txt: $!";
1318 while (<$file>) {
1319 if (s/^([^;]+);([^;]*);//) {
1320 my ($type, $cls, $msg) = ($1, $2, $_);
1321 $msg =~ tr/\x0D\x0A//d;
1322 $Msg->{$type} = [$cls, $msg];
1323 }
1324 }
1325 } # load_text_catalog
1326
1327 sub get_text ($) {
1328 my ($type, $level, $node) = @_;
1329 $type = $level . ':' . $type if defined $level;
1330 $level = 'm' unless defined $level;
1331 my @arg;
1332 {
1333 if (defined $Msg->{$type}) {
1334 my $msg = $Msg->{$type}->[1];
1335 $msg =~ s{<var>\$([0-9]+)</var>}{
1336 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
1337 }ge;
1338 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
1339 UNIVERSAL::can ($node, 'get_attribute_ns')
1340 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
1341 }ge;
1342 $msg =~ s{<var>{\@}</var>}{
1343 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
1344 }ge;
1345 $msg =~ s{<var>{local-name}</var>}{
1346 UNIVERSAL::can ($node, 'manakai_local_name')
1347 ? htescape ($node->manakai_local_name) : ''
1348 }ge;
1349 $msg =~ s{<var>{element-local-name}</var>}{
1350 (UNIVERSAL::can ($node, 'owner_element') and
1351 $node->owner_element)
1352 ? htescape ($node->owner_element->manakai_local_name)
1353 : ''
1354 }ge;
1355 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
1356 } elsif ($type =~ s/:([^:]*)$//) {
1357 unshift @arg, $1;
1358 redo;
1359 }
1360 }
1361 return ($type, 'level-'.$level, htescape ($_[0]));
1362 } # get_text
1363
1364 }
1365
1366 sub get_input_document ($$) {
1367 my ($http, $dom) = @_;
1368
1369 my $request_uri = $http->get_parameter ('uri');
1370 my $r = {};
1371 if (defined $request_uri and length $request_uri) {
1372 my $uri = $dom->create_uri_reference ($request_uri);
1373 unless ({
1374 http => 1,
1375 }->{lc $uri->uri_scheme}) {
1376 return {uri => $request_uri, request_uri => $request_uri,
1377 error_status_text => 'URI scheme not allowed'};
1378 }
1379
1380 require Message::Util::HostPermit;
1381 my $host_permit = new Message::Util::HostPermit;
1382 $host_permit->add_rule (<<EOH);
1383 Allow host=suika port=80
1384 Deny host=suika
1385 Allow host=suika.fam.cx port=80
1386 Deny host=suika.fam.cx
1387 Deny host=localhost
1388 Deny host=*.localdomain
1389 Deny ipv4=0.0.0.0/8
1390 Deny ipv4=10.0.0.0/8
1391 Deny ipv4=127.0.0.0/8
1392 Deny ipv4=169.254.0.0/16
1393 Deny ipv4=172.0.0.0/11
1394 Deny ipv4=192.0.2.0/24
1395 Deny ipv4=192.88.99.0/24
1396 Deny ipv4=192.168.0.0/16
1397 Deny ipv4=198.18.0.0/15
1398 Deny ipv4=224.0.0.0/4
1399 Deny ipv4=255.255.255.255/32
1400 Deny ipv6=0::0/0
1401 Allow host=*
1402 EOH
1403 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1404 return {uri => $request_uri, request_uri => $request_uri,
1405 error_status_text => 'Connection to the host is forbidden'};
1406 }
1407
1408 require LWP::UserAgent;
1409 my $ua = WDCC::LWPUA->new;
1410 $ua->{wdcc_dom} = $dom;
1411 $ua->{wdcc_host_permit} = $host_permit;
1412 $ua->agent ('Mozilla'); ## TODO: for now.
1413 $ua->parse_head (0);
1414 $ua->protocols_allowed ([qw/http/]);
1415 $ua->max_size (1000_000);
1416 my $req = HTTP::Request->new (GET => $request_uri);
1417 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1418 my $res = $ua->request ($req);
1419 ## TODO: 401 sets |is_success| true.
1420 if ($res->is_success or $http->get_parameter ('error-page')) {
1421 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1422 $r->{uri} = $res->request->uri;
1423 $r->{request_uri} = $request_uri;
1424
1425 ## TODO: More strict parsing...
1426 my $ct = $res->header ('Content-Type');
1427 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1428 $r->{charset} = lc $1;
1429 $r->{charset} =~ tr/\\//d;
1430 $r->{official_charset} = $r->{charset};
1431 }
1432
1433 my $input_charset = $http->get_parameter ('charset');
1434 if (defined $input_charset and length $input_charset) {
1435 $r->{charset_overridden}
1436 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1437 $r->{charset} = $input_charset;
1438 }
1439
1440 ## TODO: Support for HTTP Content-Encoding
1441
1442 $r->{s} = ''.$res->content;
1443
1444 require Whatpm::ContentType;
1445 ($r->{official_type}, $r->{media_type})
1446 = Whatpm::ContentType->get_sniffed_type
1447 (get_file_head => sub {
1448 return substr $r->{s}, 0, shift;
1449 },
1450 http_content_type_byte => $ct,
1451 has_http_content_encoding =>
1452 defined $res->header ('Content-Encoding'),
1453 supported_image_types => {});
1454 } else {
1455 $r->{uri} = $res->request->uri;
1456 $r->{request_uri} = $request_uri;
1457 $r->{error_status_text} = $res->status_line;
1458 }
1459
1460 $r->{header_field} = [];
1461 $res->scan (sub {
1462 push @{$r->{header_field}}, [$_[0], $_[1]];
1463 });
1464 $r->{header_status_code} = $res->code;
1465 $r->{header_status_text} = $res->message;
1466 } else {
1467 $r->{s} = ''.$http->get_parameter ('s');
1468 $r->{uri} = q<thismessage:/>;
1469 $r->{request_uri} = q<thismessage:/>;
1470 $r->{base_uri} = q<thismessage:/>;
1471 $r->{charset} = ''.$http->get_parameter ('_charset_');
1472 $r->{charset} =~ s/\s+//g;
1473 $r->{charset} = 'utf-8' if $r->{charset} eq '';
1474 $r->{official_charset} = $r->{charset};
1475 $r->{header_field} = [];
1476
1477 require Whatpm::ContentType;
1478 ($r->{official_type}, $r->{media_type})
1479 = Whatpm::ContentType->get_sniffed_type
1480 (get_file_head => sub {
1481 return substr $r->{s}, 0, shift;
1482 },
1483 http_content_type_byte => undef,
1484 has_http_content_encoding => 0,
1485 supported_image_types => {});
1486 }
1487
1488 my $input_format = $http->get_parameter ('i');
1489 if (defined $input_format and length $input_format) {
1490 $r->{media_type_overridden}
1491 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1492 $r->{media_type} = $input_format;
1493 }
1494 if (defined $r->{s} and not defined $r->{media_type}) {
1495 $r->{media_type} = 'text/html';
1496 $r->{media_type_overridden} = 1;
1497 }
1498
1499 if ($r->{media_type} eq 'text/xml') {
1500 unless (defined $r->{charset}) {
1501 $r->{charset} = 'us-ascii';
1502 $r->{official_charset} = $r->{charset};
1503 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1504 $r->{charset_overridden} = 0;
1505 }
1506 }
1507
1508 if (length $r->{s} > 1000_000) {
1509 $r->{error_status_text} = 'Entity-body too large';
1510 delete $r->{s};
1511 return $r;
1512 }
1513
1514 $r->{inner_html_element} = $http->get_parameter ('e');
1515
1516 return $r;
1517 } # get_input_document
1518
1519 package WDCC::LWPUA;
1520 BEGIN { push our @ISA, 'LWP::UserAgent'; }
1521
1522 sub redirect_ok {
1523 my $ua = shift;
1524 unless ($ua->SUPER::redirect_ok (@_)) {
1525 return 0;
1526 }
1527
1528 my $uris = $_[1]->header ('Location');
1529 return 0 unless $uris;
1530 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1531 unless ({
1532 http => 1,
1533 }->{lc $uri->uri_scheme}) {
1534 return 0;
1535 }
1536 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1537 return 0;
1538 }
1539 return 1;
1540 } # redirect_ok
1541
1542 =head1 AUTHOR
1543
1544 Wakaba <w@suika.fam.cx>.
1545
1546 =head1 LICENSE
1547
1548 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1549
1550 This library is free software; you can redistribute it
1551 and/or modify it under the same terms as Perl itself.
1552
1553 =cut
1554
1555 ## $Date: 2008/03/17 13:52:48 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24