/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.50 - (show annotations) (download)
Tue May 6 08:47:09 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.49: +5 -7 lines
++ ChangeLog	6 May 2008 08:47:05 -0000
	* cc.cgi: Use table object returned by the checker; don't
	form a table by itself.

	* table-script.js: Use different coloring for empty data cells.

	* cc.cgi, table.cgi: Remove table reference for JSON convertion.

2008-05-06  Wakaba  <wakaba@suika.fam.cx>

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4
5 use lib qw[/home/httpd/html/www/markup/html/whatpm
6 /home/wakaba/work/manakai2/lib];
7 use CGI::Carp qw[fatalsToBrowser];
8 use Scalar::Util qw[refaddr];
9 use Time::HiRes qw/time/;
10
11 sub htescape ($) {
12 my $s = $_[0];
13 $s =~ s/&/&amp;/g;
14 $s =~ s/</&lt;/g;
15 $s =~ s/>/&gt;/g;
16 $s =~ s/"/&quot;/g;
17 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18 sprintf '<var>U+%04X</var>', ord $1;
19 }ge;
20 return $s;
21 } # htescape
22
23 my @nav;
24 my %time;
25 require Message::DOM::DOMImplementation;
26 my $dom = Message::DOM::DOMImplementation->new;
27 {
28 use Message::CGI::HTTP;
29 my $http = Message::CGI::HTTP->new;
30
31 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
32 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
33 exit;
34 }
35
36 binmode STDOUT, ':utf8';
37 $| = 1;
38
39 load_text_catalog ('en'); ## TODO: conneg
40
41 print STDOUT qq[Content-Type: text/html; charset=utf-8
42
43 <!DOCTYPE html>
44 <html lang="en">
45 <head>
46 <title>Web Document Conformance Checker (BETA)</title>
47 <link rel="stylesheet" href="../cc-style.css" type="text/css">
48 </head>
49 <body>
50 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
51 (<em>beta</em>)</h1>
52 ];
53
54 $| = 0;
55 my $input = get_input_document ($http, $dom);
56 my $char_length = 0;
57
58 print qq[
59 <div id="document-info" class="section">
60 <dl>
61 <dt>Request URI</dt>
62 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 <dt>Document URI</dt>
64 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65 <script>
66 document.title = '<'
67 + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68 + document.title;
69 </script></dd>
70 ]; # no </dl> yet
71 push @nav, ['#document-info' => 'Information'];
72
73 if (defined $input->{s}) {
74 $char_length = length $input->{s};
75
76 print STDOUT qq[
77 <dt>Base URI</dt>
78 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79 <dt>Internet Media Type</dt>
80 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 <dt>Character Encoding</dt>
83 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 <dt>Length</dt>
86 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 </dl>
88 </div>
89
90 <script src="../cc-script.js"></script>
91 ];
92
93 $input->{id_prefix} = '';
94 #$input->{nested} = 0;
95 my $result = {conforming_min => 1, conforming_max => 1};
96 check_and_print ($input => $result);
97 print_result_section ($result);
98 } else {
99 print STDOUT qq[</dl></div>];
100 print_result_input_error_section ($input);
101 }
102
103 print STDOUT qq[
104 <ul class="navigation" id="nav-items">
105 ];
106 for (@nav) {
107 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
108 }
109 print STDOUT qq[
110 </ul>
111 </body>
112 </html>
113 ];
114
115 for (qw/decode parse parse_html parse_xml parse_manifest
116 check check_manifest/) {
117 next unless defined $time{$_};
118 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
119 print $file $char_length, "\t", $time{$_}, "\n";
120 }
121
122 exit;
123 }
124
125 sub add_error ($$$) {
126 my ($layer, $err, $result) = @_;
127 if (defined $err->{level}) {
128 if ($err->{level} eq 's') {
129 $result->{$layer}->{should}++;
130 $result->{$layer}->{score_min} -= 2;
131 $result->{conforming_min} = 0;
132 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
133 $result->{$layer}->{warning}++;
134 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
135 $result->{$layer}->{unsupported}++;
136 $result->{unsupported} = 1;
137 } elsif ($err->{level} eq 'i') {
138 #
139 } else {
140 $result->{$layer}->{must}++;
141 $result->{$layer}->{score_max} -= 2;
142 $result->{$layer}->{score_min} -= 2;
143 $result->{conforming_min} = 0;
144 $result->{conforming_max} = 0;
145 }
146 } else {
147 $result->{$layer}->{must}++;
148 $result->{$layer}->{score_max} -= 2;
149 $result->{$layer}->{score_min} -= 2;
150 $result->{conforming_min} = 0;
151 $result->{conforming_max} = 0;
152 }
153 } # add_error
154
155 sub check_and_print ($$) {
156 my ($input, $result) = @_;
157
158 print_http_header_section ($input, $result);
159
160 my $doc;
161 my $el;
162 my $cssom;
163 my $manifest;
164 my @subdoc;
165
166 if ($input->{media_type} eq 'text/html') {
167 ($doc, $el) = print_syntax_error_html_section ($input, $result);
168 print_source_string_section
169 ($input,
170 \($input->{s}),
171 $input->{charset} || $doc->input_encoding);
172 } elsif ({
173 'text/xml' => 1,
174 'application/atom+xml' => 1,
175 'application/rss+xml' => 1,
176 'image/svg+xml' => 1,
177 'application/xhtml+xml' => 1,
178 'application/xml' => 1,
179 ## TODO: Should we make all XML MIME Types fall
180 ## into this category?
181
182 'application/rdf+xml' => 1, ## NOTE: This type has different model.
183 }->{$input->{media_type}}) {
184 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
185 print_source_string_section ($input,
186 \($input->{s}),
187 $doc->input_encoding);
188 } elsif ($input->{media_type} eq 'text/css') {
189 $cssom = print_syntax_error_css_section ($input, $result);
190 print_source_string_section
191 ($input, \($input->{s}),
192 $cssom->manakai_input_encoding);
193 } elsif ($input->{media_type} eq 'text/cache-manifest') {
194 ## TODO: MUST be text/cache-manifest
195 $manifest = print_syntax_error_manifest_section ($input, $result);
196 print_source_string_section ($input, \($input->{s}),
197 'utf-8');
198 } else {
199 ## TODO: Change HTTP status code??
200 print_result_unknown_type_section ($input, $result);
201 }
202
203 if (defined $doc or defined $el) {
204 $doc->document_uri ($input->{uri});
205 $doc->manakai_entity_base_uri ($input->{base_uri});
206 print_structure_dump_dom_section ($input, $doc, $el);
207 my $elements = print_structure_error_dom_section
208 ($input, $doc, $el, $result, sub {
209 push @subdoc, shift;
210 });
211 print_table_section ($input, $elements->{table}) if @{$elements->{table}};
212 print_listing_section ({
213 id => 'identifiers', label => 'IDs', heading => 'Identifiers',
214 }, $input, $elements->{id}) if keys %{$elements->{id}};
215 print_listing_section ({
216 id => 'terms', label => 'Terms', heading => 'Terms',
217 }, $input, $elements->{term}) if keys %{$elements->{term}};
218 print_listing_section ({
219 id => 'classes', label => 'Classes', heading => 'Classes',
220 }, $input, $elements->{class}) if keys %{$elements->{class}};
221 print_uri_section ($input, $elements->{uri}) if keys %{$elements->{uri}};
222 print_rdf_section ($input, $elements->{rdf}) if @{$elements->{rdf}};
223 } elsif (defined $cssom) {
224 print_structure_dump_cssom_section ($input, $cssom);
225 ## TODO: CSSOM validation
226 add_error ('structure', {level => 'u'} => $result);
227 } elsif (defined $manifest) {
228 print_structure_dump_manifest_section ($input, $manifest);
229 print_structure_error_manifest_section ($input, $manifest, $result);
230 }
231
232 my $id_prefix = 0;
233 for my $subinput (@subdoc) {
234 $subinput->{id_prefix} = 'subdoc-' . ++$id_prefix;
235 $subinput->{nested} = 1;
236 $subinput->{base_uri} = $subinput->{container_node}->base_uri
237 unless defined $subinput->{base_uri};
238 my $ebaseuri = htescape ($subinput->{base_uri});
239 push @nav, ['#' . $subinput->{id_prefix} => 'Sub #' . $id_prefix];
240 print STDOUT qq[<div id="$subinput->{id_prefix}" class=section>
241 <h2>Subdocument #$id_prefix</h2>
242
243 <dl>
244 <dt>Internet Media Type</dt>
245 <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
246 <dt>Container Node</dt>
247 <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
248 <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
249 <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
250 </dl>];
251
252 $subinput->{id_prefix} .= '-';
253 check_and_print ($subinput => $result);
254
255 print STDOUT qq[</div>];
256 }
257 } # check_and_print
258
259 sub print_http_header_section ($$) {
260 my ($input, $result) = @_;
261 return unless defined $input->{header_status_code} or
262 defined $input->{header_status_text} or
263 @{$input->{header_field} or []};
264
265 push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
266 print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
267 <h2>HTTP Header</h2>
268
269 <p><strong>Note</strong>: Due to the limitation of the
270 network library in use, the content of this section might
271 not be the real header.</p>
272
273 <table><tbody>
274 ];
275
276 if (defined $input->{header_status_code}) {
277 print STDOUT qq[<tr><th scope="row">Status code</th>];
278 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
279 }
280 if (defined $input->{header_status_text}) {
281 print STDOUT qq[<tr><th scope="row">Status text</th>];
282 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
283 }
284
285 for (@{$input->{header_field}}) {
286 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
287 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
288 }
289
290 print STDOUT qq[</tbody></table></div>];
291 } # print_http_header_section
292
293 sub print_syntax_error_html_section ($$) {
294 my ($input, $result) = @_;
295
296 require Encode;
297 require Whatpm::HTML;
298
299 print STDOUT qq[
300 <div id="$input->{id_prefix}parse-errors" class="section">
301 <h2>Parse Errors</h2>
302
303 <dl id="$input->{id_prefix}parse-errors-list">];
304 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
305
306 my $onerror = sub {
307 my (%opt) = @_;
308 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
309 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
310 qq[</dt>];
311 $type =~ tr/ /-/;
312 $type =~ s/\|/%7C/g;
313 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
314 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
315 print STDOUT qq[$msg</dd>\n];
316
317 add_error ('syntax', \%opt => $result);
318 };
319
320 my $doc = $dom->create_document;
321 my $el;
322 my $inner_html_element = $input->{inner_html_element};
323 if (defined $inner_html_element and length $inner_html_element) {
324 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
325 my $time1 = time;
326 my $t = \($input->{s});
327 unless ($input->{is_char_string}) {
328 $t = \(Encode::decode ($input->{charset}, $$t));
329 }
330 $time{decode} = time - $time1;
331
332 $el = $doc->create_element_ns
333 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
334 $time1 = time;
335 Whatpm::HTML->set_inner_html ($el, $$t, $onerror);
336 $time{parse} = time - $time1;
337 } else {
338 my $time1 = time;
339 if ($input->{is_char_string}) {
340 Whatpm::HTML->parse_char_string ($input->{s} => $doc, $onerror);
341 } else {
342 Whatpm::HTML->parse_byte_string
343 ($input->{charset}, $input->{s} => $doc, $onerror);
344 }
345 $time{parse_html} = time - $time1;
346 }
347 $doc->manakai_charset ($input->{official_charset})
348 if defined $input->{official_charset};
349
350 print STDOUT qq[</dl></div>];
351
352 return ($doc, $el);
353 } # print_syntax_error_html_section
354
355 sub print_syntax_error_xml_section ($$) {
356 my ($input, $result) = @_;
357
358 require Message::DOM::XMLParserTemp;
359
360 print STDOUT qq[
361 <div id="$input->{id_prefix}parse-errors" class="section">
362 <h2>Parse Errors</h2>
363
364 <dl id="$input->{id_prefix}parse-errors-list">];
365 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
366
367 my $onerror = sub {
368 my $err = shift;
369 my $line = $err->location->line_number;
370 print STDOUT qq[<dt><a href="#$input->{id_prefix}line-$line">Line $line</a> column ];
371 print STDOUT $err->location->column_number, "</dt><dd>";
372 print STDOUT htescape $err->text, "</dd>\n";
373
374 add_error ('syntax', {type => $err->text,
375 level => [
376 $err->SEVERITY_FATAL_ERROR => 'm',
377 $err->SEVERITY_ERROR => 'm',
378 $err->SEVERITY_WARNING => 's',
379 ]->[$err->severity]} => $result);
380
381 return 1;
382 };
383
384 my $t = \($input->{s});
385 if ($input->{is_char_string}) {
386 require Encode;
387 $t = \(Encode::encode ('utf8', $$t));
388 $input->{charset} = 'utf-8';
389 }
390
391 my $time1 = time;
392 open my $fh, '<', $t;
393 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
394 ($fh => $dom, $onerror, charset => $input->{charset});
395 $time{parse_xml} = time - $time1;
396 $doc->manakai_charset ($input->{official_charset})
397 if defined $input->{official_charset};
398
399 print STDOUT qq[</dl></div>];
400
401 return ($doc, undef);
402 } # print_syntax_error_xml_section
403
404 sub get_css_parser () {
405 our $CSSParser;
406 return $CSSParser if $CSSParser;
407
408 require Whatpm::CSS::Parser;
409 my $p = Whatpm::CSS::Parser->new;
410
411 $p->{prop}->{$_} = 1 for qw/
412 alignment-baseline
413 background background-attachment background-color background-image
414 background-position background-position-x background-position-y
415 background-repeat border border-bottom border-bottom-color
416 border-bottom-style border-bottom-width border-collapse border-color
417 border-left border-left-color
418 border-left-style border-left-width border-right border-right-color
419 border-right-style border-right-width
420 border-spacing -manakai-border-spacing-x -manakai-border-spacing-y
421 border-style border-top border-top-color border-top-style border-top-width
422 border-width bottom
423 caption-side clear clip color content counter-increment counter-reset
424 cursor direction display dominant-baseline empty-cells float font
425 font-family font-size font-size-adjust font-stretch
426 font-style font-variant font-weight height left
427 letter-spacing line-height
428 list-style list-style-image list-style-position list-style-type
429 margin margin-bottom margin-left margin-right margin-top marker-offset
430 marks max-height max-width min-height min-width opacity -moz-opacity
431 orphans outline outline-color outline-style outline-width overflow
432 overflow-x overflow-y
433 padding padding-bottom padding-left padding-right padding-top
434 page page-break-after page-break-before page-break-inside
435 position quotes right size table-layout
436 text-align text-anchor text-decoration text-indent text-transform
437 top unicode-bidi vertical-align visibility white-space width widows
438 word-spacing writing-mode z-index
439 /;
440 $p->{prop_value}->{display}->{$_} = 1 for qw/
441 block clip inline inline-block inline-table list-item none
442 table table-caption table-cell table-column table-column-group
443 table-header-group table-footer-group table-row table-row-group
444 compact marker
445 /;
446 $p->{prop_value}->{position}->{$_} = 1 for qw/
447 absolute fixed relative static
448 /;
449 $p->{prop_value}->{float}->{$_} = 1 for qw/
450 left right none
451 /;
452 $p->{prop_value}->{clear}->{$_} = 1 for qw/
453 left right none both
454 /;
455 $p->{prop_value}->{direction}->{ltr} = 1;
456 $p->{prop_value}->{direction}->{rtl} = 1;
457 $p->{prop_value}->{marks}->{crop} = 1;
458 $p->{prop_value}->{marks}->{cross} = 1;
459 $p->{prop_value}->{'unicode-bidi'}->{$_} = 1 for qw/
460 normal bidi-override embed
461 /;
462 for my $prop_name (qw/overflow overflow-x overflow-y/) {
463 $p->{prop_value}->{$prop_name}->{$_} = 1 for qw/
464 visible hidden scroll auto -webkit-marquee -moz-hidden-unscrollable
465 /;
466 }
467 $p->{prop_value}->{visibility}->{$_} = 1 for qw/
468 visible hidden collapse
469 /;
470 $p->{prop_value}->{'list-style-type'}->{$_} = 1 for qw/
471 disc circle square decimal decimal-leading-zero
472 lower-roman upper-roman lower-greek lower-latin
473 upper-latin armenian georgian lower-alpha upper-alpha none
474 hebrew cjk-ideographic hiragana katakana hiragana-iroha
475 katakana-iroha
476 /;
477 $p->{prop_value}->{'list-style-position'}->{outside} = 1;
478 $p->{prop_value}->{'list-style-position'}->{inside} = 1;
479 $p->{prop_value}->{'page-break-before'}->{$_} = 1 for qw/
480 auto always avoid left right
481 /;
482 $p->{prop_value}->{'page-break-after'}->{$_} = 1 for qw/
483 auto always avoid left right
484 /;
485 $p->{prop_value}->{'page-break-inside'}->{auto} = 1;
486 $p->{prop_value}->{'page-break-inside'}->{avoid} = 1;
487 $p->{prop_value}->{'background-repeat'}->{$_} = 1 for qw/
488 repeat repeat-x repeat-y no-repeat
489 /;
490 $p->{prop_value}->{'background-attachment'}->{scroll} = 1;
491 $p->{prop_value}->{'background-attachment'}->{fixed} = 1;
492 $p->{prop_value}->{'font-size'}->{$_} = 1 for qw/
493 xx-small x-small small medium large x-large xx-large
494 -manakai-xxx-large -webkit-xxx-large
495 larger smaller
496 /;
497 $p->{prop_value}->{'font-style'}->{normal} = 1;
498 $p->{prop_value}->{'font-style'}->{italic} = 1;
499 $p->{prop_value}->{'font-style'}->{oblique} = 1;
500 $p->{prop_value}->{'font-variant'}->{normal} = 1;
501 $p->{prop_value}->{'font-variant'}->{'small-caps'} = 1;
502 $p->{prop_value}->{'font-stretch'}->{$_} = 1 for
503 qw/normal wider narrower ultra-condensed extra-condensed
504 condensed semi-condensed semi-expanded expanded
505 extra-expanded ultra-expanded/;
506 $p->{prop_value}->{'text-align'}->{$_} = 1 for qw/
507 left right center justify begin end
508 /;
509 $p->{prop_value}->{'text-transform'}->{$_} = 1 for qw/
510 capitalize uppercase lowercase none
511 /;
512 $p->{prop_value}->{'white-space'}->{$_} = 1 for qw/
513 normal pre nowrap pre-line pre-wrap -moz-pre-wrap
514 /;
515 $p->{prop_value}->{'writing-mode'}->{$_} = 1 for qw/
516 lr rl tb lr-tb rl-tb tb-rl
517 /;
518 $p->{prop_value}->{'text-anchor'}->{$_} = 1 for qw/
519 start middle end
520 /;
521 $p->{prop_value}->{'dominant-baseline'}->{$_} = 1 for qw/
522 auto use-script no-change reset-size ideographic alphabetic
523 hanging mathematical central middle text-after-edge text-before-edge
524 /;
525 $p->{prop_value}->{'alignment-baseline'}->{$_} = 1 for qw/
526 auto baseline before-edge text-before-edge middle central
527 after-edge text-after-edge ideographic alphabetic hanging
528 mathematical
529 /;
530 $p->{prop_value}->{'text-decoration'}->{$_} = 1 for qw/
531 none blink underline overline line-through
532 /;
533 $p->{prop_value}->{'caption-side'}->{$_} = 1 for qw/
534 top bottom left right
535 /;
536 $p->{prop_value}->{'table-layout'}->{auto} = 1;
537 $p->{prop_value}->{'table-layout'}->{fixed} = 1;
538 $p->{prop_value}->{'border-collapse'}->{collapse} = 1;
539 $p->{prop_value}->{'border-collapse'}->{separate} = 1;
540 $p->{prop_value}->{'empty-cells'}->{show} = 1;
541 $p->{prop_value}->{'empty-cells'}->{hide} = 1;
542 $p->{prop_value}->{cursor}->{$_} = 1 for qw/
543 auto crosshair default pointer move e-resize ne-resize nw-resize n-resize
544 se-resize sw-resize s-resize w-resize text wait help progress
545 /;
546 for my $prop (qw/border-top-style border-left-style
547 border-bottom-style border-right-style outline-style/) {
548 $p->{prop_value}->{$prop}->{$_} = 1 for qw/
549 none hidden dotted dashed solid double groove ridge inset outset
550 /;
551 }
552 for my $prop (qw/color background-color
553 border-bottom-color border-left-color border-right-color
554 border-top-color border-color/) {
555 $p->{prop_value}->{$prop}->{transparent} = 1;
556 $p->{prop_value}->{$prop}->{flavor} = 1;
557 $p->{prop_value}->{$prop}->{'-manakai-default'} = 1;
558 }
559 $p->{prop_value}->{'outline-color'}->{invert} = 1;
560 $p->{prop_value}->{'outline-color'}->{'-manakai-invert-or-currentcolor'} = 1;
561 $p->{pseudo_class}->{$_} = 1 for qw/
562 active checked disabled empty enabled first-child first-of-type
563 focus hover indeterminate last-child last-of-type link only-child
564 only-of-type root target visited
565 lang nth-child nth-last-child nth-of-type nth-last-of-type not
566 -manakai-contains -manakai-current
567 /;
568 $p->{pseudo_element}->{$_} = 1 for qw/
569 after before first-letter first-line
570 /;
571
572 return $CSSParser = $p;
573 } # get_css_parser
574
575 sub print_syntax_error_css_section ($$) {
576 my ($input, $result) = @_;
577
578 print STDOUT qq[
579 <div id="$input->{id_prefix}parse-errors" class="section">
580 <h2>Parse Errors</h2>
581
582 <dl id="$input->{id_prefix}parse-errors-list">];
583 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
584
585 my $p = get_css_parser ();
586 $p->init;
587 $p->{onerror} = sub {
588 my (%opt) = @_;
589 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
590 if ($opt{token}) {
591 print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{token}->{line}">Line $opt{token}->{line}</a> column $opt{token}->{column}];
592 } else {
593 print STDOUT qq[<dt class="$cls">Unknown location];
594 }
595 if (defined $opt{value}) {
596 print STDOUT qq[ (<code>@{[htescape ($opt{value})]}</code>)];
597 } elsif (defined $opt{token}) {
598 print STDOUT qq[ (<code>@{[htescape (Whatpm::CSS::Tokenizer->serialize_token ($opt{token}))]}</code>)];
599 }
600 $type =~ tr/ /-/;
601 $type =~ s/\|/%7C/g;
602 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
603 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
604 print STDOUT qq[$msg</dd>\n];
605
606 add_error ('syntax', \%opt => $result);
607 };
608 $p->{href} = $input->{uri};
609 $p->{base_uri} = $input->{base_uri};
610
611 # if ($parse_mode eq 'q') {
612 # $p->{unitless_px} = 1;
613 # $p->{hashless_color} = 1;
614 # }
615
616 ## TODO: Make $input->{s} a ref.
617
618 my $s = \$input->{s};
619 my $charset;
620 unless ($input->{is_char_string}) {
621 require Encode;
622 if (defined $input->{charset}) {## TODO: IANA->Perl
623 $charset = $input->{charset};
624 $s = \(Encode::decode ($input->{charset}, $$s));
625 } else {
626 ## TODO: charset detection
627 $s = \(Encode::decode ($charset = 'utf-8', $$s));
628 }
629 }
630
631 my $cssom = $p->parse_char_string ($$s);
632 $cssom->manakai_input_encoding ($charset) if defined $charset;
633
634 print STDOUT qq[</dl></div>];
635
636 return $cssom;
637 } # print_syntax_error_css_section
638
639 sub print_syntax_error_manifest_section ($$) {
640 my ($input, $result) = @_;
641
642 require Whatpm::CacheManifest;
643
644 print STDOUT qq[
645 <div id="$input->{id_prefix}parse-errors" class="section">
646 <h2>Parse Errors</h2>
647
648 <dl id="$input->{id_prefix}parse-errors-list">];
649 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
650
651 my $onerror = sub {
652 my (%opt) = @_;
653 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
654 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
655 qq[</dt>];
656 $type =~ tr/ /-/;
657 $type =~ s/\|/%7C/g;
658 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
659 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
660 print STDOUT qq[$msg</dd>\n];
661
662 add_error ('syntax', \%opt => $result);
663 };
664
665 my $m = $input->{is_char_string} ? 'parse_char_string' : 'parse_byte_string';
666 my $time1 = time;
667 my $manifest = Whatpm::CacheManifest->$m
668 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
669 $time{parse_manifest} = time - $time1;
670
671 print STDOUT qq[</dl></div>];
672
673 return $manifest;
674 } # print_syntax_error_manifest_section
675
676 sub print_source_string_section ($$$) {
677 my $input = shift;
678 my $s;
679 unless ($input->{is_char_string}) {
680 require Encode;
681 my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
682 return unless $enc;
683
684 $s = \($enc->decode (${$_[0]}));
685 } else {
686 $s = $_[0];
687 }
688
689 my $i = 1;
690 push @nav, ['#source-string' => 'Source'] unless $input->{nested};
691 print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
692 <h2>Document Source</h2>
693 <ol lang="">\n];
694 if (length $$s) {
695 while ($$s =~ /\G([^\x0D\x0A]*?)(?>\x0D\x0A?|\x0A)/gc) {
696 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
697 "</li>\n";
698 $i++;
699 }
700 if ($$s =~ /\G([^\x0D\x0A]+)/gc) {
701 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
702 "</li>\n";
703 }
704 } else {
705 print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
706 }
707 print STDOUT "</ol></div>
708 <script>
709 addSourceToParseErrorList ('$input->{id_prefix}', 'parse-errors-list');
710 </script>";
711 } # print_input_string_section
712
713 sub print_document_tree ($$) {
714 my ($input, $node) = @_;
715
716 my $r = '<ol class="xoxo">';
717
718 my @node = ($node);
719 while (@node) {
720 my $child = shift @node;
721 unless (ref $child) {
722 $r .= $child;
723 next;
724 }
725
726 my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
727 my $nt = $child->node_type;
728 if ($nt == $child->ELEMENT_NODE) {
729 my $child_nsuri = $child->namespace_uri;
730 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
731 '</code>'; ## ISSUE: case
732
733 if ($child->has_attributes) {
734 $r .= '<ul class="attributes">';
735 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
736 @{$child->attributes}) {
737 $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
738 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
739 }
740 $r .= '</ul>';
741 }
742
743 if ($child->has_child_nodes) {
744 $r .= '<ol class="children">';
745 unshift @node, @{$child->child_nodes}, '</ol></li>';
746 } else {
747 $r .= '</li>';
748 }
749 } elsif ($nt == $child->TEXT_NODE) {
750 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
751 } elsif ($nt == $child->CDATA_SECTION_NODE) {
752 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
753 } elsif ($nt == $child->COMMENT_NODE) {
754 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
755 } elsif ($nt == $child->DOCUMENT_NODE) {
756 $r .= qq'<li id="$node_id" class="tree-document">Document';
757 $r .= qq[<ul class="attributes">];
758 my $cp = $child->manakai_charset;
759 if (defined $cp) {
760 $r .= qq[<li><code>charset</code> parameter = <code>];
761 $r .= htescape ($cp) . qq[</code></li>];
762 }
763 $r .= qq[<li><code>inputEncoding</code> = ];
764 my $ie = $child->input_encoding;
765 if (defined $ie) {
766 $r .= qq[<code>@{[htescape ($ie)]}</code>];
767 if ($child->manakai_has_bom) {
768 $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
769 }
770 } else {
771 $r .= qq[(<code>null</code>)];
772 }
773 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
774 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
775 unless ($child->manakai_is_html) {
776 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
777 if (defined $child->xml_encoding) {
778 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
779 } else {
780 $r .= qq[<li>XML encoding = (null)</li>];
781 }
782 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
783 }
784 $r .= qq[</ul>];
785 if ($child->has_child_nodes) {
786 $r .= '<ol class="children">';
787 unshift @node, @{$child->child_nodes}, '</ol></li>';
788 }
789 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
790 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
791 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
792 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
793 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
794 $r .= '</ul></li>';
795 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
796 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
797 } else {
798 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
799 }
800 }
801
802 $r .= '</ol>';
803 print STDOUT $r;
804 } # print_document_tree
805
806 sub print_structure_dump_dom_section ($$$) {
807 my ($input, $doc, $el) = @_;
808
809 print STDOUT qq[
810 <div id="$input->{id_prefix}document-tree" class="section">
811 <h2>Document Tree</h2>
812 ];
813 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
814 unless $input->{nested};
815
816 print_document_tree ($input, $el || $doc);
817
818 print STDOUT qq[</div>];
819 } # print_structure_dump_dom_section
820
821 sub print_structure_dump_cssom_section ($$) {
822 my ($input, $cssom) = @_;
823
824 print STDOUT qq[
825 <div id="$input->{id_prefix}document-tree" class="section">
826 <h2>Document Tree</h2>
827 ];
828 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
829 unless $input->{nested};
830
831 ## TODO:
832 print STDOUT "<pre>".htescape ($cssom->css_text)."</pre>";
833
834 print STDOUT qq[</div>];
835 } # print_structure_dump_cssom_section
836
837 sub print_structure_dump_manifest_section ($$) {
838 my ($input, $manifest) = @_;
839
840 print STDOUT qq[
841 <div id="$input->{id_prefix}dump-manifest" class="section">
842 <h2>Cache Manifest</h2>
843 ];
844 push @nav, [qq[#$input->{id_prefix}dump-manifest] => 'Cache Manifest']
845 unless $input->{nested};
846
847 print STDOUT qq[<dl><dt>Explicit entries</dt>];
848 my $i = 0;
849 for my $uri (@{$manifest->[0]}) {
850 my $euri = htescape ($uri);
851 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
852 }
853
854 print STDOUT qq[<dt>Fallback entries</dt><dd>
855 <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
856 <th scope=row>Fallback Entry</tr><tbody>];
857 for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
858 my $euri = htescape ($uri);
859 my $euri2 = htescape ($manifest->[1]->{$uri});
860 print STDOUT qq[<tr><td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
861 <td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
862 }
863
864 print STDOUT qq[</table><dt>Online whitelist</dt>];
865 for my $uri (@{$manifest->[2]}) {
866 my $euri = htescape ($uri);
867 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
868 }
869
870 print STDOUT qq[</dl></div>];
871 } # print_structure_dump_manifest_section
872
873 sub print_structure_error_dom_section ($$$$$) {
874 my ($input, $doc, $el, $result, $onsubdoc) = @_;
875
876 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
877 <h2>Document Errors</h2>
878
879 <dl id=document-errors-list>];
880 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
881 unless $input->{nested};
882
883 require Whatpm::ContentChecker;
884 my $onerror = sub {
885 my %opt = @_;
886 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
887 $type =~ tr/ /-/;
888 $type =~ s/\|/%7C/g;
889 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
890 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
891 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
892 print STDOUT $msg, "</dd>\n";
893 add_error ('structure', \%opt => $result);
894 };
895
896 my $elements;
897 my $time1 = time;
898 if ($el) {
899 $elements = Whatpm::ContentChecker->check_element
900 ($el, $onerror, $onsubdoc);
901 } else {
902 $elements = Whatpm::ContentChecker->check_document
903 ($doc, $onerror, $onsubdoc);
904 }
905 $time{check} = time - $time1;
906
907 print STDOUT qq[</dl>
908 <script>
909 addSourceToParseErrorList ('$input->{id_prefix}', 'document-errors-list');
910 </script></div>];
911
912 return $elements;
913 } # print_structure_error_dom_section
914
915 sub print_structure_error_manifest_section ($$$) {
916 my ($input, $manifest, $result) = @_;
917
918 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
919 <h2>Document Errors</h2>
920
921 <dl>];
922 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
923 unless $input->{nested};
924
925 require Whatpm::CacheManifest;
926 Whatpm::CacheManifest->check_manifest ($manifest, sub {
927 my %opt = @_;
928 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
929 $type =~ tr/ /-/;
930 $type =~ s/\|/%7C/g;
931 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
932 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
933 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
934 add_error ('structure', \%opt => $result);
935 });
936
937 print STDOUT qq[</div>];
938 } # print_structure_error_manifest_section
939
940 sub print_table_section ($$) {
941 my ($input, $tables) = @_;
942
943 push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
944 unless $input->{nested};
945 print STDOUT qq[
946 <div id="$input->{id_prefix}tables" class="section">
947 <h2>Tables</h2>
948
949 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
950 <script src="../table-script.js" type="text/javascript"></script>
951 <noscript>
952 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
953 </noscript>
954 ];
955
956 require JSON;
957
958 my $i = 0;
959 for my $table (@$tables) {
960 $i++;
961 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
962 get_node_link ($input, $table->{element}) . q[</h3>];
963
964 delete $table->{element};
965
966 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption},
967 @{$table->{row}}) {
968 next unless $_;
969 delete $_->{element};
970 }
971
972 for (@{$table->{row_group}}) {
973 next unless $_;
974 next unless $_->{element};
975 $_->{type} = $_->{element}->manakai_local_name;
976 delete $_->{element};
977 }
978
979 for (@{$table->{cell}}) {
980 next unless $_;
981 for (@{$_}) {
982 next unless $_;
983 for (@$_) {
984 $_->{id} = refaddr $_->{element} if defined $_->{element};
985 delete $_->{element};
986 $_->{is_header} = $_->{is_header} ? 1 : 0;
987 }
988 }
989 }
990
991 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
992 print STDOUT JSON::objToJson ($table);
993 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
994 print STDOUT qq[, '$input->{id_prefix}');</script>];
995 }
996
997 print STDOUT qq[</div>];
998 } # print_table_section
999
1000 sub print_listing_section ($$$) {
1001 my ($opt, $input, $ids) = @_;
1002
1003 push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
1004 unless $input->{nested};
1005 print STDOUT qq[
1006 <div id="$input->{id_prefix}$opt->{id}" class="section">
1007 <h2>$opt->{heading}</h2>
1008
1009 <dl>
1010 ];
1011 for my $id (sort {$a cmp $b} keys %$ids) {
1012 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
1013 for (@{$ids->{$id}}) {
1014 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
1015 }
1016 }
1017 print STDOUT qq[</dl></div>];
1018 } # print_listing_section
1019
1020 sub print_uri_section ($$$) {
1021 my ($input, $uris) = @_;
1022
1023 ## NOTE: URIs contained in the DOM (i.e. in HTML or XML documents),
1024 ## except for those in RDF triples.
1025 ## TODO: URIs in CSS
1026
1027 push @nav, ['#' . $input->{id_prefix} . 'uris' => 'URIs']
1028 unless $input->{nested};
1029 print STDOUT qq[
1030 <div id="$input->{id_prefix}uris" class="section">
1031 <h2>URIs</h2>
1032
1033 <dl>];
1034 for my $uri (sort {$a cmp $b} keys %$uris) {
1035 my $euri = htescape ($uri);
1036 print STDOUT qq[<dt><code class=uri>&lt;<a href="$euri">$euri</a>></code>];
1037 my $eccuri = htescape (get_cc_uri ($uri));
1038 print STDOUT qq[<dd><a href="$eccuri">Check conformance of this document</a>];
1039 print STDOUT qq[<dd>Found at: <ul>];
1040 for my $entry (@{$uris->{$uri}}) {
1041 print STDOUT qq[<li>], get_node_link ($input, $entry->{node});
1042 if (keys %{$entry->{type} or {}}) {
1043 print STDOUT ' (';
1044 print STDOUT join ', ', map {
1045 {
1046 hyperlink => 'Hyperlink',
1047 resource => 'Link to an external resource',
1048 namespace => 'Namespace URI',
1049 cite => 'Citation or link to a long description',
1050 embedded => 'Link to an embedded content',
1051 base => 'Base URI',
1052 action => 'Submission URI',
1053 }->{$_}
1054 or
1055 htescape ($_)
1056 } keys %{$entry->{type}};
1057 print STDOUT ')';
1058 }
1059 }
1060 print STDOUT qq[</ul>];
1061 }
1062 print STDOUT qq[</dl></div>];
1063 } # print_uri_section
1064
1065 sub print_rdf_section ($$$) {
1066 my ($input, $rdfs) = @_;
1067
1068 push @nav, ['#' . $input->{id_prefix} . 'rdf' => 'RDF']
1069 unless $input->{nested};
1070 print STDOUT qq[
1071 <div id="$input->{id_prefix}rdf" class="section">
1072 <h2>RDF Triples</h2>
1073
1074 <dl>];
1075 my $i = 0;
1076 for my $rdf (@$rdfs) {
1077 print STDOUT qq[<dt id="$input->{id_prefix}rdf-@{[$i++]}">];
1078 print STDOUT get_node_link ($input, $rdf->[0]);
1079 print STDOUT qq[<dd><dl>];
1080 for my $triple (@{$rdf->[1]}) {
1081 print STDOUT '<dt>' . get_node_link ($input, $triple->[0]) . '<dd>';
1082 print STDOUT get_rdf_resource_html ($triple->[1]);
1083 print STDOUT ' ';
1084 print STDOUT get_rdf_resource_html ($triple->[2]);
1085 print STDOUT ' ';
1086 print STDOUT get_rdf_resource_html ($triple->[3]);
1087 }
1088 print STDOUT qq[</dl>];
1089 }
1090 print STDOUT qq[</dl></div>];
1091 } # print_rdf_section
1092
1093 sub get_rdf_resource_html ($) {
1094 my $resource = shift;
1095 if (defined $resource->{uri}) {
1096 my $euri = htescape ($resource->{uri});
1097 return '<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
1098 '</a>></code>';
1099 } elsif (defined $resource->{bnodeid}) {
1100 return htescape ('_:' . $resource->{bnodeid});
1101 } elsif ($resource->{nodes}) {
1102 return '(rdf:XMLLiteral)';
1103 } elsif (defined $resource->{value}) {
1104 my $elang = htescape (defined $resource->{language}
1105 ? $resource->{language} : '');
1106 my $r = qq[<q lang="$elang">] . htescape ($resource->{value}) . '</q>';
1107 if (defined $resource->{datatype}) {
1108 my $euri = htescape ($resource->{datatype});
1109 $r .= '^^<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
1110 '</a>></code>';
1111 } elsif (length $resource->{language}) {
1112 $r .= '@' . htescape ($resource->{language});
1113 }
1114 return $r;
1115 } else {
1116 return '??';
1117 }
1118 } # get_rdf_resource_html
1119
1120 sub print_result_section ($) {
1121 my $result = shift;
1122
1123 print STDOUT qq[
1124 <div id="result-summary" class="section">
1125 <h2>Result</h2>];
1126
1127 if ($result->{unsupported} and $result->{conforming_max}) {
1128 print STDOUT qq[<p class=uncertain id=result-para>The conformance
1129 checker cannot decide whether the document is conforming or
1130 not, since the document contains one or more unsupported
1131 features. The document might or might not be conforming.</p>];
1132 } elsif ($result->{conforming_min}) {
1133 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
1134 found in this document.</p>];
1135 } elsif ($result->{conforming_max}) {
1136 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
1137 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
1138 it might be conforming.</p>];
1139 } else {
1140 print STDOUT qq[<p class=FAIL id=result-para>This document is
1141 <strong><em>non</em>-conforming</strong>.</p>];
1142 }
1143
1144 print STDOUT qq[<table>
1145 <colgroup><col><colgroup><col><col><col><colgroup><col>
1146 <thead>
1147 <tr><th scope=col></th>
1148 <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1149 Errors</a></th>
1150 <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1151 Errors</a></th>
1152 <th scope=col><a href="../error-description#level-w">Warnings</a></th>
1153 <th scope=col>Score</th></tr></thead><tbody>];
1154
1155 my $must_error = 0;
1156 my $should_error = 0;
1157 my $warning = 0;
1158 my $score_min = 0;
1159 my $score_max = 0;
1160 my $score_base = 20;
1161 my $score_unit = $score_base / 100;
1162 for (
1163 [Transfer => 'transfer', ''],
1164 [Character => 'char', ''],
1165 [Syntax => 'syntax', '#parse-errors'],
1166 [Structure => 'structure', '#document-errors'],
1167 ) {
1168 $must_error += ($result->{$_->[1]}->{must} += 0);
1169 $should_error += ($result->{$_->[1]}->{should} += 0);
1170 $warning += ($result->{$_->[1]}->{warning} += 0);
1171 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
1172 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
1173
1174 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
1175 my $label = $_->[0];
1176 if ($result->{$_->[1]}->{must} or
1177 $result->{$_->[1]}->{should} or
1178 $result->{$_->[1]}->{warning} or
1179 $result->{$_->[1]}->{unsupported}) {
1180 $label = qq[<a href="$_->[2]">$label</a>];
1181 }
1182
1183 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
1184 if ($uncertain) {
1185 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
1186 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
1187 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
1188 } else {
1189 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
1190 }
1191 }
1192
1193 $score_max += $score_base;
1194
1195 print STDOUT qq[
1196 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
1197 </tbody>
1198 <tfoot><tr class=uncertain><th scope=row>Total</th>
1199 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
1200 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
1201 <td>$warning?</td>
1202 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
1203 </table>
1204
1205 <p><strong>Important</strong>: This conformance checking service
1206 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
1207 </div>];
1208 push @nav, ['#result-summary' => 'Result'];
1209 } # print_result_section
1210
1211 sub print_result_unknown_type_section ($$) {
1212 my ($input, $result) = @_;
1213
1214 my $euri = htescape ($input->{uri});
1215 print STDOUT qq[
1216 <div id="$input->{id_prefix}parse-errors" class="section">
1217 <h2>Errors</h2>
1218
1219 <dl>
1220 <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
1221 <dd class=unsupported><strong><a href="../error-description#level-u">Not
1222 supported</a></strong>:
1223 Media type
1224 <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
1225 is not supported.</dd>
1226 </dl>
1227 </div>
1228 ];
1229 push @nav, [qq[#$input->{id_prefix}parse-errors] => 'Errors']
1230 unless $input->{nested};
1231 add_error (char => {level => 'u'} => $result);
1232 add_error (syntax => {level => 'u'} => $result);
1233 add_error (structure => {level => 'u'} => $result);
1234 } # print_result_unknown_type_section
1235
1236 sub print_result_input_error_section ($) {
1237 my $input = shift;
1238 print STDOUT qq[<div class="section" id="result-summary">
1239 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
1240 </div>];
1241 push @nav, ['#result-summary' => 'Result'];
1242 } # print_result_input_error_section
1243
1244 sub get_error_label ($$) {
1245 my ($input, $err) = @_;
1246
1247 my $r = '';
1248
1249 my $line;
1250 my $column;
1251
1252 if (defined $err->{node}) {
1253 $line = $err->{node}->get_user_data ('manakai_source_line');
1254 if (defined $line) {
1255 $column = $err->{node}->get_user_data ('manakai_source_column');
1256 } else {
1257 if ($err->{node}->node_type == $err->{node}->ATTRIBUTE_NODE) {
1258 my $owner = $err->{node}->owner_element;
1259 $line = $owner->get_user_data ('manakai_source_line');
1260 $column = $owner->get_user_data ('manakai_source_column');
1261 } else {
1262 my $parent = $err->{node}->parent_node;
1263 if ($parent) {
1264 $line = $parent->get_user_data ('manakai_source_line');
1265 $column = $parent->get_user_data ('manakai_source_column');
1266 }
1267 }
1268 }
1269 }
1270 unless (defined $line) {
1271 if (defined $err->{token} and defined $err->{token}->{line}) {
1272 $line = $err->{token}->{line};
1273 $column = $err->{token}->{column};
1274 } elsif (defined $err->{line}) {
1275 $line = $err->{line};
1276 $column = $err->{column};
1277 }
1278 }
1279
1280 if (defined $line) {
1281 if (defined $column and $column > 0) {
1282 $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a> column $column];
1283 } else {
1284 $line = $line - 1 || 1;
1285 $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a>];
1286 }
1287 }
1288
1289 if (defined $err->{node}) {
1290 $r .= ' ' if length $r;
1291 $r .= get_node_link ($input, $err->{node});
1292 }
1293
1294 if (defined $err->{index}) {
1295 if (length $r) {
1296 $r .= ', Index ' . (0+$err->{index});
1297 } else {
1298 $r .= "<a href='#$input->{id_prefix}index-@{[0+$err->{index}]}'>Index "
1299 . (0+$err->{index}) . '</a>';
1300 }
1301 }
1302
1303 if (defined $err->{value}) {
1304 $r .= ' ' if length $r;
1305 $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
1306 }
1307
1308 return $r;
1309 } # get_error_label
1310
1311 sub get_error_level_label ($) {
1312 my $err = shift;
1313
1314 my $r = '';
1315
1316 if (not defined $err->{level} or $err->{level} eq 'm') {
1317 $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1318 error</a></strong>: ];
1319 } elsif ($err->{level} eq 's') {
1320 $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1321 error</a></strong>: ];
1322 } elsif ($err->{level} eq 'w') {
1323 $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
1324 ];
1325 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
1326 $r = qq[<strong><a href="../error-description#level-u">Not
1327 supported</a></strong>: ];
1328 } elsif ($err->{level} eq 'i') {
1329 $r = qq[<strong><a href="../error-description#level-i">Information</a></strong>: ];
1330 } else {
1331 my $elevel = htescape ($err->{level});
1332 $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
1333 ];
1334 }
1335
1336 return $r;
1337 } # get_error_level_label
1338
1339 sub get_node_path ($) {
1340 my $node = shift;
1341 my @r;
1342 while (defined $node) {
1343 my $rs;
1344 if ($node->node_type == 1) {
1345 $rs = $node->node_name;
1346 $node = $node->parent_node;
1347 } elsif ($node->node_type == 2) {
1348 $rs = '@' . $node->node_name;
1349 $node = $node->owner_element;
1350 } elsif ($node->node_type == 3) {
1351 $rs = '"' . $node->data . '"';
1352 $node = $node->parent_node;
1353 } elsif ($node->node_type == 9) {
1354 @r = ('') unless @r;
1355 $rs = '';
1356 $node = $node->parent_node;
1357 } else {
1358 $rs = '#' . $node->node_type;
1359 $node = $node->parent_node;
1360 }
1361 unshift @r, $rs;
1362 }
1363 return join '/', @r;
1364 } # get_node_path
1365
1366 sub get_node_link ($$) {
1367 return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
1368 htescape (get_node_path ($_[1])) . qq[</a>];
1369 } # get_node_link
1370
1371 {
1372 my $Msg = {};
1373
1374 sub load_text_catalog ($) {
1375 my $lang = shift; # MUST be a canonical lang name
1376 open my $file, '<:utf8', "cc-msg.$lang.txt"
1377 or die "$0: cc-msg.$lang.txt: $!";
1378 while (<$file>) {
1379 if (s/^([^;]+);([^;]*);//) {
1380 my ($type, $cls, $msg) = ($1, $2, $_);
1381 $msg =~ tr/\x0D\x0A//d;
1382 $Msg->{$type} = [$cls, $msg];
1383 }
1384 }
1385 } # load_text_catalog
1386
1387 sub get_text ($) {
1388 my ($type, $level, $node) = @_;
1389 $type = $level . ':' . $type if defined $level;
1390 $level = 'm' unless defined $level;
1391 my @arg;
1392 {
1393 if (defined $Msg->{$type}) {
1394 my $msg = $Msg->{$type}->[1];
1395 $msg =~ s{<var>\$([0-9]+)</var>}{
1396 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
1397 }ge;
1398 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
1399 UNIVERSAL::can ($node, 'get_attribute_ns')
1400 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
1401 }ge;
1402 $msg =~ s{<var>{\@}</var>}{
1403 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
1404 }ge;
1405 $msg =~ s{<var>{local-name}</var>}{
1406 UNIVERSAL::can ($node, 'manakai_local_name')
1407 ? htescape ($node->manakai_local_name) : ''
1408 }ge;
1409 $msg =~ s{<var>{element-local-name}</var>}{
1410 (UNIVERSAL::can ($node, 'owner_element') and
1411 $node->owner_element)
1412 ? htescape ($node->owner_element->manakai_local_name)
1413 : ''
1414 }ge;
1415 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
1416 } elsif ($type =~ s/:([^:]*)$//) {
1417 unshift @arg, $1;
1418 redo;
1419 }
1420 }
1421 return ($type, 'level-'.$level, htescape ($_[0]));
1422 } # get_text
1423
1424 }
1425
1426 sub encode_uri_component ($) {
1427 require Encode;
1428 my $s = Encode::encode ('utf8', shift);
1429 $s =~ s/([^0-9A-Za-z_.~-])/sprintf '%%%02X', ord $1/ge;
1430 return $s;
1431 } # encode_uri_component
1432
1433 sub get_cc_uri ($) {
1434 return './?uri=' . encode_uri_component ($_[0]);
1435 } # get_cc_uri
1436
1437 sub get_input_document ($$) {
1438 my ($http, $dom) = @_;
1439
1440 my $request_uri = $http->get_parameter ('uri');
1441 my $r = {};
1442 if (defined $request_uri and length $request_uri) {
1443 my $uri = $dom->create_uri_reference ($request_uri);
1444 unless ({
1445 http => 1,
1446 }->{lc $uri->uri_scheme}) {
1447 return {uri => $request_uri, request_uri => $request_uri,
1448 error_status_text => 'URI scheme not allowed'};
1449 }
1450
1451 require Message::Util::HostPermit;
1452 my $host_permit = new Message::Util::HostPermit;
1453 $host_permit->add_rule (<<EOH);
1454 Allow host=suika port=80
1455 Deny host=suika
1456 Allow host=suika.fam.cx port=80
1457 Deny host=suika.fam.cx
1458 Deny host=localhost
1459 Deny host=*.localdomain
1460 Deny ipv4=0.0.0.0/8
1461 Deny ipv4=10.0.0.0/8
1462 Deny ipv4=127.0.0.0/8
1463 Deny ipv4=169.254.0.0/16
1464 Deny ipv4=172.0.0.0/11
1465 Deny ipv4=192.0.2.0/24
1466 Deny ipv4=192.88.99.0/24
1467 Deny ipv4=192.168.0.0/16
1468 Deny ipv4=198.18.0.0/15
1469 Deny ipv4=224.0.0.0/4
1470 Deny ipv4=255.255.255.255/32
1471 Deny ipv6=0::0/0
1472 Allow host=*
1473 EOH
1474 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1475 return {uri => $request_uri, request_uri => $request_uri,
1476 error_status_text => 'Connection to the host is forbidden'};
1477 }
1478
1479 require LWP::UserAgent;
1480 my $ua = WDCC::LWPUA->new;
1481 $ua->{wdcc_dom} = $dom;
1482 $ua->{wdcc_host_permit} = $host_permit;
1483 $ua->agent ('Mozilla'); ## TODO: for now.
1484 $ua->parse_head (0);
1485 $ua->protocols_allowed ([qw/http/]);
1486 $ua->max_size (1000_000);
1487 my $req = HTTP::Request->new (GET => $request_uri);
1488 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1489 my $res = $ua->request ($req);
1490 ## TODO: 401 sets |is_success| true.
1491 if ($res->is_success or $http->get_parameter ('error-page')) {
1492 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1493 $r->{uri} = $res->request->uri;
1494 $r->{request_uri} = $request_uri;
1495
1496 ## TODO: More strict parsing...
1497 my $ct = $res->header ('Content-Type');
1498 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1499 $r->{charset} = lc $1;
1500 $r->{charset} =~ tr/\\//d;
1501 $r->{official_charset} = $r->{charset};
1502 }
1503
1504 my $input_charset = $http->get_parameter ('charset');
1505 if (defined $input_charset and length $input_charset) {
1506 $r->{charset_overridden}
1507 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1508 $r->{charset} = $input_charset;
1509 }
1510
1511 ## TODO: Support for HTTP Content-Encoding
1512
1513 $r->{s} = ''.$res->content;
1514
1515 require Whatpm::ContentType;
1516 ($r->{official_type}, $r->{media_type})
1517 = Whatpm::ContentType->get_sniffed_type
1518 (get_file_head => sub {
1519 return substr $r->{s}, 0, shift;
1520 },
1521 http_content_type_byte => $ct,
1522 has_http_content_encoding =>
1523 defined $res->header ('Content-Encoding'),
1524 supported_image_types => {});
1525 } else {
1526 $r->{uri} = $res->request->uri;
1527 $r->{request_uri} = $request_uri;
1528 $r->{error_status_text} = $res->status_line;
1529 }
1530
1531 $r->{header_field} = [];
1532 $res->scan (sub {
1533 push @{$r->{header_field}}, [$_[0], $_[1]];
1534 });
1535 $r->{header_status_code} = $res->code;
1536 $r->{header_status_text} = $res->message;
1537 } else {
1538 $r->{s} = ''.$http->get_parameter ('s');
1539 $r->{uri} = q<thismessage:/>;
1540 $r->{request_uri} = q<thismessage:/>;
1541 $r->{base_uri} = q<thismessage:/>;
1542 $r->{charset} = ''.$http->get_parameter ('_charset_');
1543 $r->{charset} =~ s/\s+//g;
1544 $r->{charset} = 'utf-8' if $r->{charset} eq '';
1545 $r->{official_charset} = $r->{charset};
1546 $r->{header_field} = [];
1547
1548 require Whatpm::ContentType;
1549 ($r->{official_type}, $r->{media_type})
1550 = Whatpm::ContentType->get_sniffed_type
1551 (get_file_head => sub {
1552 return substr $r->{s}, 0, shift;
1553 },
1554 http_content_type_byte => undef,
1555 has_http_content_encoding => 0,
1556 supported_image_types => {});
1557 }
1558
1559 my $input_format = $http->get_parameter ('i');
1560 if (defined $input_format and length $input_format) {
1561 $r->{media_type_overridden}
1562 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1563 $r->{media_type} = $input_format;
1564 }
1565 if (defined $r->{s} and not defined $r->{media_type}) {
1566 $r->{media_type} = 'text/html';
1567 $r->{media_type_overridden} = 1;
1568 }
1569
1570 if ($r->{media_type} eq 'text/xml') {
1571 unless (defined $r->{charset}) {
1572 $r->{charset} = 'us-ascii';
1573 $r->{official_charset} = $r->{charset};
1574 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1575 $r->{charset_overridden} = 0;
1576 }
1577 }
1578
1579 if (length $r->{s} > 1000_000) {
1580 $r->{error_status_text} = 'Entity-body too large';
1581 delete $r->{s};
1582 return $r;
1583 }
1584
1585 $r->{inner_html_element} = $http->get_parameter ('e');
1586
1587 return $r;
1588 } # get_input_document
1589
1590 package WDCC::LWPUA;
1591 BEGIN { push our @ISA, 'LWP::UserAgent'; }
1592
1593 sub redirect_ok {
1594 my $ua = shift;
1595 unless ($ua->SUPER::redirect_ok (@_)) {
1596 return 0;
1597 }
1598
1599 my $uris = $_[1]->header ('Location');
1600 return 0 unless $uris;
1601 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1602 unless ({
1603 http => 1,
1604 }->{lc $uri->uri_scheme}) {
1605 return 0;
1606 }
1607 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1608 return 0;
1609 }
1610 return 1;
1611 } # redirect_ok
1612
1613 =head1 AUTHOR
1614
1615 Wakaba <w@suika.fam.cx>.
1616
1617 =head1 LICENSE
1618
1619 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1620
1621 This library is free software; you can redistribute it
1622 and/or modify it under the same terms as Perl itself.
1623
1624 =cut
1625
1626 ## $Date: 2008/05/06 07:50:28 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24