/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.49 - (show annotations) (download)
Tue May 6 07:50:28 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.48: +3 -2 lines
++ ChangeLog	6 May 2008 07:50:23 -0000
2008-05-06  Wakaba  <wakaba@suika.fam.cx>

	* table-script.js: Support for header cell highlighting.

	* table.cgi: Set |id| to cells; it enables the cell highlighting
	feature.

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4
5 use lib qw[/home/httpd/html/www/markup/html/whatpm
6 /home/wakaba/work/manakai2/lib];
7 use CGI::Carp qw[fatalsToBrowser];
8 use Scalar::Util qw[refaddr];
9 use Time::HiRes qw/time/;
10
11 sub htescape ($) {
12 my $s = $_[0];
13 $s =~ s/&/&amp;/g;
14 $s =~ s/</&lt;/g;
15 $s =~ s/>/&gt;/g;
16 $s =~ s/"/&quot;/g;
17 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18 sprintf '<var>U+%04X</var>', ord $1;
19 }ge;
20 return $s;
21 } # htescape
22
23 my @nav;
24 my %time;
25 require Message::DOM::DOMImplementation;
26 my $dom = Message::DOM::DOMImplementation->new;
27 {
28 use Message::CGI::HTTP;
29 my $http = Message::CGI::HTTP->new;
30
31 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
32 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
33 exit;
34 }
35
36 binmode STDOUT, ':utf8';
37 $| = 1;
38
39 load_text_catalog ('en'); ## TODO: conneg
40
41 print STDOUT qq[Content-Type: text/html; charset=utf-8
42
43 <!DOCTYPE html>
44 <html lang="en">
45 <head>
46 <title>Web Document Conformance Checker (BETA)</title>
47 <link rel="stylesheet" href="../cc-style.css" type="text/css">
48 </head>
49 <body>
50 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
51 (<em>beta</em>)</h1>
52 ];
53
54 $| = 0;
55 my $input = get_input_document ($http, $dom);
56 my $char_length = 0;
57
58 print qq[
59 <div id="document-info" class="section">
60 <dl>
61 <dt>Request URI</dt>
62 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 <dt>Document URI</dt>
64 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65 <script>
66 document.title = '<'
67 + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68 + document.title;
69 </script></dd>
70 ]; # no </dl> yet
71 push @nav, ['#document-info' => 'Information'];
72
73 if (defined $input->{s}) {
74 $char_length = length $input->{s};
75
76 print STDOUT qq[
77 <dt>Base URI</dt>
78 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79 <dt>Internet Media Type</dt>
80 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 <dt>Character Encoding</dt>
83 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 <dt>Length</dt>
86 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 </dl>
88 </div>
89
90 <script src="../cc-script.js"></script>
91 ];
92
93 $input->{id_prefix} = '';
94 #$input->{nested} = 0;
95 my $result = {conforming_min => 1, conforming_max => 1};
96 check_and_print ($input => $result);
97 print_result_section ($result);
98 } else {
99 print STDOUT qq[</dl></div>];
100 print_result_input_error_section ($input);
101 }
102
103 print STDOUT qq[
104 <ul class="navigation" id="nav-items">
105 ];
106 for (@nav) {
107 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
108 }
109 print STDOUT qq[
110 </ul>
111 </body>
112 </html>
113 ];
114
115 for (qw/decode parse parse_html parse_xml parse_manifest
116 check check_manifest/) {
117 next unless defined $time{$_};
118 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
119 print $file $char_length, "\t", $time{$_}, "\n";
120 }
121
122 exit;
123 }
124
125 sub add_error ($$$) {
126 my ($layer, $err, $result) = @_;
127 if (defined $err->{level}) {
128 if ($err->{level} eq 's') {
129 $result->{$layer}->{should}++;
130 $result->{$layer}->{score_min} -= 2;
131 $result->{conforming_min} = 0;
132 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
133 $result->{$layer}->{warning}++;
134 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
135 $result->{$layer}->{unsupported}++;
136 $result->{unsupported} = 1;
137 } elsif ($err->{level} eq 'i') {
138 #
139 } else {
140 $result->{$layer}->{must}++;
141 $result->{$layer}->{score_max} -= 2;
142 $result->{$layer}->{score_min} -= 2;
143 $result->{conforming_min} = 0;
144 $result->{conforming_max} = 0;
145 }
146 } else {
147 $result->{$layer}->{must}++;
148 $result->{$layer}->{score_max} -= 2;
149 $result->{$layer}->{score_min} -= 2;
150 $result->{conforming_min} = 0;
151 $result->{conforming_max} = 0;
152 }
153 } # add_error
154
155 sub check_and_print ($$) {
156 my ($input, $result) = @_;
157
158 print_http_header_section ($input, $result);
159
160 my $doc;
161 my $el;
162 my $cssom;
163 my $manifest;
164 my @subdoc;
165
166 if ($input->{media_type} eq 'text/html') {
167 ($doc, $el) = print_syntax_error_html_section ($input, $result);
168 print_source_string_section
169 ($input,
170 \($input->{s}),
171 $input->{charset} || $doc->input_encoding);
172 } elsif ({
173 'text/xml' => 1,
174 'application/atom+xml' => 1,
175 'application/rss+xml' => 1,
176 'image/svg+xml' => 1,
177 'application/xhtml+xml' => 1,
178 'application/xml' => 1,
179 ## TODO: Should we make all XML MIME Types fall
180 ## into this category?
181
182 'application/rdf+xml' => 1, ## NOTE: This type has different model.
183 }->{$input->{media_type}}) {
184 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
185 print_source_string_section ($input,
186 \($input->{s}),
187 $doc->input_encoding);
188 } elsif ($input->{media_type} eq 'text/css') {
189 $cssom = print_syntax_error_css_section ($input, $result);
190 print_source_string_section
191 ($input, \($input->{s}),
192 $cssom->manakai_input_encoding);
193 } elsif ($input->{media_type} eq 'text/cache-manifest') {
194 ## TODO: MUST be text/cache-manifest
195 $manifest = print_syntax_error_manifest_section ($input, $result);
196 print_source_string_section ($input, \($input->{s}),
197 'utf-8');
198 } else {
199 ## TODO: Change HTTP status code??
200 print_result_unknown_type_section ($input, $result);
201 }
202
203 if (defined $doc or defined $el) {
204 $doc->document_uri ($input->{uri});
205 $doc->manakai_entity_base_uri ($input->{base_uri});
206 print_structure_dump_dom_section ($input, $doc, $el);
207 my $elements = print_structure_error_dom_section
208 ($input, $doc, $el, $result, sub {
209 push @subdoc, shift;
210 });
211 print_table_section ($input, $elements->{table}) if @{$elements->{table}};
212 print_listing_section ({
213 id => 'identifiers', label => 'IDs', heading => 'Identifiers',
214 }, $input, $elements->{id}) if keys %{$elements->{id}};
215 print_listing_section ({
216 id => 'terms', label => 'Terms', heading => 'Terms',
217 }, $input, $elements->{term}) if keys %{$elements->{term}};
218 print_listing_section ({
219 id => 'classes', label => 'Classes', heading => 'Classes',
220 }, $input, $elements->{class}) if keys %{$elements->{class}};
221 print_uri_section ($input, $elements->{uri}) if keys %{$elements->{uri}};
222 print_rdf_section ($input, $elements->{rdf}) if @{$elements->{rdf}};
223 } elsif (defined $cssom) {
224 print_structure_dump_cssom_section ($input, $cssom);
225 ## TODO: CSSOM validation
226 add_error ('structure', {level => 'u'} => $result);
227 } elsif (defined $manifest) {
228 print_structure_dump_manifest_section ($input, $manifest);
229 print_structure_error_manifest_section ($input, $manifest, $result);
230 }
231
232 my $id_prefix = 0;
233 for my $subinput (@subdoc) {
234 $subinput->{id_prefix} = 'subdoc-' . ++$id_prefix;
235 $subinput->{nested} = 1;
236 $subinput->{base_uri} = $subinput->{container_node}->base_uri
237 unless defined $subinput->{base_uri};
238 my $ebaseuri = htescape ($subinput->{base_uri});
239 push @nav, ['#' . $subinput->{id_prefix} => 'Sub #' . $id_prefix];
240 print STDOUT qq[<div id="$subinput->{id_prefix}" class=section>
241 <h2>Subdocument #$id_prefix</h2>
242
243 <dl>
244 <dt>Internet Media Type</dt>
245 <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
246 <dt>Container Node</dt>
247 <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
248 <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
249 <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
250 </dl>];
251
252 $subinput->{id_prefix} .= '-';
253 check_and_print ($subinput => $result);
254
255 print STDOUT qq[</div>];
256 }
257 } # check_and_print
258
259 sub print_http_header_section ($$) {
260 my ($input, $result) = @_;
261 return unless defined $input->{header_status_code} or
262 defined $input->{header_status_text} or
263 @{$input->{header_field} or []};
264
265 push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
266 print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
267 <h2>HTTP Header</h2>
268
269 <p><strong>Note</strong>: Due to the limitation of the
270 network library in use, the content of this section might
271 not be the real header.</p>
272
273 <table><tbody>
274 ];
275
276 if (defined $input->{header_status_code}) {
277 print STDOUT qq[<tr><th scope="row">Status code</th>];
278 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
279 }
280 if (defined $input->{header_status_text}) {
281 print STDOUT qq[<tr><th scope="row">Status text</th>];
282 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
283 }
284
285 for (@{$input->{header_field}}) {
286 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
287 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
288 }
289
290 print STDOUT qq[</tbody></table></div>];
291 } # print_http_header_section
292
293 sub print_syntax_error_html_section ($$) {
294 my ($input, $result) = @_;
295
296 require Encode;
297 require Whatpm::HTML;
298
299 print STDOUT qq[
300 <div id="$input->{id_prefix}parse-errors" class="section">
301 <h2>Parse Errors</h2>
302
303 <dl id="$input->{id_prefix}parse-errors-list">];
304 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
305
306 my $onerror = sub {
307 my (%opt) = @_;
308 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
309 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
310 qq[</dt>];
311 $type =~ tr/ /-/;
312 $type =~ s/\|/%7C/g;
313 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
314 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
315 print STDOUT qq[$msg</dd>\n];
316
317 add_error ('syntax', \%opt => $result);
318 };
319
320 my $doc = $dom->create_document;
321 my $el;
322 my $inner_html_element = $input->{inner_html_element};
323 if (defined $inner_html_element and length $inner_html_element) {
324 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
325 my $time1 = time;
326 my $t = \($input->{s});
327 unless ($input->{is_char_string}) {
328 $t = \(Encode::decode ($input->{charset}, $$t));
329 }
330 $time{decode} = time - $time1;
331
332 $el = $doc->create_element_ns
333 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
334 $time1 = time;
335 Whatpm::HTML->set_inner_html ($el, $$t, $onerror);
336 $time{parse} = time - $time1;
337 } else {
338 my $time1 = time;
339 if ($input->{is_char_string}) {
340 Whatpm::HTML->parse_char_string ($input->{s} => $doc, $onerror);
341 } else {
342 Whatpm::HTML->parse_byte_string
343 ($input->{charset}, $input->{s} => $doc, $onerror);
344 }
345 $time{parse_html} = time - $time1;
346 }
347 $doc->manakai_charset ($input->{official_charset})
348 if defined $input->{official_charset};
349
350 print STDOUT qq[</dl></div>];
351
352 return ($doc, $el);
353 } # print_syntax_error_html_section
354
355 sub print_syntax_error_xml_section ($$) {
356 my ($input, $result) = @_;
357
358 require Message::DOM::XMLParserTemp;
359
360 print STDOUT qq[
361 <div id="$input->{id_prefix}parse-errors" class="section">
362 <h2>Parse Errors</h2>
363
364 <dl id="$input->{id_prefix}parse-errors-list">];
365 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
366
367 my $onerror = sub {
368 my $err = shift;
369 my $line = $err->location->line_number;
370 print STDOUT qq[<dt><a href="#$input->{id_prefix}line-$line">Line $line</a> column ];
371 print STDOUT $err->location->column_number, "</dt><dd>";
372 print STDOUT htescape $err->text, "</dd>\n";
373
374 add_error ('syntax', {type => $err->text,
375 level => [
376 $err->SEVERITY_FATAL_ERROR => 'm',
377 $err->SEVERITY_ERROR => 'm',
378 $err->SEVERITY_WARNING => 's',
379 ]->[$err->severity]} => $result);
380
381 return 1;
382 };
383
384 my $t = \($input->{s});
385 if ($input->{is_char_string}) {
386 require Encode;
387 $t = \(Encode::encode ('utf8', $$t));
388 $input->{charset} = 'utf-8';
389 }
390
391 my $time1 = time;
392 open my $fh, '<', $t;
393 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
394 ($fh => $dom, $onerror, charset => $input->{charset});
395 $time{parse_xml} = time - $time1;
396 $doc->manakai_charset ($input->{official_charset})
397 if defined $input->{official_charset};
398
399 print STDOUT qq[</dl></div>];
400
401 return ($doc, undef);
402 } # print_syntax_error_xml_section
403
404 sub get_css_parser () {
405 our $CSSParser;
406 return $CSSParser if $CSSParser;
407
408 require Whatpm::CSS::Parser;
409 my $p = Whatpm::CSS::Parser->new;
410
411 $p->{prop}->{$_} = 1 for qw/
412 alignment-baseline
413 background background-attachment background-color background-image
414 background-position background-position-x background-position-y
415 background-repeat border border-bottom border-bottom-color
416 border-bottom-style border-bottom-width border-collapse border-color
417 border-left border-left-color
418 border-left-style border-left-width border-right border-right-color
419 border-right-style border-right-width
420 border-spacing -manakai-border-spacing-x -manakai-border-spacing-y
421 border-style border-top border-top-color border-top-style border-top-width
422 border-width bottom
423 caption-side clear clip color content counter-increment counter-reset
424 cursor direction display dominant-baseline empty-cells float font
425 font-family font-size font-size-adjust font-stretch
426 font-style font-variant font-weight height left
427 letter-spacing line-height
428 list-style list-style-image list-style-position list-style-type
429 margin margin-bottom margin-left margin-right margin-top marker-offset
430 marks max-height max-width min-height min-width opacity -moz-opacity
431 orphans outline outline-color outline-style outline-width overflow
432 overflow-x overflow-y
433 padding padding-bottom padding-left padding-right padding-top
434 page page-break-after page-break-before page-break-inside
435 position quotes right size table-layout
436 text-align text-anchor text-decoration text-indent text-transform
437 top unicode-bidi vertical-align visibility white-space width widows
438 word-spacing writing-mode z-index
439 /;
440 $p->{prop_value}->{display}->{$_} = 1 for qw/
441 block clip inline inline-block inline-table list-item none
442 table table-caption table-cell table-column table-column-group
443 table-header-group table-footer-group table-row table-row-group
444 compact marker
445 /;
446 $p->{prop_value}->{position}->{$_} = 1 for qw/
447 absolute fixed relative static
448 /;
449 $p->{prop_value}->{float}->{$_} = 1 for qw/
450 left right none
451 /;
452 $p->{prop_value}->{clear}->{$_} = 1 for qw/
453 left right none both
454 /;
455 $p->{prop_value}->{direction}->{ltr} = 1;
456 $p->{prop_value}->{direction}->{rtl} = 1;
457 $p->{prop_value}->{marks}->{crop} = 1;
458 $p->{prop_value}->{marks}->{cross} = 1;
459 $p->{prop_value}->{'unicode-bidi'}->{$_} = 1 for qw/
460 normal bidi-override embed
461 /;
462 for my $prop_name (qw/overflow overflow-x overflow-y/) {
463 $p->{prop_value}->{$prop_name}->{$_} = 1 for qw/
464 visible hidden scroll auto -webkit-marquee -moz-hidden-unscrollable
465 /;
466 }
467 $p->{prop_value}->{visibility}->{$_} = 1 for qw/
468 visible hidden collapse
469 /;
470 $p->{prop_value}->{'list-style-type'}->{$_} = 1 for qw/
471 disc circle square decimal decimal-leading-zero
472 lower-roman upper-roman lower-greek lower-latin
473 upper-latin armenian georgian lower-alpha upper-alpha none
474 hebrew cjk-ideographic hiragana katakana hiragana-iroha
475 katakana-iroha
476 /;
477 $p->{prop_value}->{'list-style-position'}->{outside} = 1;
478 $p->{prop_value}->{'list-style-position'}->{inside} = 1;
479 $p->{prop_value}->{'page-break-before'}->{$_} = 1 for qw/
480 auto always avoid left right
481 /;
482 $p->{prop_value}->{'page-break-after'}->{$_} = 1 for qw/
483 auto always avoid left right
484 /;
485 $p->{prop_value}->{'page-break-inside'}->{auto} = 1;
486 $p->{prop_value}->{'page-break-inside'}->{avoid} = 1;
487 $p->{prop_value}->{'background-repeat'}->{$_} = 1 for qw/
488 repeat repeat-x repeat-y no-repeat
489 /;
490 $p->{prop_value}->{'background-attachment'}->{scroll} = 1;
491 $p->{prop_value}->{'background-attachment'}->{fixed} = 1;
492 $p->{prop_value}->{'font-size'}->{$_} = 1 for qw/
493 xx-small x-small small medium large x-large xx-large
494 -manakai-xxx-large -webkit-xxx-large
495 larger smaller
496 /;
497 $p->{prop_value}->{'font-style'}->{normal} = 1;
498 $p->{prop_value}->{'font-style'}->{italic} = 1;
499 $p->{prop_value}->{'font-style'}->{oblique} = 1;
500 $p->{prop_value}->{'font-variant'}->{normal} = 1;
501 $p->{prop_value}->{'font-variant'}->{'small-caps'} = 1;
502 $p->{prop_value}->{'font-stretch'}->{$_} = 1 for
503 qw/normal wider narrower ultra-condensed extra-condensed
504 condensed semi-condensed semi-expanded expanded
505 extra-expanded ultra-expanded/;
506 $p->{prop_value}->{'text-align'}->{$_} = 1 for qw/
507 left right center justify begin end
508 /;
509 $p->{prop_value}->{'text-transform'}->{$_} = 1 for qw/
510 capitalize uppercase lowercase none
511 /;
512 $p->{prop_value}->{'white-space'}->{$_} = 1 for qw/
513 normal pre nowrap pre-line pre-wrap -moz-pre-wrap
514 /;
515 $p->{prop_value}->{'writing-mode'}->{$_} = 1 for qw/
516 lr rl tb lr-tb rl-tb tb-rl
517 /;
518 $p->{prop_value}->{'text-anchor'}->{$_} = 1 for qw/
519 start middle end
520 /;
521 $p->{prop_value}->{'dominant-baseline'}->{$_} = 1 for qw/
522 auto use-script no-change reset-size ideographic alphabetic
523 hanging mathematical central middle text-after-edge text-before-edge
524 /;
525 $p->{prop_value}->{'alignment-baseline'}->{$_} = 1 for qw/
526 auto baseline before-edge text-before-edge middle central
527 after-edge text-after-edge ideographic alphabetic hanging
528 mathematical
529 /;
530 $p->{prop_value}->{'text-decoration'}->{$_} = 1 for qw/
531 none blink underline overline line-through
532 /;
533 $p->{prop_value}->{'caption-side'}->{$_} = 1 for qw/
534 top bottom left right
535 /;
536 $p->{prop_value}->{'table-layout'}->{auto} = 1;
537 $p->{prop_value}->{'table-layout'}->{fixed} = 1;
538 $p->{prop_value}->{'border-collapse'}->{collapse} = 1;
539 $p->{prop_value}->{'border-collapse'}->{separate} = 1;
540 $p->{prop_value}->{'empty-cells'}->{show} = 1;
541 $p->{prop_value}->{'empty-cells'}->{hide} = 1;
542 $p->{prop_value}->{cursor}->{$_} = 1 for qw/
543 auto crosshair default pointer move e-resize ne-resize nw-resize n-resize
544 se-resize sw-resize s-resize w-resize text wait help progress
545 /;
546 for my $prop (qw/border-top-style border-left-style
547 border-bottom-style border-right-style outline-style/) {
548 $p->{prop_value}->{$prop}->{$_} = 1 for qw/
549 none hidden dotted dashed solid double groove ridge inset outset
550 /;
551 }
552 for my $prop (qw/color background-color
553 border-bottom-color border-left-color border-right-color
554 border-top-color border-color/) {
555 $p->{prop_value}->{$prop}->{transparent} = 1;
556 $p->{prop_value}->{$prop}->{flavor} = 1;
557 $p->{prop_value}->{$prop}->{'-manakai-default'} = 1;
558 }
559 $p->{prop_value}->{'outline-color'}->{invert} = 1;
560 $p->{prop_value}->{'outline-color'}->{'-manakai-invert-or-currentcolor'} = 1;
561 $p->{pseudo_class}->{$_} = 1 for qw/
562 active checked disabled empty enabled first-child first-of-type
563 focus hover indeterminate last-child last-of-type link only-child
564 only-of-type root target visited
565 lang nth-child nth-last-child nth-of-type nth-last-of-type not
566 -manakai-contains -manakai-current
567 /;
568 $p->{pseudo_element}->{$_} = 1 for qw/
569 after before first-letter first-line
570 /;
571
572 return $CSSParser = $p;
573 } # get_css_parser
574
575 sub print_syntax_error_css_section ($$) {
576 my ($input, $result) = @_;
577
578 print STDOUT qq[
579 <div id="$input->{id_prefix}parse-errors" class="section">
580 <h2>Parse Errors</h2>
581
582 <dl id="$input->{id_prefix}parse-errors-list">];
583 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
584
585 my $p = get_css_parser ();
586 $p->init;
587 $p->{onerror} = sub {
588 my (%opt) = @_;
589 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
590 if ($opt{token}) {
591 print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{token}->{line}">Line $opt{token}->{line}</a> column $opt{token}->{column}];
592 } else {
593 print STDOUT qq[<dt class="$cls">Unknown location];
594 }
595 if (defined $opt{value}) {
596 print STDOUT qq[ (<code>@{[htescape ($opt{value})]}</code>)];
597 } elsif (defined $opt{token}) {
598 print STDOUT qq[ (<code>@{[htescape (Whatpm::CSS::Tokenizer->serialize_token ($opt{token}))]}</code>)];
599 }
600 $type =~ tr/ /-/;
601 $type =~ s/\|/%7C/g;
602 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
603 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
604 print STDOUT qq[$msg</dd>\n];
605
606 add_error ('syntax', \%opt => $result);
607 };
608 $p->{href} = $input->{uri};
609 $p->{base_uri} = $input->{base_uri};
610
611 # if ($parse_mode eq 'q') {
612 # $p->{unitless_px} = 1;
613 # $p->{hashless_color} = 1;
614 # }
615
616 ## TODO: Make $input->{s} a ref.
617
618 my $s = \$input->{s};
619 my $charset;
620 unless ($input->{is_char_string}) {
621 require Encode;
622 if (defined $input->{charset}) {## TODO: IANA->Perl
623 $charset = $input->{charset};
624 $s = \(Encode::decode ($input->{charset}, $$s));
625 } else {
626 ## TODO: charset detection
627 $s = \(Encode::decode ($charset = 'utf-8', $$s));
628 }
629 }
630
631 my $cssom = $p->parse_char_string ($$s);
632 $cssom->manakai_input_encoding ($charset) if defined $charset;
633
634 print STDOUT qq[</dl></div>];
635
636 return $cssom;
637 } # print_syntax_error_css_section
638
639 sub print_syntax_error_manifest_section ($$) {
640 my ($input, $result) = @_;
641
642 require Whatpm::CacheManifest;
643
644 print STDOUT qq[
645 <div id="$input->{id_prefix}parse-errors" class="section">
646 <h2>Parse Errors</h2>
647
648 <dl id="$input->{id_prefix}parse-errors-list">];
649 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
650
651 my $onerror = sub {
652 my (%opt) = @_;
653 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
654 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
655 qq[</dt>];
656 $type =~ tr/ /-/;
657 $type =~ s/\|/%7C/g;
658 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
659 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
660 print STDOUT qq[$msg</dd>\n];
661
662 add_error ('syntax', \%opt => $result);
663 };
664
665 my $m = $input->{is_char_string} ? 'parse_char_string' : 'parse_byte_string';
666 my $time1 = time;
667 my $manifest = Whatpm::CacheManifest->$m
668 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
669 $time{parse_manifest} = time - $time1;
670
671 print STDOUT qq[</dl></div>];
672
673 return $manifest;
674 } # print_syntax_error_manifest_section
675
676 sub print_source_string_section ($$$) {
677 my $input = shift;
678 my $s;
679 unless ($input->{is_char_string}) {
680 require Encode;
681 my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
682 return unless $enc;
683
684 $s = \($enc->decode (${$_[0]}));
685 } else {
686 $s = $_[0];
687 }
688
689 my $i = 1;
690 push @nav, ['#source-string' => 'Source'] unless $input->{nested};
691 print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
692 <h2>Document Source</h2>
693 <ol lang="">\n];
694 if (length $$s) {
695 while ($$s =~ /\G([^\x0D\x0A]*?)(?>\x0D\x0A?|\x0A)/gc) {
696 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
697 "</li>\n";
698 $i++;
699 }
700 if ($$s =~ /\G([^\x0D\x0A]+)/gc) {
701 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
702 "</li>\n";
703 }
704 } else {
705 print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
706 }
707 print STDOUT "</ol></div>
708 <script>
709 addSourceToParseErrorList ('$input->{id_prefix}', 'parse-errors-list');
710 </script>";
711 } # print_input_string_section
712
713 sub print_document_tree ($$) {
714 my ($input, $node) = @_;
715
716 my $r = '<ol class="xoxo">';
717
718 my @node = ($node);
719 while (@node) {
720 my $child = shift @node;
721 unless (ref $child) {
722 $r .= $child;
723 next;
724 }
725
726 my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
727 my $nt = $child->node_type;
728 if ($nt == $child->ELEMENT_NODE) {
729 my $child_nsuri = $child->namespace_uri;
730 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
731 '</code>'; ## ISSUE: case
732
733 if ($child->has_attributes) {
734 $r .= '<ul class="attributes">';
735 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
736 @{$child->attributes}) {
737 $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
738 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
739 }
740 $r .= '</ul>';
741 }
742
743 if ($child->has_child_nodes) {
744 $r .= '<ol class="children">';
745 unshift @node, @{$child->child_nodes}, '</ol></li>';
746 } else {
747 $r .= '</li>';
748 }
749 } elsif ($nt == $child->TEXT_NODE) {
750 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
751 } elsif ($nt == $child->CDATA_SECTION_NODE) {
752 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
753 } elsif ($nt == $child->COMMENT_NODE) {
754 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
755 } elsif ($nt == $child->DOCUMENT_NODE) {
756 $r .= qq'<li id="$node_id" class="tree-document">Document';
757 $r .= qq[<ul class="attributes">];
758 my $cp = $child->manakai_charset;
759 if (defined $cp) {
760 $r .= qq[<li><code>charset</code> parameter = <code>];
761 $r .= htescape ($cp) . qq[</code></li>];
762 }
763 $r .= qq[<li><code>inputEncoding</code> = ];
764 my $ie = $child->input_encoding;
765 if (defined $ie) {
766 $r .= qq[<code>@{[htescape ($ie)]}</code>];
767 if ($child->manakai_has_bom) {
768 $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
769 }
770 } else {
771 $r .= qq[(<code>null</code>)];
772 }
773 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
774 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
775 unless ($child->manakai_is_html) {
776 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
777 if (defined $child->xml_encoding) {
778 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
779 } else {
780 $r .= qq[<li>XML encoding = (null)</li>];
781 }
782 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
783 }
784 $r .= qq[</ul>];
785 if ($child->has_child_nodes) {
786 $r .= '<ol class="children">';
787 unshift @node, @{$child->child_nodes}, '</ol></li>';
788 }
789 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
790 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
791 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
792 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
793 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
794 $r .= '</ul></li>';
795 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
796 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
797 } else {
798 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
799 }
800 }
801
802 $r .= '</ol>';
803 print STDOUT $r;
804 } # print_document_tree
805
806 sub print_structure_dump_dom_section ($$$) {
807 my ($input, $doc, $el) = @_;
808
809 print STDOUT qq[
810 <div id="$input->{id_prefix}document-tree" class="section">
811 <h2>Document Tree</h2>
812 ];
813 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
814 unless $input->{nested};
815
816 print_document_tree ($input, $el || $doc);
817
818 print STDOUT qq[</div>];
819 } # print_structure_dump_dom_section
820
821 sub print_structure_dump_cssom_section ($$) {
822 my ($input, $cssom) = @_;
823
824 print STDOUT qq[
825 <div id="$input->{id_prefix}document-tree" class="section">
826 <h2>Document Tree</h2>
827 ];
828 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
829 unless $input->{nested};
830
831 ## TODO:
832 print STDOUT "<pre>".htescape ($cssom->css_text)."</pre>";
833
834 print STDOUT qq[</div>];
835 } # print_structure_dump_cssom_section
836
837 sub print_structure_dump_manifest_section ($$) {
838 my ($input, $manifest) = @_;
839
840 print STDOUT qq[
841 <div id="$input->{id_prefix}dump-manifest" class="section">
842 <h2>Cache Manifest</h2>
843 ];
844 push @nav, [qq[#$input->{id_prefix}dump-manifest] => 'Cache Manifest']
845 unless $input->{nested};
846
847 print STDOUT qq[<dl><dt>Explicit entries</dt>];
848 my $i = 0;
849 for my $uri (@{$manifest->[0]}) {
850 my $euri = htescape ($uri);
851 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
852 }
853
854 print STDOUT qq[<dt>Fallback entries</dt><dd>
855 <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
856 <th scope=row>Fallback Entry</tr><tbody>];
857 for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
858 my $euri = htescape ($uri);
859 my $euri2 = htescape ($manifest->[1]->{$uri});
860 print STDOUT qq[<tr><td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
861 <td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
862 }
863
864 print STDOUT qq[</table><dt>Online whitelist</dt>];
865 for my $uri (@{$manifest->[2]}) {
866 my $euri = htescape ($uri);
867 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
868 }
869
870 print STDOUT qq[</dl></div>];
871 } # print_structure_dump_manifest_section
872
873 sub print_structure_error_dom_section ($$$$$) {
874 my ($input, $doc, $el, $result, $onsubdoc) = @_;
875
876 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
877 <h2>Document Errors</h2>
878
879 <dl id=document-errors-list>];
880 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
881 unless $input->{nested};
882
883 require Whatpm::ContentChecker;
884 my $onerror = sub {
885 my %opt = @_;
886 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
887 $type =~ tr/ /-/;
888 $type =~ s/\|/%7C/g;
889 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
890 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
891 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
892 print STDOUT $msg, "</dd>\n";
893 add_error ('structure', \%opt => $result);
894 };
895
896 my $elements;
897 my $time1 = time;
898 if ($el) {
899 $elements = Whatpm::ContentChecker->check_element
900 ($el, $onerror, $onsubdoc);
901 } else {
902 $elements = Whatpm::ContentChecker->check_document
903 ($doc, $onerror, $onsubdoc);
904 }
905 $time{check} = time - $time1;
906
907 print STDOUT qq[</dl>
908 <script>
909 addSourceToParseErrorList ('$input->{id_prefix}', 'document-errors-list');
910 </script></div>];
911
912 return $elements;
913 } # print_structure_error_dom_section
914
915 sub print_structure_error_manifest_section ($$$) {
916 my ($input, $manifest, $result) = @_;
917
918 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
919 <h2>Document Errors</h2>
920
921 <dl>];
922 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
923 unless $input->{nested};
924
925 require Whatpm::CacheManifest;
926 Whatpm::CacheManifest->check_manifest ($manifest, sub {
927 my %opt = @_;
928 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
929 $type =~ tr/ /-/;
930 $type =~ s/\|/%7C/g;
931 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
932 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
933 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
934 add_error ('structure', \%opt => $result);
935 });
936
937 print STDOUT qq[</div>];
938 } # print_structure_error_manifest_section
939
940 sub print_table_section ($$) {
941 my ($input, $tables) = @_;
942
943 push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
944 unless $input->{nested};
945 print STDOUT qq[
946 <div id="$input->{id_prefix}tables" class="section">
947 <h2>Tables</h2>
948
949 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
950 <script src="../table-script.js" type="text/javascript"></script>
951 <noscript>
952 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
953 </noscript>
954 ];
955
956 require JSON;
957
958 my $i = 0;
959 for my $table_el (@$tables) {
960 $i++;
961 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
962 get_node_link ($input, $table_el) . q[</h3>];
963
964 ## TODO: Make |ContentChecker| return |form_table| result
965 ## so that this script don't have to run the algorithm twice.
966 my $table = Whatpm::HTMLTable->form_table ($table_el);
967
968 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption},
969 @{$table->{row}}) {
970 next unless $_;
971 delete $_->{element};
972 }
973
974 for (@{$table->{row_group}}) {
975 next unless $_;
976 next unless $_->{element};
977 $_->{type} = $_->{element}->manakai_local_name;
978 delete $_->{element};
979 }
980
981 for (@{$table->{cell}}) {
982 next unless $_;
983 for (@{$_}) {
984 next unless $_;
985 for (@$_) {
986 $_->{id} = refaddr $_->{element} if defined $_->{element};
987 delete $_->{element};
988 $_->{is_header} = $_->{is_header} ? 1 : 0;
989 }
990 }
991 }
992
993 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
994 print STDOUT JSON::objToJson ($table);
995 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
996 print STDOUT qq[, '$input->{id_prefix}');</script>];
997 }
998
999 print STDOUT qq[</div>];
1000 } # print_table_section
1001
1002 sub print_listing_section ($$$) {
1003 my ($opt, $input, $ids) = @_;
1004
1005 push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
1006 unless $input->{nested};
1007 print STDOUT qq[
1008 <div id="$input->{id_prefix}$opt->{id}" class="section">
1009 <h2>$opt->{heading}</h2>
1010
1011 <dl>
1012 ];
1013 for my $id (sort {$a cmp $b} keys %$ids) {
1014 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
1015 for (@{$ids->{$id}}) {
1016 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
1017 }
1018 }
1019 print STDOUT qq[</dl></div>];
1020 } # print_listing_section
1021
1022 sub print_uri_section ($$$) {
1023 my ($input, $uris) = @_;
1024
1025 ## NOTE: URIs contained in the DOM (i.e. in HTML or XML documents),
1026 ## except for those in RDF triples.
1027 ## TODO: URIs in CSS
1028
1029 push @nav, ['#' . $input->{id_prefix} . 'uris' => 'URIs']
1030 unless $input->{nested};
1031 print STDOUT qq[
1032 <div id="$input->{id_prefix}uris" class="section">
1033 <h2>URIs</h2>
1034
1035 <dl>];
1036 for my $uri (sort {$a cmp $b} keys %$uris) {
1037 my $euri = htescape ($uri);
1038 print STDOUT qq[<dt><code class=uri>&lt;<a href="$euri">$euri</a>></code>];
1039 my $eccuri = htescape (get_cc_uri ($uri));
1040 print STDOUT qq[<dd><a href="$eccuri">Check conformance of this document</a>];
1041 print STDOUT qq[<dd>Found at: <ul>];
1042 for my $entry (@{$uris->{$uri}}) {
1043 print STDOUT qq[<li>], get_node_link ($input, $entry->{node});
1044 if (keys %{$entry->{type} or {}}) {
1045 print STDOUT ' (';
1046 print STDOUT join ', ', map {
1047 {
1048 hyperlink => 'Hyperlink',
1049 resource => 'Link to an external resource',
1050 namespace => 'Namespace URI',
1051 cite => 'Citation or link to a long description',
1052 embedded => 'Link to an embedded content',
1053 base => 'Base URI',
1054 action => 'Submission URI',
1055 }->{$_}
1056 or
1057 htescape ($_)
1058 } keys %{$entry->{type}};
1059 print STDOUT ')';
1060 }
1061 }
1062 print STDOUT qq[</ul>];
1063 }
1064 print STDOUT qq[</dl></div>];
1065 } # print_uri_section
1066
1067 sub print_rdf_section ($$$) {
1068 my ($input, $rdfs) = @_;
1069
1070 push @nav, ['#' . $input->{id_prefix} . 'rdf' => 'RDF']
1071 unless $input->{nested};
1072 print STDOUT qq[
1073 <div id="$input->{id_prefix}rdf" class="section">
1074 <h2>RDF Triples</h2>
1075
1076 <dl>];
1077 my $i = 0;
1078 for my $rdf (@$rdfs) {
1079 print STDOUT qq[<dt id="$input->{id_prefix}rdf-@{[$i++]}">];
1080 print STDOUT get_node_link ($input, $rdf->[0]);
1081 print STDOUT qq[<dd><dl>];
1082 for my $triple (@{$rdf->[1]}) {
1083 print STDOUT '<dt>' . get_node_link ($input, $triple->[0]) . '<dd>';
1084 print STDOUT get_rdf_resource_html ($triple->[1]);
1085 print STDOUT ' ';
1086 print STDOUT get_rdf_resource_html ($triple->[2]);
1087 print STDOUT ' ';
1088 print STDOUT get_rdf_resource_html ($triple->[3]);
1089 }
1090 print STDOUT qq[</dl>];
1091 }
1092 print STDOUT qq[</dl></div>];
1093 } # print_rdf_section
1094
1095 sub get_rdf_resource_html ($) {
1096 my $resource = shift;
1097 if (defined $resource->{uri}) {
1098 my $euri = htescape ($resource->{uri});
1099 return '<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
1100 '</a>></code>';
1101 } elsif (defined $resource->{bnodeid}) {
1102 return htescape ('_:' . $resource->{bnodeid});
1103 } elsif ($resource->{nodes}) {
1104 return '(rdf:XMLLiteral)';
1105 } elsif (defined $resource->{value}) {
1106 my $elang = htescape (defined $resource->{language}
1107 ? $resource->{language} : '');
1108 my $r = qq[<q lang="$elang">] . htescape ($resource->{value}) . '</q>';
1109 if (defined $resource->{datatype}) {
1110 my $euri = htescape ($resource->{datatype});
1111 $r .= '^^<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
1112 '</a>></code>';
1113 } elsif (length $resource->{language}) {
1114 $r .= '@' . htescape ($resource->{language});
1115 }
1116 return $r;
1117 } else {
1118 return '??';
1119 }
1120 } # get_rdf_resource_html
1121
1122 sub print_result_section ($) {
1123 my $result = shift;
1124
1125 print STDOUT qq[
1126 <div id="result-summary" class="section">
1127 <h2>Result</h2>];
1128
1129 if ($result->{unsupported} and $result->{conforming_max}) {
1130 print STDOUT qq[<p class=uncertain id=result-para>The conformance
1131 checker cannot decide whether the document is conforming or
1132 not, since the document contains one or more unsupported
1133 features. The document might or might not be conforming.</p>];
1134 } elsif ($result->{conforming_min}) {
1135 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
1136 found in this document.</p>];
1137 } elsif ($result->{conforming_max}) {
1138 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
1139 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
1140 it might be conforming.</p>];
1141 } else {
1142 print STDOUT qq[<p class=FAIL id=result-para>This document is
1143 <strong><em>non</em>-conforming</strong>.</p>];
1144 }
1145
1146 print STDOUT qq[<table>
1147 <colgroup><col><colgroup><col><col><col><colgroup><col>
1148 <thead>
1149 <tr><th scope=col></th>
1150 <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1151 Errors</a></th>
1152 <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1153 Errors</a></th>
1154 <th scope=col><a href="../error-description#level-w">Warnings</a></th>
1155 <th scope=col>Score</th></tr></thead><tbody>];
1156
1157 my $must_error = 0;
1158 my $should_error = 0;
1159 my $warning = 0;
1160 my $score_min = 0;
1161 my $score_max = 0;
1162 my $score_base = 20;
1163 my $score_unit = $score_base / 100;
1164 for (
1165 [Transfer => 'transfer', ''],
1166 [Character => 'char', ''],
1167 [Syntax => 'syntax', '#parse-errors'],
1168 [Structure => 'structure', '#document-errors'],
1169 ) {
1170 $must_error += ($result->{$_->[1]}->{must} += 0);
1171 $should_error += ($result->{$_->[1]}->{should} += 0);
1172 $warning += ($result->{$_->[1]}->{warning} += 0);
1173 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
1174 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
1175
1176 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
1177 my $label = $_->[0];
1178 if ($result->{$_->[1]}->{must} or
1179 $result->{$_->[1]}->{should} or
1180 $result->{$_->[1]}->{warning} or
1181 $result->{$_->[1]}->{unsupported}) {
1182 $label = qq[<a href="$_->[2]">$label</a>];
1183 }
1184
1185 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
1186 if ($uncertain) {
1187 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
1188 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
1189 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
1190 } else {
1191 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
1192 }
1193 }
1194
1195 $score_max += $score_base;
1196
1197 print STDOUT qq[
1198 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
1199 </tbody>
1200 <tfoot><tr class=uncertain><th scope=row>Total</th>
1201 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
1202 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
1203 <td>$warning?</td>
1204 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
1205 </table>
1206
1207 <p><strong>Important</strong>: This conformance checking service
1208 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
1209 </div>];
1210 push @nav, ['#result-summary' => 'Result'];
1211 } # print_result_section
1212
1213 sub print_result_unknown_type_section ($$) {
1214 my ($input, $result) = @_;
1215
1216 my $euri = htescape ($input->{uri});
1217 print STDOUT qq[
1218 <div id="$input->{id_prefix}parse-errors" class="section">
1219 <h2>Errors</h2>
1220
1221 <dl>
1222 <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
1223 <dd class=unsupported><strong><a href="../error-description#level-u">Not
1224 supported</a></strong>:
1225 Media type
1226 <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
1227 is not supported.</dd>
1228 </dl>
1229 </div>
1230 ];
1231 push @nav, [qq[#$input->{id_prefix}parse-errors] => 'Errors']
1232 unless $input->{nested};
1233 add_error (char => {level => 'u'} => $result);
1234 add_error (syntax => {level => 'u'} => $result);
1235 add_error (structure => {level => 'u'} => $result);
1236 } # print_result_unknown_type_section
1237
1238 sub print_result_input_error_section ($) {
1239 my $input = shift;
1240 print STDOUT qq[<div class="section" id="result-summary">
1241 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
1242 </div>];
1243 push @nav, ['#result-summary' => 'Result'];
1244 } # print_result_input_error_section
1245
1246 sub get_error_label ($$) {
1247 my ($input, $err) = @_;
1248
1249 my $r = '';
1250
1251 my $line;
1252 my $column;
1253
1254 if (defined $err->{node}) {
1255 $line = $err->{node}->get_user_data ('manakai_source_line');
1256 if (defined $line) {
1257 $column = $err->{node}->get_user_data ('manakai_source_column');
1258 } else {
1259 if ($err->{node}->node_type == $err->{node}->ATTRIBUTE_NODE) {
1260 my $owner = $err->{node}->owner_element;
1261 $line = $owner->get_user_data ('manakai_source_line');
1262 $column = $owner->get_user_data ('manakai_source_column');
1263 } else {
1264 my $parent = $err->{node}->parent_node;
1265 if ($parent) {
1266 $line = $parent->get_user_data ('manakai_source_line');
1267 $column = $parent->get_user_data ('manakai_source_column');
1268 }
1269 }
1270 }
1271 }
1272 unless (defined $line) {
1273 if (defined $err->{token} and defined $err->{token}->{line}) {
1274 $line = $err->{token}->{line};
1275 $column = $err->{token}->{column};
1276 } elsif (defined $err->{line}) {
1277 $line = $err->{line};
1278 $column = $err->{column};
1279 }
1280 }
1281
1282 if (defined $line) {
1283 if (defined $column and $column > 0) {
1284 $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a> column $column];
1285 } else {
1286 $line = $line - 1 || 1;
1287 $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a>];
1288 }
1289 }
1290
1291 if (defined $err->{node}) {
1292 $r .= ' ' if length $r;
1293 $r .= get_node_link ($input, $err->{node});
1294 }
1295
1296 if (defined $err->{index}) {
1297 if (length $r) {
1298 $r .= ', Index ' . (0+$err->{index});
1299 } else {
1300 $r .= "<a href='#$input->{id_prefix}index-@{[0+$err->{index}]}'>Index "
1301 . (0+$err->{index}) . '</a>';
1302 }
1303 }
1304
1305 if (defined $err->{value}) {
1306 $r .= ' ' if length $r;
1307 $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
1308 }
1309
1310 return $r;
1311 } # get_error_label
1312
1313 sub get_error_level_label ($) {
1314 my $err = shift;
1315
1316 my $r = '';
1317
1318 if (not defined $err->{level} or $err->{level} eq 'm') {
1319 $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1320 error</a></strong>: ];
1321 } elsif ($err->{level} eq 's') {
1322 $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1323 error</a></strong>: ];
1324 } elsif ($err->{level} eq 'w') {
1325 $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
1326 ];
1327 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
1328 $r = qq[<strong><a href="../error-description#level-u">Not
1329 supported</a></strong>: ];
1330 } elsif ($err->{level} eq 'i') {
1331 $r = qq[<strong><a href="../error-description#level-i">Information</a></strong>: ];
1332 } else {
1333 my $elevel = htescape ($err->{level});
1334 $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
1335 ];
1336 }
1337
1338 return $r;
1339 } # get_error_level_label
1340
1341 sub get_node_path ($) {
1342 my $node = shift;
1343 my @r;
1344 while (defined $node) {
1345 my $rs;
1346 if ($node->node_type == 1) {
1347 $rs = $node->node_name;
1348 $node = $node->parent_node;
1349 } elsif ($node->node_type == 2) {
1350 $rs = '@' . $node->node_name;
1351 $node = $node->owner_element;
1352 } elsif ($node->node_type == 3) {
1353 $rs = '"' . $node->data . '"';
1354 $node = $node->parent_node;
1355 } elsif ($node->node_type == 9) {
1356 @r = ('') unless @r;
1357 $rs = '';
1358 $node = $node->parent_node;
1359 } else {
1360 $rs = '#' . $node->node_type;
1361 $node = $node->parent_node;
1362 }
1363 unshift @r, $rs;
1364 }
1365 return join '/', @r;
1366 } # get_node_path
1367
1368 sub get_node_link ($$) {
1369 return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
1370 htescape (get_node_path ($_[1])) . qq[</a>];
1371 } # get_node_link
1372
1373 {
1374 my $Msg = {};
1375
1376 sub load_text_catalog ($) {
1377 my $lang = shift; # MUST be a canonical lang name
1378 open my $file, '<:utf8', "cc-msg.$lang.txt"
1379 or die "$0: cc-msg.$lang.txt: $!";
1380 while (<$file>) {
1381 if (s/^([^;]+);([^;]*);//) {
1382 my ($type, $cls, $msg) = ($1, $2, $_);
1383 $msg =~ tr/\x0D\x0A//d;
1384 $Msg->{$type} = [$cls, $msg];
1385 }
1386 }
1387 } # load_text_catalog
1388
1389 sub get_text ($) {
1390 my ($type, $level, $node) = @_;
1391 $type = $level . ':' . $type if defined $level;
1392 $level = 'm' unless defined $level;
1393 my @arg;
1394 {
1395 if (defined $Msg->{$type}) {
1396 my $msg = $Msg->{$type}->[1];
1397 $msg =~ s{<var>\$([0-9]+)</var>}{
1398 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
1399 }ge;
1400 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
1401 UNIVERSAL::can ($node, 'get_attribute_ns')
1402 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
1403 }ge;
1404 $msg =~ s{<var>{\@}</var>}{
1405 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
1406 }ge;
1407 $msg =~ s{<var>{local-name}</var>}{
1408 UNIVERSAL::can ($node, 'manakai_local_name')
1409 ? htescape ($node->manakai_local_name) : ''
1410 }ge;
1411 $msg =~ s{<var>{element-local-name}</var>}{
1412 (UNIVERSAL::can ($node, 'owner_element') and
1413 $node->owner_element)
1414 ? htescape ($node->owner_element->manakai_local_name)
1415 : ''
1416 }ge;
1417 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
1418 } elsif ($type =~ s/:([^:]*)$//) {
1419 unshift @arg, $1;
1420 redo;
1421 }
1422 }
1423 return ($type, 'level-'.$level, htescape ($_[0]));
1424 } # get_text
1425
1426 }
1427
1428 sub encode_uri_component ($) {
1429 require Encode;
1430 my $s = Encode::encode ('utf8', shift);
1431 $s =~ s/([^0-9A-Za-z_.~-])/sprintf '%%%02X', ord $1/ge;
1432 return $s;
1433 } # encode_uri_component
1434
1435 sub get_cc_uri ($) {
1436 return './?uri=' . encode_uri_component ($_[0]);
1437 } # get_cc_uri
1438
1439 sub get_input_document ($$) {
1440 my ($http, $dom) = @_;
1441
1442 my $request_uri = $http->get_parameter ('uri');
1443 my $r = {};
1444 if (defined $request_uri and length $request_uri) {
1445 my $uri = $dom->create_uri_reference ($request_uri);
1446 unless ({
1447 http => 1,
1448 }->{lc $uri->uri_scheme}) {
1449 return {uri => $request_uri, request_uri => $request_uri,
1450 error_status_text => 'URI scheme not allowed'};
1451 }
1452
1453 require Message::Util::HostPermit;
1454 my $host_permit = new Message::Util::HostPermit;
1455 $host_permit->add_rule (<<EOH);
1456 Allow host=suika port=80
1457 Deny host=suika
1458 Allow host=suika.fam.cx port=80
1459 Deny host=suika.fam.cx
1460 Deny host=localhost
1461 Deny host=*.localdomain
1462 Deny ipv4=0.0.0.0/8
1463 Deny ipv4=10.0.0.0/8
1464 Deny ipv4=127.0.0.0/8
1465 Deny ipv4=169.254.0.0/16
1466 Deny ipv4=172.0.0.0/11
1467 Deny ipv4=192.0.2.0/24
1468 Deny ipv4=192.88.99.0/24
1469 Deny ipv4=192.168.0.0/16
1470 Deny ipv4=198.18.0.0/15
1471 Deny ipv4=224.0.0.0/4
1472 Deny ipv4=255.255.255.255/32
1473 Deny ipv6=0::0/0
1474 Allow host=*
1475 EOH
1476 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1477 return {uri => $request_uri, request_uri => $request_uri,
1478 error_status_text => 'Connection to the host is forbidden'};
1479 }
1480
1481 require LWP::UserAgent;
1482 my $ua = WDCC::LWPUA->new;
1483 $ua->{wdcc_dom} = $dom;
1484 $ua->{wdcc_host_permit} = $host_permit;
1485 $ua->agent ('Mozilla'); ## TODO: for now.
1486 $ua->parse_head (0);
1487 $ua->protocols_allowed ([qw/http/]);
1488 $ua->max_size (1000_000);
1489 my $req = HTTP::Request->new (GET => $request_uri);
1490 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1491 my $res = $ua->request ($req);
1492 ## TODO: 401 sets |is_success| true.
1493 if ($res->is_success or $http->get_parameter ('error-page')) {
1494 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1495 $r->{uri} = $res->request->uri;
1496 $r->{request_uri} = $request_uri;
1497
1498 ## TODO: More strict parsing...
1499 my $ct = $res->header ('Content-Type');
1500 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1501 $r->{charset} = lc $1;
1502 $r->{charset} =~ tr/\\//d;
1503 $r->{official_charset} = $r->{charset};
1504 }
1505
1506 my $input_charset = $http->get_parameter ('charset');
1507 if (defined $input_charset and length $input_charset) {
1508 $r->{charset_overridden}
1509 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1510 $r->{charset} = $input_charset;
1511 }
1512
1513 ## TODO: Support for HTTP Content-Encoding
1514
1515 $r->{s} = ''.$res->content;
1516
1517 require Whatpm::ContentType;
1518 ($r->{official_type}, $r->{media_type})
1519 = Whatpm::ContentType->get_sniffed_type
1520 (get_file_head => sub {
1521 return substr $r->{s}, 0, shift;
1522 },
1523 http_content_type_byte => $ct,
1524 has_http_content_encoding =>
1525 defined $res->header ('Content-Encoding'),
1526 supported_image_types => {});
1527 } else {
1528 $r->{uri} = $res->request->uri;
1529 $r->{request_uri} = $request_uri;
1530 $r->{error_status_text} = $res->status_line;
1531 }
1532
1533 $r->{header_field} = [];
1534 $res->scan (sub {
1535 push @{$r->{header_field}}, [$_[0], $_[1]];
1536 });
1537 $r->{header_status_code} = $res->code;
1538 $r->{header_status_text} = $res->message;
1539 } else {
1540 $r->{s} = ''.$http->get_parameter ('s');
1541 $r->{uri} = q<thismessage:/>;
1542 $r->{request_uri} = q<thismessage:/>;
1543 $r->{base_uri} = q<thismessage:/>;
1544 $r->{charset} = ''.$http->get_parameter ('_charset_');
1545 $r->{charset} =~ s/\s+//g;
1546 $r->{charset} = 'utf-8' if $r->{charset} eq '';
1547 $r->{official_charset} = $r->{charset};
1548 $r->{header_field} = [];
1549
1550 require Whatpm::ContentType;
1551 ($r->{official_type}, $r->{media_type})
1552 = Whatpm::ContentType->get_sniffed_type
1553 (get_file_head => sub {
1554 return substr $r->{s}, 0, shift;
1555 },
1556 http_content_type_byte => undef,
1557 has_http_content_encoding => 0,
1558 supported_image_types => {});
1559 }
1560
1561 my $input_format = $http->get_parameter ('i');
1562 if (defined $input_format and length $input_format) {
1563 $r->{media_type_overridden}
1564 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1565 $r->{media_type} = $input_format;
1566 }
1567 if (defined $r->{s} and not defined $r->{media_type}) {
1568 $r->{media_type} = 'text/html';
1569 $r->{media_type_overridden} = 1;
1570 }
1571
1572 if ($r->{media_type} eq 'text/xml') {
1573 unless (defined $r->{charset}) {
1574 $r->{charset} = 'us-ascii';
1575 $r->{official_charset} = $r->{charset};
1576 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1577 $r->{charset_overridden} = 0;
1578 }
1579 }
1580
1581 if (length $r->{s} > 1000_000) {
1582 $r->{error_status_text} = 'Entity-body too large';
1583 delete $r->{s};
1584 return $r;
1585 }
1586
1587 $r->{inner_html_element} = $http->get_parameter ('e');
1588
1589 return $r;
1590 } # get_input_document
1591
1592 package WDCC::LWPUA;
1593 BEGIN { push our @ISA, 'LWP::UserAgent'; }
1594
1595 sub redirect_ok {
1596 my $ua = shift;
1597 unless ($ua->SUPER::redirect_ok (@_)) {
1598 return 0;
1599 }
1600
1601 my $uris = $_[1]->header ('Location');
1602 return 0 unless $uris;
1603 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1604 unless ({
1605 http => 1,
1606 }->{lc $uri->uri_scheme}) {
1607 return 0;
1608 }
1609 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1610 return 0;
1611 }
1612 return 1;
1613 } # redirect_ok
1614
1615 =head1 AUTHOR
1616
1617 Wakaba <w@suika.fam.cx>.
1618
1619 =head1 LICENSE
1620
1621 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1622
1623 This library is free software; you can redistribute it
1624 and/or modify it under the same terms as Perl itself.
1625
1626 =cut
1627
1628 ## $Date: 2008/04/12 15:57:56 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24