/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.48 - (show annotations) (download)
Sat Apr 12 15:57:56 2008 UTC (16 years, 1 month ago) by wakaba
Branch: MAIN
Changes since 1.47: +79 -7 lines
++ ChangeLog	12 Apr 2008 15:57:44 -0000
2008-04-12  Wakaba  <wakaba@suika.fam.cx>

	* parser-manakai.cgi, parser-manakai-interface.en.html: The |innerHTML|
	output mode is split into "|innerHTML| (HTML)" and "|innerHTML| (XML)"
	output modes.

2008-03-29  Wakaba  <wakaba@suika.fam.cx>

	* cc.cgi: New "URI" section is implemented.

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4
5 use lib qw[/home/httpd/html/www/markup/html/whatpm
6 /home/wakaba/work/manakai2/lib];
7 use CGI::Carp qw[fatalsToBrowser];
8 use Scalar::Util qw[refaddr];
9 use Time::HiRes qw/time/;
10
11 sub htescape ($) {
12 my $s = $_[0];
13 $s =~ s/&/&amp;/g;
14 $s =~ s/</&lt;/g;
15 $s =~ s/>/&gt;/g;
16 $s =~ s/"/&quot;/g;
17 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18 sprintf '<var>U+%04X</var>', ord $1;
19 }ge;
20 return $s;
21 } # htescape
22
23 my @nav;
24 my %time;
25 require Message::DOM::DOMImplementation;
26 my $dom = Message::DOM::DOMImplementation->new;
27 {
28 use Message::CGI::HTTP;
29 my $http = Message::CGI::HTTP->new;
30
31 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
32 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
33 exit;
34 }
35
36 binmode STDOUT, ':utf8';
37 $| = 1;
38
39 load_text_catalog ('en'); ## TODO: conneg
40
41 print STDOUT qq[Content-Type: text/html; charset=utf-8
42
43 <!DOCTYPE html>
44 <html lang="en">
45 <head>
46 <title>Web Document Conformance Checker (BETA)</title>
47 <link rel="stylesheet" href="../cc-style.css" type="text/css">
48 </head>
49 <body>
50 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
51 (<em>beta</em>)</h1>
52 ];
53
54 $| = 0;
55 my $input = get_input_document ($http, $dom);
56 my $char_length = 0;
57
58 print qq[
59 <div id="document-info" class="section">
60 <dl>
61 <dt>Request URI</dt>
62 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 <dt>Document URI</dt>
64 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65 <script>
66 document.title = '<'
67 + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68 + document.title;
69 </script></dd>
70 ]; # no </dl> yet
71 push @nav, ['#document-info' => 'Information'];
72
73 if (defined $input->{s}) {
74 $char_length = length $input->{s};
75
76 print STDOUT qq[
77 <dt>Base URI</dt>
78 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79 <dt>Internet Media Type</dt>
80 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 <dt>Character Encoding</dt>
83 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 <dt>Length</dt>
86 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 </dl>
88 </div>
89
90 <script src="../cc-script.js"></script>
91 ];
92
93 $input->{id_prefix} = '';
94 #$input->{nested} = 0;
95 my $result = {conforming_min => 1, conforming_max => 1};
96 check_and_print ($input => $result);
97 print_result_section ($result);
98 } else {
99 print STDOUT qq[</dl></div>];
100 print_result_input_error_section ($input);
101 }
102
103 print STDOUT qq[
104 <ul class="navigation" id="nav-items">
105 ];
106 for (@nav) {
107 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
108 }
109 print STDOUT qq[
110 </ul>
111 </body>
112 </html>
113 ];
114
115 for (qw/decode parse parse_html parse_xml parse_manifest
116 check check_manifest/) {
117 next unless defined $time{$_};
118 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
119 print $file $char_length, "\t", $time{$_}, "\n";
120 }
121
122 exit;
123 }
124
125 sub add_error ($$$) {
126 my ($layer, $err, $result) = @_;
127 if (defined $err->{level}) {
128 if ($err->{level} eq 's') {
129 $result->{$layer}->{should}++;
130 $result->{$layer}->{score_min} -= 2;
131 $result->{conforming_min} = 0;
132 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
133 $result->{$layer}->{warning}++;
134 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
135 $result->{$layer}->{unsupported}++;
136 $result->{unsupported} = 1;
137 } elsif ($err->{level} eq 'i') {
138 #
139 } else {
140 $result->{$layer}->{must}++;
141 $result->{$layer}->{score_max} -= 2;
142 $result->{$layer}->{score_min} -= 2;
143 $result->{conforming_min} = 0;
144 $result->{conforming_max} = 0;
145 }
146 } else {
147 $result->{$layer}->{must}++;
148 $result->{$layer}->{score_max} -= 2;
149 $result->{$layer}->{score_min} -= 2;
150 $result->{conforming_min} = 0;
151 $result->{conforming_max} = 0;
152 }
153 } # add_error
154
155 sub check_and_print ($$) {
156 my ($input, $result) = @_;
157
158 print_http_header_section ($input, $result);
159
160 my $doc;
161 my $el;
162 my $cssom;
163 my $manifest;
164 my @subdoc;
165
166 if ($input->{media_type} eq 'text/html') {
167 ($doc, $el) = print_syntax_error_html_section ($input, $result);
168 print_source_string_section
169 ($input,
170 \($input->{s}),
171 $input->{charset} || $doc->input_encoding);
172 } elsif ({
173 'text/xml' => 1,
174 'application/atom+xml' => 1,
175 'application/rss+xml' => 1,
176 'image/svg+xml' => 1,
177 'application/xhtml+xml' => 1,
178 'application/xml' => 1,
179 ## TODO: Should we make all XML MIME Types fall
180 ## into this category?
181
182 'application/rdf+xml' => 1, ## NOTE: This type has different model.
183 }->{$input->{media_type}}) {
184 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
185 print_source_string_section ($input,
186 \($input->{s}),
187 $doc->input_encoding);
188 } elsif ($input->{media_type} eq 'text/css') {
189 $cssom = print_syntax_error_css_section ($input, $result);
190 print_source_string_section
191 ($input, \($input->{s}),
192 $cssom->manakai_input_encoding);
193 } elsif ($input->{media_type} eq 'text/cache-manifest') {
194 ## TODO: MUST be text/cache-manifest
195 $manifest = print_syntax_error_manifest_section ($input, $result);
196 print_source_string_section ($input, \($input->{s}),
197 'utf-8');
198 } else {
199 ## TODO: Change HTTP status code??
200 print_result_unknown_type_section ($input, $result);
201 }
202
203 if (defined $doc or defined $el) {
204 $doc->document_uri ($input->{uri});
205 $doc->manakai_entity_base_uri ($input->{base_uri});
206 print_structure_dump_dom_section ($input, $doc, $el);
207 my $elements = print_structure_error_dom_section
208 ($input, $doc, $el, $result, sub {
209 push @subdoc, shift;
210 });
211 print_table_section ($input, $elements->{table}) if @{$elements->{table}};
212 print_listing_section ({
213 id => 'identifiers', label => 'IDs', heading => 'Identifiers',
214 }, $input, $elements->{id}) if keys %{$elements->{id}};
215 print_listing_section ({
216 id => 'terms', label => 'Terms', heading => 'Terms',
217 }, $input, $elements->{term}) if keys %{$elements->{term}};
218 print_listing_section ({
219 id => 'classes', label => 'Classes', heading => 'Classes',
220 }, $input, $elements->{class}) if keys %{$elements->{class}};
221 print_uri_section ($input, $elements->{uri}) if keys %{$elements->{uri}};
222 print_rdf_section ($input, $elements->{rdf}) if @{$elements->{rdf}};
223 } elsif (defined $cssom) {
224 print_structure_dump_cssom_section ($input, $cssom);
225 ## TODO: CSSOM validation
226 add_error ('structure', {level => 'u'} => $result);
227 } elsif (defined $manifest) {
228 print_structure_dump_manifest_section ($input, $manifest);
229 print_structure_error_manifest_section ($input, $manifest, $result);
230 }
231
232 my $id_prefix = 0;
233 for my $subinput (@subdoc) {
234 $subinput->{id_prefix} = 'subdoc-' . ++$id_prefix;
235 $subinput->{nested} = 1;
236 $subinput->{base_uri} = $subinput->{container_node}->base_uri
237 unless defined $subinput->{base_uri};
238 my $ebaseuri = htescape ($subinput->{base_uri});
239 push @nav, ['#' . $subinput->{id_prefix} => 'Sub #' . $id_prefix];
240 print STDOUT qq[<div id="$subinput->{id_prefix}" class=section>
241 <h2>Subdocument #$id_prefix</h2>
242
243 <dl>
244 <dt>Internet Media Type</dt>
245 <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
246 <dt>Container Node</dt>
247 <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
248 <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
249 <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
250 </dl>];
251
252 $subinput->{id_prefix} .= '-';
253 check_and_print ($subinput => $result);
254
255 print STDOUT qq[</div>];
256 }
257 } # check_and_print
258
259 sub print_http_header_section ($$) {
260 my ($input, $result) = @_;
261 return unless defined $input->{header_status_code} or
262 defined $input->{header_status_text} or
263 @{$input->{header_field} or []};
264
265 push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
266 print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
267 <h2>HTTP Header</h2>
268
269 <p><strong>Note</strong>: Due to the limitation of the
270 network library in use, the content of this section might
271 not be the real header.</p>
272
273 <table><tbody>
274 ];
275
276 if (defined $input->{header_status_code}) {
277 print STDOUT qq[<tr><th scope="row">Status code</th>];
278 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
279 }
280 if (defined $input->{header_status_text}) {
281 print STDOUT qq[<tr><th scope="row">Status text</th>];
282 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
283 }
284
285 for (@{$input->{header_field}}) {
286 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
287 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
288 }
289
290 print STDOUT qq[</tbody></table></div>];
291 } # print_http_header_section
292
293 sub print_syntax_error_html_section ($$) {
294 my ($input, $result) = @_;
295
296 require Encode;
297 require Whatpm::HTML;
298
299 print STDOUT qq[
300 <div id="$input->{id_prefix}parse-errors" class="section">
301 <h2>Parse Errors</h2>
302
303 <dl id="$input->{id_prefix}parse-errors-list">];
304 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
305
306 my $onerror = sub {
307 my (%opt) = @_;
308 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
309 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
310 qq[</dt>];
311 $type =~ tr/ /-/;
312 $type =~ s/\|/%7C/g;
313 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
314 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
315 print STDOUT qq[$msg</dd>\n];
316
317 add_error ('syntax', \%opt => $result);
318 };
319
320 my $doc = $dom->create_document;
321 my $el;
322 my $inner_html_element = $input->{inner_html_element};
323 if (defined $inner_html_element and length $inner_html_element) {
324 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
325 my $time1 = time;
326 my $t = \($input->{s});
327 unless ($input->{is_char_string}) {
328 $t = \(Encode::decode ($input->{charset}, $$t));
329 }
330 $time{decode} = time - $time1;
331
332 $el = $doc->create_element_ns
333 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
334 $time1 = time;
335 Whatpm::HTML->set_inner_html ($el, $$t, $onerror);
336 $time{parse} = time - $time1;
337 } else {
338 my $time1 = time;
339 if ($input->{is_char_string}) {
340 Whatpm::HTML->parse_char_string ($input->{s} => $doc, $onerror);
341 } else {
342 Whatpm::HTML->parse_byte_string
343 ($input->{charset}, $input->{s} => $doc, $onerror);
344 }
345 $time{parse_html} = time - $time1;
346 }
347 $doc->manakai_charset ($input->{official_charset})
348 if defined $input->{official_charset};
349
350 print STDOUT qq[</dl></div>];
351
352 return ($doc, $el);
353 } # print_syntax_error_html_section
354
355 sub print_syntax_error_xml_section ($$) {
356 my ($input, $result) = @_;
357
358 require Message::DOM::XMLParserTemp;
359
360 print STDOUT qq[
361 <div id="$input->{id_prefix}parse-errors" class="section">
362 <h2>Parse Errors</h2>
363
364 <dl id="$input->{id_prefix}parse-errors-list">];
365 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
366
367 my $onerror = sub {
368 my $err = shift;
369 my $line = $err->location->line_number;
370 print STDOUT qq[<dt><a href="#$input->{id_prefix}line-$line">Line $line</a> column ];
371 print STDOUT $err->location->column_number, "</dt><dd>";
372 print STDOUT htescape $err->text, "</dd>\n";
373
374 add_error ('syntax', {type => $err->text,
375 level => [
376 $err->SEVERITY_FATAL_ERROR => 'm',
377 $err->SEVERITY_ERROR => 'm',
378 $err->SEVERITY_WARNING => 's',
379 ]->[$err->severity]} => $result);
380
381 return 1;
382 };
383
384 my $t = \($input->{s});
385 if ($input->{is_char_string}) {
386 require Encode;
387 $t = \(Encode::encode ('utf8', $$t));
388 $input->{charset} = 'utf-8';
389 }
390
391 my $time1 = time;
392 open my $fh, '<', $t;
393 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
394 ($fh => $dom, $onerror, charset => $input->{charset});
395 $time{parse_xml} = time - $time1;
396 $doc->manakai_charset ($input->{official_charset})
397 if defined $input->{official_charset};
398
399 print STDOUT qq[</dl></div>];
400
401 return ($doc, undef);
402 } # print_syntax_error_xml_section
403
404 sub get_css_parser () {
405 our $CSSParser;
406 return $CSSParser if $CSSParser;
407
408 require Whatpm::CSS::Parser;
409 my $p = Whatpm::CSS::Parser->new;
410
411 $p->{prop}->{$_} = 1 for qw/
412 alignment-baseline
413 background background-attachment background-color background-image
414 background-position background-position-x background-position-y
415 background-repeat border border-bottom border-bottom-color
416 border-bottom-style border-bottom-width border-collapse border-color
417 border-left border-left-color
418 border-left-style border-left-width border-right border-right-color
419 border-right-style border-right-width
420 border-spacing -manakai-border-spacing-x -manakai-border-spacing-y
421 border-style border-top border-top-color border-top-style border-top-width
422 border-width bottom
423 caption-side clear clip color content counter-increment counter-reset
424 cursor direction display dominant-baseline empty-cells float font
425 font-family font-size font-size-adjust font-stretch
426 font-style font-variant font-weight height left
427 letter-spacing line-height
428 list-style list-style-image list-style-position list-style-type
429 margin margin-bottom margin-left margin-right margin-top marker-offset
430 marks max-height max-width min-height min-width opacity -moz-opacity
431 orphans outline outline-color outline-style outline-width overflow
432 overflow-x overflow-y
433 padding padding-bottom padding-left padding-right padding-top
434 page page-break-after page-break-before page-break-inside
435 position quotes right size table-layout
436 text-align text-anchor text-decoration text-indent text-transform
437 top unicode-bidi vertical-align visibility white-space width widows
438 word-spacing writing-mode z-index
439 /;
440 $p->{prop_value}->{display}->{$_} = 1 for qw/
441 block clip inline inline-block inline-table list-item none
442 table table-caption table-cell table-column table-column-group
443 table-header-group table-footer-group table-row table-row-group
444 compact marker
445 /;
446 $p->{prop_value}->{position}->{$_} = 1 for qw/
447 absolute fixed relative static
448 /;
449 $p->{prop_value}->{float}->{$_} = 1 for qw/
450 left right none
451 /;
452 $p->{prop_value}->{clear}->{$_} = 1 for qw/
453 left right none both
454 /;
455 $p->{prop_value}->{direction}->{ltr} = 1;
456 $p->{prop_value}->{direction}->{rtl} = 1;
457 $p->{prop_value}->{marks}->{crop} = 1;
458 $p->{prop_value}->{marks}->{cross} = 1;
459 $p->{prop_value}->{'unicode-bidi'}->{$_} = 1 for qw/
460 normal bidi-override embed
461 /;
462 for my $prop_name (qw/overflow overflow-x overflow-y/) {
463 $p->{prop_value}->{$prop_name}->{$_} = 1 for qw/
464 visible hidden scroll auto -webkit-marquee -moz-hidden-unscrollable
465 /;
466 }
467 $p->{prop_value}->{visibility}->{$_} = 1 for qw/
468 visible hidden collapse
469 /;
470 $p->{prop_value}->{'list-style-type'}->{$_} = 1 for qw/
471 disc circle square decimal decimal-leading-zero
472 lower-roman upper-roman lower-greek lower-latin
473 upper-latin armenian georgian lower-alpha upper-alpha none
474 hebrew cjk-ideographic hiragana katakana hiragana-iroha
475 katakana-iroha
476 /;
477 $p->{prop_value}->{'list-style-position'}->{outside} = 1;
478 $p->{prop_value}->{'list-style-position'}->{inside} = 1;
479 $p->{prop_value}->{'page-break-before'}->{$_} = 1 for qw/
480 auto always avoid left right
481 /;
482 $p->{prop_value}->{'page-break-after'}->{$_} = 1 for qw/
483 auto always avoid left right
484 /;
485 $p->{prop_value}->{'page-break-inside'}->{auto} = 1;
486 $p->{prop_value}->{'page-break-inside'}->{avoid} = 1;
487 $p->{prop_value}->{'background-repeat'}->{$_} = 1 for qw/
488 repeat repeat-x repeat-y no-repeat
489 /;
490 $p->{prop_value}->{'background-attachment'}->{scroll} = 1;
491 $p->{prop_value}->{'background-attachment'}->{fixed} = 1;
492 $p->{prop_value}->{'font-size'}->{$_} = 1 for qw/
493 xx-small x-small small medium large x-large xx-large
494 -manakai-xxx-large -webkit-xxx-large
495 larger smaller
496 /;
497 $p->{prop_value}->{'font-style'}->{normal} = 1;
498 $p->{prop_value}->{'font-style'}->{italic} = 1;
499 $p->{prop_value}->{'font-style'}->{oblique} = 1;
500 $p->{prop_value}->{'font-variant'}->{normal} = 1;
501 $p->{prop_value}->{'font-variant'}->{'small-caps'} = 1;
502 $p->{prop_value}->{'font-stretch'}->{$_} = 1 for
503 qw/normal wider narrower ultra-condensed extra-condensed
504 condensed semi-condensed semi-expanded expanded
505 extra-expanded ultra-expanded/;
506 $p->{prop_value}->{'text-align'}->{$_} = 1 for qw/
507 left right center justify begin end
508 /;
509 $p->{prop_value}->{'text-transform'}->{$_} = 1 for qw/
510 capitalize uppercase lowercase none
511 /;
512 $p->{prop_value}->{'white-space'}->{$_} = 1 for qw/
513 normal pre nowrap pre-line pre-wrap -moz-pre-wrap
514 /;
515 $p->{prop_value}->{'writing-mode'}->{$_} = 1 for qw/
516 lr rl tb lr-tb rl-tb tb-rl
517 /;
518 $p->{prop_value}->{'text-anchor'}->{$_} = 1 for qw/
519 start middle end
520 /;
521 $p->{prop_value}->{'dominant-baseline'}->{$_} = 1 for qw/
522 auto use-script no-change reset-size ideographic alphabetic
523 hanging mathematical central middle text-after-edge text-before-edge
524 /;
525 $p->{prop_value}->{'alignment-baseline'}->{$_} = 1 for qw/
526 auto baseline before-edge text-before-edge middle central
527 after-edge text-after-edge ideographic alphabetic hanging
528 mathematical
529 /;
530 $p->{prop_value}->{'text-decoration'}->{$_} = 1 for qw/
531 none blink underline overline line-through
532 /;
533 $p->{prop_value}->{'caption-side'}->{$_} = 1 for qw/
534 top bottom left right
535 /;
536 $p->{prop_value}->{'table-layout'}->{auto} = 1;
537 $p->{prop_value}->{'table-layout'}->{fixed} = 1;
538 $p->{prop_value}->{'border-collapse'}->{collapse} = 1;
539 $p->{prop_value}->{'border-collapse'}->{separate} = 1;
540 $p->{prop_value}->{'empty-cells'}->{show} = 1;
541 $p->{prop_value}->{'empty-cells'}->{hide} = 1;
542 $p->{prop_value}->{cursor}->{$_} = 1 for qw/
543 auto crosshair default pointer move e-resize ne-resize nw-resize n-resize
544 se-resize sw-resize s-resize w-resize text wait help progress
545 /;
546 for my $prop (qw/border-top-style border-left-style
547 border-bottom-style border-right-style outline-style/) {
548 $p->{prop_value}->{$prop}->{$_} = 1 for qw/
549 none hidden dotted dashed solid double groove ridge inset outset
550 /;
551 }
552 for my $prop (qw/color background-color
553 border-bottom-color border-left-color border-right-color
554 border-top-color border-color/) {
555 $p->{prop_value}->{$prop}->{transparent} = 1;
556 $p->{prop_value}->{$prop}->{flavor} = 1;
557 $p->{prop_value}->{$prop}->{'-manakai-default'} = 1;
558 }
559 $p->{prop_value}->{'outline-color'}->{invert} = 1;
560 $p->{prop_value}->{'outline-color'}->{'-manakai-invert-or-currentcolor'} = 1;
561 $p->{pseudo_class}->{$_} = 1 for qw/
562 active checked disabled empty enabled first-child first-of-type
563 focus hover indeterminate last-child last-of-type link only-child
564 only-of-type root target visited
565 lang nth-child nth-last-child nth-of-type nth-last-of-type not
566 -manakai-contains -manakai-current
567 /;
568 $p->{pseudo_element}->{$_} = 1 for qw/
569 after before first-letter first-line
570 /;
571
572 return $CSSParser = $p;
573 } # get_css_parser
574
575 sub print_syntax_error_css_section ($$) {
576 my ($input, $result) = @_;
577
578 print STDOUT qq[
579 <div id="$input->{id_prefix}parse-errors" class="section">
580 <h2>Parse Errors</h2>
581
582 <dl id="$input->{id_prefix}parse-errors-list">];
583 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
584
585 my $p = get_css_parser ();
586 $p->init;
587 $p->{onerror} = sub {
588 my (%opt) = @_;
589 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
590 if ($opt{token}) {
591 print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{token}->{line}">Line $opt{token}->{line}</a> column $opt{token}->{column}];
592 } else {
593 print STDOUT qq[<dt class="$cls">Unknown location];
594 }
595 if (defined $opt{value}) {
596 print STDOUT qq[ (<code>@{[htescape ($opt{value})]}</code>)];
597 } elsif (defined $opt{token}) {
598 print STDOUT qq[ (<code>@{[htescape (Whatpm::CSS::Tokenizer->serialize_token ($opt{token}))]}</code>)];
599 }
600 $type =~ tr/ /-/;
601 $type =~ s/\|/%7C/g;
602 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
603 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
604 print STDOUT qq[$msg</dd>\n];
605
606 add_error ('syntax', \%opt => $result);
607 };
608 $p->{href} = $input->{uri};
609 $p->{base_uri} = $input->{base_uri};
610
611 # if ($parse_mode eq 'q') {
612 # $p->{unitless_px} = 1;
613 # $p->{hashless_color} = 1;
614 # }
615
616 ## TODO: Make $input->{s} a ref.
617
618 my $s = \$input->{s};
619 my $charset;
620 unless ($input->{is_char_string}) {
621 require Encode;
622 if (defined $input->{charset}) {## TODO: IANA->Perl
623 $charset = $input->{charset};
624 $s = \(Encode::decode ($input->{charset}, $$s));
625 } else {
626 ## TODO: charset detection
627 $s = \(Encode::decode ($charset = 'utf-8', $$s));
628 }
629 }
630
631 my $cssom = $p->parse_char_string ($$s);
632 $cssom->manakai_input_encoding ($charset) if defined $charset;
633
634 print STDOUT qq[</dl></div>];
635
636 return $cssom;
637 } # print_syntax_error_css_section
638
639 sub print_syntax_error_manifest_section ($$) {
640 my ($input, $result) = @_;
641
642 require Whatpm::CacheManifest;
643
644 print STDOUT qq[
645 <div id="$input->{id_prefix}parse-errors" class="section">
646 <h2>Parse Errors</h2>
647
648 <dl id="$input->{id_prefix}parse-errors-list">];
649 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
650
651 my $onerror = sub {
652 my (%opt) = @_;
653 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
654 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
655 qq[</dt>];
656 $type =~ tr/ /-/;
657 $type =~ s/\|/%7C/g;
658 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
659 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
660 print STDOUT qq[$msg</dd>\n];
661
662 add_error ('syntax', \%opt => $result);
663 };
664
665 my $m = $input->{is_char_string} ? 'parse_char_string' : 'parse_byte_string';
666 my $time1 = time;
667 my $manifest = Whatpm::CacheManifest->$m
668 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
669 $time{parse_manifest} = time - $time1;
670
671 print STDOUT qq[</dl></div>];
672
673 return $manifest;
674 } # print_syntax_error_manifest_section
675
676 sub print_source_string_section ($$$) {
677 my $input = shift;
678 my $s;
679 unless ($input->{is_char_string}) {
680 require Encode;
681 my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
682 return unless $enc;
683
684 $s = \($enc->decode (${$_[0]}));
685 } else {
686 $s = $_[0];
687 }
688
689 my $i = 1;
690 push @nav, ['#source-string' => 'Source'] unless $input->{nested};
691 print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
692 <h2>Document Source</h2>
693 <ol lang="">\n];
694 if (length $$s) {
695 while ($$s =~ /\G([^\x0D\x0A]*?)(?>\x0D\x0A?|\x0A)/gc) {
696 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
697 "</li>\n";
698 $i++;
699 }
700 if ($$s =~ /\G([^\x0D\x0A]+)/gc) {
701 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
702 "</li>\n";
703 }
704 } else {
705 print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
706 }
707 print STDOUT "</ol></div>
708 <script>
709 addSourceToParseErrorList ('$input->{id_prefix}', 'parse-errors-list');
710 </script>";
711 } # print_input_string_section
712
713 sub print_document_tree ($$) {
714 my ($input, $node) = @_;
715
716 my $r = '<ol class="xoxo">';
717
718 my @node = ($node);
719 while (@node) {
720 my $child = shift @node;
721 unless (ref $child) {
722 $r .= $child;
723 next;
724 }
725
726 my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
727 my $nt = $child->node_type;
728 if ($nt == $child->ELEMENT_NODE) {
729 my $child_nsuri = $child->namespace_uri;
730 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
731 '</code>'; ## ISSUE: case
732
733 if ($child->has_attributes) {
734 $r .= '<ul class="attributes">';
735 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
736 @{$child->attributes}) {
737 $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
738 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
739 }
740 $r .= '</ul>';
741 }
742
743 if ($child->has_child_nodes) {
744 $r .= '<ol class="children">';
745 unshift @node, @{$child->child_nodes}, '</ol></li>';
746 } else {
747 $r .= '</li>';
748 }
749 } elsif ($nt == $child->TEXT_NODE) {
750 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
751 } elsif ($nt == $child->CDATA_SECTION_NODE) {
752 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
753 } elsif ($nt == $child->COMMENT_NODE) {
754 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
755 } elsif ($nt == $child->DOCUMENT_NODE) {
756 $r .= qq'<li id="$node_id" class="tree-document">Document';
757 $r .= qq[<ul class="attributes">];
758 my $cp = $child->manakai_charset;
759 if (defined $cp) {
760 $r .= qq[<li><code>charset</code> parameter = <code>];
761 $r .= htescape ($cp) . qq[</code></li>];
762 }
763 $r .= qq[<li><code>inputEncoding</code> = ];
764 my $ie = $child->input_encoding;
765 if (defined $ie) {
766 $r .= qq[<code>@{[htescape ($ie)]}</code>];
767 if ($child->manakai_has_bom) {
768 $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
769 }
770 } else {
771 $r .= qq[(<code>null</code>)];
772 }
773 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
774 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
775 unless ($child->manakai_is_html) {
776 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
777 if (defined $child->xml_encoding) {
778 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
779 } else {
780 $r .= qq[<li>XML encoding = (null)</li>];
781 }
782 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
783 }
784 $r .= qq[</ul>];
785 if ($child->has_child_nodes) {
786 $r .= '<ol class="children">';
787 unshift @node, @{$child->child_nodes}, '</ol></li>';
788 }
789 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
790 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
791 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
792 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
793 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
794 $r .= '</ul></li>';
795 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
796 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
797 } else {
798 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
799 }
800 }
801
802 $r .= '</ol>';
803 print STDOUT $r;
804 } # print_document_tree
805
806 sub print_structure_dump_dom_section ($$$) {
807 my ($input, $doc, $el) = @_;
808
809 print STDOUT qq[
810 <div id="$input->{id_prefix}document-tree" class="section">
811 <h2>Document Tree</h2>
812 ];
813 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
814 unless $input->{nested};
815
816 print_document_tree ($input, $el || $doc);
817
818 print STDOUT qq[</div>];
819 } # print_structure_dump_dom_section
820
821 sub print_structure_dump_cssom_section ($$) {
822 my ($input, $cssom) = @_;
823
824 print STDOUT qq[
825 <div id="$input->{id_prefix}document-tree" class="section">
826 <h2>Document Tree</h2>
827 ];
828 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
829 unless $input->{nested};
830
831 ## TODO:
832 print STDOUT "<pre>".htescape ($cssom->css_text)."</pre>";
833
834 print STDOUT qq[</div>];
835 } # print_structure_dump_cssom_section
836
837 sub print_structure_dump_manifest_section ($$) {
838 my ($input, $manifest) = @_;
839
840 print STDOUT qq[
841 <div id="$input->{id_prefix}dump-manifest" class="section">
842 <h2>Cache Manifest</h2>
843 ];
844 push @nav, [qq[#$input->{id_prefix}dump-manifest] => 'Cache Manifest']
845 unless $input->{nested};
846
847 print STDOUT qq[<dl><dt>Explicit entries</dt>];
848 my $i = 0;
849 for my $uri (@{$manifest->[0]}) {
850 my $euri = htescape ($uri);
851 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
852 }
853
854 print STDOUT qq[<dt>Fallback entries</dt><dd>
855 <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
856 <th scope=row>Fallback Entry</tr><tbody>];
857 for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
858 my $euri = htescape ($uri);
859 my $euri2 = htescape ($manifest->[1]->{$uri});
860 print STDOUT qq[<tr><td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
861 <td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
862 }
863
864 print STDOUT qq[</table><dt>Online whitelist</dt>];
865 for my $uri (@{$manifest->[2]}) {
866 my $euri = htescape ($uri);
867 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
868 }
869
870 print STDOUT qq[</dl></div>];
871 } # print_structure_dump_manifest_section
872
873 sub print_structure_error_dom_section ($$$$$) {
874 my ($input, $doc, $el, $result, $onsubdoc) = @_;
875
876 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
877 <h2>Document Errors</h2>
878
879 <dl id=document-errors-list>];
880 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
881 unless $input->{nested};
882
883 require Whatpm::ContentChecker;
884 my $onerror = sub {
885 my %opt = @_;
886 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
887 $type =~ tr/ /-/;
888 $type =~ s/\|/%7C/g;
889 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
890 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
891 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
892 print STDOUT $msg, "</dd>\n";
893 add_error ('structure', \%opt => $result);
894 };
895
896 my $elements;
897 my $time1 = time;
898 if ($el) {
899 $elements = Whatpm::ContentChecker->check_element
900 ($el, $onerror, $onsubdoc);
901 } else {
902 $elements = Whatpm::ContentChecker->check_document
903 ($doc, $onerror, $onsubdoc);
904 }
905 $time{check} = time - $time1;
906
907 print STDOUT qq[</dl>
908 <script>
909 addSourceToParseErrorList ('$input->{id_prefix}', 'document-errors-list');
910 </script></div>];
911
912 return $elements;
913 } # print_structure_error_dom_section
914
915 sub print_structure_error_manifest_section ($$$) {
916 my ($input, $manifest, $result) = @_;
917
918 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
919 <h2>Document Errors</h2>
920
921 <dl>];
922 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
923 unless $input->{nested};
924
925 require Whatpm::CacheManifest;
926 Whatpm::CacheManifest->check_manifest ($manifest, sub {
927 my %opt = @_;
928 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
929 $type =~ tr/ /-/;
930 $type =~ s/\|/%7C/g;
931 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
932 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
933 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
934 add_error ('structure', \%opt => $result);
935 });
936
937 print STDOUT qq[</div>];
938 } # print_structure_error_manifest_section
939
940 sub print_table_section ($$) {
941 my ($input, $tables) = @_;
942
943 push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
944 unless $input->{nested};
945 print STDOUT qq[
946 <div id="$input->{id_prefix}tables" class="section">
947 <h2>Tables</h2>
948
949 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
950 <script src="../table-script.js" type="text/javascript"></script>
951 <noscript>
952 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
953 </noscript>
954 ];
955
956 require JSON;
957
958 my $i = 0;
959 for my $table_el (@$tables) {
960 $i++;
961 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
962 get_node_link ($input, $table_el) . q[</h3>];
963
964 ## TODO: Make |ContentChecker| return |form_table| result
965 ## so that this script don't have to run the algorithm twice.
966 my $table = Whatpm::HTMLTable->form_table ($table_el);
967
968 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
969 next unless $_;
970 delete $_->{element};
971 }
972
973 for (@{$table->{row_group}}) {
974 next unless $_;
975 next unless $_->{element};
976 $_->{type} = $_->{element}->manakai_local_name;
977 delete $_->{element};
978 }
979
980 for (@{$table->{cell}}) {
981 next unless $_;
982 for (@{$_}) {
983 next unless $_;
984 for (@$_) {
985 $_->{id} = refaddr $_->{element} if defined $_->{element};
986 delete $_->{element};
987 $_->{is_header} = $_->{is_header} ? 1 : 0;
988 }
989 }
990 }
991
992 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
993 print STDOUT JSON::objToJson ($table);
994 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
995 print STDOUT qq[, '$input->{id_prefix}');</script>];
996 }
997
998 print STDOUT qq[</div>];
999 } # print_table_section
1000
1001 sub print_listing_section ($$$) {
1002 my ($opt, $input, $ids) = @_;
1003
1004 push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
1005 unless $input->{nested};
1006 print STDOUT qq[
1007 <div id="$input->{id_prefix}$opt->{id}" class="section">
1008 <h2>$opt->{heading}</h2>
1009
1010 <dl>
1011 ];
1012 for my $id (sort {$a cmp $b} keys %$ids) {
1013 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
1014 for (@{$ids->{$id}}) {
1015 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
1016 }
1017 }
1018 print STDOUT qq[</dl></div>];
1019 } # print_listing_section
1020
1021 sub print_uri_section ($$$) {
1022 my ($input, $uris) = @_;
1023
1024 ## NOTE: URIs contained in the DOM (i.e. in HTML or XML documents),
1025 ## except for those in RDF triples.
1026 ## TODO: URIs in CSS
1027
1028 push @nav, ['#' . $input->{id_prefix} . 'uris' => 'URIs']
1029 unless $input->{nested};
1030 print STDOUT qq[
1031 <div id="$input->{id_prefix}uris" class="section">
1032 <h2>URIs</h2>
1033
1034 <dl>];
1035 for my $uri (sort {$a cmp $b} keys %$uris) {
1036 my $euri = htescape ($uri);
1037 print STDOUT qq[<dt><code class=uri>&lt;<a href="$euri">$euri</a>></code>];
1038 my $eccuri = htescape (get_cc_uri ($uri));
1039 print STDOUT qq[<dd><a href="$eccuri">Check conformance of this document</a>];
1040 print STDOUT qq[<dd>Found at: <ul>];
1041 for my $entry (@{$uris->{$uri}}) {
1042 print STDOUT qq[<li>], get_node_link ($input, $entry->{node});
1043 if (keys %{$entry->{type} or {}}) {
1044 print STDOUT ' (';
1045 print STDOUT join ', ', map {
1046 {
1047 hyperlink => 'Hyperlink',
1048 resource => 'Link to an external resource',
1049 namespace => 'Namespace URI',
1050 cite => 'Citation or link to a long description',
1051 embedded => 'Link to an embedded content',
1052 base => 'Base URI',
1053 action => 'Submission URI',
1054 }->{$_}
1055 or
1056 htescape ($_)
1057 } keys %{$entry->{type}};
1058 print STDOUT ')';
1059 }
1060 }
1061 print STDOUT qq[</ul>];
1062 }
1063 print STDOUT qq[</dl></div>];
1064 } # print_uri_section
1065
1066 sub print_rdf_section ($$$) {
1067 my ($input, $rdfs) = @_;
1068
1069 push @nav, ['#' . $input->{id_prefix} . 'rdf' => 'RDF']
1070 unless $input->{nested};
1071 print STDOUT qq[
1072 <div id="$input->{id_prefix}rdf" class="section">
1073 <h2>RDF Triples</h2>
1074
1075 <dl>];
1076 my $i = 0;
1077 for my $rdf (@$rdfs) {
1078 print STDOUT qq[<dt id="$input->{id_prefix}rdf-@{[$i++]}">];
1079 print STDOUT get_node_link ($input, $rdf->[0]);
1080 print STDOUT qq[<dd><dl>];
1081 for my $triple (@{$rdf->[1]}) {
1082 print STDOUT '<dt>' . get_node_link ($input, $triple->[0]) . '<dd>';
1083 print STDOUT get_rdf_resource_html ($triple->[1]);
1084 print STDOUT ' ';
1085 print STDOUT get_rdf_resource_html ($triple->[2]);
1086 print STDOUT ' ';
1087 print STDOUT get_rdf_resource_html ($triple->[3]);
1088 }
1089 print STDOUT qq[</dl>];
1090 }
1091 print STDOUT qq[</dl></div>];
1092 } # print_rdf_section
1093
1094 sub get_rdf_resource_html ($) {
1095 my $resource = shift;
1096 if (defined $resource->{uri}) {
1097 my $euri = htescape ($resource->{uri});
1098 return '<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
1099 '</a>></code>';
1100 } elsif (defined $resource->{bnodeid}) {
1101 return htescape ('_:' . $resource->{bnodeid});
1102 } elsif ($resource->{nodes}) {
1103 return '(rdf:XMLLiteral)';
1104 } elsif (defined $resource->{value}) {
1105 my $elang = htescape (defined $resource->{language}
1106 ? $resource->{language} : '');
1107 my $r = qq[<q lang="$elang">] . htescape ($resource->{value}) . '</q>';
1108 if (defined $resource->{datatype}) {
1109 my $euri = htescape ($resource->{datatype});
1110 $r .= '^^<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
1111 '</a>></code>';
1112 } elsif (length $resource->{language}) {
1113 $r .= '@' . htescape ($resource->{language});
1114 }
1115 return $r;
1116 } else {
1117 return '??';
1118 }
1119 } # get_rdf_resource_html
1120
1121 sub print_result_section ($) {
1122 my $result = shift;
1123
1124 print STDOUT qq[
1125 <div id="result-summary" class="section">
1126 <h2>Result</h2>];
1127
1128 if ($result->{unsupported} and $result->{conforming_max}) {
1129 print STDOUT qq[<p class=uncertain id=result-para>The conformance
1130 checker cannot decide whether the document is conforming or
1131 not, since the document contains one or more unsupported
1132 features. The document might or might not be conforming.</p>];
1133 } elsif ($result->{conforming_min}) {
1134 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
1135 found in this document.</p>];
1136 } elsif ($result->{conforming_max}) {
1137 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
1138 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
1139 it might be conforming.</p>];
1140 } else {
1141 print STDOUT qq[<p class=FAIL id=result-para>This document is
1142 <strong><em>non</em>-conforming</strong>.</p>];
1143 }
1144
1145 print STDOUT qq[<table>
1146 <colgroup><col><colgroup><col><col><col><colgroup><col>
1147 <thead>
1148 <tr><th scope=col></th>
1149 <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1150 Errors</a></th>
1151 <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1152 Errors</a></th>
1153 <th scope=col><a href="../error-description#level-w">Warnings</a></th>
1154 <th scope=col>Score</th></tr></thead><tbody>];
1155
1156 my $must_error = 0;
1157 my $should_error = 0;
1158 my $warning = 0;
1159 my $score_min = 0;
1160 my $score_max = 0;
1161 my $score_base = 20;
1162 my $score_unit = $score_base / 100;
1163 for (
1164 [Transfer => 'transfer', ''],
1165 [Character => 'char', ''],
1166 [Syntax => 'syntax', '#parse-errors'],
1167 [Structure => 'structure', '#document-errors'],
1168 ) {
1169 $must_error += ($result->{$_->[1]}->{must} += 0);
1170 $should_error += ($result->{$_->[1]}->{should} += 0);
1171 $warning += ($result->{$_->[1]}->{warning} += 0);
1172 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
1173 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
1174
1175 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
1176 my $label = $_->[0];
1177 if ($result->{$_->[1]}->{must} or
1178 $result->{$_->[1]}->{should} or
1179 $result->{$_->[1]}->{warning} or
1180 $result->{$_->[1]}->{unsupported}) {
1181 $label = qq[<a href="$_->[2]">$label</a>];
1182 }
1183
1184 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
1185 if ($uncertain) {
1186 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
1187 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
1188 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
1189 } else {
1190 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
1191 }
1192 }
1193
1194 $score_max += $score_base;
1195
1196 print STDOUT qq[
1197 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
1198 </tbody>
1199 <tfoot><tr class=uncertain><th scope=row>Total</th>
1200 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
1201 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
1202 <td>$warning?</td>
1203 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
1204 </table>
1205
1206 <p><strong>Important</strong>: This conformance checking service
1207 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
1208 </div>];
1209 push @nav, ['#result-summary' => 'Result'];
1210 } # print_result_section
1211
1212 sub print_result_unknown_type_section ($$) {
1213 my ($input, $result) = @_;
1214
1215 my $euri = htescape ($input->{uri});
1216 print STDOUT qq[
1217 <div id="$input->{id_prefix}parse-errors" class="section">
1218 <h2>Errors</h2>
1219
1220 <dl>
1221 <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
1222 <dd class=unsupported><strong><a href="../error-description#level-u">Not
1223 supported</a></strong>:
1224 Media type
1225 <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
1226 is not supported.</dd>
1227 </dl>
1228 </div>
1229 ];
1230 push @nav, [qq[#$input->{id_prefix}parse-errors] => 'Errors']
1231 unless $input->{nested};
1232 add_error (char => {level => 'u'} => $result);
1233 add_error (syntax => {level => 'u'} => $result);
1234 add_error (structure => {level => 'u'} => $result);
1235 } # print_result_unknown_type_section
1236
1237 sub print_result_input_error_section ($) {
1238 my $input = shift;
1239 print STDOUT qq[<div class="section" id="result-summary">
1240 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
1241 </div>];
1242 push @nav, ['#result-summary' => 'Result'];
1243 } # print_result_input_error_section
1244
1245 sub get_error_label ($$) {
1246 my ($input, $err) = @_;
1247
1248 my $r = '';
1249
1250 my $line;
1251 my $column;
1252
1253 if (defined $err->{node}) {
1254 $line = $err->{node}->get_user_data ('manakai_source_line');
1255 if (defined $line) {
1256 $column = $err->{node}->get_user_data ('manakai_source_column');
1257 } else {
1258 if ($err->{node}->node_type == $err->{node}->ATTRIBUTE_NODE) {
1259 my $owner = $err->{node}->owner_element;
1260 $line = $owner->get_user_data ('manakai_source_line');
1261 $column = $owner->get_user_data ('manakai_source_column');
1262 } else {
1263 my $parent = $err->{node}->parent_node;
1264 if ($parent) {
1265 $line = $parent->get_user_data ('manakai_source_line');
1266 $column = $parent->get_user_data ('manakai_source_column');
1267 }
1268 }
1269 }
1270 }
1271 unless (defined $line) {
1272 if (defined $err->{token} and defined $err->{token}->{line}) {
1273 $line = $err->{token}->{line};
1274 $column = $err->{token}->{column};
1275 } elsif (defined $err->{line}) {
1276 $line = $err->{line};
1277 $column = $err->{column};
1278 }
1279 }
1280
1281 if (defined $line) {
1282 if (defined $column and $column > 0) {
1283 $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a> column $column];
1284 } else {
1285 $line = $line - 1 || 1;
1286 $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a>];
1287 }
1288 }
1289
1290 if (defined $err->{node}) {
1291 $r .= ' ' if length $r;
1292 $r .= get_node_link ($input, $err->{node});
1293 }
1294
1295 if (defined $err->{index}) {
1296 if (length $r) {
1297 $r .= ', Index ' . (0+$err->{index});
1298 } else {
1299 $r .= "<a href='#$input->{id_prefix}index-@{[0+$err->{index}]}'>Index "
1300 . (0+$err->{index}) . '</a>';
1301 }
1302 }
1303
1304 if (defined $err->{value}) {
1305 $r .= ' ' if length $r;
1306 $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
1307 }
1308
1309 return $r;
1310 } # get_error_label
1311
1312 sub get_error_level_label ($) {
1313 my $err = shift;
1314
1315 my $r = '';
1316
1317 if (not defined $err->{level} or $err->{level} eq 'm') {
1318 $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1319 error</a></strong>: ];
1320 } elsif ($err->{level} eq 's') {
1321 $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1322 error</a></strong>: ];
1323 } elsif ($err->{level} eq 'w') {
1324 $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
1325 ];
1326 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
1327 $r = qq[<strong><a href="../error-description#level-u">Not
1328 supported</a></strong>: ];
1329 } elsif ($err->{level} eq 'i') {
1330 $r = qq[<strong><a href="../error-description#level-i">Information</a></strong>: ];
1331 } else {
1332 my $elevel = htescape ($err->{level});
1333 $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
1334 ];
1335 }
1336
1337 return $r;
1338 } # get_error_level_label
1339
1340 sub get_node_path ($) {
1341 my $node = shift;
1342 my @r;
1343 while (defined $node) {
1344 my $rs;
1345 if ($node->node_type == 1) {
1346 $rs = $node->node_name;
1347 $node = $node->parent_node;
1348 } elsif ($node->node_type == 2) {
1349 $rs = '@' . $node->node_name;
1350 $node = $node->owner_element;
1351 } elsif ($node->node_type == 3) {
1352 $rs = '"' . $node->data . '"';
1353 $node = $node->parent_node;
1354 } elsif ($node->node_type == 9) {
1355 @r = ('') unless @r;
1356 $rs = '';
1357 $node = $node->parent_node;
1358 } else {
1359 $rs = '#' . $node->node_type;
1360 $node = $node->parent_node;
1361 }
1362 unshift @r, $rs;
1363 }
1364 return join '/', @r;
1365 } # get_node_path
1366
1367 sub get_node_link ($$) {
1368 return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
1369 htescape (get_node_path ($_[1])) . qq[</a>];
1370 } # get_node_link
1371
1372 {
1373 my $Msg = {};
1374
1375 sub load_text_catalog ($) {
1376 my $lang = shift; # MUST be a canonical lang name
1377 open my $file, '<:utf8', "cc-msg.$lang.txt"
1378 or die "$0: cc-msg.$lang.txt: $!";
1379 while (<$file>) {
1380 if (s/^([^;]+);([^;]*);//) {
1381 my ($type, $cls, $msg) = ($1, $2, $_);
1382 $msg =~ tr/\x0D\x0A//d;
1383 $Msg->{$type} = [$cls, $msg];
1384 }
1385 }
1386 } # load_text_catalog
1387
1388 sub get_text ($) {
1389 my ($type, $level, $node) = @_;
1390 $type = $level . ':' . $type if defined $level;
1391 $level = 'm' unless defined $level;
1392 my @arg;
1393 {
1394 if (defined $Msg->{$type}) {
1395 my $msg = $Msg->{$type}->[1];
1396 $msg =~ s{<var>\$([0-9]+)</var>}{
1397 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
1398 }ge;
1399 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
1400 UNIVERSAL::can ($node, 'get_attribute_ns')
1401 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
1402 }ge;
1403 $msg =~ s{<var>{\@}</var>}{
1404 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
1405 }ge;
1406 $msg =~ s{<var>{local-name}</var>}{
1407 UNIVERSAL::can ($node, 'manakai_local_name')
1408 ? htescape ($node->manakai_local_name) : ''
1409 }ge;
1410 $msg =~ s{<var>{element-local-name}</var>}{
1411 (UNIVERSAL::can ($node, 'owner_element') and
1412 $node->owner_element)
1413 ? htescape ($node->owner_element->manakai_local_name)
1414 : ''
1415 }ge;
1416 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
1417 } elsif ($type =~ s/:([^:]*)$//) {
1418 unshift @arg, $1;
1419 redo;
1420 }
1421 }
1422 return ($type, 'level-'.$level, htescape ($_[0]));
1423 } # get_text
1424
1425 }
1426
1427 sub encode_uri_component ($) {
1428 require Encode;
1429 my $s = Encode::encode ('utf8', shift);
1430 $s =~ s/([^0-9A-Za-z_.~-])/sprintf '%%%02X', ord $1/ge;
1431 return $s;
1432 } # encode_uri_component
1433
1434 sub get_cc_uri ($) {
1435 return './?uri=' . encode_uri_component ($_[0]);
1436 } # get_cc_uri
1437
1438 sub get_input_document ($$) {
1439 my ($http, $dom) = @_;
1440
1441 my $request_uri = $http->get_parameter ('uri');
1442 my $r = {};
1443 if (defined $request_uri and length $request_uri) {
1444 my $uri = $dom->create_uri_reference ($request_uri);
1445 unless ({
1446 http => 1,
1447 }->{lc $uri->uri_scheme}) {
1448 return {uri => $request_uri, request_uri => $request_uri,
1449 error_status_text => 'URI scheme not allowed'};
1450 }
1451
1452 require Message::Util::HostPermit;
1453 my $host_permit = new Message::Util::HostPermit;
1454 $host_permit->add_rule (<<EOH);
1455 Allow host=suika port=80
1456 Deny host=suika
1457 Allow host=suika.fam.cx port=80
1458 Deny host=suika.fam.cx
1459 Deny host=localhost
1460 Deny host=*.localdomain
1461 Deny ipv4=0.0.0.0/8
1462 Deny ipv4=10.0.0.0/8
1463 Deny ipv4=127.0.0.0/8
1464 Deny ipv4=169.254.0.0/16
1465 Deny ipv4=172.0.0.0/11
1466 Deny ipv4=192.0.2.0/24
1467 Deny ipv4=192.88.99.0/24
1468 Deny ipv4=192.168.0.0/16
1469 Deny ipv4=198.18.0.0/15
1470 Deny ipv4=224.0.0.0/4
1471 Deny ipv4=255.255.255.255/32
1472 Deny ipv6=0::0/0
1473 Allow host=*
1474 EOH
1475 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1476 return {uri => $request_uri, request_uri => $request_uri,
1477 error_status_text => 'Connection to the host is forbidden'};
1478 }
1479
1480 require LWP::UserAgent;
1481 my $ua = WDCC::LWPUA->new;
1482 $ua->{wdcc_dom} = $dom;
1483 $ua->{wdcc_host_permit} = $host_permit;
1484 $ua->agent ('Mozilla'); ## TODO: for now.
1485 $ua->parse_head (0);
1486 $ua->protocols_allowed ([qw/http/]);
1487 $ua->max_size (1000_000);
1488 my $req = HTTP::Request->new (GET => $request_uri);
1489 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1490 my $res = $ua->request ($req);
1491 ## TODO: 401 sets |is_success| true.
1492 if ($res->is_success or $http->get_parameter ('error-page')) {
1493 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1494 $r->{uri} = $res->request->uri;
1495 $r->{request_uri} = $request_uri;
1496
1497 ## TODO: More strict parsing...
1498 my $ct = $res->header ('Content-Type');
1499 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1500 $r->{charset} = lc $1;
1501 $r->{charset} =~ tr/\\//d;
1502 $r->{official_charset} = $r->{charset};
1503 }
1504
1505 my $input_charset = $http->get_parameter ('charset');
1506 if (defined $input_charset and length $input_charset) {
1507 $r->{charset_overridden}
1508 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1509 $r->{charset} = $input_charset;
1510 }
1511
1512 ## TODO: Support for HTTP Content-Encoding
1513
1514 $r->{s} = ''.$res->content;
1515
1516 require Whatpm::ContentType;
1517 ($r->{official_type}, $r->{media_type})
1518 = Whatpm::ContentType->get_sniffed_type
1519 (get_file_head => sub {
1520 return substr $r->{s}, 0, shift;
1521 },
1522 http_content_type_byte => $ct,
1523 has_http_content_encoding =>
1524 defined $res->header ('Content-Encoding'),
1525 supported_image_types => {});
1526 } else {
1527 $r->{uri} = $res->request->uri;
1528 $r->{request_uri} = $request_uri;
1529 $r->{error_status_text} = $res->status_line;
1530 }
1531
1532 $r->{header_field} = [];
1533 $res->scan (sub {
1534 push @{$r->{header_field}}, [$_[0], $_[1]];
1535 });
1536 $r->{header_status_code} = $res->code;
1537 $r->{header_status_text} = $res->message;
1538 } else {
1539 $r->{s} = ''.$http->get_parameter ('s');
1540 $r->{uri} = q<thismessage:/>;
1541 $r->{request_uri} = q<thismessage:/>;
1542 $r->{base_uri} = q<thismessage:/>;
1543 $r->{charset} = ''.$http->get_parameter ('_charset_');
1544 $r->{charset} =~ s/\s+//g;
1545 $r->{charset} = 'utf-8' if $r->{charset} eq '';
1546 $r->{official_charset} = $r->{charset};
1547 $r->{header_field} = [];
1548
1549 require Whatpm::ContentType;
1550 ($r->{official_type}, $r->{media_type})
1551 = Whatpm::ContentType->get_sniffed_type
1552 (get_file_head => sub {
1553 return substr $r->{s}, 0, shift;
1554 },
1555 http_content_type_byte => undef,
1556 has_http_content_encoding => 0,
1557 supported_image_types => {});
1558 }
1559
1560 my $input_format = $http->get_parameter ('i');
1561 if (defined $input_format and length $input_format) {
1562 $r->{media_type_overridden}
1563 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1564 $r->{media_type} = $input_format;
1565 }
1566 if (defined $r->{s} and not defined $r->{media_type}) {
1567 $r->{media_type} = 'text/html';
1568 $r->{media_type_overridden} = 1;
1569 }
1570
1571 if ($r->{media_type} eq 'text/xml') {
1572 unless (defined $r->{charset}) {
1573 $r->{charset} = 'us-ascii';
1574 $r->{official_charset} = $r->{charset};
1575 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1576 $r->{charset_overridden} = 0;
1577 }
1578 }
1579
1580 if (length $r->{s} > 1000_000) {
1581 $r->{error_status_text} = 'Entity-body too large';
1582 delete $r->{s};
1583 return $r;
1584 }
1585
1586 $r->{inner_html_element} = $http->get_parameter ('e');
1587
1588 return $r;
1589 } # get_input_document
1590
1591 package WDCC::LWPUA;
1592 BEGIN { push our @ISA, 'LWP::UserAgent'; }
1593
1594 sub redirect_ok {
1595 my $ua = shift;
1596 unless ($ua->SUPER::redirect_ok (@_)) {
1597 return 0;
1598 }
1599
1600 my $uris = $_[1]->header ('Location');
1601 return 0 unless $uris;
1602 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1603 unless ({
1604 http => 1,
1605 }->{lc $uri->uri_scheme}) {
1606 return 0;
1607 }
1608 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1609 return 0;
1610 }
1611 return 1;
1612 } # redirect_ok
1613
1614 =head1 AUTHOR
1615
1616 Wakaba <w@suika.fam.cx>.
1617
1618 =head1 LICENSE
1619
1620 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1621
1622 This library is free software; you can redistribute it
1623 and/or modify it under the same terms as Perl itself.
1624
1625 =cut
1626
1627 ## $Date: 2008/03/21 11:17:00 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24