/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.51 - (show annotations) (download)
Sun May 18 03:47:56 2008 UTC (16 years, 6 months ago) by wakaba
Branch: MAIN
Changes since 1.50: +28 -10 lines
++ ChangeLog	18 May 2008 03:47:40 -0000
2008-05-18  Wakaba  <wakaba@suika.fam.cx>

	* cc.cgi (print_source_string_section): Use new Message::Charset::Info
	interface to decode source code, otherwise the Perl native Encode
	module might decode the source code into different character
	string with the Info's.

2008-05-10  Wakaba  <wakaba@suika.fam.cx>

	* standards.en.html (requirements): Remove a requirement
	for an HTML element's allowed context (it is covered by HTML5
	spec since r1583).

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4
5 use lib qw[/home/httpd/html/www/markup/html/whatpm
6 /home/wakaba/work/manakai2/lib];
7 use CGI::Carp qw[fatalsToBrowser];
8 use Scalar::Util qw[refaddr];
9 use Time::HiRes qw/time/;
10
11 sub htescape ($) {
12 my $s = $_[0];
13 $s =~ s/&/&amp;/g;
14 $s =~ s/</&lt;/g;
15 $s =~ s/>/&gt;/g;
16 $s =~ s/"/&quot;/g;
17 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18 sprintf '<var>U+%04X</var>', ord $1;
19 }ge;
20 return $s;
21 } # htescape
22
23 my @nav;
24 my %time;
25 require Message::DOM::DOMImplementation;
26 my $dom = Message::DOM::DOMImplementation->new;
27 {
28 use Message::CGI::HTTP;
29 my $http = Message::CGI::HTTP->new;
30
31 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
32 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
33 exit;
34 }
35
36 binmode STDOUT, ':utf8';
37 $| = 1;
38
39 load_text_catalog ('en'); ## TODO: conneg
40
41 print STDOUT qq[Content-Type: text/html; charset=utf-8
42
43 <!DOCTYPE html>
44 <html lang="en">
45 <head>
46 <title>Web Document Conformance Checker (BETA)</title>
47 <link rel="stylesheet" href="../cc-style.css" type="text/css">
48 </head>
49 <body>
50 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
51 (<em>beta</em>)</h1>
52 ];
53
54 $| = 0;
55 my $input = get_input_document ($http, $dom);
56 my $char_length = 0;
57
58 print qq[
59 <div id="document-info" class="section">
60 <dl>
61 <dt>Request URI</dt>
62 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 <dt>Document URI</dt>
64 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65 <script>
66 document.title = '<'
67 + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68 + document.title;
69 </script></dd>
70 ]; # no </dl> yet
71 push @nav, ['#document-info' => 'Information'];
72
73 if (defined $input->{s}) {
74 $char_length = length $input->{s};
75
76 print STDOUT qq[
77 <dt>Base URI</dt>
78 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79 <dt>Internet Media Type</dt>
80 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 <dt>Character Encoding</dt>
83 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 <dt>Length</dt>
86 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 </dl>
88 </div>
89
90 <script src="../cc-script.js"></script>
91 ];
92
93 $input->{id_prefix} = '';
94 #$input->{nested} = 0;
95 my $result = {conforming_min => 1, conforming_max => 1};
96 check_and_print ($input => $result);
97 print_result_section ($result);
98 } else {
99 print STDOUT qq[</dl></div>];
100 print_result_input_error_section ($input);
101 }
102
103 print STDOUT qq[
104 <ul class="navigation" id="nav-items">
105 ];
106 for (@nav) {
107 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
108 }
109 print STDOUT qq[
110 </ul>
111 </body>
112 </html>
113 ];
114
115 for (qw/decode parse parse_html parse_xml parse_manifest
116 check check_manifest/) {
117 next unless defined $time{$_};
118 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
119 print $file $char_length, "\t", $time{$_}, "\n";
120 }
121
122 exit;
123 }
124
125 sub add_error ($$$) {
126 my ($layer, $err, $result) = @_;
127 if (defined $err->{level}) {
128 if ($err->{level} eq 's') {
129 $result->{$layer}->{should}++;
130 $result->{$layer}->{score_min} -= 2;
131 $result->{conforming_min} = 0;
132 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
133 $result->{$layer}->{warning}++;
134 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
135 $result->{$layer}->{unsupported}++;
136 $result->{unsupported} = 1;
137 } elsif ($err->{level} eq 'i') {
138 #
139 } else {
140 $result->{$layer}->{must}++;
141 $result->{$layer}->{score_max} -= 2;
142 $result->{$layer}->{score_min} -= 2;
143 $result->{conforming_min} = 0;
144 $result->{conforming_max} = 0;
145 }
146 } else {
147 $result->{$layer}->{must}++;
148 $result->{$layer}->{score_max} -= 2;
149 $result->{$layer}->{score_min} -= 2;
150 $result->{conforming_min} = 0;
151 $result->{conforming_max} = 0;
152 }
153 } # add_error
154
155 sub check_and_print ($$) {
156 my ($input, $result) = @_;
157
158 print_http_header_section ($input, $result);
159
160 my $doc;
161 my $el;
162 my $cssom;
163 my $manifest;
164 my @subdoc;
165
166 if ($input->{media_type} eq 'text/html') {
167 ($doc, $el) = print_syntax_error_html_section ($input, $result);
168 print_source_string_section
169 ($input,
170 \($input->{s}),
171 $input->{charset} || $doc->input_encoding);
172 } elsif ({
173 'text/xml' => 1,
174 'application/atom+xml' => 1,
175 'application/rss+xml' => 1,
176 'image/svg+xml' => 1,
177 'application/xhtml+xml' => 1,
178 'application/xml' => 1,
179 ## TODO: Should we make all XML MIME Types fall
180 ## into this category?
181
182 'application/rdf+xml' => 1, ## NOTE: This type has different model.
183 }->{$input->{media_type}}) {
184 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
185 print_source_string_section ($input,
186 \($input->{s}),
187 $doc->input_encoding);
188 } elsif ($input->{media_type} eq 'text/css') {
189 $cssom = print_syntax_error_css_section ($input, $result);
190 print_source_string_section
191 ($input, \($input->{s}),
192 $cssom->manakai_input_encoding);
193 } elsif ($input->{media_type} eq 'text/cache-manifest') {
194 ## TODO: MUST be text/cache-manifest
195 $manifest = print_syntax_error_manifest_section ($input, $result);
196 print_source_string_section ($input, \($input->{s}),
197 'utf-8');
198 } else {
199 ## TODO: Change HTTP status code??
200 print_result_unknown_type_section ($input, $result);
201 }
202
203 if (defined $doc or defined $el) {
204 $doc->document_uri ($input->{uri});
205 $doc->manakai_entity_base_uri ($input->{base_uri});
206 print_structure_dump_dom_section ($input, $doc, $el);
207 my $elements = print_structure_error_dom_section
208 ($input, $doc, $el, $result, sub {
209 push @subdoc, shift;
210 });
211 print_table_section ($input, $elements->{table}) if @{$elements->{table}};
212 print_listing_section ({
213 id => 'identifiers', label => 'IDs', heading => 'Identifiers',
214 }, $input, $elements->{id}) if keys %{$elements->{id}};
215 print_listing_section ({
216 id => 'terms', label => 'Terms', heading => 'Terms',
217 }, $input, $elements->{term}) if keys %{$elements->{term}};
218 print_listing_section ({
219 id => 'classes', label => 'Classes', heading => 'Classes',
220 }, $input, $elements->{class}) if keys %{$elements->{class}};
221 print_uri_section ($input, $elements->{uri}) if keys %{$elements->{uri}};
222 print_rdf_section ($input, $elements->{rdf}) if @{$elements->{rdf}};
223 } elsif (defined $cssom) {
224 print_structure_dump_cssom_section ($input, $cssom);
225 ## TODO: CSSOM validation
226 add_error ('structure', {level => 'u'} => $result);
227 } elsif (defined $manifest) {
228 print_structure_dump_manifest_section ($input, $manifest);
229 print_structure_error_manifest_section ($input, $manifest, $result);
230 }
231
232 my $id_prefix = 0;
233 for my $subinput (@subdoc) {
234 $subinput->{id_prefix} = 'subdoc-' . ++$id_prefix;
235 $subinput->{nested} = 1;
236 $subinput->{base_uri} = $subinput->{container_node}->base_uri
237 unless defined $subinput->{base_uri};
238 my $ebaseuri = htescape ($subinput->{base_uri});
239 push @nav, ['#' . $subinput->{id_prefix} => 'Sub #' . $id_prefix];
240 print STDOUT qq[<div id="$subinput->{id_prefix}" class=section>
241 <h2>Subdocument #$id_prefix</h2>
242
243 <dl>
244 <dt>Internet Media Type</dt>
245 <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
246 <dt>Container Node</dt>
247 <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
248 <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
249 <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
250 </dl>];
251
252 $subinput->{id_prefix} .= '-';
253 check_and_print ($subinput => $result);
254
255 print STDOUT qq[</div>];
256 }
257 } # check_and_print
258
259 sub print_http_header_section ($$) {
260 my ($input, $result) = @_;
261 return unless defined $input->{header_status_code} or
262 defined $input->{header_status_text} or
263 @{$input->{header_field} or []};
264
265 push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
266 print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
267 <h2>HTTP Header</h2>
268
269 <p><strong>Note</strong>: Due to the limitation of the
270 network library in use, the content of this section might
271 not be the real header.</p>
272
273 <table><tbody>
274 ];
275
276 if (defined $input->{header_status_code}) {
277 print STDOUT qq[<tr><th scope="row">Status code</th>];
278 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
279 }
280 if (defined $input->{header_status_text}) {
281 print STDOUT qq[<tr><th scope="row">Status text</th>];
282 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
283 }
284
285 for (@{$input->{header_field}}) {
286 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
287 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
288 }
289
290 print STDOUT qq[</tbody></table></div>];
291 } # print_http_header_section
292
293 sub print_syntax_error_html_section ($$) {
294 my ($input, $result) = @_;
295
296 require Encode;
297 require Whatpm::HTML;
298
299 print STDOUT qq[
300 <div id="$input->{id_prefix}parse-errors" class="section">
301 <h2>Parse Errors</h2>
302
303 <dl id="$input->{id_prefix}parse-errors-list">];
304 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
305
306 my $onerror = sub {
307 my (%opt) = @_;
308 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
309 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
310 qq[</dt>];
311 $type =~ tr/ /-/;
312 $type =~ s/\|/%7C/g;
313 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
314 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
315 print STDOUT qq[$msg</dd>\n];
316
317 add_error ('syntax', \%opt => $result);
318 };
319
320 my $doc = $dom->create_document;
321 my $el;
322 my $inner_html_element = $input->{inner_html_element};
323 if (defined $inner_html_element and length $inner_html_element) {
324 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
325 my $time1 = time;
326 my $t = \($input->{s});
327 unless ($input->{is_char_string}) {
328 $t = \(Encode::decode ($input->{charset}, $$t));
329 }
330 $time{decode} = time - $time1;
331
332 $el = $doc->create_element_ns
333 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
334 $time1 = time;
335 Whatpm::HTML->set_inner_html ($el, $$t, $onerror);
336 $time{parse} = time - $time1;
337 } else {
338 my $time1 = time;
339 if ($input->{is_char_string}) {
340 Whatpm::HTML->parse_char_string ($input->{s} => $doc, $onerror);
341 } else {
342 Whatpm::HTML->parse_byte_string
343 ($input->{charset}, $input->{s} => $doc, $onerror);
344 }
345 $time{parse_html} = time - $time1;
346 }
347 $doc->manakai_charset ($input->{official_charset})
348 if defined $input->{official_charset};
349
350 print STDOUT qq[</dl></div>];
351
352 return ($doc, $el);
353 } # print_syntax_error_html_section
354
355 sub print_syntax_error_xml_section ($$) {
356 my ($input, $result) = @_;
357
358 require Message::DOM::XMLParserTemp;
359
360 print STDOUT qq[
361 <div id="$input->{id_prefix}parse-errors" class="section">
362 <h2>Parse Errors</h2>
363
364 <dl id="$input->{id_prefix}parse-errors-list">];
365 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
366
367 my $onerror = sub {
368 my $err = shift;
369 my $line = $err->location->line_number;
370 print STDOUT qq[<dt><a href="#$input->{id_prefix}line-$line">Line $line</a> column ];
371 print STDOUT $err->location->column_number, "</dt><dd>";
372 print STDOUT htescape $err->text, "</dd>\n";
373
374 add_error ('syntax', {type => $err->text,
375 level => [
376 $err->SEVERITY_FATAL_ERROR => 'm',
377 $err->SEVERITY_ERROR => 'm',
378 $err->SEVERITY_WARNING => 's',
379 ]->[$err->severity]} => $result);
380
381 return 1;
382 };
383
384 my $t = \($input->{s});
385 if ($input->{is_char_string}) {
386 require Encode;
387 $t = \(Encode::encode ('utf8', $$t));
388 $input->{charset} = 'utf-8';
389 }
390
391 my $time1 = time;
392 open my $fh, '<', $t;
393 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
394 ($fh => $dom, $onerror, charset => $input->{charset});
395 $time{parse_xml} = time - $time1;
396 $doc->manakai_charset ($input->{official_charset})
397 if defined $input->{official_charset};
398
399 print STDOUT qq[</dl></div>];
400
401 return ($doc, undef);
402 } # print_syntax_error_xml_section
403
404 sub get_css_parser () {
405 our $CSSParser;
406 return $CSSParser if $CSSParser;
407
408 require Whatpm::CSS::Parser;
409 my $p = Whatpm::CSS::Parser->new;
410
411 $p->{prop}->{$_} = 1 for qw/
412 alignment-baseline
413 background background-attachment background-color background-image
414 background-position background-position-x background-position-y
415 background-repeat border border-bottom border-bottom-color
416 border-bottom-style border-bottom-width border-collapse border-color
417 border-left border-left-color
418 border-left-style border-left-width border-right border-right-color
419 border-right-style border-right-width
420 border-spacing -manakai-border-spacing-x -manakai-border-spacing-y
421 border-style border-top border-top-color border-top-style border-top-width
422 border-width bottom
423 caption-side clear clip color content counter-increment counter-reset
424 cursor direction display dominant-baseline empty-cells float font
425 font-family font-size font-size-adjust font-stretch
426 font-style font-variant font-weight height left
427 letter-spacing line-height
428 list-style list-style-image list-style-position list-style-type
429 margin margin-bottom margin-left margin-right margin-top marker-offset
430 marks max-height max-width min-height min-width opacity -moz-opacity
431 orphans outline outline-color outline-style outline-width overflow
432 overflow-x overflow-y
433 padding padding-bottom padding-left padding-right padding-top
434 page page-break-after page-break-before page-break-inside
435 position quotes right size table-layout
436 text-align text-anchor text-decoration text-indent text-transform
437 top unicode-bidi vertical-align visibility white-space width widows
438 word-spacing writing-mode z-index
439 /;
440 $p->{prop_value}->{display}->{$_} = 1 for qw/
441 block clip inline inline-block inline-table list-item none
442 table table-caption table-cell table-column table-column-group
443 table-header-group table-footer-group table-row table-row-group
444 compact marker
445 /;
446 $p->{prop_value}->{position}->{$_} = 1 for qw/
447 absolute fixed relative static
448 /;
449 $p->{prop_value}->{float}->{$_} = 1 for qw/
450 left right none
451 /;
452 $p->{prop_value}->{clear}->{$_} = 1 for qw/
453 left right none both
454 /;
455 $p->{prop_value}->{direction}->{ltr} = 1;
456 $p->{prop_value}->{direction}->{rtl} = 1;
457 $p->{prop_value}->{marks}->{crop} = 1;
458 $p->{prop_value}->{marks}->{cross} = 1;
459 $p->{prop_value}->{'unicode-bidi'}->{$_} = 1 for qw/
460 normal bidi-override embed
461 /;
462 for my $prop_name (qw/overflow overflow-x overflow-y/) {
463 $p->{prop_value}->{$prop_name}->{$_} = 1 for qw/
464 visible hidden scroll auto -webkit-marquee -moz-hidden-unscrollable
465 /;
466 }
467 $p->{prop_value}->{visibility}->{$_} = 1 for qw/
468 visible hidden collapse
469 /;
470 $p->{prop_value}->{'list-style-type'}->{$_} = 1 for qw/
471 disc circle square decimal decimal-leading-zero
472 lower-roman upper-roman lower-greek lower-latin
473 upper-latin armenian georgian lower-alpha upper-alpha none
474 hebrew cjk-ideographic hiragana katakana hiragana-iroha
475 katakana-iroha
476 /;
477 $p->{prop_value}->{'list-style-position'}->{outside} = 1;
478 $p->{prop_value}->{'list-style-position'}->{inside} = 1;
479 $p->{prop_value}->{'page-break-before'}->{$_} = 1 for qw/
480 auto always avoid left right
481 /;
482 $p->{prop_value}->{'page-break-after'}->{$_} = 1 for qw/
483 auto always avoid left right
484 /;
485 $p->{prop_value}->{'page-break-inside'}->{auto} = 1;
486 $p->{prop_value}->{'page-break-inside'}->{avoid} = 1;
487 $p->{prop_value}->{'background-repeat'}->{$_} = 1 for qw/
488 repeat repeat-x repeat-y no-repeat
489 /;
490 $p->{prop_value}->{'background-attachment'}->{scroll} = 1;
491 $p->{prop_value}->{'background-attachment'}->{fixed} = 1;
492 $p->{prop_value}->{'font-size'}->{$_} = 1 for qw/
493 xx-small x-small small medium large x-large xx-large
494 -manakai-xxx-large -webkit-xxx-large
495 larger smaller
496 /;
497 $p->{prop_value}->{'font-style'}->{normal} = 1;
498 $p->{prop_value}->{'font-style'}->{italic} = 1;
499 $p->{prop_value}->{'font-style'}->{oblique} = 1;
500 $p->{prop_value}->{'font-variant'}->{normal} = 1;
501 $p->{prop_value}->{'font-variant'}->{'small-caps'} = 1;
502 $p->{prop_value}->{'font-stretch'}->{$_} = 1 for
503 qw/normal wider narrower ultra-condensed extra-condensed
504 condensed semi-condensed semi-expanded expanded
505 extra-expanded ultra-expanded/;
506 $p->{prop_value}->{'text-align'}->{$_} = 1 for qw/
507 left right center justify begin end
508 /;
509 $p->{prop_value}->{'text-transform'}->{$_} = 1 for qw/
510 capitalize uppercase lowercase none
511 /;
512 $p->{prop_value}->{'white-space'}->{$_} = 1 for qw/
513 normal pre nowrap pre-line pre-wrap -moz-pre-wrap
514 /;
515 $p->{prop_value}->{'writing-mode'}->{$_} = 1 for qw/
516 lr rl tb lr-tb rl-tb tb-rl
517 /;
518 $p->{prop_value}->{'text-anchor'}->{$_} = 1 for qw/
519 start middle end
520 /;
521 $p->{prop_value}->{'dominant-baseline'}->{$_} = 1 for qw/
522 auto use-script no-change reset-size ideographic alphabetic
523 hanging mathematical central middle text-after-edge text-before-edge
524 /;
525 $p->{prop_value}->{'alignment-baseline'}->{$_} = 1 for qw/
526 auto baseline before-edge text-before-edge middle central
527 after-edge text-after-edge ideographic alphabetic hanging
528 mathematical
529 /;
530 $p->{prop_value}->{'text-decoration'}->{$_} = 1 for qw/
531 none blink underline overline line-through
532 /;
533 $p->{prop_value}->{'caption-side'}->{$_} = 1 for qw/
534 top bottom left right
535 /;
536 $p->{prop_value}->{'table-layout'}->{auto} = 1;
537 $p->{prop_value}->{'table-layout'}->{fixed} = 1;
538 $p->{prop_value}->{'border-collapse'}->{collapse} = 1;
539 $p->{prop_value}->{'border-collapse'}->{separate} = 1;
540 $p->{prop_value}->{'empty-cells'}->{show} = 1;
541 $p->{prop_value}->{'empty-cells'}->{hide} = 1;
542 $p->{prop_value}->{cursor}->{$_} = 1 for qw/
543 auto crosshair default pointer move e-resize ne-resize nw-resize n-resize
544 se-resize sw-resize s-resize w-resize text wait help progress
545 /;
546 for my $prop (qw/border-top-style border-left-style
547 border-bottom-style border-right-style outline-style/) {
548 $p->{prop_value}->{$prop}->{$_} = 1 for qw/
549 none hidden dotted dashed solid double groove ridge inset outset
550 /;
551 }
552 for my $prop (qw/color background-color
553 border-bottom-color border-left-color border-right-color
554 border-top-color border-color/) {
555 $p->{prop_value}->{$prop}->{transparent} = 1;
556 $p->{prop_value}->{$prop}->{flavor} = 1;
557 $p->{prop_value}->{$prop}->{'-manakai-default'} = 1;
558 }
559 $p->{prop_value}->{'outline-color'}->{invert} = 1;
560 $p->{prop_value}->{'outline-color'}->{'-manakai-invert-or-currentcolor'} = 1;
561 $p->{pseudo_class}->{$_} = 1 for qw/
562 active checked disabled empty enabled first-child first-of-type
563 focus hover indeterminate last-child last-of-type link only-child
564 only-of-type root target visited
565 lang nth-child nth-last-child nth-of-type nth-last-of-type not
566 -manakai-contains -manakai-current
567 /;
568 $p->{pseudo_element}->{$_} = 1 for qw/
569 after before first-letter first-line
570 /;
571
572 return $CSSParser = $p;
573 } # get_css_parser
574
575 sub print_syntax_error_css_section ($$) {
576 my ($input, $result) = @_;
577
578 print STDOUT qq[
579 <div id="$input->{id_prefix}parse-errors" class="section">
580 <h2>Parse Errors</h2>
581
582 <dl id="$input->{id_prefix}parse-errors-list">];
583 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
584
585 my $p = get_css_parser ();
586 $p->init;
587 $p->{onerror} = sub {
588 my (%opt) = @_;
589 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
590 if ($opt{token}) {
591 print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{token}->{line}">Line $opt{token}->{line}</a> column $opt{token}->{column}];
592 } else {
593 print STDOUT qq[<dt class="$cls">Unknown location];
594 }
595 if (defined $opt{value}) {
596 print STDOUT qq[ (<code>@{[htescape ($opt{value})]}</code>)];
597 } elsif (defined $opt{token}) {
598 print STDOUT qq[ (<code>@{[htescape (Whatpm::CSS::Tokenizer->serialize_token ($opt{token}))]}</code>)];
599 }
600 $type =~ tr/ /-/;
601 $type =~ s/\|/%7C/g;
602 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
603 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
604 print STDOUT qq[$msg</dd>\n];
605
606 add_error ('syntax', \%opt => $result);
607 };
608 $p->{href} = $input->{uri};
609 $p->{base_uri} = $input->{base_uri};
610
611 # if ($parse_mode eq 'q') {
612 # $p->{unitless_px} = 1;
613 # $p->{hashless_color} = 1;
614 # }
615
616 ## TODO: Make $input->{s} a ref.
617
618 my $s = \$input->{s};
619 my $charset;
620 unless ($input->{is_char_string}) {
621 require Encode;
622 if (defined $input->{charset}) {## TODO: IANA->Perl
623 $charset = $input->{charset};
624 $s = \(Encode::decode ($input->{charset}, $$s));
625 } else {
626 ## TODO: charset detection
627 $s = \(Encode::decode ($charset = 'utf-8', $$s));
628 }
629 }
630
631 my $cssom = $p->parse_char_string ($$s);
632 $cssom->manakai_input_encoding ($charset) if defined $charset;
633
634 print STDOUT qq[</dl></div>];
635
636 return $cssom;
637 } # print_syntax_error_css_section
638
639 sub print_syntax_error_manifest_section ($$) {
640 my ($input, $result) = @_;
641
642 require Whatpm::CacheManifest;
643
644 print STDOUT qq[
645 <div id="$input->{id_prefix}parse-errors" class="section">
646 <h2>Parse Errors</h2>
647
648 <dl id="$input->{id_prefix}parse-errors-list">];
649 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
650
651 my $onerror = sub {
652 my (%opt) = @_;
653 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
654 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
655 qq[</dt>];
656 $type =~ tr/ /-/;
657 $type =~ s/\|/%7C/g;
658 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
659 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
660 print STDOUT qq[$msg</dd>\n];
661
662 add_error ('syntax', \%opt => $result);
663 };
664
665 my $m = $input->{is_char_string} ? 'parse_char_string' : 'parse_byte_string';
666 my $time1 = time;
667 my $manifest = Whatpm::CacheManifest->$m
668 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
669 $time{parse_manifest} = time - $time1;
670
671 print STDOUT qq[</dl></div>];
672
673 return $manifest;
674 } # print_syntax_error_manifest_section
675
676 sub print_source_string_section ($$$) {
677 my $input = shift;
678 my $s;
679 unless ($input->{is_char_string}) {
680 open my $byte_stream, '<', $_[0];
681 require Message::Charset::Info;
682 my $charset = Message::Charset::Info->get_by_iana_name ($_[1]);
683 my ($char_stream, $e_status) = $charset->get_decode_handle
684 ($byte_stream, allow_error_reporting => 1, allow_fallback => 1);
685 return unless $char_stream;
686
687 $char_stream->onerror (sub {
688 my (undef, $type, %opt) = @_;
689 if ($opt{octets}) {
690 ${$opt{octets}} = "\x{FFFD}";
691 }
692 });
693
694 my $t = '';
695 while (1) {
696 my $c = $char_stream->getc;
697 last unless defined $c;
698 $t .= $c;
699 }
700 $s = \$t;
701 ## TODO: Output for each line, don't concat all of lines.
702 } else {
703 $s = $_[0];
704 }
705
706 my $i = 1;
707 push @nav, ['#source-string' => 'Source'] unless $input->{nested};
708 print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
709 <h2>Document Source</h2>
710 <ol lang="">\n];
711 if (length $$s) {
712 while ($$s =~ /\G([^\x0D\x0A]*?)(?>\x0D\x0A?|\x0A)/gc) {
713 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
714 "</li>\n";
715 $i++;
716 }
717 if ($$s =~ /\G([^\x0D\x0A]+)/gc) {
718 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
719 "</li>\n";
720 }
721 } else {
722 print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
723 }
724 print STDOUT "</ol></div>
725 <script>
726 addSourceToParseErrorList ('$input->{id_prefix}', 'parse-errors-list');
727 </script>";
728 } # print_input_string_section
729
730 sub print_document_tree ($$) {
731 my ($input, $node) = @_;
732
733 my $r = '<ol class="xoxo">';
734
735 my @node = ($node);
736 while (@node) {
737 my $child = shift @node;
738 unless (ref $child) {
739 $r .= $child;
740 next;
741 }
742
743 my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
744 my $nt = $child->node_type;
745 if ($nt == $child->ELEMENT_NODE) {
746 my $child_nsuri = $child->namespace_uri;
747 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
748 '</code>'; ## ISSUE: case
749
750 if ($child->has_attributes) {
751 $r .= '<ul class="attributes">';
752 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
753 @{$child->attributes}) {
754 $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
755 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
756 }
757 $r .= '</ul>';
758 }
759
760 if ($child->has_child_nodes) {
761 $r .= '<ol class="children">';
762 unshift @node, @{$child->child_nodes}, '</ol></li>';
763 } else {
764 $r .= '</li>';
765 }
766 } elsif ($nt == $child->TEXT_NODE) {
767 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
768 } elsif ($nt == $child->CDATA_SECTION_NODE) {
769 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
770 } elsif ($nt == $child->COMMENT_NODE) {
771 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
772 } elsif ($nt == $child->DOCUMENT_NODE) {
773 $r .= qq'<li id="$node_id" class="tree-document">Document';
774 $r .= qq[<ul class="attributes">];
775 my $cp = $child->manakai_charset;
776 if (defined $cp) {
777 $r .= qq[<li><code>charset</code> parameter = <code>];
778 $r .= htescape ($cp) . qq[</code></li>];
779 }
780 $r .= qq[<li><code>inputEncoding</code> = ];
781 my $ie = $child->input_encoding;
782 if (defined $ie) {
783 $r .= qq[<code>@{[htescape ($ie)]}</code>];
784 if ($child->manakai_has_bom) {
785 $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
786 }
787 } else {
788 $r .= qq[(<code>null</code>)];
789 }
790 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
791 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
792 unless ($child->manakai_is_html) {
793 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
794 if (defined $child->xml_encoding) {
795 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
796 } else {
797 $r .= qq[<li>XML encoding = (null)</li>];
798 }
799 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
800 }
801 $r .= qq[</ul>];
802 if ($child->has_child_nodes) {
803 $r .= '<ol class="children">';
804 unshift @node, @{$child->child_nodes}, '</ol></li>';
805 }
806 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
807 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
808 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
809 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
810 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
811 $r .= '</ul></li>';
812 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
813 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
814 } else {
815 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
816 }
817 }
818
819 $r .= '</ol>';
820 print STDOUT $r;
821 } # print_document_tree
822
823 sub print_structure_dump_dom_section ($$$) {
824 my ($input, $doc, $el) = @_;
825
826 print STDOUT qq[
827 <div id="$input->{id_prefix}document-tree" class="section">
828 <h2>Document Tree</h2>
829 ];
830 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
831 unless $input->{nested};
832
833 print_document_tree ($input, $el || $doc);
834
835 print STDOUT qq[</div>];
836 } # print_structure_dump_dom_section
837
838 sub print_structure_dump_cssom_section ($$) {
839 my ($input, $cssom) = @_;
840
841 print STDOUT qq[
842 <div id="$input->{id_prefix}document-tree" class="section">
843 <h2>Document Tree</h2>
844 ];
845 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
846 unless $input->{nested};
847
848 ## TODO:
849 print STDOUT "<pre>".htescape ($cssom->css_text)."</pre>";
850
851 print STDOUT qq[</div>];
852 } # print_structure_dump_cssom_section
853
854 sub print_structure_dump_manifest_section ($$) {
855 my ($input, $manifest) = @_;
856
857 print STDOUT qq[
858 <div id="$input->{id_prefix}dump-manifest" class="section">
859 <h2>Cache Manifest</h2>
860 ];
861 push @nav, [qq[#$input->{id_prefix}dump-manifest] => 'Cache Manifest']
862 unless $input->{nested};
863
864 print STDOUT qq[<dl><dt>Explicit entries</dt>];
865 my $i = 0;
866 for my $uri (@{$manifest->[0]}) {
867 my $euri = htescape ($uri);
868 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
869 }
870
871 print STDOUT qq[<dt>Fallback entries</dt><dd>
872 <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
873 <th scope=row>Fallback Entry</tr><tbody>];
874 for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
875 my $euri = htescape ($uri);
876 my $euri2 = htescape ($manifest->[1]->{$uri});
877 print STDOUT qq[<tr><td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
878 <td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
879 }
880
881 print STDOUT qq[</table><dt>Online whitelist</dt>];
882 for my $uri (@{$manifest->[2]}) {
883 my $euri = htescape ($uri);
884 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
885 }
886
887 print STDOUT qq[</dl></div>];
888 } # print_structure_dump_manifest_section
889
890 sub print_structure_error_dom_section ($$$$$) {
891 my ($input, $doc, $el, $result, $onsubdoc) = @_;
892
893 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
894 <h2>Document Errors</h2>
895
896 <dl id=document-errors-list>];
897 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
898 unless $input->{nested};
899
900 require Whatpm::ContentChecker;
901 my $onerror = sub {
902 my %opt = @_;
903 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
904 $type =~ tr/ /-/;
905 $type =~ s/\|/%7C/g;
906 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
907 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
908 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
909 print STDOUT $msg, "</dd>\n";
910 add_error ('structure', \%opt => $result);
911 };
912
913 my $elements;
914 my $time1 = time;
915 if ($el) {
916 $elements = Whatpm::ContentChecker->check_element
917 ($el, $onerror, $onsubdoc);
918 } else {
919 $elements = Whatpm::ContentChecker->check_document
920 ($doc, $onerror, $onsubdoc);
921 }
922 $time{check} = time - $time1;
923
924 print STDOUT qq[</dl>
925 <script>
926 addSourceToParseErrorList ('$input->{id_prefix}', 'document-errors-list');
927 </script></div>];
928
929 return $elements;
930 } # print_structure_error_dom_section
931
932 sub print_structure_error_manifest_section ($$$) {
933 my ($input, $manifest, $result) = @_;
934
935 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
936 <h2>Document Errors</h2>
937
938 <dl>];
939 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
940 unless $input->{nested};
941
942 require Whatpm::CacheManifest;
943 Whatpm::CacheManifest->check_manifest ($manifest, sub {
944 my %opt = @_;
945 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
946 $type =~ tr/ /-/;
947 $type =~ s/\|/%7C/g;
948 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
949 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
950 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
951 add_error ('structure', \%opt => $result);
952 });
953
954 print STDOUT qq[</div>];
955 } # print_structure_error_manifest_section
956
957 sub print_table_section ($$) {
958 my ($input, $tables) = @_;
959
960 push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
961 unless $input->{nested};
962 print STDOUT qq[
963 <div id="$input->{id_prefix}tables" class="section">
964 <h2>Tables</h2>
965
966 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
967 <script src="../table-script.js" type="text/javascript"></script>
968 <noscript>
969 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
970 </noscript>
971 ];
972
973 require JSON;
974
975 my $i = 0;
976 for my $table (@$tables) {
977 $i++;
978 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
979 get_node_link ($input, $table->{element}) . q[</h3>];
980
981 delete $table->{element};
982
983 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption},
984 @{$table->{row}}) {
985 next unless $_;
986 delete $_->{element};
987 }
988
989 for (@{$table->{row_group}}) {
990 next unless $_;
991 next unless $_->{element};
992 $_->{type} = $_->{element}->manakai_local_name;
993 delete $_->{element};
994 }
995
996 for (@{$table->{cell}}) {
997 next unless $_;
998 for (@{$_}) {
999 next unless $_;
1000 for (@$_) {
1001 $_->{id} = refaddr $_->{element} if defined $_->{element};
1002 delete $_->{element};
1003 $_->{is_header} = $_->{is_header} ? 1 : 0;
1004 }
1005 }
1006 }
1007
1008 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
1009 print STDOUT JSON::objToJson ($table);
1010 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
1011 print STDOUT qq[, '$input->{id_prefix}');</script>];
1012 }
1013
1014 print STDOUT qq[</div>];
1015 } # print_table_section
1016
1017 sub print_listing_section ($$$) {
1018 my ($opt, $input, $ids) = @_;
1019
1020 push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
1021 unless $input->{nested};
1022 print STDOUT qq[
1023 <div id="$input->{id_prefix}$opt->{id}" class="section">
1024 <h2>$opt->{heading}</h2>
1025
1026 <dl>
1027 ];
1028 for my $id (sort {$a cmp $b} keys %$ids) {
1029 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
1030 for (@{$ids->{$id}}) {
1031 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
1032 }
1033 }
1034 print STDOUT qq[</dl></div>];
1035 } # print_listing_section
1036
1037 sub print_uri_section ($$$) {
1038 my ($input, $uris) = @_;
1039
1040 ## NOTE: URIs contained in the DOM (i.e. in HTML or XML documents),
1041 ## except for those in RDF triples.
1042 ## TODO: URIs in CSS
1043
1044 push @nav, ['#' . $input->{id_prefix} . 'uris' => 'URIs']
1045 unless $input->{nested};
1046 print STDOUT qq[
1047 <div id="$input->{id_prefix}uris" class="section">
1048 <h2>URIs</h2>
1049
1050 <dl>];
1051 for my $uri (sort {$a cmp $b} keys %$uris) {
1052 my $euri = htescape ($uri);
1053 print STDOUT qq[<dt><code class=uri>&lt;<a href="$euri">$euri</a>></code>];
1054 my $eccuri = htescape (get_cc_uri ($uri));
1055 print STDOUT qq[<dd><a href="$eccuri">Check conformance of this document</a>];
1056 print STDOUT qq[<dd>Found at: <ul>];
1057 for my $entry (@{$uris->{$uri}}) {
1058 print STDOUT qq[<li>], get_node_link ($input, $entry->{node});
1059 if (keys %{$entry->{type} or {}}) {
1060 print STDOUT ' (';
1061 print STDOUT join ', ', map {
1062 {
1063 hyperlink => 'Hyperlink',
1064 resource => 'Link to an external resource',
1065 namespace => 'Namespace URI',
1066 cite => 'Citation or link to a long description',
1067 embedded => 'Link to an embedded content',
1068 base => 'Base URI',
1069 action => 'Submission URI',
1070 }->{$_}
1071 or
1072 htescape ($_)
1073 } keys %{$entry->{type}};
1074 print STDOUT ')';
1075 }
1076 }
1077 print STDOUT qq[</ul>];
1078 }
1079 print STDOUT qq[</dl></div>];
1080 } # print_uri_section
1081
1082 sub print_rdf_section ($$$) {
1083 my ($input, $rdfs) = @_;
1084
1085 push @nav, ['#' . $input->{id_prefix} . 'rdf' => 'RDF']
1086 unless $input->{nested};
1087 print STDOUT qq[
1088 <div id="$input->{id_prefix}rdf" class="section">
1089 <h2>RDF Triples</h2>
1090
1091 <dl>];
1092 my $i = 0;
1093 for my $rdf (@$rdfs) {
1094 print STDOUT qq[<dt id="$input->{id_prefix}rdf-@{[$i++]}">];
1095 print STDOUT get_node_link ($input, $rdf->[0]);
1096 print STDOUT qq[<dd><dl>];
1097 for my $triple (@{$rdf->[1]}) {
1098 print STDOUT '<dt>' . get_node_link ($input, $triple->[0]) . '<dd>';
1099 print STDOUT get_rdf_resource_html ($triple->[1]);
1100 print STDOUT ' ';
1101 print STDOUT get_rdf_resource_html ($triple->[2]);
1102 print STDOUT ' ';
1103 print STDOUT get_rdf_resource_html ($triple->[3]);
1104 }
1105 print STDOUT qq[</dl>];
1106 }
1107 print STDOUT qq[</dl></div>];
1108 } # print_rdf_section
1109
1110 sub get_rdf_resource_html ($) {
1111 my $resource = shift;
1112 if (defined $resource->{uri}) {
1113 my $euri = htescape ($resource->{uri});
1114 return '<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
1115 '</a>></code>';
1116 } elsif (defined $resource->{bnodeid}) {
1117 return htescape ('_:' . $resource->{bnodeid});
1118 } elsif ($resource->{nodes}) {
1119 return '(rdf:XMLLiteral)';
1120 } elsif (defined $resource->{value}) {
1121 my $elang = htescape (defined $resource->{language}
1122 ? $resource->{language} : '');
1123 my $r = qq[<q lang="$elang">] . htescape ($resource->{value}) . '</q>';
1124 if (defined $resource->{datatype}) {
1125 my $euri = htescape ($resource->{datatype});
1126 $r .= '^^<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
1127 '</a>></code>';
1128 } elsif (length $resource->{language}) {
1129 $r .= '@' . htescape ($resource->{language});
1130 }
1131 return $r;
1132 } else {
1133 return '??';
1134 }
1135 } # get_rdf_resource_html
1136
1137 sub print_result_section ($) {
1138 my $result = shift;
1139
1140 print STDOUT qq[
1141 <div id="result-summary" class="section">
1142 <h2>Result</h2>];
1143
1144 if ($result->{unsupported} and $result->{conforming_max}) {
1145 print STDOUT qq[<p class=uncertain id=result-para>The conformance
1146 checker cannot decide whether the document is conforming or
1147 not, since the document contains one or more unsupported
1148 features. The document might or might not be conforming.</p>];
1149 } elsif ($result->{conforming_min}) {
1150 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
1151 found in this document.</p>];
1152 } elsif ($result->{conforming_max}) {
1153 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
1154 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
1155 it might be conforming.</p>];
1156 } else {
1157 print STDOUT qq[<p class=FAIL id=result-para>This document is
1158 <strong><em>non</em>-conforming</strong>.</p>];
1159 }
1160
1161 print STDOUT qq[<table>
1162 <colgroup><col><colgroup><col><col><col><colgroup><col>
1163 <thead>
1164 <tr><th scope=col></th>
1165 <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1166 Errors</a></th>
1167 <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1168 Errors</a></th>
1169 <th scope=col><a href="../error-description#level-w">Warnings</a></th>
1170 <th scope=col>Score</th></tr></thead><tbody>];
1171
1172 my $must_error = 0;
1173 my $should_error = 0;
1174 my $warning = 0;
1175 my $score_min = 0;
1176 my $score_max = 0;
1177 my $score_base = 20;
1178 my $score_unit = $score_base / 100;
1179 for (
1180 [Transfer => 'transfer', ''],
1181 [Character => 'char', ''],
1182 [Syntax => 'syntax', '#parse-errors'],
1183 [Structure => 'structure', '#document-errors'],
1184 ) {
1185 $must_error += ($result->{$_->[1]}->{must} += 0);
1186 $should_error += ($result->{$_->[1]}->{should} += 0);
1187 $warning += ($result->{$_->[1]}->{warning} += 0);
1188 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
1189 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
1190
1191 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
1192 my $label = $_->[0];
1193 if ($result->{$_->[1]}->{must} or
1194 $result->{$_->[1]}->{should} or
1195 $result->{$_->[1]}->{warning} or
1196 $result->{$_->[1]}->{unsupported}) {
1197 $label = qq[<a href="$_->[2]">$label</a>];
1198 }
1199
1200 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
1201 if ($uncertain) {
1202 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}];
1203 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
1204 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}];
1205 } else {
1206 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}];
1207 }
1208 print qq[ / 20];
1209 }
1210
1211 $score_max += $score_base;
1212
1213 print STDOUT qq[
1214 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base / 20
1215 </tbody>
1216 <tfoot><tr class=uncertain><th scope=row>Total</th>
1217 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
1218 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
1219 <td>$warning?</td>
1220 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong> / 100
1221 </table>
1222
1223 <p><strong>Important</strong>: This conformance checking service
1224 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
1225 </div>];
1226 push @nav, ['#result-summary' => 'Result'];
1227 } # print_result_section
1228
1229 sub print_result_unknown_type_section ($$) {
1230 my ($input, $result) = @_;
1231
1232 my $euri = htescape ($input->{uri});
1233 print STDOUT qq[
1234 <div id="$input->{id_prefix}parse-errors" class="section">
1235 <h2>Errors</h2>
1236
1237 <dl>
1238 <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
1239 <dd class=unsupported><strong><a href="../error-description#level-u">Not
1240 supported</a></strong>:
1241 Media type
1242 <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
1243 is not supported.</dd>
1244 </dl>
1245 </div>
1246 ];
1247 push @nav, [qq[#$input->{id_prefix}parse-errors] => 'Errors']
1248 unless $input->{nested};
1249 add_error (char => {level => 'u'} => $result);
1250 add_error (syntax => {level => 'u'} => $result);
1251 add_error (structure => {level => 'u'} => $result);
1252 } # print_result_unknown_type_section
1253
1254 sub print_result_input_error_section ($) {
1255 my $input = shift;
1256 print STDOUT qq[<div class="section" id="result-summary">
1257 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
1258 </div>];
1259 push @nav, ['#result-summary' => 'Result'];
1260 } # print_result_input_error_section
1261
1262 sub get_error_label ($$) {
1263 my ($input, $err) = @_;
1264
1265 my $r = '';
1266
1267 my $line;
1268 my $column;
1269
1270 if (defined $err->{node}) {
1271 $line = $err->{node}->get_user_data ('manakai_source_line');
1272 if (defined $line) {
1273 $column = $err->{node}->get_user_data ('manakai_source_column');
1274 } else {
1275 if ($err->{node}->node_type == $err->{node}->ATTRIBUTE_NODE) {
1276 my $owner = $err->{node}->owner_element;
1277 $line = $owner->get_user_data ('manakai_source_line');
1278 $column = $owner->get_user_data ('manakai_source_column');
1279 } else {
1280 my $parent = $err->{node}->parent_node;
1281 if ($parent) {
1282 $line = $parent->get_user_data ('manakai_source_line');
1283 $column = $parent->get_user_data ('manakai_source_column');
1284 }
1285 }
1286 }
1287 }
1288 unless (defined $line) {
1289 if (defined $err->{token} and defined $err->{token}->{line}) {
1290 $line = $err->{token}->{line};
1291 $column = $err->{token}->{column};
1292 } elsif (defined $err->{line}) {
1293 $line = $err->{line};
1294 $column = $err->{column};
1295 }
1296 }
1297
1298 if (defined $line) {
1299 if (defined $column and $column > 0) {
1300 $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a> column $column];
1301 } else {
1302 $line = $line - 1 || 1;
1303 $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a>];
1304 }
1305 }
1306
1307 if (defined $err->{node}) {
1308 $r .= ' ' if length $r;
1309 $r .= get_node_link ($input, $err->{node});
1310 }
1311
1312 if (defined $err->{index}) {
1313 if (length $r) {
1314 $r .= ', Index ' . (0+$err->{index});
1315 } else {
1316 $r .= "<a href='#$input->{id_prefix}index-@{[0+$err->{index}]}'>Index "
1317 . (0+$err->{index}) . '</a>';
1318 }
1319 }
1320
1321 if (defined $err->{value}) {
1322 $r .= ' ' if length $r;
1323 $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
1324 }
1325
1326 return $r;
1327 } # get_error_label
1328
1329 sub get_error_level_label ($) {
1330 my $err = shift;
1331
1332 my $r = '';
1333
1334 if (not defined $err->{level} or $err->{level} eq 'm') {
1335 $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1336 error</a></strong>: ];
1337 } elsif ($err->{level} eq 's') {
1338 $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1339 error</a></strong>: ];
1340 } elsif ($err->{level} eq 'w') {
1341 $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
1342 ];
1343 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
1344 $r = qq[<strong><a href="../error-description#level-u">Not
1345 supported</a></strong>: ];
1346 } elsif ($err->{level} eq 'i') {
1347 $r = qq[<strong><a href="../error-description#level-i">Information</a></strong>: ];
1348 } else {
1349 my $elevel = htescape ($err->{level});
1350 $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
1351 ];
1352 }
1353
1354 return $r;
1355 } # get_error_level_label
1356
1357 sub get_node_path ($) {
1358 my $node = shift;
1359 my @r;
1360 while (defined $node) {
1361 my $rs;
1362 if ($node->node_type == 1) {
1363 $rs = $node->node_name;
1364 $node = $node->parent_node;
1365 } elsif ($node->node_type == 2) {
1366 $rs = '@' . $node->node_name;
1367 $node = $node->owner_element;
1368 } elsif ($node->node_type == 3) {
1369 $rs = '"' . $node->data . '"';
1370 $node = $node->parent_node;
1371 } elsif ($node->node_type == 9) {
1372 @r = ('') unless @r;
1373 $rs = '';
1374 $node = $node->parent_node;
1375 } else {
1376 $rs = '#' . $node->node_type;
1377 $node = $node->parent_node;
1378 }
1379 unshift @r, $rs;
1380 }
1381 return join '/', @r;
1382 } # get_node_path
1383
1384 sub get_node_link ($$) {
1385 return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
1386 htescape (get_node_path ($_[1])) . qq[</a>];
1387 } # get_node_link
1388
1389 {
1390 my $Msg = {};
1391
1392 sub load_text_catalog ($) {
1393 my $lang = shift; # MUST be a canonical lang name
1394 open my $file, '<:utf8', "cc-msg.$lang.txt"
1395 or die "$0: cc-msg.$lang.txt: $!";
1396 while (<$file>) {
1397 if (s/^([^;]+);([^;]*);//) {
1398 my ($type, $cls, $msg) = ($1, $2, $_);
1399 $msg =~ tr/\x0D\x0A//d;
1400 $Msg->{$type} = [$cls, $msg];
1401 }
1402 }
1403 } # load_text_catalog
1404
1405 sub get_text ($) {
1406 my ($type, $level, $node) = @_;
1407 $type = $level . ':' . $type if defined $level;
1408 $level = 'm' unless defined $level;
1409 my @arg;
1410 {
1411 if (defined $Msg->{$type}) {
1412 my $msg = $Msg->{$type}->[1];
1413 $msg =~ s{<var>\$([0-9]+)</var>}{
1414 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
1415 }ge;
1416 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
1417 UNIVERSAL::can ($node, 'get_attribute_ns')
1418 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
1419 }ge;
1420 $msg =~ s{<var>{\@}</var>}{
1421 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
1422 }ge;
1423 $msg =~ s{<var>{local-name}</var>}{
1424 UNIVERSAL::can ($node, 'manakai_local_name')
1425 ? htescape ($node->manakai_local_name) : ''
1426 }ge;
1427 $msg =~ s{<var>{element-local-name}</var>}{
1428 (UNIVERSAL::can ($node, 'owner_element') and
1429 $node->owner_element)
1430 ? htescape ($node->owner_element->manakai_local_name)
1431 : ''
1432 }ge;
1433 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
1434 } elsif ($type =~ s/:([^:]*)$//) {
1435 unshift @arg, $1;
1436 redo;
1437 }
1438 }
1439 return ($type, 'level-'.$level, htescape ($_[0]));
1440 } # get_text
1441
1442 }
1443
1444 sub encode_uri_component ($) {
1445 require Encode;
1446 my $s = Encode::encode ('utf8', shift);
1447 $s =~ s/([^0-9A-Za-z_.~-])/sprintf '%%%02X', ord $1/ge;
1448 return $s;
1449 } # encode_uri_component
1450
1451 sub get_cc_uri ($) {
1452 return './?uri=' . encode_uri_component ($_[0]);
1453 } # get_cc_uri
1454
1455 sub get_input_document ($$) {
1456 my ($http, $dom) = @_;
1457
1458 my $request_uri = $http->get_parameter ('uri');
1459 my $r = {};
1460 if (defined $request_uri and length $request_uri) {
1461 my $uri = $dom->create_uri_reference ($request_uri);
1462 unless ({
1463 http => 1,
1464 }->{lc $uri->uri_scheme}) {
1465 return {uri => $request_uri, request_uri => $request_uri,
1466 error_status_text => 'URI scheme not allowed'};
1467 }
1468
1469 require Message::Util::HostPermit;
1470 my $host_permit = new Message::Util::HostPermit;
1471 $host_permit->add_rule (<<EOH);
1472 Allow host=suika port=80
1473 Deny host=suika
1474 Allow host=suika.fam.cx port=80
1475 Deny host=suika.fam.cx
1476 Deny host=localhost
1477 Deny host=*.localdomain
1478 Deny ipv4=0.0.0.0/8
1479 Deny ipv4=10.0.0.0/8
1480 Deny ipv4=127.0.0.0/8
1481 Deny ipv4=169.254.0.0/16
1482 Deny ipv4=172.0.0.0/11
1483 Deny ipv4=192.0.2.0/24
1484 Deny ipv4=192.88.99.0/24
1485 Deny ipv4=192.168.0.0/16
1486 Deny ipv4=198.18.0.0/15
1487 Deny ipv4=224.0.0.0/4
1488 Deny ipv4=255.255.255.255/32
1489 Deny ipv6=0::0/0
1490 Allow host=*
1491 EOH
1492 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1493 return {uri => $request_uri, request_uri => $request_uri,
1494 error_status_text => 'Connection to the host is forbidden'};
1495 }
1496
1497 require LWP::UserAgent;
1498 my $ua = WDCC::LWPUA->new;
1499 $ua->{wdcc_dom} = $dom;
1500 $ua->{wdcc_host_permit} = $host_permit;
1501 $ua->agent ('Mozilla'); ## TODO: for now.
1502 $ua->parse_head (0);
1503 $ua->protocols_allowed ([qw/http/]);
1504 $ua->max_size (1000_000);
1505 my $req = HTTP::Request->new (GET => $request_uri);
1506 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1507 my $res = $ua->request ($req);
1508 ## TODO: 401 sets |is_success| true.
1509 if ($res->is_success or $http->get_parameter ('error-page')) {
1510 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1511 $r->{uri} = $res->request->uri;
1512 $r->{request_uri} = $request_uri;
1513
1514 ## TODO: More strict parsing...
1515 my $ct = $res->header ('Content-Type');
1516 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1517 $r->{charset} = lc $1;
1518 $r->{charset} =~ tr/\\//d;
1519 $r->{official_charset} = $r->{charset};
1520 }
1521
1522 my $input_charset = $http->get_parameter ('charset');
1523 if (defined $input_charset and length $input_charset) {
1524 $r->{charset_overridden}
1525 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1526 $r->{charset} = $input_charset;
1527 }
1528
1529 ## TODO: Support for HTTP Content-Encoding
1530
1531 $r->{s} = ''.$res->content;
1532
1533 require Whatpm::ContentType;
1534 ($r->{official_type}, $r->{media_type})
1535 = Whatpm::ContentType->get_sniffed_type
1536 (get_file_head => sub {
1537 return substr $r->{s}, 0, shift;
1538 },
1539 http_content_type_byte => $ct,
1540 has_http_content_encoding =>
1541 defined $res->header ('Content-Encoding'),
1542 supported_image_types => {});
1543 } else {
1544 $r->{uri} = $res->request->uri;
1545 $r->{request_uri} = $request_uri;
1546 $r->{error_status_text} = $res->status_line;
1547 }
1548
1549 $r->{header_field} = [];
1550 $res->scan (sub {
1551 push @{$r->{header_field}}, [$_[0], $_[1]];
1552 });
1553 $r->{header_status_code} = $res->code;
1554 $r->{header_status_text} = $res->message;
1555 } else {
1556 $r->{s} = ''.$http->get_parameter ('s');
1557 $r->{uri} = q<thismessage:/>;
1558 $r->{request_uri} = q<thismessage:/>;
1559 $r->{base_uri} = q<thismessage:/>;
1560 $r->{charset} = ''.$http->get_parameter ('_charset_');
1561 $r->{charset} =~ s/\s+//g;
1562 $r->{charset} = 'utf-8' if $r->{charset} eq '';
1563 $r->{official_charset} = $r->{charset};
1564 $r->{header_field} = [];
1565
1566 require Whatpm::ContentType;
1567 ($r->{official_type}, $r->{media_type})
1568 = Whatpm::ContentType->get_sniffed_type
1569 (get_file_head => sub {
1570 return substr $r->{s}, 0, shift;
1571 },
1572 http_content_type_byte => undef,
1573 has_http_content_encoding => 0,
1574 supported_image_types => {});
1575 }
1576
1577 my $input_format = $http->get_parameter ('i');
1578 if (defined $input_format and length $input_format) {
1579 $r->{media_type_overridden}
1580 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1581 $r->{media_type} = $input_format;
1582 }
1583 if (defined $r->{s} and not defined $r->{media_type}) {
1584 $r->{media_type} = 'text/html';
1585 $r->{media_type_overridden} = 1;
1586 }
1587
1588 if ($r->{media_type} eq 'text/xml') {
1589 unless (defined $r->{charset}) {
1590 $r->{charset} = 'us-ascii';
1591 $r->{official_charset} = $r->{charset};
1592 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1593 $r->{charset_overridden} = 0;
1594 }
1595 }
1596
1597 if (length $r->{s} > 1000_000) {
1598 $r->{error_status_text} = 'Entity-body too large';
1599 delete $r->{s};
1600 return $r;
1601 }
1602
1603 $r->{inner_html_element} = $http->get_parameter ('e');
1604
1605 return $r;
1606 } # get_input_document
1607
1608 package WDCC::LWPUA;
1609 BEGIN { push our @ISA, 'LWP::UserAgent'; }
1610
1611 sub redirect_ok {
1612 my $ua = shift;
1613 unless ($ua->SUPER::redirect_ok (@_)) {
1614 return 0;
1615 }
1616
1617 my $uris = $_[1]->header ('Location');
1618 return 0 unless $uris;
1619 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1620 unless ({
1621 http => 1,
1622 }->{lc $uri->uri_scheme}) {
1623 return 0;
1624 }
1625 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1626 return 0;
1627 }
1628 return 1;
1629 } # redirect_ok
1630
1631 =head1 AUTHOR
1632
1633 Wakaba <w@suika.fam.cx>.
1634
1635 =head1 LICENSE
1636
1637 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1638
1639 This library is free software; you can redistribute it
1640 and/or modify it under the same terms as Perl itself.
1641
1642 =cut
1643
1644 ## $Date: 2008/05/06 08:47:09 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24