/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.29 - (show annotations) (download)
Fri Nov 23 12:08:32 2007 UTC (16 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.28: +4 -3 lines
Set level-* class to all errors; make should-error icon stop; make warning icon alert

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4
5 use lib qw[/home/httpd/html/www/markup/html/whatpm
6 /home/wakaba/work/manakai2/lib];
7 use CGI::Carp qw[fatalsToBrowser];
8 use Scalar::Util qw[refaddr];
9 use Time::HiRes qw/time/;
10
11 sub htescape ($) {
12 my $s = $_[0];
13 $s =~ s/&/&/g;
14 $s =~ s/</&lt;/g;
15 $s =~ s/>/&gt;/g;
16 $s =~ s/"/&quot;/g;
17 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18 sprintf '<var>U+%04X</var>', ord $1;
19 }ge;
20 return $s;
21 } # htescape
22
23 use Message::CGI::HTTP;
24 my $http = Message::CGI::HTTP->new;
25
26 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
27 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
28 exit;
29 }
30
31 binmode STDOUT, ':utf8';
32 $| = 1;
33
34 require Message::DOM::DOMImplementation;
35 my $dom = Message::DOM::DOMImplementation->new;
36
37 load_text_catalog ('en'); ## TODO: conneg
38
39 my @nav;
40 print STDOUT qq[Content-Type: text/html; charset=utf-8
41
42 <!DOCTYPE html>
43 <html lang="en">
44 <head>
45 <title>Web Document Conformance Checker (BETA)</title>
46 <link rel="stylesheet" href="../cc-style.css" type="text/css">
47 </head>
48 <body>
49 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
50 (<em>beta</em>)</h1>
51 ];
52
53 $| = 0;
54 my $input = get_input_document ($http, $dom);
55 my $char_length = 0;
56 my %time;
57
58 print qq[
59 <div id="document-info" class="section">
60 <dl>
61 <dt>Request URI</dt>
62 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 <dt>Document URI</dt>
64 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65 <script>
66 document.title = '<'
67 + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68 + document.title;
69 </script></dd>
70 ]; # no </dl> yet
71 push @nav, ['#document-info' => 'Information'];
72
73 if (defined $input->{s}) {
74 $char_length = length $input->{s};
75
76 print STDOUT qq[
77 <dt>Base URI</dt>
78 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79 <dt>Internet Media Type</dt>
80 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 <dt>Character Encoding</dt>
83 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 <dt>Length</dt>
86 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 </dl>
88 </div>
89 ];
90
91 my $result = {conforming_min => 1, conforming_max => 1};
92 print_http_header_section ($input, $result);
93
94 my $doc;
95 my $el;
96 my $manifest;
97
98 if ($input->{media_type} eq 'text/html') {
99 ($doc, $el) = print_syntax_error_html_section ($input, $result);
100 print_source_string_section
101 (\($input->{s}), $input->{charset} || $doc->input_encoding);
102 } elsif ({
103 'text/xml' => 1,
104 'application/atom+xml' => 1,
105 'application/rss+xml' => 1,
106 'application/svg+xml' => 1,
107 'application/xhtml+xml' => 1,
108 'application/xml' => 1,
109 }->{$input->{media_type}}) {
110 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
111 print_source_string_section (\($input->{s}), $doc->input_encoding);
112 } elsif ($input->{media_type} eq 'text/cache-manifest') {
113 ## TODO: MUST be text/cache-manifest
114 $manifest = print_syntax_error_manifest_section ($input, $result);
115 print_source_string_section (\($input->{s}), 'utf-8');
116 } else {
117 ## TODO: Change HTTP status code??
118 print_result_unknown_type_section ($input, $result);
119 }
120
121 if (defined $doc or defined $el) {
122 print_structure_dump_dom_section ($doc, $el);
123 my $elements = print_structure_error_dom_section ($doc, $el, $result);
124 print_table_section ($elements->{table}) if @{$elements->{table}};
125 print_id_section ($elements->{id}) if keys %{$elements->{id}};
126 print_term_section ($elements->{term}) if keys %{$elements->{term}};
127 print_class_section ($elements->{class}) if keys %{$elements->{class}};
128 } elsif (defined $manifest) {
129 print_structure_dump_manifest_section ($manifest);
130 print_structure_error_manifest_section ($manifest, $result);
131 }
132
133 print_result_section ($result);
134 } else {
135 print STDOUT qq[</dl></div>];
136 print_result_input_error_section ($input);
137 }
138
139 print STDOUT qq[
140 <ul class="navigation" id="nav-items">
141 ];
142 for (@nav) {
143 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
144 }
145 print STDOUT qq[
146 </ul>
147 </body>
148 </html>
149 ];
150
151 for (qw/decode parse parse_html parse_xml parse_manifest
152 check check_manifest/) {
153 next unless defined $time{$_};
154 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
155 print $file $char_length, "\t", $time{$_}, "\n";
156 }
157
158 exit;
159
160 sub add_error ($$$) {
161 my ($layer, $err, $result) = @_;
162 if (defined $err->{level}) {
163 if ($err->{level} eq 's') {
164 $result->{$layer}->{should}++;
165 $result->{$layer}->{score_min} -= 2;
166 $result->{conforming_min} = 0;
167 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
168 $result->{$layer}->{warning}++;
169 } elsif ($err->{level} eq 'unsupported') {
170 $result->{$layer}->{unsupported}++;
171 $result->{unsupported} = 1;
172 } else {
173 $result->{$layer}->{must}++;
174 $result->{$layer}->{score_max} -= 2;
175 $result->{$layer}->{score_min} -= 2;
176 $result->{conforming_min} = 0;
177 $result->{conforming_max} = 0;
178 }
179 } else {
180 $result->{$layer}->{must}++;
181 $result->{$layer}->{score_max} -= 2;
182 $result->{$layer}->{score_min} -= 2;
183 $result->{conforming_min} = 0;
184 $result->{conforming_max} = 0;
185 }
186 } # add_error
187
188 sub print_http_header_section ($$) {
189 my ($input, $result) = @_;
190 return unless defined $input->{header_status_code} or
191 defined $input->{header_status_text} or
192 @{$input->{header_field}};
193
194 push @nav, ['#source-header' => 'HTTP Header'];
195 print STDOUT qq[<div id="source-header" class="section">
196 <h2>HTTP Header</h2>
197
198 <p><strong>Note</strong>: Due to the limitation of the
199 network library in use, the content of this section might
200 not be the real header.</p>
201
202 <table><tbody>
203 ];
204
205 if (defined $input->{header_status_code}) {
206 print STDOUT qq[<tr><th scope="row">Status code</th>];
207 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
208 }
209 if (defined $input->{header_status_text}) {
210 print STDOUT qq[<tr><th scope="row">Status text</th>];
211 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
212 }
213
214 for (@{$input->{header_field}}) {
215 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
216 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
217 }
218
219 print STDOUT qq[</tbody></table></div>];
220 } # print_http_header_section
221
222 sub print_syntax_error_html_section ($$) {
223 my ($input, $result) = @_;
224
225 require Encode;
226 require Whatpm::HTML;
227
228 print STDOUT qq[
229 <div id="parse-errors" class="section">
230 <h2>Parse Errors</h2>
231
232 <dl>];
233 push @nav, ['#parse-errors' => 'Parse Error'];
234
235 my $onerror = sub {
236 my (%opt) = @_;
237 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
238 if ($opt{column} > 0) {
239 print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
240 } else {
241 $opt{line} = $opt{line} - 1 || 1;
242 print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];
243 }
244 $type =~ tr/ /-/;
245 $type =~ s/\|/%7C/g;
246 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
247 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
248 print STDOUT qq[$msg</dd>\n];
249
250 add_error ('syntax', \%opt => $result);
251 };
252
253 my $doc = $dom->create_document;
254 my $el;
255 my $inner_html_element = $http->get_parameter ('e');
256 if (defined $inner_html_element and length $inner_html_element) {
257 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
258 my $time1 = time;
259 my $t = Encode::decode ($input->{charset}, $input->{s});
260 $time{decode} = time - $time1;
261
262 $el = $doc->create_element_ns
263 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
264 $time1 = time;
265 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
266 $time{parse} = time - $time1;
267 } else {
268 my $time1 = time;
269 Whatpm::HTML->parse_byte_string
270 ($input->{charset}, $input->{s} => $doc, $onerror);
271 $time{parse_html} = time - $time1;
272 }
273 $doc->manakai_charset ($input->{official_charset})
274 if defined $input->{official_charset};
275
276 print STDOUT qq[</dl></div>];
277
278 return ($doc, $el);
279 } # print_syntax_error_html_section
280
281 sub print_syntax_error_xml_section ($$) {
282 my ($input, $result) = @_;
283
284 require Message::DOM::XMLParserTemp;
285
286 print STDOUT qq[
287 <div id="parse-errors" class="section">
288 <h2>Parse Errors</h2>
289
290 <dl>];
291 push @nav, ['#parse-errors' => 'Parse Error'];
292
293 my $onerror = sub {
294 my $err = shift;
295 my $line = $err->location->line_number;
296 print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];
297 print STDOUT $err->location->column_number, "</dt><dd>";
298 print STDOUT htescape $err->text, "</dd>\n";
299
300 add_error ('syntax', {type => $err->text,
301 level => [
302 $err->SEVERITY_FATAL_ERROR => 'm',
303 $err->SEVERITY_ERROR => 'm',
304 $err->SEVERITY_WARNING => 's',
305 ]->[$err->severity]} => $result);
306
307 return 1;
308 };
309
310 my $time1 = time;
311 open my $fh, '<', \($input->{s});
312 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
313 ($fh => $dom, $onerror, charset => $input->{charset});
314 $time{parse_xml} = time - $time1;
315 $doc->manakai_charset ($input->{official_charset})
316 if defined $input->{official_charset};
317
318 print STDOUT qq[</dl></div>];
319
320 return ($doc, undef);
321 } # print_syntax_error_xml_section
322
323 sub print_syntax_error_manifest_section ($$) {
324 my ($input, $result) = @_;
325
326 require Whatpm::CacheManifest;
327
328 print STDOUT qq[
329 <div id="parse-errors" class="section">
330 <h2>Parse Errors</h2>
331
332 <dl>];
333 push @nav, ['#parse-errors' => 'Parse Error'];
334
335 my $onerror = sub {
336 my (%opt) = @_;
337 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
338 print STDOUT qq[<dt class="$cls">], get_error_label (\%opt), qq[</dt>];
339 $type =~ tr/ /-/;
340 $type =~ s/\|/%7C/g;
341 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
342 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
343 print STDOUT qq[$msg</dd>\n];
344
345 add_error ('syntax', \%opt => $result);
346 };
347
348 my $time1 = time;
349 my $manifest = Whatpm::CacheManifest->parse_byte_string
350 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
351 $time{parse_manifest} = time - $time1;
352
353 print STDOUT qq[</dl></div>];
354
355 return $manifest;
356 } # print_syntax_error_manifest_section
357
358 sub print_source_string_section ($$) {
359 require Encode;
360 my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
361 return unless $enc;
362
363 my $s = \($enc->decode (${$_[0]}));
364 my $i = 1;
365 push @nav, ['#source-string' => 'Source'];
366 print STDOUT qq[<div id="source-string" class="section">
367 <h2>Document Source</h2>
368 <ol lang="">\n];
369 if (length $$s) {
370 while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
371 print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
372 $i++;
373 }
374 if ($$s =~ /\G([^\x0A]+)/gc) {
375 print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
376 }
377 } else {
378 print STDOUT q[<li id="line-1"></li>];
379 }
380 print STDOUT "</ol></div>";
381 } # print_input_string_section
382
383 sub print_document_tree ($) {
384 my $node = shift;
385 my $r = '<ol class="xoxo">';
386
387 my @node = ($node);
388 while (@node) {
389 my $child = shift @node;
390 unless (ref $child) {
391 $r .= $child;
392 next;
393 }
394
395 my $node_id = 'node-'.refaddr $child;
396 my $nt = $child->node_type;
397 if ($nt == $child->ELEMENT_NODE) {
398 my $child_nsuri = $child->namespace_uri;
399 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
400 '</code>'; ## ISSUE: case
401
402 if ($child->has_attributes) {
403 $r .= '<ul class="attributes">';
404 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
405 @{$child->attributes}) {
406 $r .= qq[<li id="$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
407 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
408 }
409 $r .= '</ul>';
410 }
411
412 if ($child->has_child_nodes) {
413 $r .= '<ol class="children">';
414 unshift @node, @{$child->child_nodes}, '</ol></li>';
415 } else {
416 $r .= '</li>';
417 }
418 } elsif ($nt == $child->TEXT_NODE) {
419 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
420 } elsif ($nt == $child->CDATA_SECTION_NODE) {
421 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
422 } elsif ($nt == $child->COMMENT_NODE) {
423 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
424 } elsif ($nt == $child->DOCUMENT_NODE) {
425 $r .= qq'<li id="$node_id" class="tree-document">Document';
426 $r .= qq[<ul class="attributes">];
427 my $cp = $child->manakai_charset;
428 if (defined $cp) {
429 $r .= qq[<li><code>charset</code> parameter = <code>];
430 $r .= htescape ($cp) . qq[</code></li>];
431 }
432 $r .= qq[<li><code>inputEncoding</code> = ];
433 my $ie = $child->input_encoding;
434 if (defined $ie) {
435 $r .= qq[<code>@{[htescape ($ie)]}</code>];
436 if ($child->manakai_has_bom) {
437 $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
438 }
439 } else {
440 $r .= qq[(<code>null</code>)];
441 }
442 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
443 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
444 unless ($child->manakai_is_html) {
445 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
446 if (defined $child->xml_encoding) {
447 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
448 } else {
449 $r .= qq[<li>XML encoding = (null)</li>];
450 }
451 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
452 }
453 $r .= qq[</ul>];
454 if ($child->has_child_nodes) {
455 $r .= '<ol class="children">';
456 unshift @node, @{$child->child_nodes}, '</ol></li>';
457 }
458 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
459 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
460 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
461 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
462 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
463 $r .= '</ul></li>';
464 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
465 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
466 } else {
467 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
468 }
469 }
470
471 $r .= '</ol>';
472 print STDOUT $r;
473 } # print_document_tree
474
475 sub print_structure_dump_dom_section ($$) {
476 my ($doc, $el) = @_;
477
478 print STDOUT qq[
479 <div id="document-tree" class="section">
480 <h2>Document Tree</h2>
481 ];
482 push @nav, ['#document-tree' => 'Tree'];
483
484 print_document_tree ($el || $doc);
485
486 print STDOUT qq[</div>];
487 } # print_structure_dump_dom_section
488
489 sub print_structure_dump_manifest_section ($) {
490 my $manifest = shift;
491
492 print STDOUT qq[
493 <div id="dump-manifest" class="section">
494 <h2>Cache Manifest</h2>
495 ];
496 push @nav, ['#dump-manifest' => 'Caceh Manifest'];
497
498 print STDOUT qq[<dl><dt>Explicit entries</dt>];
499 for my $uri (@{$manifest->[0]}) {
500 my $euri = htescape ($uri);
501 print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
502 }
503
504 print STDOUT qq[<dt>Fallback entries</dt><dd>
505 <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
506 <th scope=row>Fallback Entry</tr><tbody>];
507 for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
508 my $euri = htescape ($uri);
509 my $euri2 = htescape ($manifest->[1]->{$uri});
510 print STDOUT qq[<tr><td><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
511 <td><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
512 }
513
514 print STDOUT qq[</table><dt>Online whitelist</dt>];
515 for my $uri (@{$manifest->[2]}) {
516 my $euri = htescape ($uri);
517 print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
518 }
519
520 print STDOUT qq[</dl></div>];
521 } # print_structure_dump_manifest_section
522
523 sub print_structure_error_dom_section ($$$) {
524 my ($doc, $el, $result) = @_;
525
526 print STDOUT qq[<div id="document-errors" class="section">
527 <h2>Document Errors</h2>
528
529 <dl>];
530 push @nav, ['#document-errors' => 'Document Error'];
531
532 require Whatpm::ContentChecker;
533 my $onerror = sub {
534 my %opt = @_;
535 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
536 $type =~ tr/ /-/;
537 $type =~ s/\|/%7C/g;
538 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
539 print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
540 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
541 print STDOUT $msg, "</dd>\n";
542 add_error ('structure', \%opt => $result);
543 };
544
545 my $elements;
546 my $time1 = time;
547 if ($el) {
548 $elements = Whatpm::ContentChecker->check_element ($el, $onerror);
549 } else {
550 $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);
551 }
552 $time{check} = time - $time1;
553
554 print STDOUT qq[</dl></div>];
555
556 return $elements;
557 } # print_structure_error_dom_section
558
559 sub print_structure_error_manifest_section ($$$) {
560 my ($manifest, $result) = @_;
561
562 print STDOUT qq[<div id="document-errors" class="section">
563 <h2>Document Errors</h2>
564
565 <dl>];
566 push @nav, ['#document-errors' => 'Document Error'];
567
568 require Whatpm::CacheManifest;
569 Whatpm::CacheManifest->check_manifest ($manifest, sub {
570 my %opt = @_;
571 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
572 $type =~ tr/ /-/;
573 $type =~ s/\|/%7C/g;
574 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
575 print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
576 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
577 add_error ('structure', \%opt => $result);
578 });
579
580 print STDOUT qq[</div>];
581 } # print_structure_error_manifest_section
582
583 sub print_table_section ($) {
584 my $tables = shift;
585
586 push @nav, ['#tables' => 'Tables'];
587 print STDOUT qq[
588 <div id="tables" class="section">
589 <h2>Tables</h2>
590
591 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
592 <script src="../table-script.js" type="text/javascript"></script>
593 <noscript>
594 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
595 </noscript>
596 ];
597
598 require JSON;
599
600 my $i = 0;
601 for my $table_el (@$tables) {
602 $i++;
603 print STDOUT qq[<div class="section" id="table-$i"><h3>] .
604 get_node_link ($table_el) . q[</h3>];
605
606 ## TODO: Make |ContentChecker| return |form_table| result
607 ## so that this script don't have to run the algorithm twice.
608 my $table = Whatpm::HTMLTable->form_table ($table_el);
609
610 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
611 next unless $_;
612 delete $_->{element};
613 }
614
615 for (@{$table->{row_group}}) {
616 next unless $_;
617 next unless $_->{element};
618 $_->{type} = $_->{element}->manakai_local_name;
619 delete $_->{element};
620 }
621
622 for (@{$table->{cell}}) {
623 next unless $_;
624 for (@{$_}) {
625 next unless $_;
626 for (@$_) {
627 $_->{id} = refaddr $_->{element} if defined $_->{element};
628 delete $_->{element};
629 $_->{is_header} = $_->{is_header} ? 1 : 0;
630 }
631 }
632 }
633
634 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
635 print STDOUT JSON::objToJson ($table);
636 print STDOUT qq[, document.getElementById ('table-$i'));</script>];
637 }
638
639 print STDOUT qq[</div>];
640 } # print_table_section
641
642 sub print_id_section ($) {
643 my $ids = shift;
644
645 push @nav, ['#identifiers' => 'IDs'];
646 print STDOUT qq[
647 <div id="identifiers" class="section">
648 <h2>Identifiers</h2>
649
650 <dl>
651 ];
652 for my $id (sort {$a cmp $b} keys %$ids) {
653 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
654 for (@{$ids->{$id}}) {
655 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
656 }
657 }
658 print STDOUT qq[</dl></div>];
659 } # print_id_section
660
661 sub print_term_section ($) {
662 my $terms = shift;
663
664 push @nav, ['#terms' => 'Terms'];
665 print STDOUT qq[
666 <div id="terms" class="section">
667 <h2>Terms</h2>
668
669 <dl>
670 ];
671 for my $term (sort {$a cmp $b} keys %$terms) {
672 print STDOUT qq[<dt>@{[htescape $term]}</dt>];
673 for (@{$terms->{$term}}) {
674 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
675 }
676 }
677 print STDOUT qq[</dl></div>];
678 } # print_term_section
679
680 sub print_class_section ($) {
681 my $classes = shift;
682
683 push @nav, ['#classes' => 'Classes'];
684 print STDOUT qq[
685 <div id="classes" class="section">
686 <h2>Classes</h2>
687
688 <dl>
689 ];
690 for my $class (sort {$a cmp $b} keys %$classes) {
691 print STDOUT qq[<dt><code>@{[htescape $class]}</code></dt>];
692 for (@{$classes->{$class}}) {
693 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
694 }
695 }
696 print STDOUT qq[</dl></div>];
697 } # print_class_section
698
699 sub print_result_section ($) {
700 my $result = shift;
701
702 print STDOUT qq[
703 <div id="result-summary" class="section">
704 <h2>Result</h2>];
705
706 if ($result->{unsupported} and $result->{conforming_max}) {
707 print STDOUT qq[<p class=uncertain id=result-para>The conformance
708 checker cannot decide whether the document is conforming or
709 not, since the document contains one or more unsupported
710 features. The document might or might not be conforming.</p>];
711 } elsif ($result->{conforming_min}) {
712 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
713 found in this document.</p>];
714 } elsif ($result->{conforming_max}) {
715 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
716 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
717 it might be conforming.</p>];
718 } else {
719 print STDOUT qq[<p class=FAIL id=result-para>This document is
720 <strong><em>non</em>-conforming</strong>.</p>];
721 }
722
723 print STDOUT qq[<table>
724 <colgroup><col><colgroup><col><col><col><colgroup><col>
725 <thead>
726 <tr><th scope=col></th>
727 <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
728 Errors</a></th>
729 <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
730 Errors</a></th>
731 <th scope=col><a href="../error-description#level-w">Warnings</a></th>
732 <th scope=col>Score</th></tr></thead><tbody>];
733
734 my $must_error = 0;
735 my $should_error = 0;
736 my $warning = 0;
737 my $score_min = 0;
738 my $score_max = 0;
739 my $score_base = 20;
740 my $score_unit = $score_base / 100;
741 for (
742 [Transfer => 'transfer', ''],
743 [Character => 'char', ''],
744 [Syntax => 'syntax', '#parse-errors'],
745 [Structure => 'structure', '#document-errors'],
746 ) {
747 $must_error += ($result->{$_->[1]}->{must} += 0);
748 $should_error += ($result->{$_->[1]}->{should} += 0);
749 $warning += ($result->{$_->[1]}->{warning} += 0);
750 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
751 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
752
753 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
754 my $label = $_->[0];
755 if ($result->{$_->[1]}->{must} or
756 $result->{$_->[1]}->{should} or
757 $result->{$_->[1]}->{warning} or
758 $result->{$_->[1]}->{unsupported}) {
759 $label = qq[<a href="$_->[2]">$label</a>];
760 }
761
762 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
763 if ($uncertain) {
764 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
765 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
766 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
767 } else {
768 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
769 }
770 }
771
772 $score_max += $score_base;
773
774 print STDOUT qq[
775 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
776 </tbody>
777 <tfoot><tr class=uncertain><th scope=row>Total</th>
778 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
779 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
780 <td>$warning?</td>
781 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
782 </table>
783
784 <p><strong>Important</strong>: This conformance checking service
785 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
786 </div>];
787 push @nav, ['#result-summary' => 'Result'];
788 } # print_result_section
789
790 sub print_result_unknown_type_section ($$) {
791 my ($input, $result) = @_;
792
793 my $euri = htescape ($input->{uri});
794 print STDOUT qq[
795 <div id="parse-errors" class="section">
796 <h2>Errors</h2>
797
798 <dl>
799 <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
800 <dd class=unsupported><strong><a href="../error-description#level-u">Not
801 supported</a></strong>:
802 Media type
803 <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
804 is not supported.</dd>
805 </dl>
806 </div>
807 ];
808 push @nav, ['#parse-errors' => 'Errors'];
809 add_error (char => {level => 'unsupported'} => $result);
810 add_error (syntax => {level => 'unsupported'} => $result);
811 add_error (structure => {level => 'unsupported'} => $result);
812 } # print_result_unknown_type_section
813
814 sub print_result_input_error_section ($) {
815 my $input = shift;
816 print STDOUT qq[<div class="section" id="result-summary">
817 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
818 </div>];
819 push @nav, ['#result-summary' => 'Result'];
820 } # print_Result_input_error_section
821
822 sub get_error_label ($) {
823 my $err = shift;
824
825 my $r = '';
826
827 if (defined $err->{line}) {
828 if ($err->{column} > 0) {
829 $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a> column $err->{column}];
830 } else {
831 $err->{line} = $err->{line} - 1 || 1;
832 $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a>];
833 }
834 }
835
836 if (defined $err->{node}) {
837 $r .= ' ' if length $r;
838 $r = get_node_link ($err->{node});
839 }
840
841 if (defined $err->{index}) {
842 $r .= ' ' if length $r;
843 $r .= 'Index ' . (0+$err->{index});
844 }
845
846 if (defined $err->{value}) {
847 $r .= ' ' if length $r;
848 $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
849 }
850
851 return $r;
852 } # get_error_label
853
854 sub get_error_level_label ($) {
855 my $err = shift;
856
857 my $r = '';
858
859 if (not defined $err->{level} or $err->{level} eq 'm') {
860 $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
861 error</a></strong>: ];
862 } elsif ($err->{level} eq 's') {
863 $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
864 error</a></strong>: ];
865 } elsif ($err->{level} eq 'w') {
866 $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
867 ];
868 } elsif ($err->{level} eq 'unsupported') {
869 $r = qq[<strong><a href="../error-description#level-u">Not
870 supported</a></strong>: ];
871 } else {
872 my $elevel = htescape ($err->{level});
873 $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
874 ];
875 }
876
877 return $r;
878 } # get_error_level_label
879
880 sub get_node_path ($) {
881 my $node = shift;
882 my @r;
883 while (defined $node) {
884 my $rs;
885 if ($node->node_type == 1) {
886 $rs = $node->manakai_local_name;
887 $node = $node->parent_node;
888 } elsif ($node->node_type == 2) {
889 $rs = '@' . $node->manakai_local_name;
890 $node = $node->owner_element;
891 } elsif ($node->node_type == 3) {
892 $rs = '"' . $node->data . '"';
893 $node = $node->parent_node;
894 } elsif ($node->node_type == 9) {
895 @r = ('') unless @r;
896 $rs = '';
897 $node = $node->parent_node;
898 } else {
899 $rs = '#' . $node->node_type;
900 $node = $node->parent_node;
901 }
902 unshift @r, $rs;
903 }
904 return join '/', @r;
905 } # get_node_path
906
907 sub get_node_link ($) {
908 return qq[<a href="#node-@{[refaddr $_[0]]}">] .
909 htescape (get_node_path ($_[0])) . qq[</a>];
910 } # get_node_link
911
912 {
913 my $Msg = {};
914
915 sub load_text_catalog ($) {
916 my $lang = shift; # MUST be a canonical lang name
917 open my $file, '<:utf8', "cc-msg.$lang.txt"
918 or die "$0: cc-msg.$lang.txt: $!";
919 while (<$file>) {
920 if (s/^([^;]+);([^;]*);//) {
921 my ($type, $cls, $msg) = ($1, $2, $_);
922 $msg =~ tr/\x0D\x0A//d;
923 $Msg->{$type} = [$cls, $msg];
924 }
925 }
926 } # load_text_catalog
927
928 sub get_text ($) {
929 my ($type, $level, $node) = @_;
930 $type = $level . ':' . $type if defined $level;
931 $level = 'm' unless defined $level;
932 my @arg;
933 {
934 if (defined $Msg->{$type}) {
935 my $msg = $Msg->{$type}->[1];
936 $msg =~ s{<var>\$([0-9]+)</var>}{
937 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
938 }ge;
939 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
940 UNIVERSAL::can ($node, 'get_attribute_ns')
941 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
942 }ge;
943 $msg =~ s{<var>{\@}</var>}{
944 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
945 }ge;
946 $msg =~ s{<var>{local-name}</var>}{
947 UNIVERSAL::can ($node, 'manakai_local_name')
948 ? htescape ($node->manakai_local_name) : ''
949 }ge;
950 $msg =~ s{<var>{element-local-name}</var>}{
951 (UNIVERSAL::can ($node, 'owner_element') and
952 $node->owner_element)
953 ? htescape ($node->owner_element->manakai_local_name)
954 : ''
955 }ge;
956 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
957 } elsif ($type =~ s/:([^:]*)$//) {
958 unshift @arg, $1;
959 redo;
960 }
961 }
962 return ($type, 'level-'.$level, htescape ($_[0]));
963 } # get_text
964
965 }
966
967 sub get_input_document ($$) {
968 my ($http, $dom) = @_;
969
970 my $request_uri = $http->get_parameter ('uri');
971 my $r = {};
972 if (defined $request_uri and length $request_uri) {
973 my $uri = $dom->create_uri_reference ($request_uri);
974 unless ({
975 http => 1,
976 }->{lc $uri->uri_scheme}) {
977 return {uri => $request_uri, request_uri => $request_uri,
978 error_status_text => 'URI scheme not allowed'};
979 }
980
981 require Message::Util::HostPermit;
982 my $host_permit = new Message::Util::HostPermit;
983 $host_permit->add_rule (<<EOH);
984 Allow host=suika port=80
985 Deny host=suika
986 Allow host=suika.fam.cx port=80
987 Deny host=suika.fam.cx
988 Deny host=localhost
989 Deny host=*.localdomain
990 Deny ipv4=0.0.0.0/8
991 Deny ipv4=10.0.0.0/8
992 Deny ipv4=127.0.0.0/8
993 Deny ipv4=169.254.0.0/16
994 Deny ipv4=172.0.0.0/11
995 Deny ipv4=192.0.2.0/24
996 Deny ipv4=192.88.99.0/24
997 Deny ipv4=192.168.0.0/16
998 Deny ipv4=198.18.0.0/15
999 Deny ipv4=224.0.0.0/4
1000 Deny ipv4=255.255.255.255/32
1001 Deny ipv6=0::0/0
1002 Allow host=*
1003 EOH
1004 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1005 return {uri => $request_uri, request_uri => $request_uri,
1006 error_status_text => 'Connection to the host is forbidden'};
1007 }
1008
1009 require LWP::UserAgent;
1010 my $ua = WDCC::LWPUA->new;
1011 $ua->{wdcc_dom} = $dom;
1012 $ua->{wdcc_host_permit} = $host_permit;
1013 $ua->agent ('Mozilla'); ## TODO: for now.
1014 $ua->parse_head (0);
1015 $ua->protocols_allowed ([qw/http/]);
1016 $ua->max_size (1000_000);
1017 my $req = HTTP::Request->new (GET => $request_uri);
1018 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1019 my $res = $ua->request ($req);
1020 ## TODO: 401 sets |is_success| true.
1021 if ($res->is_success or $http->get_parameter ('error-page')) {
1022 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1023 $r->{uri} = $res->request->uri;
1024 $r->{request_uri} = $request_uri;
1025
1026 ## TODO: More strict parsing...
1027 my $ct = $res->header ('Content-Type');
1028 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1029 $r->{charset} = lc $1;
1030 $r->{charset} =~ tr/\\//d;
1031 $r->{official_charset} = $r->{charset};
1032 }
1033
1034 my $input_charset = $http->get_parameter ('charset');
1035 if (defined $input_charset and length $input_charset) {
1036 $r->{charset_overridden}
1037 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1038 $r->{charset} = $input_charset;
1039 }
1040
1041 ## TODO: Support for HTTP Content-Encoding
1042
1043 $r->{s} = ''.$res->content;
1044
1045 require Whatpm::ContentType;
1046 ($r->{official_type}, $r->{media_type})
1047 = Whatpm::ContentType->get_sniffed_type
1048 (get_file_head => sub {
1049 return substr $r->{s}, 0, shift;
1050 },
1051 http_content_type_byte => $ct,
1052 has_http_content_encoding =>
1053 defined $res->header ('Content-Encoding'),
1054 supported_image_types => {});
1055 } else {
1056 $r->{uri} = $res->request->uri;
1057 $r->{request_uri} = $request_uri;
1058 $r->{error_status_text} = $res->status_line;
1059 }
1060
1061 $r->{header_field} = [];
1062 $res->scan (sub {
1063 push @{$r->{header_field}}, [$_[0], $_[1]];
1064 });
1065 $r->{header_status_code} = $res->code;
1066 $r->{header_status_text} = $res->message;
1067 } else {
1068 $r->{s} = ''.$http->get_parameter ('s');
1069 $r->{uri} = q<thismessage:/>;
1070 $r->{request_uri} = q<thismessage:/>;
1071 $r->{base_uri} = q<thismessage:/>;
1072 $r->{charset} = ''.$http->get_parameter ('_charset_');
1073 $r->{charset} =~ s/\s+//g;
1074 $r->{charset} = 'utf-8' if $r->{charset} eq '';
1075 $r->{official_charset} = $r->{charset};
1076 $r->{header_field} = [];
1077
1078 require Whatpm::ContentType;
1079 ($r->{official_type}, $r->{media_type})
1080 = Whatpm::ContentType->get_sniffed_type
1081 (get_file_head => sub {
1082 return substr $r->{s}, 0, shift;
1083 },
1084 http_content_type_byte => undef,
1085 has_http_content_encoding => 0,
1086 supported_image_types => {});
1087 }
1088
1089 my $input_format = $http->get_parameter ('i');
1090 if (defined $input_format and length $input_format) {
1091 $r->{media_type_overridden}
1092 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1093 $r->{media_type} = $input_format;
1094 }
1095 if (defined $r->{s} and not defined $r->{media_type}) {
1096 $r->{media_type} = 'text/html';
1097 $r->{media_type_overridden} = 1;
1098 }
1099
1100 if ($r->{media_type} eq 'text/xml') {
1101 unless (defined $r->{charset}) {
1102 $r->{charset} = 'us-ascii';
1103 $r->{official_charset} = $r->{charset};
1104 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1105 $r->{charset_overridden} = 0;
1106 }
1107 }
1108
1109 if (length $r->{s} > 1000_000) {
1110 $r->{error_status_text} = 'Entity-body too large';
1111 delete $r->{s};
1112 return $r;
1113 }
1114
1115 return $r;
1116 } # get_input_document
1117
1118 package WDCC::LWPUA;
1119 BEGIN { push our @ISA, 'LWP::UserAgent'; }
1120
1121 sub redirect_ok {
1122 my $ua = shift;
1123 unless ($ua->SUPER::redirect_ok (@_)) {
1124 return 0;
1125 }
1126
1127 my $uris = $_[1]->header ('Location');
1128 return 0 unless $uris;
1129 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1130 unless ({
1131 http => 1,
1132 }->{lc $uri->uri_scheme}) {
1133 return 0;
1134 }
1135 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1136 return 0;
1137 }
1138 return 1;
1139 } # redirect_ok
1140
1141 =head1 AUTHOR
1142
1143 Wakaba <w@suika.fam.cx>.
1144
1145 =head1 LICENSE
1146
1147 Copyright 2007 Wakaba <w@suika.fam.cx>
1148
1149 This library is free software; you can redistribute it
1150 and/or modify it under the same terms as Perl itself.
1151
1152 =cut
1153
1154 ## $Date: 2007/11/23 06:36:19 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24