/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.28 - (show annotations) (download)
Fri Nov 23 06:36:19 2007 UTC (16 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.27: +2 -1 lines
++ ChangeLog	23 Nov 2007 06:36:14 -0000
2007-11-23  Wakaba  <wakaba@suika.fam.cx>

	* error-description-source.xml: New error descriptions.

	* cc.cgi: |Accept-Encoding: *; q=0| is a request for server to send
	a 406 page!  |identity| encoding is added.

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4
5 use lib qw[/home/httpd/html/www/markup/html/whatpm
6 /home/wakaba/work/manakai2/lib];
7 use CGI::Carp qw[fatalsToBrowser];
8 use Scalar::Util qw[refaddr];
9 use Time::HiRes qw/time/;
10
11 sub htescape ($) {
12 my $s = $_[0];
13 $s =~ s/&/&amp;/g;
14 $s =~ s/</&lt;/g;
15 $s =~ s/>/&gt;/g;
16 $s =~ s/"/&quot;/g;
17 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18 sprintf '<var>U+%04X</var>', ord $1;
19 }ge;
20 return $s;
21 } # htescape
22
23 use Message::CGI::HTTP;
24 my $http = Message::CGI::HTTP->new;
25
26 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
27 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
28 exit;
29 }
30
31 binmode STDOUT, ':utf8';
32 $| = 1;
33
34 require Message::DOM::DOMImplementation;
35 my $dom = Message::DOM::DOMImplementation->new;
36
37 load_text_catalog ('en'); ## TODO: conneg
38
39 my @nav;
40 print STDOUT qq[Content-Type: text/html; charset=utf-8
41
42 <!DOCTYPE html>
43 <html lang="en">
44 <head>
45 <title>Web Document Conformance Checker (BETA)</title>
46 <link rel="stylesheet" href="../cc-style.css" type="text/css">
47 </head>
48 <body>
49 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
50 (<em>beta</em>)</h1>
51 ];
52
53 $| = 0;
54 my $input = get_input_document ($http, $dom);
55 my $char_length = 0;
56 my %time;
57
58 print qq[
59 <div id="document-info" class="section">
60 <dl>
61 <dt>Request URI</dt>
62 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 <dt>Document URI</dt>
64 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65 <script>
66 document.title = '<'
67 + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68 + document.title;
69 </script></dd>
70 ]; # no </dl> yet
71 push @nav, ['#document-info' => 'Information'];
72
73 if (defined $input->{s}) {
74 $char_length = length $input->{s};
75
76 print STDOUT qq[
77 <dt>Base URI</dt>
78 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79 <dt>Internet Media Type</dt>
80 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 <dt>Character Encoding</dt>
83 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 <dt>Length</dt>
86 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 </dl>
88 </div>
89 ];
90
91 my $result = {conforming_min => 1, conforming_max => 1};
92 print_http_header_section ($input, $result);
93
94 my $doc;
95 my $el;
96 my $manifest;
97
98 if ($input->{media_type} eq 'text/html') {
99 ($doc, $el) = print_syntax_error_html_section ($input, $result);
100 print_source_string_section
101 (\($input->{s}), $input->{charset} || $doc->input_encoding);
102 } elsif ({
103 'text/xml' => 1,
104 'application/atom+xml' => 1,
105 'application/rss+xml' => 1,
106 'application/svg+xml' => 1,
107 'application/xhtml+xml' => 1,
108 'application/xml' => 1,
109 }->{$input->{media_type}}) {
110 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
111 print_source_string_section (\($input->{s}), $doc->input_encoding);
112 } elsif ($input->{media_type} eq 'text/cache-manifest') {
113 ## TODO: MUST be text/cache-manifest
114 $manifest = print_syntax_error_manifest_section ($input, $result);
115 print_source_string_section (\($input->{s}), 'utf-8');
116 } else {
117 ## TODO: Change HTTP status code??
118 print_result_unknown_type_section ($input, $result);
119 }
120
121 if (defined $doc or defined $el) {
122 print_structure_dump_dom_section ($doc, $el);
123 my $elements = print_structure_error_dom_section ($doc, $el, $result);
124 print_table_section ($elements->{table}) if @{$elements->{table}};
125 print_id_section ($elements->{id}) if keys %{$elements->{id}};
126 print_term_section ($elements->{term}) if keys %{$elements->{term}};
127 print_class_section ($elements->{class}) if keys %{$elements->{class}};
128 } elsif (defined $manifest) {
129 print_structure_dump_manifest_section ($manifest);
130 print_structure_error_manifest_section ($manifest, $result);
131 }
132
133 print_result_section ($result);
134 } else {
135 print STDOUT qq[</dl></div>];
136 print_result_input_error_section ($input);
137 }
138
139 print STDOUT qq[
140 <ul class="navigation" id="nav-items">
141 ];
142 for (@nav) {
143 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
144 }
145 print STDOUT qq[
146 </ul>
147 </body>
148 </html>
149 ];
150
151 for (qw/decode parse parse_html parse_xml parse_manifest
152 check check_manifest/) {
153 next unless defined $time{$_};
154 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
155 print $file $char_length, "\t", $time{$_}, "\n";
156 }
157
158 exit;
159
160 sub add_error ($$$) {
161 my ($layer, $err, $result) = @_;
162 if (defined $err->{level}) {
163 if ($err->{level} eq 's') {
164 $result->{$layer}->{should}++;
165 $result->{$layer}->{score_min} -= 2;
166 $result->{conforming_min} = 0;
167 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
168 $result->{$layer}->{warning}++;
169 } elsif ($err->{level} eq 'unsupported') {
170 $result->{$layer}->{unsupported}++;
171 $result->{unsupported} = 1;
172 } else {
173 $result->{$layer}->{must}++;
174 $result->{$layer}->{score_max} -= 2;
175 $result->{$layer}->{score_min} -= 2;
176 $result->{conforming_min} = 0;
177 $result->{conforming_max} = 0;
178 }
179 } else {
180 $result->{$layer}->{must}++;
181 $result->{$layer}->{score_max} -= 2;
182 $result->{$layer}->{score_min} -= 2;
183 $result->{conforming_min} = 0;
184 $result->{conforming_max} = 0;
185 }
186 } # add_error
187
188 sub print_http_header_section ($$) {
189 my ($input, $result) = @_;
190 return unless defined $input->{header_status_code} or
191 defined $input->{header_status_text} or
192 @{$input->{header_field}};
193
194 push @nav, ['#source-header' => 'HTTP Header'];
195 print STDOUT qq[<div id="source-header" class="section">
196 <h2>HTTP Header</h2>
197
198 <p><strong>Note</strong>: Due to the limitation of the
199 network library in use, the content of this section might
200 not be the real header.</p>
201
202 <table><tbody>
203 ];
204
205 if (defined $input->{header_status_code}) {
206 print STDOUT qq[<tr><th scope="row">Status code</th>];
207 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
208 }
209 if (defined $input->{header_status_text}) {
210 print STDOUT qq[<tr><th scope="row">Status text</th>];
211 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
212 }
213
214 for (@{$input->{header_field}}) {
215 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
216 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
217 }
218
219 print STDOUT qq[</tbody></table></div>];
220 } # print_http_header_section
221
222 sub print_syntax_error_html_section ($$) {
223 my ($input, $result) = @_;
224
225 require Encode;
226 require Whatpm::HTML;
227
228 print STDOUT qq[
229 <div id="parse-errors" class="section">
230 <h2>Parse Errors</h2>
231
232 <dl>];
233 push @nav, ['#parse-errors' => 'Parse Error'];
234
235 my $onerror = sub {
236 my (%opt) = @_;
237 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
238 if ($opt{column} > 0) {
239 print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
240 } else {
241 $opt{line} = $opt{line} - 1 || 1;
242 print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];
243 }
244 $type =~ tr/ /-/;
245 $type =~ s/\|/%7C/g;
246 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
247 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
248 print STDOUT qq[$msg</dd>\n];
249
250 add_error ('syntax', \%opt => $result);
251 };
252
253 my $doc = $dom->create_document;
254 my $el;
255 my $inner_html_element = $http->get_parameter ('e');
256 if (defined $inner_html_element and length $inner_html_element) {
257 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
258 my $time1 = time;
259 my $t = Encode::decode ($input->{charset}, $input->{s});
260 $time{decode} = time - $time1;
261
262 $el = $doc->create_element_ns
263 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
264 $time1 = time;
265 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
266 $time{parse} = time - $time1;
267 } else {
268 my $time1 = time;
269 Whatpm::HTML->parse_byte_string
270 ($input->{charset}, $input->{s} => $doc, $onerror);
271 $time{parse_html} = time - $time1;
272 }
273 $doc->manakai_charset ($input->{official_charset})
274 if defined $input->{official_charset};
275
276 print STDOUT qq[</dl></div>];
277
278 return ($doc, $el);
279 } # print_syntax_error_html_section
280
281 sub print_syntax_error_xml_section ($$) {
282 my ($input, $result) = @_;
283
284 require Message::DOM::XMLParserTemp;
285
286 print STDOUT qq[
287 <div id="parse-errors" class="section">
288 <h2>Parse Errors</h2>
289
290 <dl>];
291 push @nav, ['#parse-errors' => 'Parse Error'];
292
293 my $onerror = sub {
294 my $err = shift;
295 my $line = $err->location->line_number;
296 print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];
297 print STDOUT $err->location->column_number, "</dt><dd>";
298 print STDOUT htescape $err->text, "</dd>\n";
299
300 add_error ('syntax', {type => $err->text,
301 level => [
302 $err->SEVERITY_FATAL_ERROR => 'm',
303 $err->SEVERITY_ERROR => 'm',
304 $err->SEVERITY_WARNING => 's',
305 ]->[$err->severity]} => $result);
306
307 return 1;
308 };
309
310 my $time1 = time;
311 open my $fh, '<', \($input->{s});
312 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
313 ($fh => $dom, $onerror, charset => $input->{charset});
314 $time{parse_xml} = time - $time1;
315 $doc->manakai_charset ($input->{official_charset})
316 if defined $input->{official_charset};
317
318 print STDOUT qq[</dl></div>];
319
320 return ($doc, undef);
321 } # print_syntax_error_xml_section
322
323 sub print_syntax_error_manifest_section ($$) {
324 my ($input, $result) = @_;
325
326 require Whatpm::CacheManifest;
327
328 print STDOUT qq[
329 <div id="parse-errors" class="section">
330 <h2>Parse Errors</h2>
331
332 <dl>];
333 push @nav, ['#parse-errors' => 'Parse Error'];
334
335 my $onerror = sub {
336 my (%opt) = @_;
337 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
338 print STDOUT qq[<dt class="$cls">], get_error_label (\%opt), qq[</dt>];
339 $type =~ tr/ /-/;
340 $type =~ s/\|/%7C/g;
341 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
342 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
343 print STDOUT qq[$msg</dd>\n];
344
345 add_error ('syntax', \%opt => $result);
346 };
347
348 my $time1 = time;
349 my $manifest = Whatpm::CacheManifest->parse_byte_string
350 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
351 $time{parse_manifest} = time - $time1;
352
353 print STDOUT qq[</dl></div>];
354
355 return $manifest;
356 } # print_syntax_error_manifest_section
357
358 sub print_source_string_section ($$) {
359 require Encode;
360 my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
361 return unless $enc;
362
363 my $s = \($enc->decode (${$_[0]}));
364 my $i = 1;
365 push @nav, ['#source-string' => 'Source'];
366 print STDOUT qq[<div id="source-string" class="section">
367 <h2>Document Source</h2>
368 <ol lang="">\n];
369 if (length $$s) {
370 while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
371 print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
372 $i++;
373 }
374 if ($$s =~ /\G([^\x0A]+)/gc) {
375 print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
376 }
377 } else {
378 print STDOUT q[<li id="line-1"></li>];
379 }
380 print STDOUT "</ol></div>";
381 } # print_input_string_section
382
383 sub print_document_tree ($) {
384 my $node = shift;
385 my $r = '<ol class="xoxo">';
386
387 my @node = ($node);
388 while (@node) {
389 my $child = shift @node;
390 unless (ref $child) {
391 $r .= $child;
392 next;
393 }
394
395 my $node_id = 'node-'.refaddr $child;
396 my $nt = $child->node_type;
397 if ($nt == $child->ELEMENT_NODE) {
398 my $child_nsuri = $child->namespace_uri;
399 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
400 '</code>'; ## ISSUE: case
401
402 if ($child->has_attributes) {
403 $r .= '<ul class="attributes">';
404 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
405 @{$child->attributes}) {
406 $r .= qq[<li id="$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
407 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
408 }
409 $r .= '</ul>';
410 }
411
412 if ($child->has_child_nodes) {
413 $r .= '<ol class="children">';
414 unshift @node, @{$child->child_nodes}, '</ol></li>';
415 } else {
416 $r .= '</li>';
417 }
418 } elsif ($nt == $child->TEXT_NODE) {
419 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
420 } elsif ($nt == $child->CDATA_SECTION_NODE) {
421 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
422 } elsif ($nt == $child->COMMENT_NODE) {
423 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
424 } elsif ($nt == $child->DOCUMENT_NODE) {
425 $r .= qq'<li id="$node_id" class="tree-document">Document';
426 $r .= qq[<ul class="attributes">];
427 my $cp = $child->manakai_charset;
428 if (defined $cp) {
429 $r .= qq[<li><code>charset</code> parameter = <code>];
430 $r .= htescape ($cp) . qq[</code></li>];
431 }
432 $r .= qq[<li><code>inputEncoding</code> = ];
433 my $ie = $child->input_encoding;
434 if (defined $ie) {
435 $r .= qq[<code>@{[htescape ($ie)]}</code>];
436 if ($child->manakai_has_bom) {
437 $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
438 }
439 } else {
440 $r .= qq[(<code>null</code>)];
441 }
442 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
443 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
444 unless ($child->manakai_is_html) {
445 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
446 if (defined $child->xml_encoding) {
447 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
448 } else {
449 $r .= qq[<li>XML encoding = (null)</li>];
450 }
451 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
452 }
453 $r .= qq[</ul>];
454 if ($child->has_child_nodes) {
455 $r .= '<ol class="children">';
456 unshift @node, @{$child->child_nodes}, '</ol></li>';
457 }
458 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
459 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
460 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
461 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
462 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
463 $r .= '</ul></li>';
464 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
465 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
466 } else {
467 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
468 }
469 }
470
471 $r .= '</ol>';
472 print STDOUT $r;
473 } # print_document_tree
474
475 sub print_structure_dump_dom_section ($$) {
476 my ($doc, $el) = @_;
477
478 print STDOUT qq[
479 <div id="document-tree" class="section">
480 <h2>Document Tree</h2>
481 ];
482 push @nav, ['#document-tree' => 'Tree'];
483
484 print_document_tree ($el || $doc);
485
486 print STDOUT qq[</div>];
487 } # print_structure_dump_dom_section
488
489 sub print_structure_dump_manifest_section ($) {
490 my $manifest = shift;
491
492 print STDOUT qq[
493 <div id="dump-manifest" class="section">
494 <h2>Cache Manifest</h2>
495 ];
496 push @nav, ['#dump-manifest' => 'Caceh Manifest'];
497
498 print STDOUT qq[<dl><dt>Explicit entries</dt>];
499 for my $uri (@{$manifest->[0]}) {
500 my $euri = htescape ($uri);
501 print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
502 }
503
504 print STDOUT qq[<dt>Fallback entries</dt><dd>
505 <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
506 <th scope=row>Fallback Entry</tr><tbody>];
507 for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
508 my $euri = htescape ($uri);
509 my $euri2 = htescape ($manifest->[1]->{$uri});
510 print STDOUT qq[<tr><td><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
511 <td><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
512 }
513
514 print STDOUT qq[</table><dt>Online whitelist</dt>];
515 for my $uri (@{$manifest->[2]}) {
516 my $euri = htescape ($uri);
517 print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
518 }
519
520 print STDOUT qq[</dl></div>];
521 } # print_structure_dump_manifest_section
522
523 sub print_structure_error_dom_section ($$$) {
524 my ($doc, $el, $result) = @_;
525
526 print STDOUT qq[<div id="document-errors" class="section">
527 <h2>Document Errors</h2>
528
529 <dl>];
530 push @nav, ['#document-errors' => 'Document Error'];
531
532 require Whatpm::ContentChecker;
533 my $onerror = sub {
534 my %opt = @_;
535 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
536 $type =~ tr/ /-/;
537 $type =~ s/\|/%7C/g;
538 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
539 print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
540 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
541 print STDOUT $msg, "</dd>\n";
542 add_error ('structure', \%opt => $result);
543 };
544
545 my $elements;
546 my $time1 = time;
547 if ($el) {
548 $elements = Whatpm::ContentChecker->check_element ($el, $onerror);
549 } else {
550 $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);
551 }
552 $time{check} = time - $time1;
553
554 print STDOUT qq[</dl></div>];
555
556 return $elements;
557 } # print_structure_error_dom_section
558
559 sub print_structure_error_manifest_section ($$$) {
560 my ($manifest, $result) = @_;
561
562 print STDOUT qq[<div id="document-errors" class="section">
563 <h2>Document Errors</h2>
564
565 <dl>];
566 push @nav, ['#document-errors' => 'Document Error'];
567
568 require Whatpm::CacheManifest;
569 Whatpm::CacheManifest->check_manifest ($manifest, sub {
570 my %opt = @_;
571 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
572 $type =~ tr/ /-/;
573 $type =~ s/\|/%7C/g;
574 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
575 print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
576 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
577 add_error ('structure', \%opt => $result);
578 });
579
580 print STDOUT qq[</div>];
581 } # print_structure_error_manifest_section
582
583 sub print_table_section ($) {
584 my $tables = shift;
585
586 push @nav, ['#tables' => 'Tables'];
587 print STDOUT qq[
588 <div id="tables" class="section">
589 <h2>Tables</h2>
590
591 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
592 <script src="../table-script.js" type="text/javascript"></script>
593 <noscript>
594 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
595 </noscript>
596 ];
597
598 require JSON;
599
600 my $i = 0;
601 for my $table_el (@$tables) {
602 $i++;
603 print STDOUT qq[<div class="section" id="table-$i"><h3>] .
604 get_node_link ($table_el) . q[</h3>];
605
606 ## TODO: Make |ContentChecker| return |form_table| result
607 ## so that this script don't have to run the algorithm twice.
608 my $table = Whatpm::HTMLTable->form_table ($table_el);
609
610 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
611 next unless $_;
612 delete $_->{element};
613 }
614
615 for (@{$table->{row_group}}) {
616 next unless $_;
617 next unless $_->{element};
618 $_->{type} = $_->{element}->manakai_local_name;
619 delete $_->{element};
620 }
621
622 for (@{$table->{cell}}) {
623 next unless $_;
624 for (@{$_}) {
625 next unless $_;
626 for (@$_) {
627 $_->{id} = refaddr $_->{element} if defined $_->{element};
628 delete $_->{element};
629 $_->{is_header} = $_->{is_header} ? 1 : 0;
630 }
631 }
632 }
633
634 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
635 print STDOUT JSON::objToJson ($table);
636 print STDOUT qq[, document.getElementById ('table-$i'));</script>];
637 }
638
639 print STDOUT qq[</div>];
640 } # print_table_section
641
642 sub print_id_section ($) {
643 my $ids = shift;
644
645 push @nav, ['#identifiers' => 'IDs'];
646 print STDOUT qq[
647 <div id="identifiers" class="section">
648 <h2>Identifiers</h2>
649
650 <dl>
651 ];
652 for my $id (sort {$a cmp $b} keys %$ids) {
653 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
654 for (@{$ids->{$id}}) {
655 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
656 }
657 }
658 print STDOUT qq[</dl></div>];
659 } # print_id_section
660
661 sub print_term_section ($) {
662 my $terms = shift;
663
664 push @nav, ['#terms' => 'Terms'];
665 print STDOUT qq[
666 <div id="terms" class="section">
667 <h2>Terms</h2>
668
669 <dl>
670 ];
671 for my $term (sort {$a cmp $b} keys %$terms) {
672 print STDOUT qq[<dt>@{[htescape $term]}</dt>];
673 for (@{$terms->{$term}}) {
674 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
675 }
676 }
677 print STDOUT qq[</dl></div>];
678 } # print_term_section
679
680 sub print_class_section ($) {
681 my $classes = shift;
682
683 push @nav, ['#classes' => 'Classes'];
684 print STDOUT qq[
685 <div id="classes" class="section">
686 <h2>Classes</h2>
687
688 <dl>
689 ];
690 for my $class (sort {$a cmp $b} keys %$classes) {
691 print STDOUT qq[<dt><code>@{[htescape $class]}</code></dt>];
692 for (@{$classes->{$class}}) {
693 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
694 }
695 }
696 print STDOUT qq[</dl></div>];
697 } # print_class_section
698
699 sub print_result_section ($) {
700 my $result = shift;
701
702 print STDOUT qq[
703 <div id="result-summary" class="section">
704 <h2>Result</h2>];
705
706 if ($result->{unsupported} and $result->{conforming_max}) {
707 print STDOUT qq[<p class=uncertain id=result-para>The conformance
708 checker cannot decide whether the document is conforming or
709 not, since the document contains one or more unsupported
710 features. The document might or might not be conforming.</p>];
711 } elsif ($result->{conforming_min}) {
712 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
713 found in this document.</p>];
714 } elsif ($result->{conforming_max}) {
715 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
716 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
717 it might be conforming.</p>];
718 } else {
719 print STDOUT qq[<p class=FAIL id=result-para>This document is
720 <strong><em>non</em>-conforming</strong>.</p>];
721 }
722
723 print STDOUT qq[<table>
724 <colgroup><col><colgroup><col><col><col><colgroup><col>
725 <thead>
726 <tr><th scope=col></th>
727 <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
728 Errors</a></th>
729 <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
730 Errors</a></th>
731 <th scope=col><a href="../error-description#level-w">Warnings</a></th>
732 <th scope=col>Score</th></tr></thead><tbody>];
733
734 my $must_error = 0;
735 my $should_error = 0;
736 my $warning = 0;
737 my $score_min = 0;
738 my $score_max = 0;
739 my $score_base = 20;
740 my $score_unit = $score_base / 100;
741 for (
742 [Transfer => 'transfer', ''],
743 [Character => 'char', ''],
744 [Syntax => 'syntax', '#parse-errors'],
745 [Structure => 'structure', '#document-errors'],
746 ) {
747 $must_error += ($result->{$_->[1]}->{must} += 0);
748 $should_error += ($result->{$_->[1]}->{should} += 0);
749 $warning += ($result->{$_->[1]}->{warning} += 0);
750 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
751 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
752
753 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
754 my $label = $_->[0];
755 if ($result->{$_->[1]}->{must} or
756 $result->{$_->[1]}->{should} or
757 $result->{$_->[1]}->{warning} or
758 $result->{$_->[1]}->{unsupported}) {
759 $label = qq[<a href="$_->[2]">$label</a>];
760 }
761
762 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
763 if ($uncertain) {
764 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
765 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
766 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
767 } else {
768 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
769 }
770 }
771
772 $score_max += $score_base;
773
774 print STDOUT qq[
775 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
776 </tbody>
777 <tfoot><tr class=uncertain><th scope=row>Total</th>
778 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
779 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
780 <td>$warning?</td>
781 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
782 </table>
783
784 <p><strong>Important</strong>: This conformance checking service
785 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
786 </div>];
787 push @nav, ['#result-summary' => 'Result'];
788 } # print_result_section
789
790 sub print_result_unknown_type_section ($$) {
791 my ($input, $result) = @_;
792
793 my $euri = htescape ($input->{uri});
794 print STDOUT qq[
795 <div id="parse-errors" class="section">
796 <h2>Errors</h2>
797
798 <dl>
799 <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
800 <dd class=unsupported><strong><a href="../error-description#level-u">Not
801 supported</a></strong>:
802 Media type
803 <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
804 is not supported.</dd>
805 </dl>
806 </div>
807 ];
808 push @nav, ['#parse-errors' => 'Errors'];
809 add_error (char => {level => 'unsupported'} => $result);
810 add_error (syntax => {level => 'unsupported'} => $result);
811 add_error (structure => {level => 'unsupported'} => $result);
812 } # print_result_unknown_type_section
813
814 sub print_result_input_error_section ($) {
815 my $input = shift;
816 print STDOUT qq[<div class="section" id="result-summary">
817 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
818 </div>];
819 push @nav, ['#result-summary' => 'Result'];
820 } # print_Result_input_error_section
821
822 sub get_error_label ($) {
823 my $err = shift;
824
825 my $r = '';
826
827 if (defined $err->{line}) {
828 if ($err->{column} > 0) {
829 $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a> column $err->{column}];
830 } else {
831 $err->{line} = $err->{line} - 1 || 1;
832 $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a>];
833 }
834 }
835
836 if (defined $err->{node}) {
837 $r .= ' ' if length $r;
838 $r = get_node_link ($err->{node});
839 }
840
841 if (defined $err->{index}) {
842 $r .= ' ' if length $r;
843 $r .= 'Index ' . (0+$err->{index});
844 }
845
846 if (defined $err->{value}) {
847 $r .= ' ' if length $r;
848 $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
849 }
850
851 return $r;
852 } # get_error_label
853
854 sub get_error_level_label ($) {
855 my $err = shift;
856
857 my $r = '';
858
859 if (not defined $err->{level} or $err->{level} eq 'm') {
860 $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
861 error</a></strong>: ];
862 } elsif ($err->{level} eq 's') {
863 $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
864 error</a></strong>: ];
865 } elsif ($err->{level} eq 'w') {
866 $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
867 ];
868 } elsif ($err->{level} eq 'unsupported') {
869 $r = qq[<strong><a href="../error-description#level-u">Not
870 supported</a></strong>: ];
871 } else {
872 my $elevel = htescape ($err->{level});
873 $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
874 ];
875 }
876
877 return $r;
878 } # get_error_level_label
879
880 sub get_node_path ($) {
881 my $node = shift;
882 my @r;
883 while (defined $node) {
884 my $rs;
885 if ($node->node_type == 1) {
886 $rs = $node->manakai_local_name;
887 $node = $node->parent_node;
888 } elsif ($node->node_type == 2) {
889 $rs = '@' . $node->manakai_local_name;
890 $node = $node->owner_element;
891 } elsif ($node->node_type == 3) {
892 $rs = '"' . $node->data . '"';
893 $node = $node->parent_node;
894 } elsif ($node->node_type == 9) {
895 @r = ('') unless @r;
896 $rs = '';
897 $node = $node->parent_node;
898 } else {
899 $rs = '#' . $node->node_type;
900 $node = $node->parent_node;
901 }
902 unshift @r, $rs;
903 }
904 return join '/', @r;
905 } # get_node_path
906
907 sub get_node_link ($) {
908 return qq[<a href="#node-@{[refaddr $_[0]]}">] .
909 htescape (get_node_path ($_[0])) . qq[</a>];
910 } # get_node_link
911
912 {
913 my $Msg = {};
914
915 sub load_text_catalog ($) {
916 my $lang = shift; # MUST be a canonical lang name
917 open my $file, '<:utf8', "cc-msg.$lang.txt"
918 or die "$0: cc-msg.$lang.txt: $!";
919 while (<$file>) {
920 if (s/^([^;]+);([^;]*);//) {
921 my ($type, $cls, $msg) = ($1, $2, $_);
922 $msg =~ tr/\x0D\x0A//d;
923 $Msg->{$type} = [$cls, $msg];
924 }
925 }
926 } # load_text_catalog
927
928 sub get_text ($) {
929 my ($type, $level, $node) = @_;
930 $type = $level . ':' . $type if defined $level;
931 my @arg;
932 {
933 if (defined $Msg->{$type}) {
934 my $msg = $Msg->{$type}->[1];
935 $msg =~ s{<var>\$([0-9]+)</var>}{
936 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
937 }ge;
938 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
939 UNIVERSAL::can ($node, 'get_attribute_ns')
940 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
941 }ge;
942 $msg =~ s{<var>{\@}</var>}{
943 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
944 }ge;
945 $msg =~ s{<var>{local-name}</var>}{
946 UNIVERSAL::can ($node, 'manakai_local_name')
947 ? htescape ($node->manakai_local_name) : ''
948 }ge;
949 $msg =~ s{<var>{element-local-name}</var>}{
950 (UNIVERSAL::can ($node, 'owner_element') and
951 $node->owner_element)
952 ? htescape ($node->owner_element->manakai_local_name)
953 : ''
954 }ge;
955 return ($type, $Msg->{$type}->[0], $msg);
956 } elsif ($type =~ s/:([^:]*)$//) {
957 unshift @arg, $1;
958 redo;
959 }
960 }
961 return ($type, '', htescape ($_[0]));
962 } # get_text
963
964 }
965
966 sub get_input_document ($$) {
967 my ($http, $dom) = @_;
968
969 my $request_uri = $http->get_parameter ('uri');
970 my $r = {};
971 if (defined $request_uri and length $request_uri) {
972 my $uri = $dom->create_uri_reference ($request_uri);
973 unless ({
974 http => 1,
975 }->{lc $uri->uri_scheme}) {
976 return {uri => $request_uri, request_uri => $request_uri,
977 error_status_text => 'URI scheme not allowed'};
978 }
979
980 require Message::Util::HostPermit;
981 my $host_permit = new Message::Util::HostPermit;
982 $host_permit->add_rule (<<EOH);
983 Allow host=suika port=80
984 Deny host=suika
985 Allow host=suika.fam.cx port=80
986 Deny host=suika.fam.cx
987 Deny host=localhost
988 Deny host=*.localdomain
989 Deny ipv4=0.0.0.0/8
990 Deny ipv4=10.0.0.0/8
991 Deny ipv4=127.0.0.0/8
992 Deny ipv4=169.254.0.0/16
993 Deny ipv4=172.0.0.0/11
994 Deny ipv4=192.0.2.0/24
995 Deny ipv4=192.88.99.0/24
996 Deny ipv4=192.168.0.0/16
997 Deny ipv4=198.18.0.0/15
998 Deny ipv4=224.0.0.0/4
999 Deny ipv4=255.255.255.255/32
1000 Deny ipv6=0::0/0
1001 Allow host=*
1002 EOH
1003 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1004 return {uri => $request_uri, request_uri => $request_uri,
1005 error_status_text => 'Connection to the host is forbidden'};
1006 }
1007
1008 require LWP::UserAgent;
1009 my $ua = WDCC::LWPUA->new;
1010 $ua->{wdcc_dom} = $dom;
1011 $ua->{wdcc_host_permit} = $host_permit;
1012 $ua->agent ('Mozilla'); ## TODO: for now.
1013 $ua->parse_head (0);
1014 $ua->protocols_allowed ([qw/http/]);
1015 $ua->max_size (1000_000);
1016 my $req = HTTP::Request->new (GET => $request_uri);
1017 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1018 my $res = $ua->request ($req);
1019 ## TODO: 401 sets |is_success| true.
1020 if ($res->is_success or $http->get_parameter ('error-page')) {
1021 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1022 $r->{uri} = $res->request->uri;
1023 $r->{request_uri} = $request_uri;
1024
1025 ## TODO: More strict parsing...
1026 my $ct = $res->header ('Content-Type');
1027 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1028 $r->{charset} = lc $1;
1029 $r->{charset} =~ tr/\\//d;
1030 $r->{official_charset} = $r->{charset};
1031 }
1032
1033 my $input_charset = $http->get_parameter ('charset');
1034 if (defined $input_charset and length $input_charset) {
1035 $r->{charset_overridden}
1036 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1037 $r->{charset} = $input_charset;
1038 }
1039
1040 ## TODO: Support for HTTP Content-Encoding
1041
1042 $r->{s} = ''.$res->content;
1043
1044 require Whatpm::ContentType;
1045 ($r->{official_type}, $r->{media_type})
1046 = Whatpm::ContentType->get_sniffed_type
1047 (get_file_head => sub {
1048 return substr $r->{s}, 0, shift;
1049 },
1050 http_content_type_byte => $ct,
1051 has_http_content_encoding =>
1052 defined $res->header ('Content-Encoding'),
1053 supported_image_types => {});
1054 } else {
1055 $r->{uri} = $res->request->uri;
1056 $r->{request_uri} = $request_uri;
1057 $r->{error_status_text} = $res->status_line;
1058 }
1059
1060 $r->{header_field} = [];
1061 $res->scan (sub {
1062 push @{$r->{header_field}}, [$_[0], $_[1]];
1063 });
1064 $r->{header_status_code} = $res->code;
1065 $r->{header_status_text} = $res->message;
1066 } else {
1067 $r->{s} = ''.$http->get_parameter ('s');
1068 $r->{uri} = q<thismessage:/>;
1069 $r->{request_uri} = q<thismessage:/>;
1070 $r->{base_uri} = q<thismessage:/>;
1071 $r->{charset} = ''.$http->get_parameter ('_charset_');
1072 $r->{charset} =~ s/\s+//g;
1073 $r->{charset} = 'utf-8' if $r->{charset} eq '';
1074 $r->{official_charset} = $r->{charset};
1075 $r->{header_field} = [];
1076
1077 require Whatpm::ContentType;
1078 ($r->{official_type}, $r->{media_type})
1079 = Whatpm::ContentType->get_sniffed_type
1080 (get_file_head => sub {
1081 return substr $r->{s}, 0, shift;
1082 },
1083 http_content_type_byte => undef,
1084 has_http_content_encoding => 0,
1085 supported_image_types => {});
1086 }
1087
1088 my $input_format = $http->get_parameter ('i');
1089 if (defined $input_format and length $input_format) {
1090 $r->{media_type_overridden}
1091 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1092 $r->{media_type} = $input_format;
1093 }
1094 if (defined $r->{s} and not defined $r->{media_type}) {
1095 $r->{media_type} = 'text/html';
1096 $r->{media_type_overridden} = 1;
1097 }
1098
1099 if ($r->{media_type} eq 'text/xml') {
1100 unless (defined $r->{charset}) {
1101 $r->{charset} = 'us-ascii';
1102 $r->{official_charset} = $r->{charset};
1103 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1104 $r->{charset_overridden} = 0;
1105 }
1106 }
1107
1108 if (length $r->{s} > 1000_000) {
1109 $r->{error_status_text} = 'Entity-body too large';
1110 delete $r->{s};
1111 return $r;
1112 }
1113
1114 return $r;
1115 } # get_input_document
1116
1117 package WDCC::LWPUA;
1118 BEGIN { push our @ISA, 'LWP::UserAgent'; }
1119
1120 sub redirect_ok {
1121 my $ua = shift;
1122 unless ($ua->SUPER::redirect_ok (@_)) {
1123 return 0;
1124 }
1125
1126 my $uris = $_[1]->header ('Location');
1127 return 0 unless $uris;
1128 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1129 unless ({
1130 http => 1,
1131 }->{lc $uri->uri_scheme}) {
1132 return 0;
1133 }
1134 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1135 return 0;
1136 }
1137 return 1;
1138 } # redirect_ok
1139
1140 =head1 AUTHOR
1141
1142 Wakaba <w@suika.fam.cx>.
1143
1144 =head1 LICENSE
1145
1146 Copyright 2007 Wakaba <w@suika.fam.cx>
1147
1148 This library is free software; you can redistribute it
1149 and/or modify it under the same terms as Perl itself.
1150
1151 =cut
1152
1153 ## $Date: 2007/11/19 12:20:14 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24