/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.22 - (show annotations) (download)
Sun Nov 4 09:15:02 2007 UTC (16 years, 6 months ago) by wakaba
Branch: MAIN
Changes since 1.21: +142 -10 lines
++ ChangeLog	4 Nov 2007 09:14:24 -0000
2007-11-04  Wakaba  <wakaba@suika.fam.cx>

	* cc-interface.en.html (i): |text/cache-manifest| is added.

	* cc-style.css: New rules for manifest dump.

	* cc.cgi: Support for |text/cache-manifest|.

	* error-description-soruce.en.xml (#cache-manifest-errors): New
	section.

1 #!/usr/bin/perl
2 use strict;
3
4 use lib qw[/home/httpd/html/www/markup/html/whatpm
5 /home/wakaba/work/manakai2/lib];
6 use CGI::Carp qw[fatalsToBrowser];
7 use Scalar::Util qw[refaddr];
8 use Time::HiRes qw/time/;
9
10 sub htescape ($) {
11 my $s = $_[0];
12 $s =~ s/&/&amp;/g;
13 $s =~ s/</&lt;/g;
14 $s =~ s/>/&gt;/g;
15 $s =~ s/"/&quot;/g;
16 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
17 sprintf '<var>U+%04X</var>', ord $1;
18 }ge;
19 return $s;
20 } # htescape
21
22 use Message::CGI::HTTP;
23 my $http = Message::CGI::HTTP->new;
24
25 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
26 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
27 exit;
28 }
29
30 binmode STDOUT, ':utf8';
31 $| = 1;
32
33 require Message::DOM::DOMImplementation;
34 my $dom = Message::DOM::DOMImplementation->new;
35
36 load_text_catalog ('en'); ## TODO: conneg
37
38 my @nav;
39 print STDOUT qq[Content-Type: text/html; charset=utf-8
40
41 <!DOCTYPE html>
42 <html lang="en">
43 <head>
44 <title>Web Document Conformance Checker (BETA)</title>
45 <link rel="stylesheet" href="../cc-style.css" type="text/css">
46 </head>
47 <body>
48 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
49 (<em>beta</em>)</h1>
50 ];
51
52 $| = 0;
53 my $input = get_input_document ($http, $dom);
54 my $inner_html_element = $http->get_parameter ('e');
55 my $char_length = 0;
56 my %time;
57
58 print qq[
59 <div id="document-info" class="section">
60 <dl>
61 <dt>Request URI</dt>
62 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 <dt>Document URI</dt>
64 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}">@{[htescape $input->{uri}]}</a>&gt;</code></dd>
65 ]; # no </dl> yet
66 push @nav, ['#document-info' => 'Information'];
67
68 if (defined $input->{s}) {
69 $char_length = length $input->{s};
70
71 print STDOUT qq[
72 <dt>Base URI</dt>
73 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
74 <dt>Internet Media Type</dt>
75 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
76 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : '']}</dd>
77 <dt>Character Encoding</dt>
78 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
79 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
80 <dt>Length</dt>
81 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
82 </dl>
83 </div>
84 ];
85
86 my $result = {conforming_min => 1, conforming_max => 1};
87 print_http_header_section ($input, $result);
88
89 my $doc;
90 my $el;
91 my $manifest;
92
93 if ($input->{media_type} eq 'text/html') {
94 ($doc, $el) = print_syntax_error_html_section ($input, $result);
95 print_source_string_section (\($input->{s}), $input->{charset});
96 } elsif ({
97 'text/xml' => 1,
98 'application/atom+xml' => 1,
99 'application/rss+xml' => 1,
100 'application/svg+xml' => 1,
101 'application/xhtml+xml' => 1,
102 'application/xml' => 1,
103 }->{$input->{media_type}}) {
104 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
105 print_source_string_section (\($input->{s}), $doc->input_encoding);
106 } elsif ($input->{media_type} eq 'text/cache-manifest') {
107 ## TODO: MUST be text/cache-manifest
108 $manifest = print_syntax_error_manifest_section ($input, $result);
109 print_source_string_section (\($input->{s}), 'utf-8');
110 } else {
111 ## TODO: Change HTTP status code??
112 print_result_unknown_type_section ($input);
113 }
114
115 if (defined $doc or defined $el) {
116 print_structure_dump_dom_section ($doc, $el);
117 my $elements = print_structure_error_dom_section ($doc, $el, $result);
118 print_table_section ($elements->{table}) if @{$elements->{table}};
119 print_id_section ($elements->{id}) if keys %{$elements->{id}};
120 print_term_section ($elements->{term}) if keys %{$elements->{term}};
121 print_class_section ($elements->{class}) if keys %{$elements->{class}};
122 } elsif (defined $manifest) {
123 print_structure_dump_manifest_section ($manifest);
124 print_structure_error_manifest_section ($manifest, $result);
125 }
126
127 print_result_section ($result);
128 } else {
129 print STDOUT qq[</dl></div>];
130 print_result_input_error_section ($input);
131 }
132
133 print STDOUT qq[
134 <ul class="navigation" id="nav-items">
135 ];
136 for (@nav) {
137 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
138 }
139 print STDOUT qq[
140 </ul>
141 </body>
142 </html>
143 ];
144
145 for (qw/decode parse parse_xml parse_manifest check check_manifest/) {
146 next unless defined $time{$_};
147 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
148 print $file $char_length, "\t", $time{$_}, "\n";
149 }
150
151 exit;
152
153 sub add_error ($$$) {
154 my ($layer, $err, $result) = @_;
155 if (defined $err->{level}) {
156 if ($err->{level} eq 's') {
157 $result->{$layer}->{should}++;
158 $result->{$layer}->{score_min} -= 2;
159 $result->{conforming_min} = 0;
160 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
161 $result->{$layer}->{warning}++;
162 } elsif ($err->{level} eq 'unsupported') {
163 $result->{$layer}->{unsupported}++;
164 $result->{unsupported} = 1;
165 } else {
166 $result->{$layer}->{must}++;
167 $result->{$layer}->{score_max} -= 2;
168 $result->{$layer}->{score_min} -= 2;
169 $result->{conforming_min} = 0;
170 $result->{conforming_max} = 0;
171 }
172 } else {
173 $result->{$layer}->{must}++;
174 $result->{$layer}->{score_max} -= 2;
175 $result->{$layer}->{score_min} -= 2;
176 $result->{conforming_min} = 0;
177 $result->{conforming_max} = 0;
178 }
179 } # add_error
180
181 sub print_http_header_section ($$) {
182 my ($input, $result) = @_;
183 return unless defined $input->{header_status_code} or
184 defined $input->{header_status_text} or
185 @{$input->{header_field}};
186
187 push @nav, ['#source-header' => 'HTTP Header'];
188 print STDOUT qq[<div id="source-header" class="section">
189 <h2>HTTP Header</h2>
190
191 <p><strong>Note</strong>: Due to the limitation of the
192 network library in use, the content of this section might
193 not be the real header.</p>
194
195 <table><tbody>
196 ];
197
198 if (defined $input->{header_status_code}) {
199 print STDOUT qq[<tr><th scope="row">Status code</th>];
200 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
201 }
202 if (defined $input->{header_status_text}) {
203 print STDOUT qq[<tr><th scope="row">Status text</th>];
204 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
205 }
206
207 for (@{$input->{header_field}}) {
208 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
209 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
210 }
211
212 print STDOUT qq[</tbody></table></div>];
213 } # print_http_header_section
214
215 sub print_syntax_error_html_section ($$) {
216 my ($input, $result) = @_;
217
218 require Encode;
219 require Whatpm::HTML;
220
221 $input->{charset} ||= 'ISO-8859-1'; ## TODO: for now.
222
223 my $time1 = time;
224 my $t = Encode::decode ($input->{charset}, $input->{s});
225 $time{decode} = time - $time1;
226
227 print STDOUT qq[
228 <div id="parse-errors" class="section">
229 <h2>Parse Errors</h2>
230
231 <dl>];
232 push @nav, ['#parse-errors' => 'Parse Error'];
233
234 my $onerror = sub {
235 my (%opt) = @_;
236 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
237 if ($opt{column} > 0) {
238 print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
239 } else {
240 $opt{line} = $opt{line} - 1 || 1;
241 print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];
242 }
243 $type =~ tr/ /-/;
244 $type =~ s/\|/%7C/g;
245 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
246 print STDOUT qq[<dd class="$cls">$msg</dd>\n];
247
248 add_error ('syntax', \%opt => $result);
249 };
250
251 my $doc = $dom->create_document;
252 my $el;
253 $time1 = time;
254 if (defined $inner_html_element and length $inner_html_element) {
255 $el = $doc->create_element_ns
256 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
257 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
258 } else {
259 Whatpm::HTML->parse_string ($t => $doc, $onerror);
260 }
261 $time{parse} = time - $time1;
262
263 print STDOUT qq[</dl></div>];
264
265 return ($doc, $el);
266 } # print_syntax_error_html_section
267
268 sub print_syntax_error_xml_section ($$) {
269 my ($input, $result) = @_;
270
271 require Message::DOM::XMLParserTemp;
272
273 print STDOUT qq[
274 <div id="parse-errors" class="section">
275 <h2>Parse Errors</h2>
276
277 <dl>];
278 push @nav, ['#parse-errors' => 'Parse Error'];
279
280 my $onerror = sub {
281 my $err = shift;
282 my $line = $err->location->line_number;
283 print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];
284 print STDOUT $err->location->column_number, "</dt><dd>";
285 print STDOUT htescape $err->text, "</dd>\n";
286
287 add_error ('syntax', {type => $err->text,
288 level => [
289 $err->SEVERITY_FATAL_ERROR => 'm',
290 $err->SEVERITY_ERROR => 'm',
291 $err->SEVERITY_WARNING => 's',
292 ]->[$err->severity]} => $result);
293
294 return 1;
295 };
296
297 my $time1 = time;
298 open my $fh, '<', \($input->{s});
299 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
300 ($fh => $dom, $onerror, charset => $input->{charset});
301 $time{parse_xml} = time - $time1;
302
303 print STDOUT qq[</dl></div>];
304
305 return ($doc, undef);
306 } # print_syntax_error_xml_section
307
308 sub print_syntax_error_manifest_section ($$) {
309 my ($input, $result) = @_;
310
311 require Whatpm::CacheManifest;
312
313 print STDOUT qq[
314 <div id="parse-errors" class="section">
315 <h2>Parse Errors</h2>
316
317 <dl>];
318 push @nav, ['#parse-errors' => 'Parse Error'];
319
320 my $onerror = sub {
321 my (%opt) = @_;
322 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
323 print STDOUT qq[<dt class="$cls">], get_error_label (\%opt), qq[</dt>];
324 $type =~ tr/ /-/;
325 $type =~ s/\|/%7C/g;
326 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
327 print STDOUT qq[<dd class="$cls">$msg</dd>\n];
328
329 add_error ('syntax', \%opt => $result);
330 };
331
332 my $time1 = time;
333 my $manifest = Whatpm::CacheManifest->parse_byte_string
334 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
335 $time{parse_manifest} = time - $time1;
336
337 print STDOUT qq[</dl></div>];
338
339 return $manifest;
340 } # print_syntax_error_manifest_section
341
342 sub print_source_string_section ($$) {
343 require Encode;
344 my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
345 return unless $enc;
346
347 my $s = \($enc->decode (${$_[0]}));
348 my $i = 1;
349 push @nav, ['#source-string' => 'Source'];
350 print STDOUT qq[<div id="source-string" class="section">
351 <h2>Document Source</h2>
352 <ol lang="">\n];
353 if (length $$s) {
354 while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
355 print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
356 $i++;
357 }
358 if ($$s =~ /\G([^\x0A]+)/gc) {
359 print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
360 }
361 } else {
362 print STDOUT q[<li id="line-1"></li>];
363 }
364 print STDOUT "</ol></div>";
365 } # print_input_string_section
366
367 sub print_document_tree ($) {
368 my $node = shift;
369 my $r = '<ol class="xoxo">';
370
371 my @node = ($node);
372 while (@node) {
373 my $child = shift @node;
374 unless (ref $child) {
375 $r .= $child;
376 next;
377 }
378
379 my $node_id = 'node-'.refaddr $child;
380 my $nt = $child->node_type;
381 if ($nt == $child->ELEMENT_NODE) {
382 my $child_nsuri = $child->namespace_uri;
383 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
384 '</code>'; ## ISSUE: case
385
386 if ($child->has_attributes) {
387 $r .= '<ul class="attributes">';
388 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
389 @{$child->attributes}) {
390 $r .= qq[<li id="$attr->[3]" class="tree-attribute"><code title="@{[defined $_->[2] ? $_->[2] : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
391 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
392 }
393 $r .= '</ul>';
394 }
395
396 if ($child->has_child_nodes) {
397 $r .= '<ol class="children">';
398 unshift @node, @{$child->child_nodes}, '</ol></li>';
399 } else {
400 $r .= '</li>';
401 }
402 } elsif ($nt == $child->TEXT_NODE) {
403 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
404 } elsif ($nt == $child->CDATA_SECTION_NODE) {
405 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
406 } elsif ($nt == $child->COMMENT_NODE) {
407 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
408 } elsif ($nt == $child->DOCUMENT_NODE) {
409 $r .= qq'<li id="$node_id" class="tree-document">Document';
410 $r .= qq[<ul class="attributes">];
411 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
412 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
413 unless ($child->manakai_is_html) {
414 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
415 if (defined $child->xml_encoding) {
416 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
417 } else {
418 $r .= qq[<li>XML encoding = (null)</li>];
419 }
420 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
421 }
422 $r .= qq[</ul>];
423 if ($child->has_child_nodes) {
424 $r .= '<ol class="children">';
425 unshift @node, @{$child->child_nodes}, '</ol></li>';
426 }
427 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
428 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
429 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
430 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
431 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
432 $r .= '</ul></li>';
433 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
434 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
435 } else {
436 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
437 }
438 }
439
440 $r .= '</ol>';
441 print STDOUT $r;
442 } # print_document_tree
443
444 sub print_structure_dump_dom_section ($$) {
445 my ($doc, $el) = @_;
446
447 print STDOUT qq[
448 <div id="document-tree" class="section">
449 <h2>Document Tree</h2>
450 ];
451 push @nav, ['#document-tree' => 'Tree'];
452
453 print_document_tree ($el || $doc);
454
455 print STDOUT qq[</div>];
456 } # print_structure_dump_dom_section
457
458 sub print_structure_dump_manifest_section ($) {
459 my $manifest = shift;
460
461 print STDOUT qq[
462 <div id="dump-manifest" class="section">
463 <h2>Cache Manifest</h2>
464 ];
465 push @nav, ['#dump-manifest' => 'Caceh Manifest'];
466
467 print STDOUT qq[<dl><dt>Explicit entries</dt>];
468 for my $uri (@{$manifest->[0]}) {
469 my $euri = htescape ($uri);
470 print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
471 }
472
473 print STDOUT qq[<dt>Fallback entries</dt><dd>
474 <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
475 <th scope=row>Fallback Entry</tr><tbody>];
476 for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
477 my $euri = htescape ($uri);
478 my $euri2 = htescape ($manifest->[1]->{$uri});
479 print STDOUT qq[<tr><td><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
480 <td><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
481 }
482
483 print STDOUT qq[</table><dt>Online whitelist</dt>];
484 for my $uri (@{$manifest->[2]}) {
485 my $euri = htescape ($uri);
486 print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
487 }
488
489 print STDOUT qq[</dl></div>];
490 } # print_structure_dump_manifest_section
491
492 sub print_structure_error_dom_section ($$$) {
493 my ($doc, $el, $result) = @_;
494
495 print STDOUT qq[<div id="document-errors" class="section">
496 <h2>Document Errors</h2>
497
498 <dl>];
499 push @nav, ['#document-errors' => 'Document Error'];
500
501 require Whatpm::ContentChecker;
502 my $onerror = sub {
503 my %opt = @_;
504 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
505 $type =~ tr/ /-/;
506 $type =~ s/\|/%7C/g;
507 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
508 print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
509 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
510 add_error ('structure', \%opt => $result);
511 };
512
513 my $elements;
514 my $time1 = time;
515 if ($el) {
516 $elements = Whatpm::ContentChecker->check_element ($el, $onerror);
517 } else {
518 $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);
519 }
520 $time{check} = time - $time1;
521
522 print STDOUT qq[</dl></div>];
523
524 return $elements;
525 } # print_structure_error_dom_section
526
527 sub print_structure_error_manifest_section ($$$) {
528 my ($manifest, $result) = @_;
529
530 print STDOUT qq[<div id="document-errors" class="section">
531 <h2>Document Errors</h2>
532
533 <dl>];
534 push @nav, ['#document-errors' => 'Document Error'];
535
536 require Whatpm::CacheManifest;
537 Whatpm::CacheManifest->check_manifest ($manifest, sub {
538 my %opt = @_;
539 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
540 $type =~ tr/ /-/;
541 $type =~ s/\|/%7C/g;
542 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
543 print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
544 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
545 add_error ('structure', \%opt => $result);
546 });
547
548 print STDOUT qq[</div>];
549 } # print_structure_error_manifest_section
550
551 sub print_table_section ($) {
552 my $tables = shift;
553
554 push @nav, ['#tables' => 'Tables'];
555 print STDOUT qq[
556 <div id="tables" class="section">
557 <h2>Tables</h2>
558
559 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
560 <script src="../table-script.js" type="text/javascript"></script>
561 <noscript>
562 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
563 </noscript>
564 ];
565
566 require JSON;
567
568 my $i = 0;
569 for my $table_el (@$tables) {
570 $i++;
571 print STDOUT qq[<div class="section" id="table-$i"><h3>] .
572 get_node_link ($table_el) . q[</h3>];
573
574 ## TODO: Make |ContentChecker| return |form_table| result
575 ## so that this script don't have to run the algorithm twice.
576 my $table = Whatpm::HTMLTable->form_table ($table_el);
577
578 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
579 next unless $_;
580 delete $_->{element};
581 }
582
583 for (@{$table->{row_group}}) {
584 next unless $_;
585 next unless $_->{element};
586 $_->{type} = $_->{element}->manakai_local_name;
587 delete $_->{element};
588 }
589
590 for (@{$table->{cell}}) {
591 next unless $_;
592 for (@{$_}) {
593 next unless $_;
594 for (@$_) {
595 $_->{id} = refaddr $_->{element} if defined $_->{element};
596 delete $_->{element};
597 $_->{is_header} = $_->{is_header} ? 1 : 0;
598 }
599 }
600 }
601
602 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
603 print STDOUT JSON::objToJson ($table);
604 print STDOUT qq[, document.getElementById ('table-$i'));</script>];
605 }
606
607 print STDOUT qq[</div>];
608 } # print_table_section
609
610 sub print_id_section ($) {
611 my $ids = shift;
612
613 push @nav, ['#identifiers' => 'IDs'];
614 print STDOUT qq[
615 <div id="identifiers" class="section">
616 <h2>Identifiers</h2>
617
618 <dl>
619 ];
620 for my $id (sort {$a cmp $b} keys %$ids) {
621 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
622 for (@{$ids->{$id}}) {
623 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
624 }
625 }
626 print STDOUT qq[</dl></div>];
627 } # print_id_section
628
629 sub print_term_section ($) {
630 my $terms = shift;
631
632 push @nav, ['#terms' => 'Terms'];
633 print STDOUT qq[
634 <div id="terms" class="section">
635 <h2>Terms</h2>
636
637 <dl>
638 ];
639 for my $term (sort {$a cmp $b} keys %$terms) {
640 print STDOUT qq[<dt>@{[htescape $term]}</dt>];
641 for (@{$terms->{$term}}) {
642 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
643 }
644 }
645 print STDOUT qq[</dl></div>];
646 } # print_term_section
647
648 sub print_class_section ($) {
649 my $classes = shift;
650
651 push @nav, ['#classes' => 'Classes'];
652 print STDOUT qq[
653 <div id="classes" class="section">
654 <h2>Classes</h2>
655
656 <dl>
657 ];
658 for my $class (sort {$a cmp $b} keys %$classes) {
659 print STDOUT qq[<dt><code>@{[htescape $class]}</code></dt>];
660 for (@{$classes->{$class}}) {
661 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
662 }
663 }
664 print STDOUT qq[</dl></div>];
665 } # print_class_section
666
667 sub print_result_section ($) {
668 my $result = shift;
669
670 print STDOUT qq[
671 <div id="result-summary" class="section">
672 <h2>Result</h2>];
673
674 if ($result->{unsupported} and $result->{conforming_max}) {
675 print STDOUT qq[<p class=uncertain id=result-para>The conformance
676 checker cannot decide whether the document is conforming or
677 not, since the document contains one or more unsupported
678 features. The document might or might not be conforming.</p>];
679 } elsif ($result->{conforming_min}) {
680 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
681 found in this document.</p>];
682 } elsif ($result->{conforming_max}) {
683 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
684 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
685 it might be conforming.</p>];
686 } else {
687 print STDOUT qq[<p class=FAIL id=result-para>This document is
688 <strong><em>non</em>-conforming</strong>.</p>];
689 }
690
691 print STDOUT qq[<table>
692 <colgroup><col><colgroup><col><col><col><colgroup><col>
693 <thead>
694 <tr><th scope=col></th><th scope=col><em class=rfc2119>MUST</em>-level
695 Errors</th><th scope=col><em class=rfc2119>SHOULD</em>-level
696 Errors</th><th scope=col>Warnings</th><th scope=col>Score</th></tr>
697 </thead><tbody>];
698
699 my $must_error = 0;
700 my $should_error = 0;
701 my $warning = 0;
702 my $score_min = 0;
703 my $score_max = 0;
704 my $score_base = 20;
705 my $score_unit = $score_base / 100;
706 for (
707 [Transfer => 'transfer', ''],
708 [Character => 'char', ''],
709 [Syntax => 'syntax', '#parse-errors'],
710 [Structure => 'structure', '#document-errors'],
711 ) {
712 $must_error += ($result->{$_->[1]}->{must} += 0);
713 $should_error += ($result->{$_->[1]}->{should} += 0);
714 $warning += ($result->{$_->[1]}->{warning} += 0);
715 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
716 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
717
718 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
719 my $label = $_->[0];
720 if ($result->{$_->[1]}->{must} or
721 $result->{$_->[1]}->{should} or
722 $result->{$_->[1]}->{warning} or
723 $result->{$_->[1]}->{unsupported}) {
724 $label = qq[<a href="$_->[2]">$label</a>];
725 }
726
727 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
728 if ($uncertain) {
729 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
730 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
731 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
732 } else {
733 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
734 }
735 }
736
737 $score_max += $score_base;
738
739 print STDOUT qq[
740 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
741 </tbody>
742 <tfoot><tr class=uncertain><th scope=row>Total</th>
743 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
744 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
745 <td>$warning?</td>
746 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
747 </table>
748
749 <p><strong>Important</strong>: This conformance checking service
750 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
751 </div>];
752 push @nav, ['#result-summary' => 'Result'];
753 } # print_result_section
754
755 sub print_result_unknown_type_section ($) {
756 my $input = shift;
757
758 print STDOUT qq[
759 <div id="result-summary" class="section">
760 <p><em>Media type <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code> is not supported!</em></p>
761 </div>
762 ];
763 push @nav, ['#result-summary' => 'Result'];
764 } # print_result_unknown_type_section
765
766 sub print_result_input_error_section ($) {
767 my $input = shift;
768 print STDOUT qq[<div class="section" id="result-summary">
769 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
770 </div>];
771 push @nav, ['#result-summary' => 'Result'];
772 } # print_Result_input_error_section
773
774 sub get_error_label ($) {
775 my $err = shift;
776
777 my $r = '';
778
779 if (defined $err->{line}) {
780 if ($err->{column} > 0) {
781 $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a> column $err->{column}];
782 } else {
783 $err->{line} = $err->{line} - 1 || 1;
784 $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a>];
785 }
786 }
787
788 if (defined $err->{node}) {
789 $r .= ' ' if length $r;
790 $r = get_node_path ($err->{node});
791 }
792
793 if (defined $err->{index}) {
794 $r .= ' ' if length $r;
795 $r .= 'Index ' . (0+$err->{index});
796 }
797
798 if (defined $err->{value}) {
799 $r .= ' ' if length $r;
800 $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
801 }
802
803 return $r;
804 } # get_error_label
805
806 sub get_node_path ($) {
807 my $node = shift;
808 my @r;
809 while (defined $node) {
810 my $rs;
811 if ($node->node_type == 1) {
812 $rs = $node->manakai_local_name;
813 $node = $node->parent_node;
814 } elsif ($node->node_type == 2) {
815 $rs = '@' . $node->manakai_local_name;
816 $node = $node->owner_element;
817 } elsif ($node->node_type == 3) {
818 $rs = '"' . $node->data . '"';
819 $node = $node->parent_node;
820 } elsif ($node->node_type == 9) {
821 @r = ('') unless @r;
822 $rs = '';
823 $node = $node->parent_node;
824 } else {
825 $rs = '#' . $node->node_type;
826 $node = $node->parent_node;
827 }
828 unshift @r, $rs;
829 }
830 return join '/', @r;
831 } # get_node_path
832
833 sub get_node_link ($) {
834 return qq[<a href="#node-@{[refaddr $_[0]]}">] .
835 htescape (get_node_path ($_[0])) . qq[</a>];
836 } # get_node_link
837
838 {
839 my $Msg = {};
840
841 sub load_text_catalog ($) {
842 my $lang = shift; # MUST be a canonical lang name
843 open my $file, '<', "cc-msg.$lang.txt" or die "$0: cc-msg.$lang.txt: $!";
844 while (<$file>) {
845 if (s/^([^;]+);([^;]*);//) {
846 my ($type, $cls, $msg) = ($1, $2, $_);
847 $msg =~ tr/\x0D\x0A//d;
848 $Msg->{$type} = [$cls, $msg];
849 }
850 }
851 } # load_text_catalog
852
853 sub get_text ($) {
854 my ($type, $level, $node) = @_;
855 $type = $level . ':' . $type if defined $level;
856 my @arg;
857 {
858 if (defined $Msg->{$type}) {
859 my $msg = $Msg->{$type}->[1];
860 $msg =~ s{<var>\$([0-9]+)</var>}{
861 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
862 }ge;
863 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
864 UNIVERSAL::can ($node, 'get_attribute_ns')
865 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
866 }ge;
867 $msg =~ s{<var>{\@}</var>}{
868 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
869 }ge;
870 $msg =~ s{<var>{local-name}</var>}{
871 UNIVERSAL::can ($node, 'manakai_local_name')
872 ? htescape ($node->manakai_local_name) : ''
873 }ge;
874 $msg =~ s{<var>{element-local-name}</var>}{
875 (UNIVERSAL::can ($node, 'owner_element') and
876 $node->owner_element)
877 ? htescape ($node->owner_element->manakai_local_name)
878 : ''
879 }ge;
880 return ($type, $Msg->{$type}->[0], $msg);
881 } elsif ($type =~ s/:([^:]*)$//) {
882 unshift @arg, $1;
883 redo;
884 }
885 }
886 return ($type, '', htescape ($_[0]));
887 } # get_text
888
889 }
890
891 sub get_input_document ($$) {
892 my ($http, $dom) = @_;
893
894 my $request_uri = $http->get_parameter ('uri');
895 my $r = {};
896 if (defined $request_uri and length $request_uri) {
897 my $uri = $dom->create_uri_reference ($request_uri);
898 unless ({
899 http => 1,
900 }->{lc $uri->uri_scheme}) {
901 return {uri => $request_uri, request_uri => $request_uri,
902 error_status_text => 'URI scheme not allowed'};
903 }
904
905 require Message::Util::HostPermit;
906 my $host_permit = new Message::Util::HostPermit;
907 $host_permit->add_rule (<<EOH);
908 Allow host=suika port=80
909 Deny host=suika
910 Allow host=suika.fam.cx port=80
911 Deny host=suika.fam.cx
912 Deny host=localhost
913 Deny host=*.localdomain
914 Deny ipv4=0.0.0.0/8
915 Deny ipv4=10.0.0.0/8
916 Deny ipv4=127.0.0.0/8
917 Deny ipv4=169.254.0.0/16
918 Deny ipv4=172.0.0.0/11
919 Deny ipv4=192.0.2.0/24
920 Deny ipv4=192.88.99.0/24
921 Deny ipv4=192.168.0.0/16
922 Deny ipv4=198.18.0.0/15
923 Deny ipv4=224.0.0.0/4
924 Deny ipv4=255.255.255.255/32
925 Deny ipv6=0::0/0
926 Allow host=*
927 EOH
928 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
929 return {uri => $request_uri, request_uri => $request_uri,
930 error_status_text => 'Connection to the host is forbidden'};
931 }
932
933 require LWP::UserAgent;
934 my $ua = WDCC::LWPUA->new;
935 $ua->{wdcc_dom} = $dom;
936 $ua->{wdcc_host_permit} = $host_permit;
937 $ua->agent ('Mozilla'); ## TODO: for now.
938 $ua->parse_head (0);
939 $ua->protocols_allowed ([qw/http/]);
940 $ua->max_size (1000_000);
941 my $req = HTTP::Request->new (GET => $request_uri);
942 my $res = $ua->request ($req);
943 ## TODO: 401 sets |is_success| true.
944 if ($res->is_success or $http->get_parameter ('error-page')) {
945 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
946 $r->{uri} = $res->request->uri;
947 $r->{request_uri} = $request_uri;
948
949 ## TODO: More strict parsing...
950 my $ct = $res->header ('Content-Type');
951 if (defined $ct and $ct =~ m#^([0-9A-Za-z._+-]+/[0-9A-Za-z._+-]+)#) {
952 $r->{media_type} = lc $1;
953 }
954 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
955 $r->{charset} = lc $1;
956 $r->{charset} =~ tr/\\//d;
957 }
958
959 my $input_charset = $http->get_parameter ('charset');
960 if (defined $input_charset and length $input_charset) {
961 $r->{charset_overridden}
962 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
963 $r->{charset} = $input_charset;
964 }
965
966 $r->{s} = ''.$res->content;
967 } else {
968 $r->{uri} = $res->request->uri;
969 $r->{request_uri} = $request_uri;
970 $r->{error_status_text} = $res->status_line;
971 }
972
973 $r->{header_field} = [];
974 $res->scan (sub {
975 push @{$r->{header_field}}, [$_[0], $_[1]];
976 });
977 $r->{header_status_code} = $res->code;
978 $r->{header_status_text} = $res->message;
979 } else {
980 $r->{s} = ''.$http->get_parameter ('s');
981 $r->{uri} = q<thismessage:/>;
982 $r->{request_uri} = q<thismessage:/>;
983 $r->{base_uri} = q<thismessage:/>;
984 $r->{charset} = ''.$http->get_parameter ('_charset_');
985 $r->{charset} =~ s/\s+//g;
986 $r->{charset} = 'utf-8' if $r->{charset} eq '';
987 $r->{header_field} = [];
988 }
989
990 my $input_format = $http->get_parameter ('i');
991 if (defined $input_format and length $input_format) {
992 $r->{media_type_overridden}
993 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
994 $r->{media_type} = $input_format;
995 }
996 if (defined $r->{s} and not defined $r->{media_type}) {
997 $r->{media_type} = 'text/html';
998 $r->{media_type_overridden} = 1;
999 }
1000
1001 if ($r->{media_type} eq 'text/xml') {
1002 unless (defined $r->{charset}) {
1003 $r->{charset} = 'us-ascii';
1004 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1005 $r->{charset_overridden} = 0;
1006 }
1007 }
1008
1009 if (length $r->{s} > 1000_000) {
1010 $r->{error_status_text} = 'Entity-body too large';
1011 delete $r->{s};
1012 return $r;
1013 }
1014
1015 return $r;
1016 } # get_input_document
1017
1018 package WDCC::LWPUA;
1019 BEGIN { push our @ISA, 'LWP::UserAgent'; }
1020
1021 sub redirect_ok {
1022 my $ua = shift;
1023 unless ($ua->SUPER::redirect_ok (@_)) {
1024 return 0;
1025 }
1026
1027 my $uris = $_[1]->header ('Location');
1028 return 0 unless $uris;
1029 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1030 unless ({
1031 http => 1,
1032 }->{lc $uri->uri_scheme}) {
1033 return 0;
1034 }
1035 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1036 return 0;
1037 }
1038 return 1;
1039 } # redirect_ok
1040
1041 =head1 AUTHOR
1042
1043 Wakaba <w@suika.fam.cx>.
1044
1045 =head1 LICENSE
1046
1047 Copyright 2007 Wakaba <w@suika.fam.cx>
1048
1049 This library is free software; you can redistribute it
1050 and/or modify it under the same terms as Perl itself.
1051
1052 =cut
1053
1054 ## $Date: 2007/09/11 08:25:23 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24