/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.25 - (show annotations) (download)
Sun Nov 18 05:30:03 2007 UTC (16 years, 6 months ago) by wakaba
Branch: MAIN
Changes since 1.24: +34 -8 lines
++ ChangeLog	18 Nov 2007 05:29:39 -0000
2007-11-18  Wakaba  <wakaba@suika.fam.cx>

	* cc.cgi (get_input_document): Use sniffer to determine
	media type of the entity.

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4
5 use lib qw[/home/httpd/html/www/markup/html/whatpm
6 /home/wakaba/work/manakai2/lib];
7 use CGI::Carp qw[fatalsToBrowser];
8 use Scalar::Util qw[refaddr];
9 use Time::HiRes qw/time/;
10
11 sub htescape ($) {
12 my $s = $_[0];
13 $s =~ s/&/&amp;/g;
14 $s =~ s/</&lt;/g;
15 $s =~ s/>/&gt;/g;
16 $s =~ s/"/&quot;/g;
17 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18 sprintf '<var>U+%04X</var>', ord $1;
19 }ge;
20 return $s;
21 } # htescape
22
23 use Message::CGI::HTTP;
24 my $http = Message::CGI::HTTP->new;
25
26 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
27 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
28 exit;
29 }
30
31 binmode STDOUT, ':utf8';
32 $| = 1;
33
34 require Message::DOM::DOMImplementation;
35 my $dom = Message::DOM::DOMImplementation->new;
36
37 load_text_catalog ('en'); ## TODO: conneg
38
39 my @nav;
40 print STDOUT qq[Content-Type: text/html; charset=utf-8
41
42 <!DOCTYPE html>
43 <html lang="en">
44 <head>
45 <title>Web Document Conformance Checker (BETA)</title>
46 <link rel="stylesheet" href="../cc-style.css" type="text/css">
47 </head>
48 <body>
49 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
50 (<em>beta</em>)</h1>
51 ];
52
53 $| = 0;
54 my $input = get_input_document ($http, $dom);
55 my $inner_html_element = $http->get_parameter ('e');
56 my $char_length = 0;
57 my %time;
58
59 print qq[
60 <div id="document-info" class="section">
61 <dl>
62 <dt>Request URI</dt>
63 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
64 <dt>Document URI</dt>
65 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
66 <script>
67 document.title = '<'
68 + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
69 + document.title;
70 </script></dd>
71 ]; # no </dl> yet
72 push @nav, ['#document-info' => 'Information'];
73
74 if (defined $input->{s}) {
75 $char_length = length $input->{s};
76
77 print STDOUT qq[
78 <dt>Base URI</dt>
79 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
80 <dt>Internet Media Type</dt>
81 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
82 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
83 <dt>Character Encoding</dt>
84 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
85 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
86 <dt>Length</dt>
87 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
88 </dl>
89 </div>
90 ];
91
92 my $result = {conforming_min => 1, conforming_max => 1};
93 print_http_header_section ($input, $result);
94
95 my $doc;
96 my $el;
97 my $manifest;
98
99 if ($input->{media_type} eq 'text/html') {
100 ($doc, $el) = print_syntax_error_html_section ($input, $result);
101 print_source_string_section
102 (\($input->{s}), $input->{charset} || $doc->input_encoding);
103 } elsif ({
104 'text/xml' => 1,
105 'application/atom+xml' => 1,
106 'application/rss+xml' => 1,
107 'application/svg+xml' => 1,
108 'application/xhtml+xml' => 1,
109 'application/xml' => 1,
110 }->{$input->{media_type}}) {
111 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
112 print_source_string_section (\($input->{s}), $doc->input_encoding);
113 } elsif ($input->{media_type} eq 'text/cache-manifest') {
114 ## TODO: MUST be text/cache-manifest
115 $manifest = print_syntax_error_manifest_section ($input, $result);
116 print_source_string_section (\($input->{s}), 'utf-8');
117 } else {
118 ## TODO: Change HTTP status code??
119 print_result_unknown_type_section ($input, $result);
120 }
121
122 if (defined $doc or defined $el) {
123 print_structure_dump_dom_section ($doc, $el);
124 my $elements = print_structure_error_dom_section ($doc, $el, $result);
125 print_table_section ($elements->{table}) if @{$elements->{table}};
126 print_id_section ($elements->{id}) if keys %{$elements->{id}};
127 print_term_section ($elements->{term}) if keys %{$elements->{term}};
128 print_class_section ($elements->{class}) if keys %{$elements->{class}};
129 } elsif (defined $manifest) {
130 print_structure_dump_manifest_section ($manifest);
131 print_structure_error_manifest_section ($manifest, $result);
132 }
133
134 print_result_section ($result);
135 } else {
136 print STDOUT qq[</dl></div>];
137 print_result_input_error_section ($input);
138 }
139
140 print STDOUT qq[
141 <ul class="navigation" id="nav-items">
142 ];
143 for (@nav) {
144 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
145 }
146 print STDOUT qq[
147 </ul>
148 </body>
149 </html>
150 ];
151
152 for (qw/decode parse parse_html parse_xml parse_manifest
153 check check_manifest/) {
154 next unless defined $time{$_};
155 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
156 print $file $char_length, "\t", $time{$_}, "\n";
157 }
158
159 exit;
160
161 sub add_error ($$$) {
162 my ($layer, $err, $result) = @_;
163 if (defined $err->{level}) {
164 if ($err->{level} eq 's') {
165 $result->{$layer}->{should}++;
166 $result->{$layer}->{score_min} -= 2;
167 $result->{conforming_min} = 0;
168 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
169 $result->{$layer}->{warning}++;
170 } elsif ($err->{level} eq 'unsupported') {
171 $result->{$layer}->{unsupported}++;
172 $result->{unsupported} = 1;
173 } else {
174 $result->{$layer}->{must}++;
175 $result->{$layer}->{score_max} -= 2;
176 $result->{$layer}->{score_min} -= 2;
177 $result->{conforming_min} = 0;
178 $result->{conforming_max} = 0;
179 }
180 } else {
181 $result->{$layer}->{must}++;
182 $result->{$layer}->{score_max} -= 2;
183 $result->{$layer}->{score_min} -= 2;
184 $result->{conforming_min} = 0;
185 $result->{conforming_max} = 0;
186 }
187 } # add_error
188
189 sub print_http_header_section ($$) {
190 my ($input, $result) = @_;
191 return unless defined $input->{header_status_code} or
192 defined $input->{header_status_text} or
193 @{$input->{header_field}};
194
195 push @nav, ['#source-header' => 'HTTP Header'];
196 print STDOUT qq[<div id="source-header" class="section">
197 <h2>HTTP Header</h2>
198
199 <p><strong>Note</strong>: Due to the limitation of the
200 network library in use, the content of this section might
201 not be the real header.</p>
202
203 <table><tbody>
204 ];
205
206 if (defined $input->{header_status_code}) {
207 print STDOUT qq[<tr><th scope="row">Status code</th>];
208 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
209 }
210 if (defined $input->{header_status_text}) {
211 print STDOUT qq[<tr><th scope="row">Status text</th>];
212 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
213 }
214
215 for (@{$input->{header_field}}) {
216 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
217 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
218 }
219
220 print STDOUT qq[</tbody></table></div>];
221 } # print_http_header_section
222
223 sub print_syntax_error_html_section ($$) {
224 my ($input, $result) = @_;
225
226 require Encode;
227 require Whatpm::HTML;
228
229 print STDOUT qq[
230 <div id="parse-errors" class="section">
231 <h2>Parse Errors</h2>
232
233 <dl>];
234 push @nav, ['#parse-errors' => 'Parse Error'];
235
236 my $onerror = sub {
237 my (%opt) = @_;
238 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
239 if ($opt{column} > 0) {
240 print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
241 } else {
242 $opt{line} = $opt{line} - 1 || 1;
243 print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];
244 }
245 $type =~ tr/ /-/;
246 $type =~ s/\|/%7C/g;
247 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
248 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
249 print STDOUT qq[$msg</dd>\n];
250
251 add_error ('syntax', \%opt => $result);
252 };
253
254 my $doc = $dom->create_document;
255 my $el;
256 if (defined $inner_html_element and length $inner_html_element) {
257 $input->{charset} ||= 'ISO-8859-1'; ## TODO: for now.
258 my $time1 = time;
259 my $t = Encode::decode ($input->{charset}, $input->{s});
260 $time{decode} = time - $time1;
261
262 $el = $doc->create_element_ns
263 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
264 $time1 = time;
265 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
266 $time{parse} = time - $time1;
267 } else {
268 my $time1 = time;
269 Whatpm::HTML->parse_byte_string
270 ($input->{charset}, $input->{s} => $doc, $onerror);
271 $time{parse_html} = time - $time1;
272 }
273
274 print STDOUT qq[</dl></div>];
275
276 return ($doc, $el);
277 } # print_syntax_error_html_section
278
279 sub print_syntax_error_xml_section ($$) {
280 my ($input, $result) = @_;
281
282 require Message::DOM::XMLParserTemp;
283
284 print STDOUT qq[
285 <div id="parse-errors" class="section">
286 <h2>Parse Errors</h2>
287
288 <dl>];
289 push @nav, ['#parse-errors' => 'Parse Error'];
290
291 my $onerror = sub {
292 my $err = shift;
293 my $line = $err->location->line_number;
294 print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];
295 print STDOUT $err->location->column_number, "</dt><dd>";
296 print STDOUT htescape $err->text, "</dd>\n";
297
298 add_error ('syntax', {type => $err->text,
299 level => [
300 $err->SEVERITY_FATAL_ERROR => 'm',
301 $err->SEVERITY_ERROR => 'm',
302 $err->SEVERITY_WARNING => 's',
303 ]->[$err->severity]} => $result);
304
305 return 1;
306 };
307
308 my $time1 = time;
309 open my $fh, '<', \($input->{s});
310 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
311 ($fh => $dom, $onerror, charset => $input->{charset});
312 $time{parse_xml} = time - $time1;
313
314 print STDOUT qq[</dl></div>];
315
316 return ($doc, undef);
317 } # print_syntax_error_xml_section
318
319 sub print_syntax_error_manifest_section ($$) {
320 my ($input, $result) = @_;
321
322 require Whatpm::CacheManifest;
323
324 print STDOUT qq[
325 <div id="parse-errors" class="section">
326 <h2>Parse Errors</h2>
327
328 <dl>];
329 push @nav, ['#parse-errors' => 'Parse Error'];
330
331 my $onerror = sub {
332 my (%opt) = @_;
333 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
334 print STDOUT qq[<dt class="$cls">], get_error_label (\%opt), qq[</dt>];
335 $type =~ tr/ /-/;
336 $type =~ s/\|/%7C/g;
337 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
338 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
339 print STDOUT qq[$msg</dd>\n];
340
341 add_error ('syntax', \%opt => $result);
342 };
343
344 my $time1 = time;
345 my $manifest = Whatpm::CacheManifest->parse_byte_string
346 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
347 $time{parse_manifest} = time - $time1;
348
349 print STDOUT qq[</dl></div>];
350
351 return $manifest;
352 } # print_syntax_error_manifest_section
353
354 sub print_source_string_section ($$) {
355 require Encode;
356 my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
357 return unless $enc;
358
359 my $s = \($enc->decode (${$_[0]}));
360 my $i = 1;
361 push @nav, ['#source-string' => 'Source'];
362 print STDOUT qq[<div id="source-string" class="section">
363 <h2>Document Source</h2>
364 <ol lang="">\n];
365 if (length $$s) {
366 while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
367 print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
368 $i++;
369 }
370 if ($$s =~ /\G([^\x0A]+)/gc) {
371 print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
372 }
373 } else {
374 print STDOUT q[<li id="line-1"></li>];
375 }
376 print STDOUT "</ol></div>";
377 } # print_input_string_section
378
379 sub print_document_tree ($) {
380 my $node = shift;
381 my $r = '<ol class="xoxo">';
382
383 my @node = ($node);
384 while (@node) {
385 my $child = shift @node;
386 unless (ref $child) {
387 $r .= $child;
388 next;
389 }
390
391 my $node_id = 'node-'.refaddr $child;
392 my $nt = $child->node_type;
393 if ($nt == $child->ELEMENT_NODE) {
394 my $child_nsuri = $child->namespace_uri;
395 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
396 '</code>'; ## ISSUE: case
397
398 if ($child->has_attributes) {
399 $r .= '<ul class="attributes">';
400 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
401 @{$child->attributes}) {
402 $r .= qq[<li id="$attr->[3]" class="tree-attribute"><code title="@{[defined $_->[2] ? $_->[2] : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
403 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
404 }
405 $r .= '</ul>';
406 }
407
408 if ($child->has_child_nodes) {
409 $r .= '<ol class="children">';
410 unshift @node, @{$child->child_nodes}, '</ol></li>';
411 } else {
412 $r .= '</li>';
413 }
414 } elsif ($nt == $child->TEXT_NODE) {
415 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
416 } elsif ($nt == $child->CDATA_SECTION_NODE) {
417 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
418 } elsif ($nt == $child->COMMENT_NODE) {
419 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
420 } elsif ($nt == $child->DOCUMENT_NODE) {
421 $r .= qq'<li id="$node_id" class="tree-document">Document';
422 $r .= qq[<ul class="attributes">];
423 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
424 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
425 unless ($child->manakai_is_html) {
426 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
427 if (defined $child->xml_encoding) {
428 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
429 } else {
430 $r .= qq[<li>XML encoding = (null)</li>];
431 }
432 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
433 }
434 $r .= qq[</ul>];
435 if ($child->has_child_nodes) {
436 $r .= '<ol class="children">';
437 unshift @node, @{$child->child_nodes}, '</ol></li>';
438 }
439 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
440 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
441 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
442 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
443 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
444 $r .= '</ul></li>';
445 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
446 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
447 } else {
448 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
449 }
450 }
451
452 $r .= '</ol>';
453 print STDOUT $r;
454 } # print_document_tree
455
456 sub print_structure_dump_dom_section ($$) {
457 my ($doc, $el) = @_;
458
459 print STDOUT qq[
460 <div id="document-tree" class="section">
461 <h2>Document Tree</h2>
462 ];
463 push @nav, ['#document-tree' => 'Tree'];
464
465 print_document_tree ($el || $doc);
466
467 print STDOUT qq[</div>];
468 } # print_structure_dump_dom_section
469
470 sub print_structure_dump_manifest_section ($) {
471 my $manifest = shift;
472
473 print STDOUT qq[
474 <div id="dump-manifest" class="section">
475 <h2>Cache Manifest</h2>
476 ];
477 push @nav, ['#dump-manifest' => 'Caceh Manifest'];
478
479 print STDOUT qq[<dl><dt>Explicit entries</dt>];
480 for my $uri (@{$manifest->[0]}) {
481 my $euri = htescape ($uri);
482 print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
483 }
484
485 print STDOUT qq[<dt>Fallback entries</dt><dd>
486 <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
487 <th scope=row>Fallback Entry</tr><tbody>];
488 for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
489 my $euri = htescape ($uri);
490 my $euri2 = htescape ($manifest->[1]->{$uri});
491 print STDOUT qq[<tr><td><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
492 <td><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
493 }
494
495 print STDOUT qq[</table><dt>Online whitelist</dt>];
496 for my $uri (@{$manifest->[2]}) {
497 my $euri = htescape ($uri);
498 print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
499 }
500
501 print STDOUT qq[</dl></div>];
502 } # print_structure_dump_manifest_section
503
504 sub print_structure_error_dom_section ($$$) {
505 my ($doc, $el, $result) = @_;
506
507 print STDOUT qq[<div id="document-errors" class="section">
508 <h2>Document Errors</h2>
509
510 <dl>];
511 push @nav, ['#document-errors' => 'Document Error'];
512
513 require Whatpm::ContentChecker;
514 my $onerror = sub {
515 my %opt = @_;
516 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
517 $type =~ tr/ /-/;
518 $type =~ s/\|/%7C/g;
519 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
520 print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
521 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
522 print STDOUT $msg, "</dd>\n";
523 add_error ('structure', \%opt => $result);
524 };
525
526 my $elements;
527 my $time1 = time;
528 if ($el) {
529 $elements = Whatpm::ContentChecker->check_element ($el, $onerror);
530 } else {
531 $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);
532 }
533 $time{check} = time - $time1;
534
535 print STDOUT qq[</dl></div>];
536
537 return $elements;
538 } # print_structure_error_dom_section
539
540 sub print_structure_error_manifest_section ($$$) {
541 my ($manifest, $result) = @_;
542
543 print STDOUT qq[<div id="document-errors" class="section">
544 <h2>Document Errors</h2>
545
546 <dl>];
547 push @nav, ['#document-errors' => 'Document Error'];
548
549 require Whatpm::CacheManifest;
550 Whatpm::CacheManifest->check_manifest ($manifest, sub {
551 my %opt = @_;
552 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
553 $type =~ tr/ /-/;
554 $type =~ s/\|/%7C/g;
555 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
556 print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
557 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
558 add_error ('structure', \%opt => $result);
559 });
560
561 print STDOUT qq[</div>];
562 } # print_structure_error_manifest_section
563
564 sub print_table_section ($) {
565 my $tables = shift;
566
567 push @nav, ['#tables' => 'Tables'];
568 print STDOUT qq[
569 <div id="tables" class="section">
570 <h2>Tables</h2>
571
572 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
573 <script src="../table-script.js" type="text/javascript"></script>
574 <noscript>
575 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
576 </noscript>
577 ];
578
579 require JSON;
580
581 my $i = 0;
582 for my $table_el (@$tables) {
583 $i++;
584 print STDOUT qq[<div class="section" id="table-$i"><h3>] .
585 get_node_link ($table_el) . q[</h3>];
586
587 ## TODO: Make |ContentChecker| return |form_table| result
588 ## so that this script don't have to run the algorithm twice.
589 my $table = Whatpm::HTMLTable->form_table ($table_el);
590
591 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
592 next unless $_;
593 delete $_->{element};
594 }
595
596 for (@{$table->{row_group}}) {
597 next unless $_;
598 next unless $_->{element};
599 $_->{type} = $_->{element}->manakai_local_name;
600 delete $_->{element};
601 }
602
603 for (@{$table->{cell}}) {
604 next unless $_;
605 for (@{$_}) {
606 next unless $_;
607 for (@$_) {
608 $_->{id} = refaddr $_->{element} if defined $_->{element};
609 delete $_->{element};
610 $_->{is_header} = $_->{is_header} ? 1 : 0;
611 }
612 }
613 }
614
615 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
616 print STDOUT JSON::objToJson ($table);
617 print STDOUT qq[, document.getElementById ('table-$i'));</script>];
618 }
619
620 print STDOUT qq[</div>];
621 } # print_table_section
622
623 sub print_id_section ($) {
624 my $ids = shift;
625
626 push @nav, ['#identifiers' => 'IDs'];
627 print STDOUT qq[
628 <div id="identifiers" class="section">
629 <h2>Identifiers</h2>
630
631 <dl>
632 ];
633 for my $id (sort {$a cmp $b} keys %$ids) {
634 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
635 for (@{$ids->{$id}}) {
636 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
637 }
638 }
639 print STDOUT qq[</dl></div>];
640 } # print_id_section
641
642 sub print_term_section ($) {
643 my $terms = shift;
644
645 push @nav, ['#terms' => 'Terms'];
646 print STDOUT qq[
647 <div id="terms" class="section">
648 <h2>Terms</h2>
649
650 <dl>
651 ];
652 for my $term (sort {$a cmp $b} keys %$terms) {
653 print STDOUT qq[<dt>@{[htescape $term]}</dt>];
654 for (@{$terms->{$term}}) {
655 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
656 }
657 }
658 print STDOUT qq[</dl></div>];
659 } # print_term_section
660
661 sub print_class_section ($) {
662 my $classes = shift;
663
664 push @nav, ['#classes' => 'Classes'];
665 print STDOUT qq[
666 <div id="classes" class="section">
667 <h2>Classes</h2>
668
669 <dl>
670 ];
671 for my $class (sort {$a cmp $b} keys %$classes) {
672 print STDOUT qq[<dt><code>@{[htescape $class]}</code></dt>];
673 for (@{$classes->{$class}}) {
674 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
675 }
676 }
677 print STDOUT qq[</dl></div>];
678 } # print_class_section
679
680 sub print_result_section ($) {
681 my $result = shift;
682
683 print STDOUT qq[
684 <div id="result-summary" class="section">
685 <h2>Result</h2>];
686
687 if ($result->{unsupported} and $result->{conforming_max}) {
688 print STDOUT qq[<p class=uncertain id=result-para>The conformance
689 checker cannot decide whether the document is conforming or
690 not, since the document contains one or more unsupported
691 features. The document might or might not be conforming.</p>];
692 } elsif ($result->{conforming_min}) {
693 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
694 found in this document.</p>];
695 } elsif ($result->{conforming_max}) {
696 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
697 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
698 it might be conforming.</p>];
699 } else {
700 print STDOUT qq[<p class=FAIL id=result-para>This document is
701 <strong><em>non</em>-conforming</strong>.</p>];
702 }
703
704 print STDOUT qq[<table>
705 <colgroup><col><colgroup><col><col><col><colgroup><col>
706 <thead>
707 <tr><th scope=col></th>
708 <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
709 Errors</a></th>
710 <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
711 Errors</a></th>
712 <th scope=col><a href="../error-description#level-w">Warnings</a></th>
713 <th scope=col>Score</th></tr></thead><tbody>];
714
715 my $must_error = 0;
716 my $should_error = 0;
717 my $warning = 0;
718 my $score_min = 0;
719 my $score_max = 0;
720 my $score_base = 20;
721 my $score_unit = $score_base / 100;
722 for (
723 [Transfer => 'transfer', ''],
724 [Character => 'char', ''],
725 [Syntax => 'syntax', '#parse-errors'],
726 [Structure => 'structure', '#document-errors'],
727 ) {
728 $must_error += ($result->{$_->[1]}->{must} += 0);
729 $should_error += ($result->{$_->[1]}->{should} += 0);
730 $warning += ($result->{$_->[1]}->{warning} += 0);
731 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
732 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
733
734 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
735 my $label = $_->[0];
736 if ($result->{$_->[1]}->{must} or
737 $result->{$_->[1]}->{should} or
738 $result->{$_->[1]}->{warning} or
739 $result->{$_->[1]}->{unsupported}) {
740 $label = qq[<a href="$_->[2]">$label</a>];
741 }
742
743 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
744 if ($uncertain) {
745 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
746 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
747 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
748 } else {
749 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
750 }
751 }
752
753 $score_max += $score_base;
754
755 print STDOUT qq[
756 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
757 </tbody>
758 <tfoot><tr class=uncertain><th scope=row>Total</th>
759 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
760 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
761 <td>$warning?</td>
762 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
763 </table>
764
765 <p><strong>Important</strong>: This conformance checking service
766 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
767 </div>];
768 push @nav, ['#result-summary' => 'Result'];
769 } # print_result_section
770
771 sub print_result_unknown_type_section ($$) {
772 my ($input, $result) = @_;
773
774 my $euri = htescape ($input->{uri});
775 print STDOUT qq[
776 <div id="parse-errors" class="section">
777 <h2>Errors</h2>
778
779 <dl>
780 <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
781 <dd class=unsupported><strong><a href="../error-description#level-u">Not
782 supported</a></strong>:
783 Media type
784 <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
785 is not supported.</dd>
786 </dl>
787 </div>
788 ];
789 push @nav, ['#parse-errors' => 'Errors'];
790 add_error (char => {level => 'unsupported'} => $result);
791 add_error (syntax => {level => 'unsupported'} => $result);
792 add_error (structure => {level => 'unsupported'} => $result);
793 } # print_result_unknown_type_section
794
795 sub print_result_input_error_section ($) {
796 my $input = shift;
797 print STDOUT qq[<div class="section" id="result-summary">
798 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
799 </div>];
800 push @nav, ['#result-summary' => 'Result'];
801 } # print_Result_input_error_section
802
803 sub get_error_label ($) {
804 my $err = shift;
805
806 my $r = '';
807
808 if (defined $err->{line}) {
809 if ($err->{column} > 0) {
810 $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a> column $err->{column}];
811 } else {
812 $err->{line} = $err->{line} - 1 || 1;
813 $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a>];
814 }
815 }
816
817 if (defined $err->{node}) {
818 $r .= ' ' if length $r;
819 $r = get_node_link ($err->{node});
820 }
821
822 if (defined $err->{index}) {
823 $r .= ' ' if length $r;
824 $r .= 'Index ' . (0+$err->{index});
825 }
826
827 if (defined $err->{value}) {
828 $r .= ' ' if length $r;
829 $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
830 }
831
832 return $r;
833 } # get_error_label
834
835 sub get_error_level_label ($) {
836 my $err = shift;
837
838 my $r = '';
839
840 if (not defined $err->{level} or $err->{level} eq 'm') {
841 $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
842 error</a></strong>: ];
843 } elsif ($err->{level} eq 's') {
844 $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
845 error</a></strong>: ];
846 } elsif ($err->{level} eq 'w') {
847 $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
848 ];
849 } elsif ($err->{level} eq 'unsupported') {
850 $r = qq[<strong><a href="../error-description#level-u">Not
851 supported</a></strong>: ];
852 } else {
853 my $elevel = htescape ($err->{level});
854 $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
855 ];
856 }
857
858 return $r;
859 } # get_error_level_label
860
861 sub get_node_path ($) {
862 my $node = shift;
863 my @r;
864 while (defined $node) {
865 my $rs;
866 if ($node->node_type == 1) {
867 $rs = $node->manakai_local_name;
868 $node = $node->parent_node;
869 } elsif ($node->node_type == 2) {
870 $rs = '@' . $node->manakai_local_name;
871 $node = $node->owner_element;
872 } elsif ($node->node_type == 3) {
873 $rs = '"' . $node->data . '"';
874 $node = $node->parent_node;
875 } elsif ($node->node_type == 9) {
876 @r = ('') unless @r;
877 $rs = '';
878 $node = $node->parent_node;
879 } else {
880 $rs = '#' . $node->node_type;
881 $node = $node->parent_node;
882 }
883 unshift @r, $rs;
884 }
885 return join '/', @r;
886 } # get_node_path
887
888 sub get_node_link ($) {
889 return qq[<a href="#node-@{[refaddr $_[0]]}">] .
890 htescape (get_node_path ($_[0])) . qq[</a>];
891 } # get_node_link
892
893 {
894 my $Msg = {};
895
896 sub load_text_catalog ($) {
897 my $lang = shift; # MUST be a canonical lang name
898 open my $file, '<', "cc-msg.$lang.txt" or die "$0: cc-msg.$lang.txt: $!";
899 while (<$file>) {
900 if (s/^([^;]+);([^;]*);//) {
901 my ($type, $cls, $msg) = ($1, $2, $_);
902 $msg =~ tr/\x0D\x0A//d;
903 $Msg->{$type} = [$cls, $msg];
904 }
905 }
906 } # load_text_catalog
907
908 sub get_text ($) {
909 my ($type, $level, $node) = @_;
910 $type = $level . ':' . $type if defined $level;
911 my @arg;
912 {
913 if (defined $Msg->{$type}) {
914 my $msg = $Msg->{$type}->[1];
915 $msg =~ s{<var>\$([0-9]+)</var>}{
916 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
917 }ge;
918 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
919 UNIVERSAL::can ($node, 'get_attribute_ns')
920 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
921 }ge;
922 $msg =~ s{<var>{\@}</var>}{
923 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
924 }ge;
925 $msg =~ s{<var>{local-name}</var>}{
926 UNIVERSAL::can ($node, 'manakai_local_name')
927 ? htescape ($node->manakai_local_name) : ''
928 }ge;
929 $msg =~ s{<var>{element-local-name}</var>}{
930 (UNIVERSAL::can ($node, 'owner_element') and
931 $node->owner_element)
932 ? htescape ($node->owner_element->manakai_local_name)
933 : ''
934 }ge;
935 return ($type, $Msg->{$type}->[0], $msg);
936 } elsif ($type =~ s/:([^:]*)$//) {
937 unshift @arg, $1;
938 redo;
939 }
940 }
941 return ($type, '', htescape ($_[0]));
942 } # get_text
943
944 }
945
946 sub get_input_document ($$) {
947 my ($http, $dom) = @_;
948
949 my $request_uri = $http->get_parameter ('uri');
950 my $r = {};
951 if (defined $request_uri and length $request_uri) {
952 my $uri = $dom->create_uri_reference ($request_uri);
953 unless ({
954 http => 1,
955 }->{lc $uri->uri_scheme}) {
956 return {uri => $request_uri, request_uri => $request_uri,
957 error_status_text => 'URI scheme not allowed'};
958 }
959
960 require Message::Util::HostPermit;
961 my $host_permit = new Message::Util::HostPermit;
962 $host_permit->add_rule (<<EOH);
963 Allow host=suika port=80
964 Deny host=suika
965 Allow host=suika.fam.cx port=80
966 Deny host=suika.fam.cx
967 Deny host=localhost
968 Deny host=*.localdomain
969 Deny ipv4=0.0.0.0/8
970 Deny ipv4=10.0.0.0/8
971 Deny ipv4=127.0.0.0/8
972 Deny ipv4=169.254.0.0/16
973 Deny ipv4=172.0.0.0/11
974 Deny ipv4=192.0.2.0/24
975 Deny ipv4=192.88.99.0/24
976 Deny ipv4=192.168.0.0/16
977 Deny ipv4=198.18.0.0/15
978 Deny ipv4=224.0.0.0/4
979 Deny ipv4=255.255.255.255/32
980 Deny ipv6=0::0/0
981 Allow host=*
982 EOH
983 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
984 return {uri => $request_uri, request_uri => $request_uri,
985 error_status_text => 'Connection to the host is forbidden'};
986 }
987
988 require LWP::UserAgent;
989 my $ua = WDCC::LWPUA->new;
990 $ua->{wdcc_dom} = $dom;
991 $ua->{wdcc_host_permit} = $host_permit;
992 $ua->agent ('Mozilla'); ## TODO: for now.
993 $ua->parse_head (0);
994 $ua->protocols_allowed ([qw/http/]);
995 $ua->max_size (1000_000);
996 my $req = HTTP::Request->new (GET => $request_uri);
997 my $res = $ua->request ($req);
998 ## TODO: 401 sets |is_success| true.
999 if ($res->is_success or $http->get_parameter ('error-page')) {
1000 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1001 $r->{uri} = $res->request->uri;
1002 $r->{request_uri} = $request_uri;
1003
1004 ## TODO: More strict parsing...
1005 my $ct = $res->header ('Content-Type');
1006 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1007 $r->{charset} = lc $1;
1008 $r->{charset} =~ tr/\\//d;
1009 }
1010
1011 my $input_charset = $http->get_parameter ('charset');
1012 if (defined $input_charset and length $input_charset) {
1013 $r->{charset_overridden}
1014 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1015 $r->{charset} = $input_charset;
1016 }
1017
1018 ## TODO: Support for HTTP Content-Encoding
1019
1020 $r->{s} = ''.$res->content;
1021
1022 require Whatpm::ContentType;
1023 ($r->{official_type}, $r->{media_type})
1024 = Whatpm::ContentType->get_sniffed_type
1025 (get_file_head => sub {
1026 return substr $r->{s}, 0, shift;
1027 },
1028 http_content_type_byte => $ct,
1029 has_http_content_encoding =>
1030 defined $res->header ('Content-Encoding'),
1031 supported_image_types => {});
1032 } else {
1033 $r->{uri} = $res->request->uri;
1034 $r->{request_uri} = $request_uri;
1035 $r->{error_status_text} = $res->status_line;
1036 }
1037
1038 $r->{header_field} = [];
1039 $res->scan (sub {
1040 push @{$r->{header_field}}, [$_[0], $_[1]];
1041 });
1042 $r->{header_status_code} = $res->code;
1043 $r->{header_status_text} = $res->message;
1044 } else {
1045 $r->{s} = ''.$http->get_parameter ('s');
1046 $r->{uri} = q<thismessage:/>;
1047 $r->{request_uri} = q<thismessage:/>;
1048 $r->{base_uri} = q<thismessage:/>;
1049 $r->{charset} = ''.$http->get_parameter ('_charset_');
1050 $r->{charset} =~ s/\s+//g;
1051 $r->{charset} = 'utf-8' if $r->{charset} eq '';
1052 $r->{header_field} = [];
1053
1054 require Whatpm::ContentType;
1055 ($r->{official_type}, $r->{media_type})
1056 = Whatpm::ContentType->get_sniffed_type
1057 (get_file_head => sub {
1058 return substr $r->{s}, 0, shift;
1059 },
1060 http_content_type_byte => undef,
1061 has_http_content_encoding => 0,
1062 supported_image_types => {});
1063 }
1064
1065 my $input_format = $http->get_parameter ('i');
1066 if (defined $input_format and length $input_format) {
1067 $r->{media_type_overridden}
1068 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1069 $r->{media_type} = $input_format;
1070 }
1071 if (defined $r->{s} and not defined $r->{media_type}) {
1072 $r->{media_type} = 'text/html';
1073 $r->{media_type_overridden} = 1;
1074 }
1075
1076 if ($r->{media_type} eq 'text/xml') {
1077 unless (defined $r->{charset}) {
1078 $r->{charset} = 'us-ascii';
1079 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1080 $r->{charset_overridden} = 0;
1081 }
1082 }
1083
1084 if (length $r->{s} > 1000_000) {
1085 $r->{error_status_text} = 'Entity-body too large';
1086 delete $r->{s};
1087 return $r;
1088 }
1089
1090 return $r;
1091 } # get_input_document
1092
1093 package WDCC::LWPUA;
1094 BEGIN { push our @ISA, 'LWP::UserAgent'; }
1095
1096 sub redirect_ok {
1097 my $ua = shift;
1098 unless ($ua->SUPER::redirect_ok (@_)) {
1099 return 0;
1100 }
1101
1102 my $uris = $_[1]->header ('Location');
1103 return 0 unless $uris;
1104 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1105 unless ({
1106 http => 1,
1107 }->{lc $uri->uri_scheme}) {
1108 return 0;
1109 }
1110 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1111 return 0;
1112 }
1113 return 1;
1114 } # redirect_ok
1115
1116 =head1 AUTHOR
1117
1118 Wakaba <w@suika.fam.cx>.
1119
1120 =head1 LICENSE
1121
1122 Copyright 2007 Wakaba <w@suika.fam.cx>
1123
1124 This library is free software; you can redistribute it
1125 and/or modify it under the same terms as Perl itself.
1126
1127 =cut
1128
1129 ## $Date: 2007/11/11 06:57:16 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24