/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.31 - (show annotations) (download)
Sun Feb 10 02:05:30 2008 UTC (16 years, 9 months ago) by wakaba
Branch: MAIN
Changes since 1.30: +47 -42 lines
++ ChangeLog	10 Feb 2008 02:05:20 -0000
2008-02-10  Wakaba  <wakaba@suika.fam.cx>

	* cc.cgi (check_and_print): Now this is a subroutine.

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4
5 use lib qw[/home/httpd/html/www/markup/html/whatpm
6 /home/wakaba/work/manakai2/lib];
7 use CGI::Carp qw[fatalsToBrowser];
8 use Scalar::Util qw[refaddr];
9 use Time::HiRes qw/time/;
10
11 sub htescape ($) {
12 my $s = $_[0];
13 $s =~ s/&/&amp;/g;
14 $s =~ s/</&lt;/g;
15 $s =~ s/>/&gt;/g;
16 $s =~ s/"/&quot;/g;
17 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18 sprintf '<var>U+%04X</var>', ord $1;
19 }ge;
20 return $s;
21 } # htescape
22
23 use Message::CGI::HTTP;
24 my $http = Message::CGI::HTTP->new;
25
26 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
27 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
28 exit;
29 }
30
31 binmode STDOUT, ':utf8';
32 $| = 1;
33
34 require Message::DOM::DOMImplementation;
35 my $dom = Message::DOM::DOMImplementation->new;
36
37 load_text_catalog ('en'); ## TODO: conneg
38
39 my @nav;
40 print STDOUT qq[Content-Type: text/html; charset=utf-8
41
42 <!DOCTYPE html>
43 <html lang="en">
44 <head>
45 <title>Web Document Conformance Checker (BETA)</title>
46 <link rel="stylesheet" href="../cc-style.css" type="text/css">
47 </head>
48 <body>
49 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
50 (<em>beta</em>)</h1>
51 ];
52
53 $| = 0;
54 my $input = get_input_document ($http, $dom);
55 my $char_length = 0;
56 my %time;
57
58 print qq[
59 <div id="document-info" class="section">
60 <dl>
61 <dt>Request URI</dt>
62 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 <dt>Document URI</dt>
64 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65 <script>
66 document.title = '<'
67 + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68 + document.title;
69 </script></dd>
70 ]; # no </dl> yet
71 push @nav, ['#document-info' => 'Information'];
72
73 if (defined $input->{s}) {
74 $char_length = length $input->{s};
75
76 print STDOUT qq[
77 <dt>Base URI</dt>
78 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79 <dt>Internet Media Type</dt>
80 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 <dt>Character Encoding</dt>
83 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 <dt>Length</dt>
86 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 </dl>
88 </div>
89 ];
90
91 my $result = {conforming_min => 1, conforming_max => 1};
92 check_and_print ($input => $result);
93 print_result_section ($result);
94 } else {
95 print STDOUT qq[</dl></div>];
96 print_result_input_error_section ($input);
97 }
98
99 print STDOUT qq[
100 <ul class="navigation" id="nav-items">
101 ];
102 for (@nav) {
103 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
104 }
105 print STDOUT qq[
106 </ul>
107 </body>
108 </html>
109 ];
110
111 for (qw/decode parse parse_html parse_xml parse_manifest
112 check check_manifest/) {
113 next unless defined $time{$_};
114 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
115 print $file $char_length, "\t", $time{$_}, "\n";
116 }
117
118 exit;
119
120 sub add_error ($$$) {
121 my ($layer, $err, $result) = @_;
122 if (defined $err->{level}) {
123 if ($err->{level} eq 's') {
124 $result->{$layer}->{should}++;
125 $result->{$layer}->{score_min} -= 2;
126 $result->{conforming_min} = 0;
127 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
128 $result->{$layer}->{warning}++;
129 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
130 $result->{$layer}->{unsupported}++;
131 $result->{unsupported} = 1;
132 } else {
133 $result->{$layer}->{must}++;
134 $result->{$layer}->{score_max} -= 2;
135 $result->{$layer}->{score_min} -= 2;
136 $result->{conforming_min} = 0;
137 $result->{conforming_max} = 0;
138 }
139 } else {
140 $result->{$layer}->{must}++;
141 $result->{$layer}->{score_max} -= 2;
142 $result->{$layer}->{score_min} -= 2;
143 $result->{conforming_min} = 0;
144 $result->{conforming_max} = 0;
145 }
146 } # add_error
147
148 sub check_and_print ($$) {
149 my ($input, $result) = @_;
150
151 print_http_header_section ($input, $result);
152
153 my $doc;
154 my $el;
155 my $manifest;
156
157 if ($input->{media_type} eq 'text/html') {
158 ($doc, $el) = print_syntax_error_html_section ($input, $result);
159 print_source_string_section
160 (\($input->{s}), $input->{charset} || $doc->input_encoding);
161 } elsif ({
162 'text/xml' => 1,
163 'application/atom+xml' => 1,
164 'application/rss+xml' => 1,
165 'application/svg+xml' => 1,
166 'application/xhtml+xml' => 1,
167 'application/xml' => 1,
168 }->{$input->{media_type}}) {
169 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
170 print_source_string_section (\($input->{s}), $doc->input_encoding);
171 } elsif ($input->{media_type} eq 'text/cache-manifest') {
172 ## TODO: MUST be text/cache-manifest
173 $manifest = print_syntax_error_manifest_section ($input, $result);
174 print_source_string_section (\($input->{s}), 'utf-8');
175 } else {
176 ## TODO: Change HTTP status code??
177 print_result_unknown_type_section ($input, $result);
178 }
179
180 if (defined $doc or defined $el) {
181 print_structure_dump_dom_section ($doc, $el);
182 my $elements = print_structure_error_dom_section ($doc, $el, $result);
183 print_table_section ($elements->{table}) if @{$elements->{table}};
184 print_id_section ($elements->{id}) if keys %{$elements->{id}};
185 print_term_section ($elements->{term}) if keys %{$elements->{term}};
186 print_class_section ($elements->{class}) if keys %{$elements->{class}};
187 } elsif (defined $manifest) {
188 print_structure_dump_manifest_section ($manifest);
189 print_structure_error_manifest_section ($manifest, $result);
190 }
191 } # check_and_print
192
193 sub print_http_header_section ($$) {
194 my ($input, $result) = @_;
195 return unless defined $input->{header_status_code} or
196 defined $input->{header_status_text} or
197 @{$input->{header_field}};
198
199 push @nav, ['#source-header' => 'HTTP Header'];
200 print STDOUT qq[<div id="source-header" class="section">
201 <h2>HTTP Header</h2>
202
203 <p><strong>Note</strong>: Due to the limitation of the
204 network library in use, the content of this section might
205 not be the real header.</p>
206
207 <table><tbody>
208 ];
209
210 if (defined $input->{header_status_code}) {
211 print STDOUT qq[<tr><th scope="row">Status code</th>];
212 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
213 }
214 if (defined $input->{header_status_text}) {
215 print STDOUT qq[<tr><th scope="row">Status text</th>];
216 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
217 }
218
219 for (@{$input->{header_field}}) {
220 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
221 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
222 }
223
224 print STDOUT qq[</tbody></table></div>];
225 } # print_http_header_section
226
227 sub print_syntax_error_html_section ($$) {
228 my ($input, $result) = @_;
229
230 require Encode;
231 require Whatpm::HTML;
232
233 print STDOUT qq[
234 <div id="parse-errors" class="section">
235 <h2>Parse Errors</h2>
236
237 <dl>];
238 push @nav, ['#parse-errors' => 'Parse Error'];
239
240 my $onerror = sub {
241 my (%opt) = @_;
242 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
243 if ($opt{column} > 0) {
244 print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
245 } else {
246 $opt{line} = $opt{line} - 1 || 1;
247 print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];
248 }
249 $type =~ tr/ /-/;
250 $type =~ s/\|/%7C/g;
251 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
252 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
253 print STDOUT qq[$msg</dd>\n];
254
255 add_error ('syntax', \%opt => $result);
256 };
257
258 my $doc = $dom->create_document;
259 my $el;
260 my $inner_html_element = $http->get_parameter ('e');
261 if (defined $inner_html_element and length $inner_html_element) {
262 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
263 my $time1 = time;
264 my $t = Encode::decode ($input->{charset}, $input->{s});
265 $time{decode} = time - $time1;
266
267 $el = $doc->create_element_ns
268 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
269 $time1 = time;
270 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
271 $time{parse} = time - $time1;
272 } else {
273 my $time1 = time;
274 Whatpm::HTML->parse_byte_string
275 ($input->{charset}, $input->{s} => $doc, $onerror);
276 $time{parse_html} = time - $time1;
277 }
278 $doc->manakai_charset ($input->{official_charset})
279 if defined $input->{official_charset};
280
281 print STDOUT qq[</dl></div>];
282
283 return ($doc, $el);
284 } # print_syntax_error_html_section
285
286 sub print_syntax_error_xml_section ($$) {
287 my ($input, $result) = @_;
288
289 require Message::DOM::XMLParserTemp;
290
291 print STDOUT qq[
292 <div id="parse-errors" class="section">
293 <h2>Parse Errors</h2>
294
295 <dl>];
296 push @nav, ['#parse-errors' => 'Parse Error'];
297
298 my $onerror = sub {
299 my $err = shift;
300 my $line = $err->location->line_number;
301 print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];
302 print STDOUT $err->location->column_number, "</dt><dd>";
303 print STDOUT htescape $err->text, "</dd>\n";
304
305 add_error ('syntax', {type => $err->text,
306 level => [
307 $err->SEVERITY_FATAL_ERROR => 'm',
308 $err->SEVERITY_ERROR => 'm',
309 $err->SEVERITY_WARNING => 's',
310 ]->[$err->severity]} => $result);
311
312 return 1;
313 };
314
315 my $time1 = time;
316 open my $fh, '<', \($input->{s});
317 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
318 ($fh => $dom, $onerror, charset => $input->{charset});
319 $time{parse_xml} = time - $time1;
320 $doc->manakai_charset ($input->{official_charset})
321 if defined $input->{official_charset};
322
323 print STDOUT qq[</dl></div>];
324
325 return ($doc, undef);
326 } # print_syntax_error_xml_section
327
328 sub print_syntax_error_manifest_section ($$) {
329 my ($input, $result) = @_;
330
331 require Whatpm::CacheManifest;
332
333 print STDOUT qq[
334 <div id="parse-errors" class="section">
335 <h2>Parse Errors</h2>
336
337 <dl>];
338 push @nav, ['#parse-errors' => 'Parse Error'];
339
340 my $onerror = sub {
341 my (%opt) = @_;
342 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
343 print STDOUT qq[<dt class="$cls">], get_error_label (\%opt), qq[</dt>];
344 $type =~ tr/ /-/;
345 $type =~ s/\|/%7C/g;
346 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
347 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
348 print STDOUT qq[$msg</dd>\n];
349
350 add_error ('syntax', \%opt => $result);
351 };
352
353 my $time1 = time;
354 my $manifest = Whatpm::CacheManifest->parse_byte_string
355 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
356 $time{parse_manifest} = time - $time1;
357
358 print STDOUT qq[</dl></div>];
359
360 return $manifest;
361 } # print_syntax_error_manifest_section
362
363 sub print_source_string_section ($$) {
364 require Encode;
365 my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
366 return unless $enc;
367
368 my $s = \($enc->decode (${$_[0]}));
369 my $i = 1;
370 push @nav, ['#source-string' => 'Source'];
371 print STDOUT qq[<div id="source-string" class="section">
372 <h2>Document Source</h2>
373 <ol lang="">\n];
374 if (length $$s) {
375 while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
376 print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
377 $i++;
378 }
379 if ($$s =~ /\G([^\x0A]+)/gc) {
380 print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
381 }
382 } else {
383 print STDOUT q[<li id="line-1"></li>];
384 }
385 print STDOUT "</ol></div>";
386 } # print_input_string_section
387
388 sub print_document_tree ($) {
389 my $node = shift;
390 my $r = '<ol class="xoxo">';
391
392 my @node = ($node);
393 while (@node) {
394 my $child = shift @node;
395 unless (ref $child) {
396 $r .= $child;
397 next;
398 }
399
400 my $node_id = 'node-'.refaddr $child;
401 my $nt = $child->node_type;
402 if ($nt == $child->ELEMENT_NODE) {
403 my $child_nsuri = $child->namespace_uri;
404 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
405 '</code>'; ## ISSUE: case
406
407 if ($child->has_attributes) {
408 $r .= '<ul class="attributes">';
409 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
410 @{$child->attributes}) {
411 $r .= qq[<li id="$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
412 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
413 }
414 $r .= '</ul>';
415 }
416
417 if ($child->has_child_nodes) {
418 $r .= '<ol class="children">';
419 unshift @node, @{$child->child_nodes}, '</ol></li>';
420 } else {
421 $r .= '</li>';
422 }
423 } elsif ($nt == $child->TEXT_NODE) {
424 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
425 } elsif ($nt == $child->CDATA_SECTION_NODE) {
426 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
427 } elsif ($nt == $child->COMMENT_NODE) {
428 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
429 } elsif ($nt == $child->DOCUMENT_NODE) {
430 $r .= qq'<li id="$node_id" class="tree-document">Document';
431 $r .= qq[<ul class="attributes">];
432 my $cp = $child->manakai_charset;
433 if (defined $cp) {
434 $r .= qq[<li><code>charset</code> parameter = <code>];
435 $r .= htescape ($cp) . qq[</code></li>];
436 }
437 $r .= qq[<li><code>inputEncoding</code> = ];
438 my $ie = $child->input_encoding;
439 if (defined $ie) {
440 $r .= qq[<code>@{[htescape ($ie)]}</code>];
441 if ($child->manakai_has_bom) {
442 $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
443 }
444 } else {
445 $r .= qq[(<code>null</code>)];
446 }
447 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
448 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
449 unless ($child->manakai_is_html) {
450 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
451 if (defined $child->xml_encoding) {
452 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
453 } else {
454 $r .= qq[<li>XML encoding = (null)</li>];
455 }
456 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
457 }
458 $r .= qq[</ul>];
459 if ($child->has_child_nodes) {
460 $r .= '<ol class="children">';
461 unshift @node, @{$child->child_nodes}, '</ol></li>';
462 }
463 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
464 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
465 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
466 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
467 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
468 $r .= '</ul></li>';
469 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
470 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
471 } else {
472 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
473 }
474 }
475
476 $r .= '</ol>';
477 print STDOUT $r;
478 } # print_document_tree
479
480 sub print_structure_dump_dom_section ($$) {
481 my ($doc, $el) = @_;
482
483 print STDOUT qq[
484 <div id="document-tree" class="section">
485 <h2>Document Tree</h2>
486 ];
487 push @nav, ['#document-tree' => 'Tree'];
488
489 print_document_tree ($el || $doc);
490
491 print STDOUT qq[</div>];
492 } # print_structure_dump_dom_section
493
494 sub print_structure_dump_manifest_section ($) {
495 my $manifest = shift;
496
497 print STDOUT qq[
498 <div id="dump-manifest" class="section">
499 <h2>Cache Manifest</h2>
500 ];
501 push @nav, ['#dump-manifest' => 'Caceh Manifest'];
502
503 print STDOUT qq[<dl><dt>Explicit entries</dt>];
504 for my $uri (@{$manifest->[0]}) {
505 my $euri = htescape ($uri);
506 print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
507 }
508
509 print STDOUT qq[<dt>Fallback entries</dt><dd>
510 <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
511 <th scope=row>Fallback Entry</tr><tbody>];
512 for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
513 my $euri = htescape ($uri);
514 my $euri2 = htescape ($manifest->[1]->{$uri});
515 print STDOUT qq[<tr><td><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
516 <td><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
517 }
518
519 print STDOUT qq[</table><dt>Online whitelist</dt>];
520 for my $uri (@{$manifest->[2]}) {
521 my $euri = htescape ($uri);
522 print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
523 }
524
525 print STDOUT qq[</dl></div>];
526 } # print_structure_dump_manifest_section
527
528 sub print_structure_error_dom_section ($$$) {
529 my ($doc, $el, $result) = @_;
530
531 print STDOUT qq[<div id="document-errors" class="section">
532 <h2>Document Errors</h2>
533
534 <dl>];
535 push @nav, ['#document-errors' => 'Document Error'];
536
537 require Whatpm::ContentChecker;
538 my $onerror = sub {
539 my %opt = @_;
540 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
541 $type =~ tr/ /-/;
542 $type =~ s/\|/%7C/g;
543 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
544 print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
545 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
546 print STDOUT $msg, "</dd>\n";
547 add_error ('structure', \%opt => $result);
548 };
549
550 my $elements;
551 my $time1 = time;
552 if ($el) {
553 $elements = Whatpm::ContentChecker->check_element ($el, $onerror);
554 } else {
555 $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);
556 }
557 $time{check} = time - $time1;
558
559 print STDOUT qq[</dl></div>];
560
561 return $elements;
562 } # print_structure_error_dom_section
563
564 sub print_structure_error_manifest_section ($$$) {
565 my ($manifest, $result) = @_;
566
567 print STDOUT qq[<div id="document-errors" class="section">
568 <h2>Document Errors</h2>
569
570 <dl>];
571 push @nav, ['#document-errors' => 'Document Error'];
572
573 require Whatpm::CacheManifest;
574 Whatpm::CacheManifest->check_manifest ($manifest, sub {
575 my %opt = @_;
576 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
577 $type =~ tr/ /-/;
578 $type =~ s/\|/%7C/g;
579 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
580 print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
581 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
582 add_error ('structure', \%opt => $result);
583 });
584
585 print STDOUT qq[</div>];
586 } # print_structure_error_manifest_section
587
588 sub print_table_section ($) {
589 my $tables = shift;
590
591 push @nav, ['#tables' => 'Tables'];
592 print STDOUT qq[
593 <div id="tables" class="section">
594 <h2>Tables</h2>
595
596 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
597 <script src="../table-script.js" type="text/javascript"></script>
598 <noscript>
599 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
600 </noscript>
601 ];
602
603 require JSON;
604
605 my $i = 0;
606 for my $table_el (@$tables) {
607 $i++;
608 print STDOUT qq[<div class="section" id="table-$i"><h3>] .
609 get_node_link ($table_el) . q[</h3>];
610
611 ## TODO: Make |ContentChecker| return |form_table| result
612 ## so that this script don't have to run the algorithm twice.
613 my $table = Whatpm::HTMLTable->form_table ($table_el);
614
615 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
616 next unless $_;
617 delete $_->{element};
618 }
619
620 for (@{$table->{row_group}}) {
621 next unless $_;
622 next unless $_->{element};
623 $_->{type} = $_->{element}->manakai_local_name;
624 delete $_->{element};
625 }
626
627 for (@{$table->{cell}}) {
628 next unless $_;
629 for (@{$_}) {
630 next unless $_;
631 for (@$_) {
632 $_->{id} = refaddr $_->{element} if defined $_->{element};
633 delete $_->{element};
634 $_->{is_header} = $_->{is_header} ? 1 : 0;
635 }
636 }
637 }
638
639 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
640 print STDOUT JSON::objToJson ($table);
641 print STDOUT qq[, document.getElementById ('table-$i'));</script>];
642 }
643
644 print STDOUT qq[</div>];
645 } # print_table_section
646
647 sub print_id_section ($) {
648 my $ids = shift;
649
650 push @nav, ['#identifiers' => 'IDs'];
651 print STDOUT qq[
652 <div id="identifiers" class="section">
653 <h2>Identifiers</h2>
654
655 <dl>
656 ];
657 for my $id (sort {$a cmp $b} keys %$ids) {
658 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
659 for (@{$ids->{$id}}) {
660 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
661 }
662 }
663 print STDOUT qq[</dl></div>];
664 } # print_id_section
665
666 sub print_term_section ($) {
667 my $terms = shift;
668
669 push @nav, ['#terms' => 'Terms'];
670 print STDOUT qq[
671 <div id="terms" class="section">
672 <h2>Terms</h2>
673
674 <dl>
675 ];
676 for my $term (sort {$a cmp $b} keys %$terms) {
677 print STDOUT qq[<dt>@{[htescape $term]}</dt>];
678 for (@{$terms->{$term}}) {
679 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
680 }
681 }
682 print STDOUT qq[</dl></div>];
683 } # print_term_section
684
685 sub print_class_section ($) {
686 my $classes = shift;
687
688 push @nav, ['#classes' => 'Classes'];
689 print STDOUT qq[
690 <div id="classes" class="section">
691 <h2>Classes</h2>
692
693 <dl>
694 ];
695 for my $class (sort {$a cmp $b} keys %$classes) {
696 print STDOUT qq[<dt><code>@{[htescape $class]}</code></dt>];
697 for (@{$classes->{$class}}) {
698 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
699 }
700 }
701 print STDOUT qq[</dl></div>];
702 } # print_class_section
703
704 sub print_result_section ($) {
705 my $result = shift;
706
707 print STDOUT qq[
708 <div id="result-summary" class="section">
709 <h2>Result</h2>];
710
711 if ($result->{unsupported} and $result->{conforming_max}) {
712 print STDOUT qq[<p class=uncertain id=result-para>The conformance
713 checker cannot decide whether the document is conforming or
714 not, since the document contains one or more unsupported
715 features. The document might or might not be conforming.</p>];
716 } elsif ($result->{conforming_min}) {
717 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
718 found in this document.</p>];
719 } elsif ($result->{conforming_max}) {
720 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
721 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
722 it might be conforming.</p>];
723 } else {
724 print STDOUT qq[<p class=FAIL id=result-para>This document is
725 <strong><em>non</em>-conforming</strong>.</p>];
726 }
727
728 print STDOUT qq[<table>
729 <colgroup><col><colgroup><col><col><col><colgroup><col>
730 <thead>
731 <tr><th scope=col></th>
732 <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
733 Errors</a></th>
734 <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
735 Errors</a></th>
736 <th scope=col><a href="../error-description#level-w">Warnings</a></th>
737 <th scope=col>Score</th></tr></thead><tbody>];
738
739 my $must_error = 0;
740 my $should_error = 0;
741 my $warning = 0;
742 my $score_min = 0;
743 my $score_max = 0;
744 my $score_base = 20;
745 my $score_unit = $score_base / 100;
746 for (
747 [Transfer => 'transfer', ''],
748 [Character => 'char', ''],
749 [Syntax => 'syntax', '#parse-errors'],
750 [Structure => 'structure', '#document-errors'],
751 ) {
752 $must_error += ($result->{$_->[1]}->{must} += 0);
753 $should_error += ($result->{$_->[1]}->{should} += 0);
754 $warning += ($result->{$_->[1]}->{warning} += 0);
755 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
756 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
757
758 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
759 my $label = $_->[0];
760 if ($result->{$_->[1]}->{must} or
761 $result->{$_->[1]}->{should} or
762 $result->{$_->[1]}->{warning} or
763 $result->{$_->[1]}->{unsupported}) {
764 $label = qq[<a href="$_->[2]">$label</a>];
765 }
766
767 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
768 if ($uncertain) {
769 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
770 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
771 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
772 } else {
773 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
774 }
775 }
776
777 $score_max += $score_base;
778
779 print STDOUT qq[
780 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
781 </tbody>
782 <tfoot><tr class=uncertain><th scope=row>Total</th>
783 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
784 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
785 <td>$warning?</td>
786 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
787 </table>
788
789 <p><strong>Important</strong>: This conformance checking service
790 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
791 </div>];
792 push @nav, ['#result-summary' => 'Result'];
793 } # print_result_section
794
795 sub print_result_unknown_type_section ($$) {
796 my ($input, $result) = @_;
797
798 my $euri = htescape ($input->{uri});
799 print STDOUT qq[
800 <div id="parse-errors" class="section">
801 <h2>Errors</h2>
802
803 <dl>
804 <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
805 <dd class=unsupported><strong><a href="../error-description#level-u">Not
806 supported</a></strong>:
807 Media type
808 <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
809 is not supported.</dd>
810 </dl>
811 </div>
812 ];
813 push @nav, ['#parse-errors' => 'Errors'];
814 add_error (char => {level => 'u'} => $result);
815 add_error (syntax => {level => 'u'} => $result);
816 add_error (structure => {level => 'u'} => $result);
817 } # print_result_unknown_type_section
818
819 sub print_result_input_error_section ($) {
820 my $input = shift;
821 print STDOUT qq[<div class="section" id="result-summary">
822 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
823 </div>];
824 push @nav, ['#result-summary' => 'Result'];
825 } # print_Result_input_error_section
826
827 sub get_error_label ($) {
828 my $err = shift;
829
830 my $r = '';
831
832 if (defined $err->{line}) {
833 if ($err->{column} > 0) {
834 $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a> column $err->{column}];
835 } else {
836 $err->{line} = $err->{line} - 1 || 1;
837 $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a>];
838 }
839 }
840
841 if (defined $err->{node}) {
842 $r .= ' ' if length $r;
843 $r = get_node_link ($err->{node});
844 }
845
846 if (defined $err->{index}) {
847 $r .= ' ' if length $r;
848 $r .= 'Index ' . (0+$err->{index});
849 }
850
851 if (defined $err->{value}) {
852 $r .= ' ' if length $r;
853 $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
854 }
855
856 return $r;
857 } # get_error_label
858
859 sub get_error_level_label ($) {
860 my $err = shift;
861
862 my $r = '';
863
864 if (not defined $err->{level} or $err->{level} eq 'm') {
865 $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
866 error</a></strong>: ];
867 } elsif ($err->{level} eq 's') {
868 $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
869 error</a></strong>: ];
870 } elsif ($err->{level} eq 'w') {
871 $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
872 ];
873 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
874 $r = qq[<strong><a href="../error-description#level-u">Not
875 supported</a></strong>: ];
876 } else {
877 my $elevel = htescape ($err->{level});
878 $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
879 ];
880 }
881
882 return $r;
883 } # get_error_level_label
884
885 sub get_node_path ($) {
886 my $node = shift;
887 my @r;
888 while (defined $node) {
889 my $rs;
890 if ($node->node_type == 1) {
891 $rs = $node->manakai_local_name;
892 $node = $node->parent_node;
893 } elsif ($node->node_type == 2) {
894 $rs = '@' . $node->manakai_local_name;
895 $node = $node->owner_element;
896 } elsif ($node->node_type == 3) {
897 $rs = '"' . $node->data . '"';
898 $node = $node->parent_node;
899 } elsif ($node->node_type == 9) {
900 @r = ('') unless @r;
901 $rs = '';
902 $node = $node->parent_node;
903 } else {
904 $rs = '#' . $node->node_type;
905 $node = $node->parent_node;
906 }
907 unshift @r, $rs;
908 }
909 return join '/', @r;
910 } # get_node_path
911
912 sub get_node_link ($) {
913 return qq[<a href="#node-@{[refaddr $_[0]]}">] .
914 htescape (get_node_path ($_[0])) . qq[</a>];
915 } # get_node_link
916
917 {
918 my $Msg = {};
919
920 sub load_text_catalog ($) {
921 my $lang = shift; # MUST be a canonical lang name
922 open my $file, '<:utf8', "cc-msg.$lang.txt"
923 or die "$0: cc-msg.$lang.txt: $!";
924 while (<$file>) {
925 if (s/^([^;]+);([^;]*);//) {
926 my ($type, $cls, $msg) = ($1, $2, $_);
927 $msg =~ tr/\x0D\x0A//d;
928 $Msg->{$type} = [$cls, $msg];
929 }
930 }
931 } # load_text_catalog
932
933 sub get_text ($) {
934 my ($type, $level, $node) = @_;
935 $type = $level . ':' . $type if defined $level;
936 $level = 'm' unless defined $level;
937 my @arg;
938 {
939 if (defined $Msg->{$type}) {
940 my $msg = $Msg->{$type}->[1];
941 $msg =~ s{<var>\$([0-9]+)</var>}{
942 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
943 }ge;
944 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
945 UNIVERSAL::can ($node, 'get_attribute_ns')
946 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
947 }ge;
948 $msg =~ s{<var>{\@}</var>}{
949 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
950 }ge;
951 $msg =~ s{<var>{local-name}</var>}{
952 UNIVERSAL::can ($node, 'manakai_local_name')
953 ? htescape ($node->manakai_local_name) : ''
954 }ge;
955 $msg =~ s{<var>{element-local-name}</var>}{
956 (UNIVERSAL::can ($node, 'owner_element') and
957 $node->owner_element)
958 ? htescape ($node->owner_element->manakai_local_name)
959 : ''
960 }ge;
961 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
962 } elsif ($type =~ s/:([^:]*)$//) {
963 unshift @arg, $1;
964 redo;
965 }
966 }
967 return ($type, 'level-'.$level, htescape ($_[0]));
968 } # get_text
969
970 }
971
972 sub get_input_document ($$) {
973 my ($http, $dom) = @_;
974
975 my $request_uri = $http->get_parameter ('uri');
976 my $r = {};
977 if (defined $request_uri and length $request_uri) {
978 my $uri = $dom->create_uri_reference ($request_uri);
979 unless ({
980 http => 1,
981 }->{lc $uri->uri_scheme}) {
982 return {uri => $request_uri, request_uri => $request_uri,
983 error_status_text => 'URI scheme not allowed'};
984 }
985
986 require Message::Util::HostPermit;
987 my $host_permit = new Message::Util::HostPermit;
988 $host_permit->add_rule (<<EOH);
989 Allow host=suika port=80
990 Deny host=suika
991 Allow host=suika.fam.cx port=80
992 Deny host=suika.fam.cx
993 Deny host=localhost
994 Deny host=*.localdomain
995 Deny ipv4=0.0.0.0/8
996 Deny ipv4=10.0.0.0/8
997 Deny ipv4=127.0.0.0/8
998 Deny ipv4=169.254.0.0/16
999 Deny ipv4=172.0.0.0/11
1000 Deny ipv4=192.0.2.0/24
1001 Deny ipv4=192.88.99.0/24
1002 Deny ipv4=192.168.0.0/16
1003 Deny ipv4=198.18.0.0/15
1004 Deny ipv4=224.0.0.0/4
1005 Deny ipv4=255.255.255.255/32
1006 Deny ipv6=0::0/0
1007 Allow host=*
1008 EOH
1009 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1010 return {uri => $request_uri, request_uri => $request_uri,
1011 error_status_text => 'Connection to the host is forbidden'};
1012 }
1013
1014 require LWP::UserAgent;
1015 my $ua = WDCC::LWPUA->new;
1016 $ua->{wdcc_dom} = $dom;
1017 $ua->{wdcc_host_permit} = $host_permit;
1018 $ua->agent ('Mozilla'); ## TODO: for now.
1019 $ua->parse_head (0);
1020 $ua->protocols_allowed ([qw/http/]);
1021 $ua->max_size (1000_000);
1022 my $req = HTTP::Request->new (GET => $request_uri);
1023 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1024 my $res = $ua->request ($req);
1025 ## TODO: 401 sets |is_success| true.
1026 if ($res->is_success or $http->get_parameter ('error-page')) {
1027 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1028 $r->{uri} = $res->request->uri;
1029 $r->{request_uri} = $request_uri;
1030
1031 ## TODO: More strict parsing...
1032 my $ct = $res->header ('Content-Type');
1033 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1034 $r->{charset} = lc $1;
1035 $r->{charset} =~ tr/\\//d;
1036 $r->{official_charset} = $r->{charset};
1037 }
1038
1039 my $input_charset = $http->get_parameter ('charset');
1040 if (defined $input_charset and length $input_charset) {
1041 $r->{charset_overridden}
1042 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1043 $r->{charset} = $input_charset;
1044 }
1045
1046 ## TODO: Support for HTTP Content-Encoding
1047
1048 $r->{s} = ''.$res->content;
1049
1050 require Whatpm::ContentType;
1051 ($r->{official_type}, $r->{media_type})
1052 = Whatpm::ContentType->get_sniffed_type
1053 (get_file_head => sub {
1054 return substr $r->{s}, 0, shift;
1055 },
1056 http_content_type_byte => $ct,
1057 has_http_content_encoding =>
1058 defined $res->header ('Content-Encoding'),
1059 supported_image_types => {});
1060 } else {
1061 $r->{uri} = $res->request->uri;
1062 $r->{request_uri} = $request_uri;
1063 $r->{error_status_text} = $res->status_line;
1064 }
1065
1066 $r->{header_field} = [];
1067 $res->scan (sub {
1068 push @{$r->{header_field}}, [$_[0], $_[1]];
1069 });
1070 $r->{header_status_code} = $res->code;
1071 $r->{header_status_text} = $res->message;
1072 } else {
1073 $r->{s} = ''.$http->get_parameter ('s');
1074 $r->{uri} = q<thismessage:/>;
1075 $r->{request_uri} = q<thismessage:/>;
1076 $r->{base_uri} = q<thismessage:/>;
1077 $r->{charset} = ''.$http->get_parameter ('_charset_');
1078 $r->{charset} =~ s/\s+//g;
1079 $r->{charset} = 'utf-8' if $r->{charset} eq '';
1080 $r->{official_charset} = $r->{charset};
1081 $r->{header_field} = [];
1082
1083 require Whatpm::ContentType;
1084 ($r->{official_type}, $r->{media_type})
1085 = Whatpm::ContentType->get_sniffed_type
1086 (get_file_head => sub {
1087 return substr $r->{s}, 0, shift;
1088 },
1089 http_content_type_byte => undef,
1090 has_http_content_encoding => 0,
1091 supported_image_types => {});
1092 }
1093
1094 my $input_format = $http->get_parameter ('i');
1095 if (defined $input_format and length $input_format) {
1096 $r->{media_type_overridden}
1097 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1098 $r->{media_type} = $input_format;
1099 }
1100 if (defined $r->{s} and not defined $r->{media_type}) {
1101 $r->{media_type} = 'text/html';
1102 $r->{media_type_overridden} = 1;
1103 }
1104
1105 if ($r->{media_type} eq 'text/xml') {
1106 unless (defined $r->{charset}) {
1107 $r->{charset} = 'us-ascii';
1108 $r->{official_charset} = $r->{charset};
1109 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1110 $r->{charset_overridden} = 0;
1111 }
1112 }
1113
1114 if (length $r->{s} > 1000_000) {
1115 $r->{error_status_text} = 'Entity-body too large';
1116 delete $r->{s};
1117 return $r;
1118 }
1119
1120 return $r;
1121 } # get_input_document
1122
1123 package WDCC::LWPUA;
1124 BEGIN { push our @ISA, 'LWP::UserAgent'; }
1125
1126 sub redirect_ok {
1127 my $ua = shift;
1128 unless ($ua->SUPER::redirect_ok (@_)) {
1129 return 0;
1130 }
1131
1132 my $uris = $_[1]->header ('Location');
1133 return 0 unless $uris;
1134 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1135 unless ({
1136 http => 1,
1137 }->{lc $uri->uri_scheme}) {
1138 return 0;
1139 }
1140 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1141 return 0;
1142 }
1143 return 1;
1144 } # redirect_ok
1145
1146 =head1 AUTHOR
1147
1148 Wakaba <w@suika.fam.cx>.
1149
1150 =head1 LICENSE
1151
1152 Copyright 2007 Wakaba <w@suika.fam.cx>
1153
1154 This library is free software; you can redistribute it
1155 and/or modify it under the same terms as Perl itself.
1156
1157 =cut
1158
1159 ## $Date: 2008/02/09 12:22:19 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24