/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.26 - (show annotations) (download)
Sun Nov 18 11:05:12 2007 UTC (17 years ago) by wakaba
Branch: MAIN
Changes since 1.25: +12 -4 lines
++ ChangeLog	18 Nov 2007 11:04:51 -0000
	* cc-style.css: New rules for warnings.

	* cc-todo.en.txt: Updated.

	* cc.cgi: Default to |Windows-1252| instead of |ISO-8859-1|
	for |inner_html| with external source.  Set |manakai_charset|
	attribute if possible.
	(load_text_catalog): Interpret catalog file as UTF-8.

	* error-description-source.en.xml: New errors for character
	encodings are added.

2007-11-18  Wakaba  <wakaba@suika.fam.cx>

	* error-description-source.xml: s/charset declaration/character
	encoding declaration/g, since HTML5 spec says so.

2007-11-18  Wakaba  <wakaba@suika.fam.cx>

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4
5 use lib qw[/home/httpd/html/www/markup/html/whatpm
6 /home/wakaba/work/manakai2/lib];
7 use CGI::Carp qw[fatalsToBrowser];
8 use Scalar::Util qw[refaddr];
9 use Time::HiRes qw/time/;
10
11 sub htescape ($) {
12 my $s = $_[0];
13 $s =~ s/&/&amp;/g;
14 $s =~ s/</&lt;/g;
15 $s =~ s/>/&gt;/g;
16 $s =~ s/"/&quot;/g;
17 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18 sprintf '<var>U+%04X</var>', ord $1;
19 }ge;
20 return $s;
21 } # htescape
22
23 use Message::CGI::HTTP;
24 my $http = Message::CGI::HTTP->new;
25
26 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
27 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
28 exit;
29 }
30
31 binmode STDOUT, ':utf8';
32 $| = 1;
33
34 require Message::DOM::DOMImplementation;
35 my $dom = Message::DOM::DOMImplementation->new;
36
37 load_text_catalog ('en'); ## TODO: conneg
38
39 my @nav;
40 print STDOUT qq[Content-Type: text/html; charset=utf-8
41
42 <!DOCTYPE html>
43 <html lang="en">
44 <head>
45 <title>Web Document Conformance Checker (BETA)</title>
46 <link rel="stylesheet" href="../cc-style.css" type="text/css">
47 </head>
48 <body>
49 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
50 (<em>beta</em>)</h1>
51 ];
52
53 $| = 0;
54 my $input = get_input_document ($http, $dom);
55 my $char_length = 0;
56 my %time;
57
58 print qq[
59 <div id="document-info" class="section">
60 <dl>
61 <dt>Request URI</dt>
62 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 <dt>Document URI</dt>
64 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65 <script>
66 document.title = '<'
67 + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68 + document.title;
69 </script></dd>
70 ]; # no </dl> yet
71 push @nav, ['#document-info' => 'Information'];
72
73 if (defined $input->{s}) {
74 $char_length = length $input->{s};
75
76 print STDOUT qq[
77 <dt>Base URI</dt>
78 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79 <dt>Internet Media Type</dt>
80 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 <dt>Character Encoding</dt>
83 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 <dt>Length</dt>
86 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 </dl>
88 </div>
89 ];
90
91 my $result = {conforming_min => 1, conforming_max => 1};
92 print_http_header_section ($input, $result);
93
94 my $doc;
95 my $el;
96 my $manifest;
97
98 if ($input->{media_type} eq 'text/html') {
99 ($doc, $el) = print_syntax_error_html_section ($input, $result);
100 print_source_string_section
101 (\($input->{s}), $input->{charset} || $doc->input_encoding);
102 } elsif ({
103 'text/xml' => 1,
104 'application/atom+xml' => 1,
105 'application/rss+xml' => 1,
106 'application/svg+xml' => 1,
107 'application/xhtml+xml' => 1,
108 'application/xml' => 1,
109 }->{$input->{media_type}}) {
110 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
111 print_source_string_section (\($input->{s}), $doc->input_encoding);
112 } elsif ($input->{media_type} eq 'text/cache-manifest') {
113 ## TODO: MUST be text/cache-manifest
114 $manifest = print_syntax_error_manifest_section ($input, $result);
115 print_source_string_section (\($input->{s}), 'utf-8');
116 } else {
117 ## TODO: Change HTTP status code??
118 print_result_unknown_type_section ($input, $result);
119 }
120
121 if (defined $doc or defined $el) {
122 print_structure_dump_dom_section ($doc, $el);
123 my $elements = print_structure_error_dom_section ($doc, $el, $result);
124 print_table_section ($elements->{table}) if @{$elements->{table}};
125 print_id_section ($elements->{id}) if keys %{$elements->{id}};
126 print_term_section ($elements->{term}) if keys %{$elements->{term}};
127 print_class_section ($elements->{class}) if keys %{$elements->{class}};
128 } elsif (defined $manifest) {
129 print_structure_dump_manifest_section ($manifest);
130 print_structure_error_manifest_section ($manifest, $result);
131 }
132
133 print_result_section ($result);
134 } else {
135 print STDOUT qq[</dl></div>];
136 print_result_input_error_section ($input);
137 }
138
139 print STDOUT qq[
140 <ul class="navigation" id="nav-items">
141 ];
142 for (@nav) {
143 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
144 }
145 print STDOUT qq[
146 </ul>
147 </body>
148 </html>
149 ];
150
151 for (qw/decode parse parse_html parse_xml parse_manifest
152 check check_manifest/) {
153 next unless defined $time{$_};
154 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
155 print $file $char_length, "\t", $time{$_}, "\n";
156 }
157
158 exit;
159
160 sub add_error ($$$) {
161 my ($layer, $err, $result) = @_;
162 if (defined $err->{level}) {
163 if ($err->{level} eq 's') {
164 $result->{$layer}->{should}++;
165 $result->{$layer}->{score_min} -= 2;
166 $result->{conforming_min} = 0;
167 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
168 $result->{$layer}->{warning}++;
169 } elsif ($err->{level} eq 'unsupported') {
170 $result->{$layer}->{unsupported}++;
171 $result->{unsupported} = 1;
172 } else {
173 $result->{$layer}->{must}++;
174 $result->{$layer}->{score_max} -= 2;
175 $result->{$layer}->{score_min} -= 2;
176 $result->{conforming_min} = 0;
177 $result->{conforming_max} = 0;
178 }
179 } else {
180 $result->{$layer}->{must}++;
181 $result->{$layer}->{score_max} -= 2;
182 $result->{$layer}->{score_min} -= 2;
183 $result->{conforming_min} = 0;
184 $result->{conforming_max} = 0;
185 }
186 } # add_error
187
188 sub print_http_header_section ($$) {
189 my ($input, $result) = @_;
190 return unless defined $input->{header_status_code} or
191 defined $input->{header_status_text} or
192 @{$input->{header_field}};
193
194 push @nav, ['#source-header' => 'HTTP Header'];
195 print STDOUT qq[<div id="source-header" class="section">
196 <h2>HTTP Header</h2>
197
198 <p><strong>Note</strong>: Due to the limitation of the
199 network library in use, the content of this section might
200 not be the real header.</p>
201
202 <table><tbody>
203 ];
204
205 if (defined $input->{header_status_code}) {
206 print STDOUT qq[<tr><th scope="row">Status code</th>];
207 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
208 }
209 if (defined $input->{header_status_text}) {
210 print STDOUT qq[<tr><th scope="row">Status text</th>];
211 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
212 }
213
214 for (@{$input->{header_field}}) {
215 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
216 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
217 }
218
219 print STDOUT qq[</tbody></table></div>];
220 } # print_http_header_section
221
222 sub print_syntax_error_html_section ($$) {
223 my ($input, $result) = @_;
224
225 require Encode;
226 require Whatpm::HTML;
227
228 print STDOUT qq[
229 <div id="parse-errors" class="section">
230 <h2>Parse Errors</h2>
231
232 <dl>];
233 push @nav, ['#parse-errors' => 'Parse Error'];
234
235 my $onerror = sub {
236 my (%opt) = @_;
237 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
238 if ($opt{column} > 0) {
239 print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
240 } else {
241 $opt{line} = $opt{line} - 1 || 1;
242 print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];
243 }
244 $type =~ tr/ /-/;
245 $type =~ s/\|/%7C/g;
246 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
247 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
248 print STDOUT qq[$msg</dd>\n];
249
250 add_error ('syntax', \%opt => $result);
251 };
252
253 my $doc = $dom->create_document;
254 my $el;
255 my $inner_html_element = $http->get_parameter ('e');
256 if (defined $inner_html_element and length $inner_html_element) {
257 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
258 my $time1 = time;
259 my $t = Encode::decode ($input->{charset}, $input->{s});
260 $time{decode} = time - $time1;
261
262 $el = $doc->create_element_ns
263 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
264 $time1 = time;
265 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
266 $time{parse} = time - $time1;
267 } else {
268 my $time1 = time;
269 Whatpm::HTML->parse_byte_string
270 ($input->{charset}, $input->{s} => $doc, $onerror);
271 $time{parse_html} = time - $time1;
272 }
273 $doc->manakai_charset ($input->{official_charset})
274 if defined $input->{official_charset};
275
276 print STDOUT qq[</dl></div>];
277
278 return ($doc, $el);
279 } # print_syntax_error_html_section
280
281 sub print_syntax_error_xml_section ($$) {
282 my ($input, $result) = @_;
283
284 require Message::DOM::XMLParserTemp;
285
286 print STDOUT qq[
287 <div id="parse-errors" class="section">
288 <h2>Parse Errors</h2>
289
290 <dl>];
291 push @nav, ['#parse-errors' => 'Parse Error'];
292
293 my $onerror = sub {
294 my $err = shift;
295 my $line = $err->location->line_number;
296 print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];
297 print STDOUT $err->location->column_number, "</dt><dd>";
298 print STDOUT htescape $err->text, "</dd>\n";
299
300 add_error ('syntax', {type => $err->text,
301 level => [
302 $err->SEVERITY_FATAL_ERROR => 'm',
303 $err->SEVERITY_ERROR => 'm',
304 $err->SEVERITY_WARNING => 's',
305 ]->[$err->severity]} => $result);
306
307 return 1;
308 };
309
310 my $time1 = time;
311 open my $fh, '<', \($input->{s});
312 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
313 ($fh => $dom, $onerror, charset => $input->{charset});
314 $time{parse_xml} = time - $time1;
315 $doc->manakai_charset ($input->{official_charset})
316 if defined $input->{official_charset};
317
318 print STDOUT qq[</dl></div>];
319
320 return ($doc, undef);
321 } # print_syntax_error_xml_section
322
323 sub print_syntax_error_manifest_section ($$) {
324 my ($input, $result) = @_;
325
326 require Whatpm::CacheManifest;
327
328 print STDOUT qq[
329 <div id="parse-errors" class="section">
330 <h2>Parse Errors</h2>
331
332 <dl>];
333 push @nav, ['#parse-errors' => 'Parse Error'];
334
335 my $onerror = sub {
336 my (%opt) = @_;
337 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
338 print STDOUT qq[<dt class="$cls">], get_error_label (\%opt), qq[</dt>];
339 $type =~ tr/ /-/;
340 $type =~ s/\|/%7C/g;
341 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
342 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
343 print STDOUT qq[$msg</dd>\n];
344
345 add_error ('syntax', \%opt => $result);
346 };
347
348 my $time1 = time;
349 my $manifest = Whatpm::CacheManifest->parse_byte_string
350 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
351 $time{parse_manifest} = time - $time1;
352
353 print STDOUT qq[</dl></div>];
354
355 return $manifest;
356 } # print_syntax_error_manifest_section
357
358 sub print_source_string_section ($$) {
359 require Encode;
360 my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
361 return unless $enc;
362
363 my $s = \($enc->decode (${$_[0]}));
364 my $i = 1;
365 push @nav, ['#source-string' => 'Source'];
366 print STDOUT qq[<div id="source-string" class="section">
367 <h2>Document Source</h2>
368 <ol lang="">\n];
369 if (length $$s) {
370 while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
371 print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
372 $i++;
373 }
374 if ($$s =~ /\G([^\x0A]+)/gc) {
375 print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
376 }
377 } else {
378 print STDOUT q[<li id="line-1"></li>];
379 }
380 print STDOUT "</ol></div>";
381 } # print_input_string_section
382
383 sub print_document_tree ($) {
384 my $node = shift;
385 my $r = '<ol class="xoxo">';
386
387 my @node = ($node);
388 while (@node) {
389 my $child = shift @node;
390 unless (ref $child) {
391 $r .= $child;
392 next;
393 }
394
395 my $node_id = 'node-'.refaddr $child;
396 my $nt = $child->node_type;
397 if ($nt == $child->ELEMENT_NODE) {
398 my $child_nsuri = $child->namespace_uri;
399 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
400 '</code>'; ## ISSUE: case
401
402 if ($child->has_attributes) {
403 $r .= '<ul class="attributes">';
404 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
405 @{$child->attributes}) {
406 $r .= qq[<li id="$attr->[3]" class="tree-attribute"><code title="@{[defined $_->[2] ? $_->[2] : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
407 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
408 }
409 $r .= '</ul>';
410 }
411
412 if ($child->has_child_nodes) {
413 $r .= '<ol class="children">';
414 unshift @node, @{$child->child_nodes}, '</ol></li>';
415 } else {
416 $r .= '</li>';
417 }
418 } elsif ($nt == $child->TEXT_NODE) {
419 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
420 } elsif ($nt == $child->CDATA_SECTION_NODE) {
421 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
422 } elsif ($nt == $child->COMMENT_NODE) {
423 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
424 } elsif ($nt == $child->DOCUMENT_NODE) {
425 $r .= qq'<li id="$node_id" class="tree-document">Document';
426 $r .= qq[<ul class="attributes">];
427 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
428 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
429 unless ($child->manakai_is_html) {
430 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
431 if (defined $child->xml_encoding) {
432 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
433 } else {
434 $r .= qq[<li>XML encoding = (null)</li>];
435 }
436 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
437 }
438 $r .= qq[</ul>];
439 if ($child->has_child_nodes) {
440 $r .= '<ol class="children">';
441 unshift @node, @{$child->child_nodes}, '</ol></li>';
442 }
443 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
444 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
445 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
446 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
447 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
448 $r .= '</ul></li>';
449 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
450 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
451 } else {
452 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
453 }
454 }
455
456 $r .= '</ol>';
457 print STDOUT $r;
458 } # print_document_tree
459
460 sub print_structure_dump_dom_section ($$) {
461 my ($doc, $el) = @_;
462
463 print STDOUT qq[
464 <div id="document-tree" class="section">
465 <h2>Document Tree</h2>
466 ];
467 push @nav, ['#document-tree' => 'Tree'];
468
469 print_document_tree ($el || $doc);
470
471 print STDOUT qq[</div>];
472 } # print_structure_dump_dom_section
473
474 sub print_structure_dump_manifest_section ($) {
475 my $manifest = shift;
476
477 print STDOUT qq[
478 <div id="dump-manifest" class="section">
479 <h2>Cache Manifest</h2>
480 ];
481 push @nav, ['#dump-manifest' => 'Caceh Manifest'];
482
483 print STDOUT qq[<dl><dt>Explicit entries</dt>];
484 for my $uri (@{$manifest->[0]}) {
485 my $euri = htescape ($uri);
486 print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
487 }
488
489 print STDOUT qq[<dt>Fallback entries</dt><dd>
490 <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
491 <th scope=row>Fallback Entry</tr><tbody>];
492 for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
493 my $euri = htescape ($uri);
494 my $euri2 = htescape ($manifest->[1]->{$uri});
495 print STDOUT qq[<tr><td><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
496 <td><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
497 }
498
499 print STDOUT qq[</table><dt>Online whitelist</dt>];
500 for my $uri (@{$manifest->[2]}) {
501 my $euri = htescape ($uri);
502 print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
503 }
504
505 print STDOUT qq[</dl></div>];
506 } # print_structure_dump_manifest_section
507
508 sub print_structure_error_dom_section ($$$) {
509 my ($doc, $el, $result) = @_;
510
511 print STDOUT qq[<div id="document-errors" class="section">
512 <h2>Document Errors</h2>
513
514 <dl>];
515 push @nav, ['#document-errors' => 'Document Error'];
516
517 require Whatpm::ContentChecker;
518 my $onerror = sub {
519 my %opt = @_;
520 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
521 $type =~ tr/ /-/;
522 $type =~ s/\|/%7C/g;
523 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
524 print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
525 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
526 print STDOUT $msg, "</dd>\n";
527 add_error ('structure', \%opt => $result);
528 };
529
530 my $elements;
531 my $time1 = time;
532 if ($el) {
533 $elements = Whatpm::ContentChecker->check_element ($el, $onerror);
534 } else {
535 $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);
536 }
537 $time{check} = time - $time1;
538
539 print STDOUT qq[</dl></div>];
540
541 return $elements;
542 } # print_structure_error_dom_section
543
544 sub print_structure_error_manifest_section ($$$) {
545 my ($manifest, $result) = @_;
546
547 print STDOUT qq[<div id="document-errors" class="section">
548 <h2>Document Errors</h2>
549
550 <dl>];
551 push @nav, ['#document-errors' => 'Document Error'];
552
553 require Whatpm::CacheManifest;
554 Whatpm::CacheManifest->check_manifest ($manifest, sub {
555 my %opt = @_;
556 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
557 $type =~ tr/ /-/;
558 $type =~ s/\|/%7C/g;
559 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
560 print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
561 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
562 add_error ('structure', \%opt => $result);
563 });
564
565 print STDOUT qq[</div>];
566 } # print_structure_error_manifest_section
567
568 sub print_table_section ($) {
569 my $tables = shift;
570
571 push @nav, ['#tables' => 'Tables'];
572 print STDOUT qq[
573 <div id="tables" class="section">
574 <h2>Tables</h2>
575
576 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
577 <script src="../table-script.js" type="text/javascript"></script>
578 <noscript>
579 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
580 </noscript>
581 ];
582
583 require JSON;
584
585 my $i = 0;
586 for my $table_el (@$tables) {
587 $i++;
588 print STDOUT qq[<div class="section" id="table-$i"><h3>] .
589 get_node_link ($table_el) . q[</h3>];
590
591 ## TODO: Make |ContentChecker| return |form_table| result
592 ## so that this script don't have to run the algorithm twice.
593 my $table = Whatpm::HTMLTable->form_table ($table_el);
594
595 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
596 next unless $_;
597 delete $_->{element};
598 }
599
600 for (@{$table->{row_group}}) {
601 next unless $_;
602 next unless $_->{element};
603 $_->{type} = $_->{element}->manakai_local_name;
604 delete $_->{element};
605 }
606
607 for (@{$table->{cell}}) {
608 next unless $_;
609 for (@{$_}) {
610 next unless $_;
611 for (@$_) {
612 $_->{id} = refaddr $_->{element} if defined $_->{element};
613 delete $_->{element};
614 $_->{is_header} = $_->{is_header} ? 1 : 0;
615 }
616 }
617 }
618
619 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
620 print STDOUT JSON::objToJson ($table);
621 print STDOUT qq[, document.getElementById ('table-$i'));</script>];
622 }
623
624 print STDOUT qq[</div>];
625 } # print_table_section
626
627 sub print_id_section ($) {
628 my $ids = shift;
629
630 push @nav, ['#identifiers' => 'IDs'];
631 print STDOUT qq[
632 <div id="identifiers" class="section">
633 <h2>Identifiers</h2>
634
635 <dl>
636 ];
637 for my $id (sort {$a cmp $b} keys %$ids) {
638 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
639 for (@{$ids->{$id}}) {
640 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
641 }
642 }
643 print STDOUT qq[</dl></div>];
644 } # print_id_section
645
646 sub print_term_section ($) {
647 my $terms = shift;
648
649 push @nav, ['#terms' => 'Terms'];
650 print STDOUT qq[
651 <div id="terms" class="section">
652 <h2>Terms</h2>
653
654 <dl>
655 ];
656 for my $term (sort {$a cmp $b} keys %$terms) {
657 print STDOUT qq[<dt>@{[htescape $term]}</dt>];
658 for (@{$terms->{$term}}) {
659 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
660 }
661 }
662 print STDOUT qq[</dl></div>];
663 } # print_term_section
664
665 sub print_class_section ($) {
666 my $classes = shift;
667
668 push @nav, ['#classes' => 'Classes'];
669 print STDOUT qq[
670 <div id="classes" class="section">
671 <h2>Classes</h2>
672
673 <dl>
674 ];
675 for my $class (sort {$a cmp $b} keys %$classes) {
676 print STDOUT qq[<dt><code>@{[htescape $class]}</code></dt>];
677 for (@{$classes->{$class}}) {
678 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
679 }
680 }
681 print STDOUT qq[</dl></div>];
682 } # print_class_section
683
684 sub print_result_section ($) {
685 my $result = shift;
686
687 print STDOUT qq[
688 <div id="result-summary" class="section">
689 <h2>Result</h2>];
690
691 if ($result->{unsupported} and $result->{conforming_max}) {
692 print STDOUT qq[<p class=uncertain id=result-para>The conformance
693 checker cannot decide whether the document is conforming or
694 not, since the document contains one or more unsupported
695 features. The document might or might not be conforming.</p>];
696 } elsif ($result->{conforming_min}) {
697 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
698 found in this document.</p>];
699 } elsif ($result->{conforming_max}) {
700 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
701 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
702 it might be conforming.</p>];
703 } else {
704 print STDOUT qq[<p class=FAIL id=result-para>This document is
705 <strong><em>non</em>-conforming</strong>.</p>];
706 }
707
708 print STDOUT qq[<table>
709 <colgroup><col><colgroup><col><col><col><colgroup><col>
710 <thead>
711 <tr><th scope=col></th>
712 <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
713 Errors</a></th>
714 <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
715 Errors</a></th>
716 <th scope=col><a href="../error-description#level-w">Warnings</a></th>
717 <th scope=col>Score</th></tr></thead><tbody>];
718
719 my $must_error = 0;
720 my $should_error = 0;
721 my $warning = 0;
722 my $score_min = 0;
723 my $score_max = 0;
724 my $score_base = 20;
725 my $score_unit = $score_base / 100;
726 for (
727 [Transfer => 'transfer', ''],
728 [Character => 'char', ''],
729 [Syntax => 'syntax', '#parse-errors'],
730 [Structure => 'structure', '#document-errors'],
731 ) {
732 $must_error += ($result->{$_->[1]}->{must} += 0);
733 $should_error += ($result->{$_->[1]}->{should} += 0);
734 $warning += ($result->{$_->[1]}->{warning} += 0);
735 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
736 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
737
738 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
739 my $label = $_->[0];
740 if ($result->{$_->[1]}->{must} or
741 $result->{$_->[1]}->{should} or
742 $result->{$_->[1]}->{warning} or
743 $result->{$_->[1]}->{unsupported}) {
744 $label = qq[<a href="$_->[2]">$label</a>];
745 }
746
747 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
748 if ($uncertain) {
749 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
750 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
751 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
752 } else {
753 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
754 }
755 }
756
757 $score_max += $score_base;
758
759 print STDOUT qq[
760 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
761 </tbody>
762 <tfoot><tr class=uncertain><th scope=row>Total</th>
763 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
764 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
765 <td>$warning?</td>
766 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
767 </table>
768
769 <p><strong>Important</strong>: This conformance checking service
770 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
771 </div>];
772 push @nav, ['#result-summary' => 'Result'];
773 } # print_result_section
774
775 sub print_result_unknown_type_section ($$) {
776 my ($input, $result) = @_;
777
778 my $euri = htescape ($input->{uri});
779 print STDOUT qq[
780 <div id="parse-errors" class="section">
781 <h2>Errors</h2>
782
783 <dl>
784 <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
785 <dd class=unsupported><strong><a href="../error-description#level-u">Not
786 supported</a></strong>:
787 Media type
788 <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
789 is not supported.</dd>
790 </dl>
791 </div>
792 ];
793 push @nav, ['#parse-errors' => 'Errors'];
794 add_error (char => {level => 'unsupported'} => $result);
795 add_error (syntax => {level => 'unsupported'} => $result);
796 add_error (structure => {level => 'unsupported'} => $result);
797 } # print_result_unknown_type_section
798
799 sub print_result_input_error_section ($) {
800 my $input = shift;
801 print STDOUT qq[<div class="section" id="result-summary">
802 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
803 </div>];
804 push @nav, ['#result-summary' => 'Result'];
805 } # print_Result_input_error_section
806
807 sub get_error_label ($) {
808 my $err = shift;
809
810 my $r = '';
811
812 if (defined $err->{line}) {
813 if ($err->{column} > 0) {
814 $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a> column $err->{column}];
815 } else {
816 $err->{line} = $err->{line} - 1 || 1;
817 $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a>];
818 }
819 }
820
821 if (defined $err->{node}) {
822 $r .= ' ' if length $r;
823 $r = get_node_link ($err->{node});
824 }
825
826 if (defined $err->{index}) {
827 $r .= ' ' if length $r;
828 $r .= 'Index ' . (0+$err->{index});
829 }
830
831 if (defined $err->{value}) {
832 $r .= ' ' if length $r;
833 $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
834 }
835
836 return $r;
837 } # get_error_label
838
839 sub get_error_level_label ($) {
840 my $err = shift;
841
842 my $r = '';
843
844 if (not defined $err->{level} or $err->{level} eq 'm') {
845 $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
846 error</a></strong>: ];
847 } elsif ($err->{level} eq 's') {
848 $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
849 error</a></strong>: ];
850 } elsif ($err->{level} eq 'w') {
851 $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
852 ];
853 } elsif ($err->{level} eq 'unsupported') {
854 $r = qq[<strong><a href="../error-description#level-u">Not
855 supported</a></strong>: ];
856 } else {
857 my $elevel = htescape ($err->{level});
858 $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
859 ];
860 }
861
862 return $r;
863 } # get_error_level_label
864
865 sub get_node_path ($) {
866 my $node = shift;
867 my @r;
868 while (defined $node) {
869 my $rs;
870 if ($node->node_type == 1) {
871 $rs = $node->manakai_local_name;
872 $node = $node->parent_node;
873 } elsif ($node->node_type == 2) {
874 $rs = '@' . $node->manakai_local_name;
875 $node = $node->owner_element;
876 } elsif ($node->node_type == 3) {
877 $rs = '"' . $node->data . '"';
878 $node = $node->parent_node;
879 } elsif ($node->node_type == 9) {
880 @r = ('') unless @r;
881 $rs = '';
882 $node = $node->parent_node;
883 } else {
884 $rs = '#' . $node->node_type;
885 $node = $node->parent_node;
886 }
887 unshift @r, $rs;
888 }
889 return join '/', @r;
890 } # get_node_path
891
892 sub get_node_link ($) {
893 return qq[<a href="#node-@{[refaddr $_[0]]}">] .
894 htescape (get_node_path ($_[0])) . qq[</a>];
895 } # get_node_link
896
897 {
898 my $Msg = {};
899
900 sub load_text_catalog ($) {
901 my $lang = shift; # MUST be a canonical lang name
902 open my $file, '<:utf8', "cc-msg.$lang.txt"
903 or die "$0: cc-msg.$lang.txt: $!";
904 while (<$file>) {
905 if (s/^([^;]+);([^;]*);//) {
906 my ($type, $cls, $msg) = ($1, $2, $_);
907 $msg =~ tr/\x0D\x0A//d;
908 $Msg->{$type} = [$cls, $msg];
909 }
910 }
911 } # load_text_catalog
912
913 sub get_text ($) {
914 my ($type, $level, $node) = @_;
915 $type = $level . ':' . $type if defined $level;
916 my @arg;
917 {
918 if (defined $Msg->{$type}) {
919 my $msg = $Msg->{$type}->[1];
920 $msg =~ s{<var>\$([0-9]+)</var>}{
921 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
922 }ge;
923 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
924 UNIVERSAL::can ($node, 'get_attribute_ns')
925 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
926 }ge;
927 $msg =~ s{<var>{\@}</var>}{
928 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
929 }ge;
930 $msg =~ s{<var>{local-name}</var>}{
931 UNIVERSAL::can ($node, 'manakai_local_name')
932 ? htescape ($node->manakai_local_name) : ''
933 }ge;
934 $msg =~ s{<var>{element-local-name}</var>}{
935 (UNIVERSAL::can ($node, 'owner_element') and
936 $node->owner_element)
937 ? htescape ($node->owner_element->manakai_local_name)
938 : ''
939 }ge;
940 return ($type, $Msg->{$type}->[0], $msg);
941 } elsif ($type =~ s/:([^:]*)$//) {
942 unshift @arg, $1;
943 redo;
944 }
945 }
946 return ($type, '', htescape ($_[0]));
947 } # get_text
948
949 }
950
951 sub get_input_document ($$) {
952 my ($http, $dom) = @_;
953
954 my $request_uri = $http->get_parameter ('uri');
955 my $r = {};
956 if (defined $request_uri and length $request_uri) {
957 my $uri = $dom->create_uri_reference ($request_uri);
958 unless ({
959 http => 1,
960 }->{lc $uri->uri_scheme}) {
961 return {uri => $request_uri, request_uri => $request_uri,
962 error_status_text => 'URI scheme not allowed'};
963 }
964
965 require Message::Util::HostPermit;
966 my $host_permit = new Message::Util::HostPermit;
967 $host_permit->add_rule (<<EOH);
968 Allow host=suika port=80
969 Deny host=suika
970 Allow host=suika.fam.cx port=80
971 Deny host=suika.fam.cx
972 Deny host=localhost
973 Deny host=*.localdomain
974 Deny ipv4=0.0.0.0/8
975 Deny ipv4=10.0.0.0/8
976 Deny ipv4=127.0.0.0/8
977 Deny ipv4=169.254.0.0/16
978 Deny ipv4=172.0.0.0/11
979 Deny ipv4=192.0.2.0/24
980 Deny ipv4=192.88.99.0/24
981 Deny ipv4=192.168.0.0/16
982 Deny ipv4=198.18.0.0/15
983 Deny ipv4=224.0.0.0/4
984 Deny ipv4=255.255.255.255/32
985 Deny ipv6=0::0/0
986 Allow host=*
987 EOH
988 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
989 return {uri => $request_uri, request_uri => $request_uri,
990 error_status_text => 'Connection to the host is forbidden'};
991 }
992
993 require LWP::UserAgent;
994 my $ua = WDCC::LWPUA->new;
995 $ua->{wdcc_dom} = $dom;
996 $ua->{wdcc_host_permit} = $host_permit;
997 $ua->agent ('Mozilla'); ## TODO: for now.
998 $ua->parse_head (0);
999 $ua->protocols_allowed ([qw/http/]);
1000 $ua->max_size (1000_000);
1001 my $req = HTTP::Request->new (GET => $request_uri);
1002 my $res = $ua->request ($req);
1003 ## TODO: 401 sets |is_success| true.
1004 if ($res->is_success or $http->get_parameter ('error-page')) {
1005 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1006 $r->{uri} = $res->request->uri;
1007 $r->{request_uri} = $request_uri;
1008
1009 ## TODO: More strict parsing...
1010 my $ct = $res->header ('Content-Type');
1011 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1012 $r->{charset} = lc $1;
1013 $r->{charset} =~ tr/\\//d;
1014 $r->{official_charset} = $r->{charset};
1015 }
1016
1017 my $input_charset = $http->get_parameter ('charset');
1018 if (defined $input_charset and length $input_charset) {
1019 $r->{charset_overridden}
1020 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1021 $r->{charset} = $input_charset;
1022 }
1023
1024 ## TODO: Support for HTTP Content-Encoding
1025
1026 $r->{s} = ''.$res->content;
1027
1028 require Whatpm::ContentType;
1029 ($r->{official_type}, $r->{media_type})
1030 = Whatpm::ContentType->get_sniffed_type
1031 (get_file_head => sub {
1032 return substr $r->{s}, 0, shift;
1033 },
1034 http_content_type_byte => $ct,
1035 has_http_content_encoding =>
1036 defined $res->header ('Content-Encoding'),
1037 supported_image_types => {});
1038 } else {
1039 $r->{uri} = $res->request->uri;
1040 $r->{request_uri} = $request_uri;
1041 $r->{error_status_text} = $res->status_line;
1042 }
1043
1044 $r->{header_field} = [];
1045 $res->scan (sub {
1046 push @{$r->{header_field}}, [$_[0], $_[1]];
1047 });
1048 $r->{header_status_code} = $res->code;
1049 $r->{header_status_text} = $res->message;
1050 } else {
1051 $r->{s} = ''.$http->get_parameter ('s');
1052 $r->{uri} = q<thismessage:/>;
1053 $r->{request_uri} = q<thismessage:/>;
1054 $r->{base_uri} = q<thismessage:/>;
1055 $r->{charset} = ''.$http->get_parameter ('_charset_');
1056 $r->{charset} =~ s/\s+//g;
1057 $r->{charset} = 'utf-8' if $r->{charset} eq '';
1058 $r->{official_charset} = $r->{charset};
1059 $r->{header_field} = [];
1060
1061 require Whatpm::ContentType;
1062 ($r->{official_type}, $r->{media_type})
1063 = Whatpm::ContentType->get_sniffed_type
1064 (get_file_head => sub {
1065 return substr $r->{s}, 0, shift;
1066 },
1067 http_content_type_byte => undef,
1068 has_http_content_encoding => 0,
1069 supported_image_types => {});
1070 }
1071
1072 my $input_format = $http->get_parameter ('i');
1073 if (defined $input_format and length $input_format) {
1074 $r->{media_type_overridden}
1075 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1076 $r->{media_type} = $input_format;
1077 }
1078 if (defined $r->{s} and not defined $r->{media_type}) {
1079 $r->{media_type} = 'text/html';
1080 $r->{media_type_overridden} = 1;
1081 }
1082
1083 if ($r->{media_type} eq 'text/xml') {
1084 unless (defined $r->{charset}) {
1085 $r->{charset} = 'us-ascii';
1086 $r->{official_charset} = $r->{charset};
1087 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1088 $r->{charset_overridden} = 0;
1089 }
1090 }
1091
1092 if (length $r->{s} > 1000_000) {
1093 $r->{error_status_text} = 'Entity-body too large';
1094 delete $r->{s};
1095 return $r;
1096 }
1097
1098 return $r;
1099 } # get_input_document
1100
1101 package WDCC::LWPUA;
1102 BEGIN { push our @ISA, 'LWP::UserAgent'; }
1103
1104 sub redirect_ok {
1105 my $ua = shift;
1106 unless ($ua->SUPER::redirect_ok (@_)) {
1107 return 0;
1108 }
1109
1110 my $uris = $_[1]->header ('Location');
1111 return 0 unless $uris;
1112 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1113 unless ({
1114 http => 1,
1115 }->{lc $uri->uri_scheme}) {
1116 return 0;
1117 }
1118 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1119 return 0;
1120 }
1121 return 1;
1122 } # redirect_ok
1123
1124 =head1 AUTHOR
1125
1126 Wakaba <w@suika.fam.cx>.
1127
1128 =head1 LICENSE
1129
1130 Copyright 2007 Wakaba <w@suika.fam.cx>
1131
1132 This library is free software; you can redistribute it
1133 and/or modify it under the same terms as Perl itself.
1134
1135 =cut
1136
1137 ## $Date: 2007/11/18 05:30:03 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24