/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.24 - (show annotations) (download)
Sun Nov 11 06:57:16 2007 UTC (16 years, 6 months ago) by wakaba
Branch: MAIN
Changes since 1.23: +34 -18 lines
++ ChangeLog	11 Nov 2007 06:57:02 -0000
	* cc-style.css: Rules for "unsupported" parse errors.

	* cc.cgi (print_syntax_error_html_section): Use HTML
	parser for byte string.
	(print_result_unknown_type_section): Make output
	more consistent with other media types.

2007-11-11  Wakaba  <wakaba@suika.fam.cx>

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4
5 use lib qw[/home/httpd/html/www/markup/html/whatpm
6 /home/wakaba/work/manakai2/lib];
7 use CGI::Carp qw[fatalsToBrowser];
8 use Scalar::Util qw[refaddr];
9 use Time::HiRes qw/time/;
10
11 sub htescape ($) {
12 my $s = $_[0];
13 $s =~ s/&/&amp;/g;
14 $s =~ s/</&lt;/g;
15 $s =~ s/>/&gt;/g;
16 $s =~ s/"/&quot;/g;
17 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18 sprintf '<var>U+%04X</var>', ord $1;
19 }ge;
20 return $s;
21 } # htescape
22
23 use Message::CGI::HTTP;
24 my $http = Message::CGI::HTTP->new;
25
26 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
27 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
28 exit;
29 }
30
31 binmode STDOUT, ':utf8';
32 $| = 1;
33
34 require Message::DOM::DOMImplementation;
35 my $dom = Message::DOM::DOMImplementation->new;
36
37 load_text_catalog ('en'); ## TODO: conneg
38
39 my @nav;
40 print STDOUT qq[Content-Type: text/html; charset=utf-8
41
42 <!DOCTYPE html>
43 <html lang="en">
44 <head>
45 <title>Web Document Conformance Checker (BETA)</title>
46 <link rel="stylesheet" href="../cc-style.css" type="text/css">
47 </head>
48 <body>
49 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
50 (<em>beta</em>)</h1>
51 ];
52
53 $| = 0;
54 my $input = get_input_document ($http, $dom);
55 my $inner_html_element = $http->get_parameter ('e');
56 my $char_length = 0;
57 my %time;
58
59 print qq[
60 <div id="document-info" class="section">
61 <dl>
62 <dt>Request URI</dt>
63 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
64 <dt>Document URI</dt>
65 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}">@{[htescape $input->{uri}]}</a>&gt;</code></dd>
66 ]; # no </dl> yet
67 push @nav, ['#document-info' => 'Information'];
68
69 if (defined $input->{s}) {
70 $char_length = length $input->{s};
71
72 print STDOUT qq[
73 <dt>Base URI</dt>
74 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
75 <dt>Internet Media Type</dt>
76 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
77 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : '']}</dd>
78 <dt>Character Encoding</dt>
79 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
80 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
81 <dt>Length</dt>
82 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
83 </dl>
84 </div>
85 ];
86
87 my $result = {conforming_min => 1, conforming_max => 1};
88 print_http_header_section ($input, $result);
89
90 my $doc;
91 my $el;
92 my $manifest;
93
94 if ($input->{media_type} eq 'text/html') {
95 ($doc, $el) = print_syntax_error_html_section ($input, $result);
96 print_source_string_section (\($input->{s}), $input->{charset});
97 } elsif ({
98 'text/xml' => 1,
99 'application/atom+xml' => 1,
100 'application/rss+xml' => 1,
101 'application/svg+xml' => 1,
102 'application/xhtml+xml' => 1,
103 'application/xml' => 1,
104 }->{$input->{media_type}}) {
105 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
106 print_source_string_section (\($input->{s}), $doc->input_encoding);
107 } elsif ($input->{media_type} eq 'text/cache-manifest') {
108 ## TODO: MUST be text/cache-manifest
109 $manifest = print_syntax_error_manifest_section ($input, $result);
110 print_source_string_section (\($input->{s}), 'utf-8');
111 } else {
112 ## TODO: Change HTTP status code??
113 print_result_unknown_type_section ($input, $result);
114 }
115
116 if (defined $doc or defined $el) {
117 print_structure_dump_dom_section ($doc, $el);
118 my $elements = print_structure_error_dom_section ($doc, $el, $result);
119 print_table_section ($elements->{table}) if @{$elements->{table}};
120 print_id_section ($elements->{id}) if keys %{$elements->{id}};
121 print_term_section ($elements->{term}) if keys %{$elements->{term}};
122 print_class_section ($elements->{class}) if keys %{$elements->{class}};
123 } elsif (defined $manifest) {
124 print_structure_dump_manifest_section ($manifest);
125 print_structure_error_manifest_section ($manifest, $result);
126 }
127
128 print_result_section ($result);
129 } else {
130 print STDOUT qq[</dl></div>];
131 print_result_input_error_section ($input);
132 }
133
134 print STDOUT qq[
135 <ul class="navigation" id="nav-items">
136 ];
137 for (@nav) {
138 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
139 }
140 print STDOUT qq[
141 </ul>
142 </body>
143 </html>
144 ];
145
146 for (qw/decode parse parse_html parse_xml parse_manifest
147 check check_manifest/) {
148 next unless defined $time{$_};
149 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
150 print $file $char_length, "\t", $time{$_}, "\n";
151 }
152
153 exit;
154
155 sub add_error ($$$) {
156 my ($layer, $err, $result) = @_;
157 if (defined $err->{level}) {
158 if ($err->{level} eq 's') {
159 $result->{$layer}->{should}++;
160 $result->{$layer}->{score_min} -= 2;
161 $result->{conforming_min} = 0;
162 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
163 $result->{$layer}->{warning}++;
164 } elsif ($err->{level} eq 'unsupported') {
165 $result->{$layer}->{unsupported}++;
166 $result->{unsupported} = 1;
167 } else {
168 $result->{$layer}->{must}++;
169 $result->{$layer}->{score_max} -= 2;
170 $result->{$layer}->{score_min} -= 2;
171 $result->{conforming_min} = 0;
172 $result->{conforming_max} = 0;
173 }
174 } else {
175 $result->{$layer}->{must}++;
176 $result->{$layer}->{score_max} -= 2;
177 $result->{$layer}->{score_min} -= 2;
178 $result->{conforming_min} = 0;
179 $result->{conforming_max} = 0;
180 }
181 } # add_error
182
183 sub print_http_header_section ($$) {
184 my ($input, $result) = @_;
185 return unless defined $input->{header_status_code} or
186 defined $input->{header_status_text} or
187 @{$input->{header_field}};
188
189 push @nav, ['#source-header' => 'HTTP Header'];
190 print STDOUT qq[<div id="source-header" class="section">
191 <h2>HTTP Header</h2>
192
193 <p><strong>Note</strong>: Due to the limitation of the
194 network library in use, the content of this section might
195 not be the real header.</p>
196
197 <table><tbody>
198 ];
199
200 if (defined $input->{header_status_code}) {
201 print STDOUT qq[<tr><th scope="row">Status code</th>];
202 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
203 }
204 if (defined $input->{header_status_text}) {
205 print STDOUT qq[<tr><th scope="row">Status text</th>];
206 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
207 }
208
209 for (@{$input->{header_field}}) {
210 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
211 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
212 }
213
214 print STDOUT qq[</tbody></table></div>];
215 } # print_http_header_section
216
217 sub print_syntax_error_html_section ($$) {
218 my ($input, $result) = @_;
219
220 require Encode;
221 require Whatpm::HTML;
222
223 print STDOUT qq[
224 <div id="parse-errors" class="section">
225 <h2>Parse Errors</h2>
226
227 <dl>];
228 push @nav, ['#parse-errors' => 'Parse Error'];
229
230 my $onerror = sub {
231 my (%opt) = @_;
232 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
233 if ($opt{column} > 0) {
234 print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
235 } else {
236 $opt{line} = $opt{line} - 1 || 1;
237 print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];
238 }
239 $type =~ tr/ /-/;
240 $type =~ s/\|/%7C/g;
241 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
242 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
243 print STDOUT qq[$msg</dd>\n];
244
245 add_error ('syntax', \%opt => $result);
246 };
247
248 my $doc = $dom->create_document;
249 my $el;
250 if (defined $inner_html_element and length $inner_html_element) {
251 $input->{charset} ||= 'ISO-8859-1'; ## TODO: for now.
252 my $time1 = time;
253 my $t = Encode::decode ($input->{charset}, $input->{s});
254 $time{decode} = time - $time1;
255
256 $el = $doc->create_element_ns
257 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
258 $time1 = time;
259 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
260 $time{parse} = time - $time1;
261 } else {
262 my $time1 = time;
263 Whatpm::HTML->parse_byte_string
264 ($input->{charset}, $input->{s} => $doc, $onerror);
265 $time{parse_html} = time - $time1;
266 }
267
268 print STDOUT qq[</dl></div>];
269
270 return ($doc, $el);
271 } # print_syntax_error_html_section
272
273 sub print_syntax_error_xml_section ($$) {
274 my ($input, $result) = @_;
275
276 require Message::DOM::XMLParserTemp;
277
278 print STDOUT qq[
279 <div id="parse-errors" class="section">
280 <h2>Parse Errors</h2>
281
282 <dl>];
283 push @nav, ['#parse-errors' => 'Parse Error'];
284
285 my $onerror = sub {
286 my $err = shift;
287 my $line = $err->location->line_number;
288 print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];
289 print STDOUT $err->location->column_number, "</dt><dd>";
290 print STDOUT htescape $err->text, "</dd>\n";
291
292 add_error ('syntax', {type => $err->text,
293 level => [
294 $err->SEVERITY_FATAL_ERROR => 'm',
295 $err->SEVERITY_ERROR => 'm',
296 $err->SEVERITY_WARNING => 's',
297 ]->[$err->severity]} => $result);
298
299 return 1;
300 };
301
302 my $time1 = time;
303 open my $fh, '<', \($input->{s});
304 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
305 ($fh => $dom, $onerror, charset => $input->{charset});
306 $time{parse_xml} = time - $time1;
307
308 print STDOUT qq[</dl></div>];
309
310 return ($doc, undef);
311 } # print_syntax_error_xml_section
312
313 sub print_syntax_error_manifest_section ($$) {
314 my ($input, $result) = @_;
315
316 require Whatpm::CacheManifest;
317
318 print STDOUT qq[
319 <div id="parse-errors" class="section">
320 <h2>Parse Errors</h2>
321
322 <dl>];
323 push @nav, ['#parse-errors' => 'Parse Error'];
324
325 my $onerror = sub {
326 my (%opt) = @_;
327 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
328 print STDOUT qq[<dt class="$cls">], get_error_label (\%opt), qq[</dt>];
329 $type =~ tr/ /-/;
330 $type =~ s/\|/%7C/g;
331 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
332 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
333 print STDOUT qq[$msg</dd>\n];
334
335 add_error ('syntax', \%opt => $result);
336 };
337
338 my $time1 = time;
339 my $manifest = Whatpm::CacheManifest->parse_byte_string
340 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
341 $time{parse_manifest} = time - $time1;
342
343 print STDOUT qq[</dl></div>];
344
345 return $manifest;
346 } # print_syntax_error_manifest_section
347
348 sub print_source_string_section ($$) {
349 require Encode;
350 my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
351 return unless $enc;
352
353 my $s = \($enc->decode (${$_[0]}));
354 my $i = 1;
355 push @nav, ['#source-string' => 'Source'];
356 print STDOUT qq[<div id="source-string" class="section">
357 <h2>Document Source</h2>
358 <ol lang="">\n];
359 if (length $$s) {
360 while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
361 print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
362 $i++;
363 }
364 if ($$s =~ /\G([^\x0A]+)/gc) {
365 print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
366 }
367 } else {
368 print STDOUT q[<li id="line-1"></li>];
369 }
370 print STDOUT "</ol></div>";
371 } # print_input_string_section
372
373 sub print_document_tree ($) {
374 my $node = shift;
375 my $r = '<ol class="xoxo">';
376
377 my @node = ($node);
378 while (@node) {
379 my $child = shift @node;
380 unless (ref $child) {
381 $r .= $child;
382 next;
383 }
384
385 my $node_id = 'node-'.refaddr $child;
386 my $nt = $child->node_type;
387 if ($nt == $child->ELEMENT_NODE) {
388 my $child_nsuri = $child->namespace_uri;
389 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
390 '</code>'; ## ISSUE: case
391
392 if ($child->has_attributes) {
393 $r .= '<ul class="attributes">';
394 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
395 @{$child->attributes}) {
396 $r .= qq[<li id="$attr->[3]" class="tree-attribute"><code title="@{[defined $_->[2] ? $_->[2] : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
397 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
398 }
399 $r .= '</ul>';
400 }
401
402 if ($child->has_child_nodes) {
403 $r .= '<ol class="children">';
404 unshift @node, @{$child->child_nodes}, '</ol></li>';
405 } else {
406 $r .= '</li>';
407 }
408 } elsif ($nt == $child->TEXT_NODE) {
409 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
410 } elsif ($nt == $child->CDATA_SECTION_NODE) {
411 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
412 } elsif ($nt == $child->COMMENT_NODE) {
413 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
414 } elsif ($nt == $child->DOCUMENT_NODE) {
415 $r .= qq'<li id="$node_id" class="tree-document">Document';
416 $r .= qq[<ul class="attributes">];
417 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
418 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
419 unless ($child->manakai_is_html) {
420 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
421 if (defined $child->xml_encoding) {
422 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
423 } else {
424 $r .= qq[<li>XML encoding = (null)</li>];
425 }
426 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
427 }
428 $r .= qq[</ul>];
429 if ($child->has_child_nodes) {
430 $r .= '<ol class="children">';
431 unshift @node, @{$child->child_nodes}, '</ol></li>';
432 }
433 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
434 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
435 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
436 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
437 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
438 $r .= '</ul></li>';
439 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
440 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
441 } else {
442 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
443 }
444 }
445
446 $r .= '</ol>';
447 print STDOUT $r;
448 } # print_document_tree
449
450 sub print_structure_dump_dom_section ($$) {
451 my ($doc, $el) = @_;
452
453 print STDOUT qq[
454 <div id="document-tree" class="section">
455 <h2>Document Tree</h2>
456 ];
457 push @nav, ['#document-tree' => 'Tree'];
458
459 print_document_tree ($el || $doc);
460
461 print STDOUT qq[</div>];
462 } # print_structure_dump_dom_section
463
464 sub print_structure_dump_manifest_section ($) {
465 my $manifest = shift;
466
467 print STDOUT qq[
468 <div id="dump-manifest" class="section">
469 <h2>Cache Manifest</h2>
470 ];
471 push @nav, ['#dump-manifest' => 'Caceh Manifest'];
472
473 print STDOUT qq[<dl><dt>Explicit entries</dt>];
474 for my $uri (@{$manifest->[0]}) {
475 my $euri = htescape ($uri);
476 print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
477 }
478
479 print STDOUT qq[<dt>Fallback entries</dt><dd>
480 <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
481 <th scope=row>Fallback Entry</tr><tbody>];
482 for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
483 my $euri = htescape ($uri);
484 my $euri2 = htescape ($manifest->[1]->{$uri});
485 print STDOUT qq[<tr><td><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
486 <td><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
487 }
488
489 print STDOUT qq[</table><dt>Online whitelist</dt>];
490 for my $uri (@{$manifest->[2]}) {
491 my $euri = htescape ($uri);
492 print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
493 }
494
495 print STDOUT qq[</dl></div>];
496 } # print_structure_dump_manifest_section
497
498 sub print_structure_error_dom_section ($$$) {
499 my ($doc, $el, $result) = @_;
500
501 print STDOUT qq[<div id="document-errors" class="section">
502 <h2>Document Errors</h2>
503
504 <dl>];
505 push @nav, ['#document-errors' => 'Document Error'];
506
507 require Whatpm::ContentChecker;
508 my $onerror = sub {
509 my %opt = @_;
510 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
511 $type =~ tr/ /-/;
512 $type =~ s/\|/%7C/g;
513 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
514 print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
515 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
516 print STDOUT $msg, "</dd>\n";
517 add_error ('structure', \%opt => $result);
518 };
519
520 my $elements;
521 my $time1 = time;
522 if ($el) {
523 $elements = Whatpm::ContentChecker->check_element ($el, $onerror);
524 } else {
525 $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);
526 }
527 $time{check} = time - $time1;
528
529 print STDOUT qq[</dl></div>];
530
531 return $elements;
532 } # print_structure_error_dom_section
533
534 sub print_structure_error_manifest_section ($$$) {
535 my ($manifest, $result) = @_;
536
537 print STDOUT qq[<div id="document-errors" class="section">
538 <h2>Document Errors</h2>
539
540 <dl>];
541 push @nav, ['#document-errors' => 'Document Error'];
542
543 require Whatpm::CacheManifest;
544 Whatpm::CacheManifest->check_manifest ($manifest, sub {
545 my %opt = @_;
546 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
547 $type =~ tr/ /-/;
548 $type =~ s/\|/%7C/g;
549 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
550 print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
551 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
552 add_error ('structure', \%opt => $result);
553 });
554
555 print STDOUT qq[</div>];
556 } # print_structure_error_manifest_section
557
558 sub print_table_section ($) {
559 my $tables = shift;
560
561 push @nav, ['#tables' => 'Tables'];
562 print STDOUT qq[
563 <div id="tables" class="section">
564 <h2>Tables</h2>
565
566 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
567 <script src="../table-script.js" type="text/javascript"></script>
568 <noscript>
569 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
570 </noscript>
571 ];
572
573 require JSON;
574
575 my $i = 0;
576 for my $table_el (@$tables) {
577 $i++;
578 print STDOUT qq[<div class="section" id="table-$i"><h3>] .
579 get_node_link ($table_el) . q[</h3>];
580
581 ## TODO: Make |ContentChecker| return |form_table| result
582 ## so that this script don't have to run the algorithm twice.
583 my $table = Whatpm::HTMLTable->form_table ($table_el);
584
585 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
586 next unless $_;
587 delete $_->{element};
588 }
589
590 for (@{$table->{row_group}}) {
591 next unless $_;
592 next unless $_->{element};
593 $_->{type} = $_->{element}->manakai_local_name;
594 delete $_->{element};
595 }
596
597 for (@{$table->{cell}}) {
598 next unless $_;
599 for (@{$_}) {
600 next unless $_;
601 for (@$_) {
602 $_->{id} = refaddr $_->{element} if defined $_->{element};
603 delete $_->{element};
604 $_->{is_header} = $_->{is_header} ? 1 : 0;
605 }
606 }
607 }
608
609 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
610 print STDOUT JSON::objToJson ($table);
611 print STDOUT qq[, document.getElementById ('table-$i'));</script>];
612 }
613
614 print STDOUT qq[</div>];
615 } # print_table_section
616
617 sub print_id_section ($) {
618 my $ids = shift;
619
620 push @nav, ['#identifiers' => 'IDs'];
621 print STDOUT qq[
622 <div id="identifiers" class="section">
623 <h2>Identifiers</h2>
624
625 <dl>
626 ];
627 for my $id (sort {$a cmp $b} keys %$ids) {
628 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
629 for (@{$ids->{$id}}) {
630 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
631 }
632 }
633 print STDOUT qq[</dl></div>];
634 } # print_id_section
635
636 sub print_term_section ($) {
637 my $terms = shift;
638
639 push @nav, ['#terms' => 'Terms'];
640 print STDOUT qq[
641 <div id="terms" class="section">
642 <h2>Terms</h2>
643
644 <dl>
645 ];
646 for my $term (sort {$a cmp $b} keys %$terms) {
647 print STDOUT qq[<dt>@{[htescape $term]}</dt>];
648 for (@{$terms->{$term}}) {
649 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
650 }
651 }
652 print STDOUT qq[</dl></div>];
653 } # print_term_section
654
655 sub print_class_section ($) {
656 my $classes = shift;
657
658 push @nav, ['#classes' => 'Classes'];
659 print STDOUT qq[
660 <div id="classes" class="section">
661 <h2>Classes</h2>
662
663 <dl>
664 ];
665 for my $class (sort {$a cmp $b} keys %$classes) {
666 print STDOUT qq[<dt><code>@{[htescape $class]}</code></dt>];
667 for (@{$classes->{$class}}) {
668 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
669 }
670 }
671 print STDOUT qq[</dl></div>];
672 } # print_class_section
673
674 sub print_result_section ($) {
675 my $result = shift;
676
677 print STDOUT qq[
678 <div id="result-summary" class="section">
679 <h2>Result</h2>];
680
681 if ($result->{unsupported} and $result->{conforming_max}) {
682 print STDOUT qq[<p class=uncertain id=result-para>The conformance
683 checker cannot decide whether the document is conforming or
684 not, since the document contains one or more unsupported
685 features. The document might or might not be conforming.</p>];
686 } elsif ($result->{conforming_min}) {
687 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
688 found in this document.</p>];
689 } elsif ($result->{conforming_max}) {
690 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
691 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
692 it might be conforming.</p>];
693 } else {
694 print STDOUT qq[<p class=FAIL id=result-para>This document is
695 <strong><em>non</em>-conforming</strong>.</p>];
696 }
697
698 print STDOUT qq[<table>
699 <colgroup><col><colgroup><col><col><col><colgroup><col>
700 <thead>
701 <tr><th scope=col></th>
702 <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
703 Errors</a></th>
704 <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
705 Errors</a></th>
706 <th scope=col><a href="../error-description#level-w">Warnings</a></th>
707 <th scope=col>Score</th></tr></thead><tbody>];
708
709 my $must_error = 0;
710 my $should_error = 0;
711 my $warning = 0;
712 my $score_min = 0;
713 my $score_max = 0;
714 my $score_base = 20;
715 my $score_unit = $score_base / 100;
716 for (
717 [Transfer => 'transfer', ''],
718 [Character => 'char', ''],
719 [Syntax => 'syntax', '#parse-errors'],
720 [Structure => 'structure', '#document-errors'],
721 ) {
722 $must_error += ($result->{$_->[1]}->{must} += 0);
723 $should_error += ($result->{$_->[1]}->{should} += 0);
724 $warning += ($result->{$_->[1]}->{warning} += 0);
725 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
726 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
727
728 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
729 my $label = $_->[0];
730 if ($result->{$_->[1]}->{must} or
731 $result->{$_->[1]}->{should} or
732 $result->{$_->[1]}->{warning} or
733 $result->{$_->[1]}->{unsupported}) {
734 $label = qq[<a href="$_->[2]">$label</a>];
735 }
736
737 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
738 if ($uncertain) {
739 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
740 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
741 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
742 } else {
743 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
744 }
745 }
746
747 $score_max += $score_base;
748
749 print STDOUT qq[
750 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
751 </tbody>
752 <tfoot><tr class=uncertain><th scope=row>Total</th>
753 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
754 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
755 <td>$warning?</td>
756 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
757 </table>
758
759 <p><strong>Important</strong>: This conformance checking service
760 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
761 </div>];
762 push @nav, ['#result-summary' => 'Result'];
763 } # print_result_section
764
765 sub print_result_unknown_type_section ($$) {
766 my ($input, $result) = @_;
767
768 my $euri = htescape ($input->{uri});
769 print STDOUT qq[
770 <div id="parse-errors" class="section">
771 <h2>Errors</h2>
772
773 <dl>
774 <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
775 <dd class=unsupported><strong><a href="../error-description#level-u">Not
776 supported</a></strong>:
777 Media type
778 <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
779 is not supported.</dd>
780 </dl>
781 </div>
782 ];
783 push @nav, ['#parse-errors' => 'Errors'];
784 add_error (char => {level => 'unsupported'} => $result);
785 add_error (syntax => {level => 'unsupported'} => $result);
786 add_error (structure => {level => 'unsupported'} => $result);
787 } # print_result_unknown_type_section
788
789 sub print_result_input_error_section ($) {
790 my $input = shift;
791 print STDOUT qq[<div class="section" id="result-summary">
792 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
793 </div>];
794 push @nav, ['#result-summary' => 'Result'];
795 } # print_Result_input_error_section
796
797 sub get_error_label ($) {
798 my $err = shift;
799
800 my $r = '';
801
802 if (defined $err->{line}) {
803 if ($err->{column} > 0) {
804 $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a> column $err->{column}];
805 } else {
806 $err->{line} = $err->{line} - 1 || 1;
807 $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a>];
808 }
809 }
810
811 if (defined $err->{node}) {
812 $r .= ' ' if length $r;
813 $r = get_node_link ($err->{node});
814 }
815
816 if (defined $err->{index}) {
817 $r .= ' ' if length $r;
818 $r .= 'Index ' . (0+$err->{index});
819 }
820
821 if (defined $err->{value}) {
822 $r .= ' ' if length $r;
823 $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
824 }
825
826 return $r;
827 } # get_error_label
828
829 sub get_error_level_label ($) {
830 my $err = shift;
831
832 my $r = '';
833
834 if (not defined $err->{level} or $err->{level} eq 'm') {
835 $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
836 error</a></strong>: ];
837 } elsif ($err->{level} eq 's') {
838 $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
839 error</a></strong>: ];
840 } elsif ($err->{level} eq 'w') {
841 $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
842 ];
843 } elsif ($err->{level} eq 'unsupported') {
844 $r = qq[<strong><a href="../error-description#level-u">Not
845 supported</a></strong>: ];
846 } else {
847 my $elevel = htescape ($err->{level});
848 $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
849 ];
850 }
851
852 return $r;
853 } # get_error_level_label
854
855 sub get_node_path ($) {
856 my $node = shift;
857 my @r;
858 while (defined $node) {
859 my $rs;
860 if ($node->node_type == 1) {
861 $rs = $node->manakai_local_name;
862 $node = $node->parent_node;
863 } elsif ($node->node_type == 2) {
864 $rs = '@' . $node->manakai_local_name;
865 $node = $node->owner_element;
866 } elsif ($node->node_type == 3) {
867 $rs = '"' . $node->data . '"';
868 $node = $node->parent_node;
869 } elsif ($node->node_type == 9) {
870 @r = ('') unless @r;
871 $rs = '';
872 $node = $node->parent_node;
873 } else {
874 $rs = '#' . $node->node_type;
875 $node = $node->parent_node;
876 }
877 unshift @r, $rs;
878 }
879 return join '/', @r;
880 } # get_node_path
881
882 sub get_node_link ($) {
883 return qq[<a href="#node-@{[refaddr $_[0]]}">] .
884 htescape (get_node_path ($_[0])) . qq[</a>];
885 } # get_node_link
886
887 {
888 my $Msg = {};
889
890 sub load_text_catalog ($) {
891 my $lang = shift; # MUST be a canonical lang name
892 open my $file, '<', "cc-msg.$lang.txt" or die "$0: cc-msg.$lang.txt: $!";
893 while (<$file>) {
894 if (s/^([^;]+);([^;]*);//) {
895 my ($type, $cls, $msg) = ($1, $2, $_);
896 $msg =~ tr/\x0D\x0A//d;
897 $Msg->{$type} = [$cls, $msg];
898 }
899 }
900 } # load_text_catalog
901
902 sub get_text ($) {
903 my ($type, $level, $node) = @_;
904 $type = $level . ':' . $type if defined $level;
905 my @arg;
906 {
907 if (defined $Msg->{$type}) {
908 my $msg = $Msg->{$type}->[1];
909 $msg =~ s{<var>\$([0-9]+)</var>}{
910 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
911 }ge;
912 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
913 UNIVERSAL::can ($node, 'get_attribute_ns')
914 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
915 }ge;
916 $msg =~ s{<var>{\@}</var>}{
917 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
918 }ge;
919 $msg =~ s{<var>{local-name}</var>}{
920 UNIVERSAL::can ($node, 'manakai_local_name')
921 ? htescape ($node->manakai_local_name) : ''
922 }ge;
923 $msg =~ s{<var>{element-local-name}</var>}{
924 (UNIVERSAL::can ($node, 'owner_element') and
925 $node->owner_element)
926 ? htescape ($node->owner_element->manakai_local_name)
927 : ''
928 }ge;
929 return ($type, $Msg->{$type}->[0], $msg);
930 } elsif ($type =~ s/:([^:]*)$//) {
931 unshift @arg, $1;
932 redo;
933 }
934 }
935 return ($type, '', htescape ($_[0]));
936 } # get_text
937
938 }
939
940 sub get_input_document ($$) {
941 my ($http, $dom) = @_;
942
943 my $request_uri = $http->get_parameter ('uri');
944 my $r = {};
945 if (defined $request_uri and length $request_uri) {
946 my $uri = $dom->create_uri_reference ($request_uri);
947 unless ({
948 http => 1,
949 }->{lc $uri->uri_scheme}) {
950 return {uri => $request_uri, request_uri => $request_uri,
951 error_status_text => 'URI scheme not allowed'};
952 }
953
954 require Message::Util::HostPermit;
955 my $host_permit = new Message::Util::HostPermit;
956 $host_permit->add_rule (<<EOH);
957 Allow host=suika port=80
958 Deny host=suika
959 Allow host=suika.fam.cx port=80
960 Deny host=suika.fam.cx
961 Deny host=localhost
962 Deny host=*.localdomain
963 Deny ipv4=0.0.0.0/8
964 Deny ipv4=10.0.0.0/8
965 Deny ipv4=127.0.0.0/8
966 Deny ipv4=169.254.0.0/16
967 Deny ipv4=172.0.0.0/11
968 Deny ipv4=192.0.2.0/24
969 Deny ipv4=192.88.99.0/24
970 Deny ipv4=192.168.0.0/16
971 Deny ipv4=198.18.0.0/15
972 Deny ipv4=224.0.0.0/4
973 Deny ipv4=255.255.255.255/32
974 Deny ipv6=0::0/0
975 Allow host=*
976 EOH
977 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
978 return {uri => $request_uri, request_uri => $request_uri,
979 error_status_text => 'Connection to the host is forbidden'};
980 }
981
982 require LWP::UserAgent;
983 my $ua = WDCC::LWPUA->new;
984 $ua->{wdcc_dom} = $dom;
985 $ua->{wdcc_host_permit} = $host_permit;
986 $ua->agent ('Mozilla'); ## TODO: for now.
987 $ua->parse_head (0);
988 $ua->protocols_allowed ([qw/http/]);
989 $ua->max_size (1000_000);
990 my $req = HTTP::Request->new (GET => $request_uri);
991 my $res = $ua->request ($req);
992 ## TODO: 401 sets |is_success| true.
993 if ($res->is_success or $http->get_parameter ('error-page')) {
994 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
995 $r->{uri} = $res->request->uri;
996 $r->{request_uri} = $request_uri;
997
998 ## TODO: More strict parsing...
999 my $ct = $res->header ('Content-Type');
1000 if (defined $ct and $ct =~ m#^([0-9A-Za-z._+-]+/[0-9A-Za-z._+-]+)#) {
1001 $r->{media_type} = lc $1;
1002 }
1003 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1004 $r->{charset} = lc $1;
1005 $r->{charset} =~ tr/\\//d;
1006 }
1007
1008 my $input_charset = $http->get_parameter ('charset');
1009 if (defined $input_charset and length $input_charset) {
1010 $r->{charset_overridden}
1011 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1012 $r->{charset} = $input_charset;
1013 }
1014
1015 $r->{s} = ''.$res->content;
1016 } else {
1017 $r->{uri} = $res->request->uri;
1018 $r->{request_uri} = $request_uri;
1019 $r->{error_status_text} = $res->status_line;
1020 }
1021
1022 $r->{header_field} = [];
1023 $res->scan (sub {
1024 push @{$r->{header_field}}, [$_[0], $_[1]];
1025 });
1026 $r->{header_status_code} = $res->code;
1027 $r->{header_status_text} = $res->message;
1028 } else {
1029 $r->{s} = ''.$http->get_parameter ('s');
1030 $r->{uri} = q<thismessage:/>;
1031 $r->{request_uri} = q<thismessage:/>;
1032 $r->{base_uri} = q<thismessage:/>;
1033 $r->{charset} = ''.$http->get_parameter ('_charset_');
1034 $r->{charset} =~ s/\s+//g;
1035 $r->{charset} = 'utf-8' if $r->{charset} eq '';
1036 $r->{header_field} = [];
1037 }
1038
1039 my $input_format = $http->get_parameter ('i');
1040 if (defined $input_format and length $input_format) {
1041 $r->{media_type_overridden}
1042 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1043 $r->{media_type} = $input_format;
1044 }
1045 if (defined $r->{s} and not defined $r->{media_type}) {
1046 $r->{media_type} = 'text/html';
1047 $r->{media_type_overridden} = 1;
1048 }
1049
1050 if ($r->{media_type} eq 'text/xml') {
1051 unless (defined $r->{charset}) {
1052 $r->{charset} = 'us-ascii';
1053 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1054 $r->{charset_overridden} = 0;
1055 }
1056 }
1057
1058 if (length $r->{s} > 1000_000) {
1059 $r->{error_status_text} = 'Entity-body too large';
1060 delete $r->{s};
1061 return $r;
1062 }
1063
1064 return $r;
1065 } # get_input_document
1066
1067 package WDCC::LWPUA;
1068 BEGIN { push our @ISA, 'LWP::UserAgent'; }
1069
1070 sub redirect_ok {
1071 my $ua = shift;
1072 unless ($ua->SUPER::redirect_ok (@_)) {
1073 return 0;
1074 }
1075
1076 my $uris = $_[1]->header ('Location');
1077 return 0 unless $uris;
1078 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1079 unless ({
1080 http => 1,
1081 }->{lc $uri->uri_scheme}) {
1082 return 0;
1083 }
1084 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1085 return 0;
1086 }
1087 return 1;
1088 } # redirect_ok
1089
1090 =head1 AUTHOR
1091
1092 Wakaba <w@suika.fam.cx>.
1093
1094 =head1 LICENSE
1095
1096 Copyright 2007 Wakaba <w@suika.fam.cx>
1097
1098 This library is free software; you can redistribute it
1099 and/or modify it under the same terms as Perl itself.
1100
1101 =cut
1102
1103 ## $Date: 2007/11/05 09:33:52 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24