/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.23 - (show annotations) (download)
Mon Nov 5 09:33:52 2007 UTC (17 years, 1 month ago) by wakaba
Branch: MAIN
Changes since 1.22: +42 -9 lines
++ ChangeLog	5 Nov 2007 09:33:38 -0000
2007-11-05  Wakaba  <wakaba@suika.fam.cx>

	* cc.cgi (get_error_level_label): New.

	* cc-style.css: New rules for error level descriptions.

	* error-description-source.xml: Description for error
	levels is added.

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4
5 use lib qw[/home/httpd/html/www/markup/html/whatpm
6 /home/wakaba/work/manakai2/lib];
7 use CGI::Carp qw[fatalsToBrowser];
8 use Scalar::Util qw[refaddr];
9 use Time::HiRes qw/time/;
10
11 sub htescape ($) {
12 my $s = $_[0];
13 $s =~ s/&/&amp;/g;
14 $s =~ s/</&lt;/g;
15 $s =~ s/>/&gt;/g;
16 $s =~ s/"/&quot;/g;
17 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18 sprintf '<var>U+%04X</var>', ord $1;
19 }ge;
20 return $s;
21 } # htescape
22
23 use Message::CGI::HTTP;
24 my $http = Message::CGI::HTTP->new;
25
26 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
27 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
28 exit;
29 }
30
31 binmode STDOUT, ':utf8';
32 $| = 1;
33
34 require Message::DOM::DOMImplementation;
35 my $dom = Message::DOM::DOMImplementation->new;
36
37 load_text_catalog ('en'); ## TODO: conneg
38
39 my @nav;
40 print STDOUT qq[Content-Type: text/html; charset=utf-8
41
42 <!DOCTYPE html>
43 <html lang="en">
44 <head>
45 <title>Web Document Conformance Checker (BETA)</title>
46 <link rel="stylesheet" href="../cc-style.css" type="text/css">
47 </head>
48 <body>
49 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
50 (<em>beta</em>)</h1>
51 ];
52
53 $| = 0;
54 my $input = get_input_document ($http, $dom);
55 my $inner_html_element = $http->get_parameter ('e');
56 my $char_length = 0;
57 my %time;
58
59 print qq[
60 <div id="document-info" class="section">
61 <dl>
62 <dt>Request URI</dt>
63 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
64 <dt>Document URI</dt>
65 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}">@{[htescape $input->{uri}]}</a>&gt;</code></dd>
66 ]; # no </dl> yet
67 push @nav, ['#document-info' => 'Information'];
68
69 if (defined $input->{s}) {
70 $char_length = length $input->{s};
71
72 print STDOUT qq[
73 <dt>Base URI</dt>
74 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
75 <dt>Internet Media Type</dt>
76 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
77 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : '']}</dd>
78 <dt>Character Encoding</dt>
79 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
80 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
81 <dt>Length</dt>
82 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
83 </dl>
84 </div>
85 ];
86
87 my $result = {conforming_min => 1, conforming_max => 1};
88 print_http_header_section ($input, $result);
89
90 my $doc;
91 my $el;
92 my $manifest;
93
94 if ($input->{media_type} eq 'text/html') {
95 ($doc, $el) = print_syntax_error_html_section ($input, $result);
96 print_source_string_section (\($input->{s}), $input->{charset});
97 } elsif ({
98 'text/xml' => 1,
99 'application/atom+xml' => 1,
100 'application/rss+xml' => 1,
101 'application/svg+xml' => 1,
102 'application/xhtml+xml' => 1,
103 'application/xml' => 1,
104 }->{$input->{media_type}}) {
105 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
106 print_source_string_section (\($input->{s}), $doc->input_encoding);
107 } elsif ($input->{media_type} eq 'text/cache-manifest') {
108 ## TODO: MUST be text/cache-manifest
109 $manifest = print_syntax_error_manifest_section ($input, $result);
110 print_source_string_section (\($input->{s}), 'utf-8');
111 } else {
112 ## TODO: Change HTTP status code??
113 print_result_unknown_type_section ($input);
114 }
115
116 if (defined $doc or defined $el) {
117 print_structure_dump_dom_section ($doc, $el);
118 my $elements = print_structure_error_dom_section ($doc, $el, $result);
119 print_table_section ($elements->{table}) if @{$elements->{table}};
120 print_id_section ($elements->{id}) if keys %{$elements->{id}};
121 print_term_section ($elements->{term}) if keys %{$elements->{term}};
122 print_class_section ($elements->{class}) if keys %{$elements->{class}};
123 } elsif (defined $manifest) {
124 print_structure_dump_manifest_section ($manifest);
125 print_structure_error_manifest_section ($manifest, $result);
126 }
127
128 print_result_section ($result);
129 } else {
130 print STDOUT qq[</dl></div>];
131 print_result_input_error_section ($input);
132 }
133
134 print STDOUT qq[
135 <ul class="navigation" id="nav-items">
136 ];
137 for (@nav) {
138 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
139 }
140 print STDOUT qq[
141 </ul>
142 </body>
143 </html>
144 ];
145
146 for (qw/decode parse parse_xml parse_manifest check check_manifest/) {
147 next unless defined $time{$_};
148 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
149 print $file $char_length, "\t", $time{$_}, "\n";
150 }
151
152 exit;
153
154 sub add_error ($$$) {
155 my ($layer, $err, $result) = @_;
156 if (defined $err->{level}) {
157 if ($err->{level} eq 's') {
158 $result->{$layer}->{should}++;
159 $result->{$layer}->{score_min} -= 2;
160 $result->{conforming_min} = 0;
161 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
162 $result->{$layer}->{warning}++;
163 } elsif ($err->{level} eq 'unsupported') {
164 $result->{$layer}->{unsupported}++;
165 $result->{unsupported} = 1;
166 } else {
167 $result->{$layer}->{must}++;
168 $result->{$layer}->{score_max} -= 2;
169 $result->{$layer}->{score_min} -= 2;
170 $result->{conforming_min} = 0;
171 $result->{conforming_max} = 0;
172 }
173 } else {
174 $result->{$layer}->{must}++;
175 $result->{$layer}->{score_max} -= 2;
176 $result->{$layer}->{score_min} -= 2;
177 $result->{conforming_min} = 0;
178 $result->{conforming_max} = 0;
179 }
180 } # add_error
181
182 sub print_http_header_section ($$) {
183 my ($input, $result) = @_;
184 return unless defined $input->{header_status_code} or
185 defined $input->{header_status_text} or
186 @{$input->{header_field}};
187
188 push @nav, ['#source-header' => 'HTTP Header'];
189 print STDOUT qq[<div id="source-header" class="section">
190 <h2>HTTP Header</h2>
191
192 <p><strong>Note</strong>: Due to the limitation of the
193 network library in use, the content of this section might
194 not be the real header.</p>
195
196 <table><tbody>
197 ];
198
199 if (defined $input->{header_status_code}) {
200 print STDOUT qq[<tr><th scope="row">Status code</th>];
201 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
202 }
203 if (defined $input->{header_status_text}) {
204 print STDOUT qq[<tr><th scope="row">Status text</th>];
205 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
206 }
207
208 for (@{$input->{header_field}}) {
209 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
210 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
211 }
212
213 print STDOUT qq[</tbody></table></div>];
214 } # print_http_header_section
215
216 sub print_syntax_error_html_section ($$) {
217 my ($input, $result) = @_;
218
219 require Encode;
220 require Whatpm::HTML;
221
222 $input->{charset} ||= 'ISO-8859-1'; ## TODO: for now.
223
224 my $time1 = time;
225 my $t = Encode::decode ($input->{charset}, $input->{s});
226 $time{decode} = time - $time1;
227
228 print STDOUT qq[
229 <div id="parse-errors" class="section">
230 <h2>Parse Errors</h2>
231
232 <dl>];
233 push @nav, ['#parse-errors' => 'Parse Error'];
234
235 my $onerror = sub {
236 my (%opt) = @_;
237 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
238 if ($opt{column} > 0) {
239 print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
240 } else {
241 $opt{line} = $opt{line} - 1 || 1;
242 print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];
243 }
244 $type =~ tr/ /-/;
245 $type =~ s/\|/%7C/g;
246 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
247 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
248 print STDOUT qq[$msg</dd>\n];
249
250 add_error ('syntax', \%opt => $result);
251 };
252
253 my $doc = $dom->create_document;
254 my $el;
255 $time1 = time;
256 if (defined $inner_html_element and length $inner_html_element) {
257 $el = $doc->create_element_ns
258 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
259 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
260 } else {
261 Whatpm::HTML->parse_string ($t => $doc, $onerror);
262 }
263 $time{parse} = time - $time1;
264
265 print STDOUT qq[</dl></div>];
266
267 return ($doc, $el);
268 } # print_syntax_error_html_section
269
270 sub print_syntax_error_xml_section ($$) {
271 my ($input, $result) = @_;
272
273 require Message::DOM::XMLParserTemp;
274
275 print STDOUT qq[
276 <div id="parse-errors" class="section">
277 <h2>Parse Errors</h2>
278
279 <dl>];
280 push @nav, ['#parse-errors' => 'Parse Error'];
281
282 my $onerror = sub {
283 my $err = shift;
284 my $line = $err->location->line_number;
285 print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];
286 print STDOUT $err->location->column_number, "</dt><dd>";
287 print STDOUT htescape $err->text, "</dd>\n";
288
289 add_error ('syntax', {type => $err->text,
290 level => [
291 $err->SEVERITY_FATAL_ERROR => 'm',
292 $err->SEVERITY_ERROR => 'm',
293 $err->SEVERITY_WARNING => 's',
294 ]->[$err->severity]} => $result);
295
296 return 1;
297 };
298
299 my $time1 = time;
300 open my $fh, '<', \($input->{s});
301 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
302 ($fh => $dom, $onerror, charset => $input->{charset});
303 $time{parse_xml} = time - $time1;
304
305 print STDOUT qq[</dl></div>];
306
307 return ($doc, undef);
308 } # print_syntax_error_xml_section
309
310 sub print_syntax_error_manifest_section ($$) {
311 my ($input, $result) = @_;
312
313 require Whatpm::CacheManifest;
314
315 print STDOUT qq[
316 <div id="parse-errors" class="section">
317 <h2>Parse Errors</h2>
318
319 <dl>];
320 push @nav, ['#parse-errors' => 'Parse Error'];
321
322 my $onerror = sub {
323 my (%opt) = @_;
324 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
325 print STDOUT qq[<dt class="$cls">], get_error_label (\%opt), qq[</dt>];
326 $type =~ tr/ /-/;
327 $type =~ s/\|/%7C/g;
328 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
329 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
330 print STDOUT qq[$msg</dd>\n];
331
332 add_error ('syntax', \%opt => $result);
333 };
334
335 my $time1 = time;
336 my $manifest = Whatpm::CacheManifest->parse_byte_string
337 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
338 $time{parse_manifest} = time - $time1;
339
340 print STDOUT qq[</dl></div>];
341
342 return $manifest;
343 } # print_syntax_error_manifest_section
344
345 sub print_source_string_section ($$) {
346 require Encode;
347 my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
348 return unless $enc;
349
350 my $s = \($enc->decode (${$_[0]}));
351 my $i = 1;
352 push @nav, ['#source-string' => 'Source'];
353 print STDOUT qq[<div id="source-string" class="section">
354 <h2>Document Source</h2>
355 <ol lang="">\n];
356 if (length $$s) {
357 while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
358 print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
359 $i++;
360 }
361 if ($$s =~ /\G([^\x0A]+)/gc) {
362 print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
363 }
364 } else {
365 print STDOUT q[<li id="line-1"></li>];
366 }
367 print STDOUT "</ol></div>";
368 } # print_input_string_section
369
370 sub print_document_tree ($) {
371 my $node = shift;
372 my $r = '<ol class="xoxo">';
373
374 my @node = ($node);
375 while (@node) {
376 my $child = shift @node;
377 unless (ref $child) {
378 $r .= $child;
379 next;
380 }
381
382 my $node_id = 'node-'.refaddr $child;
383 my $nt = $child->node_type;
384 if ($nt == $child->ELEMENT_NODE) {
385 my $child_nsuri = $child->namespace_uri;
386 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
387 '</code>'; ## ISSUE: case
388
389 if ($child->has_attributes) {
390 $r .= '<ul class="attributes">';
391 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
392 @{$child->attributes}) {
393 $r .= qq[<li id="$attr->[3]" class="tree-attribute"><code title="@{[defined $_->[2] ? $_->[2] : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
394 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
395 }
396 $r .= '</ul>';
397 }
398
399 if ($child->has_child_nodes) {
400 $r .= '<ol class="children">';
401 unshift @node, @{$child->child_nodes}, '</ol></li>';
402 } else {
403 $r .= '</li>';
404 }
405 } elsif ($nt == $child->TEXT_NODE) {
406 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
407 } elsif ($nt == $child->CDATA_SECTION_NODE) {
408 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
409 } elsif ($nt == $child->COMMENT_NODE) {
410 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
411 } elsif ($nt == $child->DOCUMENT_NODE) {
412 $r .= qq'<li id="$node_id" class="tree-document">Document';
413 $r .= qq[<ul class="attributes">];
414 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
415 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
416 unless ($child->manakai_is_html) {
417 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
418 if (defined $child->xml_encoding) {
419 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
420 } else {
421 $r .= qq[<li>XML encoding = (null)</li>];
422 }
423 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
424 }
425 $r .= qq[</ul>];
426 if ($child->has_child_nodes) {
427 $r .= '<ol class="children">';
428 unshift @node, @{$child->child_nodes}, '</ol></li>';
429 }
430 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
431 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
432 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
433 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
434 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
435 $r .= '</ul></li>';
436 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
437 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
438 } else {
439 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
440 }
441 }
442
443 $r .= '</ol>';
444 print STDOUT $r;
445 } # print_document_tree
446
447 sub print_structure_dump_dom_section ($$) {
448 my ($doc, $el) = @_;
449
450 print STDOUT qq[
451 <div id="document-tree" class="section">
452 <h2>Document Tree</h2>
453 ];
454 push @nav, ['#document-tree' => 'Tree'];
455
456 print_document_tree ($el || $doc);
457
458 print STDOUT qq[</div>];
459 } # print_structure_dump_dom_section
460
461 sub print_structure_dump_manifest_section ($) {
462 my $manifest = shift;
463
464 print STDOUT qq[
465 <div id="dump-manifest" class="section">
466 <h2>Cache Manifest</h2>
467 ];
468 push @nav, ['#dump-manifest' => 'Caceh Manifest'];
469
470 print STDOUT qq[<dl><dt>Explicit entries</dt>];
471 for my $uri (@{$manifest->[0]}) {
472 my $euri = htescape ($uri);
473 print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
474 }
475
476 print STDOUT qq[<dt>Fallback entries</dt><dd>
477 <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
478 <th scope=row>Fallback Entry</tr><tbody>];
479 for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
480 my $euri = htescape ($uri);
481 my $euri2 = htescape ($manifest->[1]->{$uri});
482 print STDOUT qq[<tr><td><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
483 <td><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
484 }
485
486 print STDOUT qq[</table><dt>Online whitelist</dt>];
487 for my $uri (@{$manifest->[2]}) {
488 my $euri = htescape ($uri);
489 print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
490 }
491
492 print STDOUT qq[</dl></div>];
493 } # print_structure_dump_manifest_section
494
495 sub print_structure_error_dom_section ($$$) {
496 my ($doc, $el, $result) = @_;
497
498 print STDOUT qq[<div id="document-errors" class="section">
499 <h2>Document Errors</h2>
500
501 <dl>];
502 push @nav, ['#document-errors' => 'Document Error'];
503
504 require Whatpm::ContentChecker;
505 my $onerror = sub {
506 my %opt = @_;
507 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
508 $type =~ tr/ /-/;
509 $type =~ s/\|/%7C/g;
510 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
511 print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
512 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
513 print STDOUT $msg, "</dd>\n";
514 add_error ('structure', \%opt => $result);
515 };
516
517 my $elements;
518 my $time1 = time;
519 if ($el) {
520 $elements = Whatpm::ContentChecker->check_element ($el, $onerror);
521 } else {
522 $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);
523 }
524 $time{check} = time - $time1;
525
526 print STDOUT qq[</dl></div>];
527
528 return $elements;
529 } # print_structure_error_dom_section
530
531 sub print_structure_error_manifest_section ($$$) {
532 my ($manifest, $result) = @_;
533
534 print STDOUT qq[<div id="document-errors" class="section">
535 <h2>Document Errors</h2>
536
537 <dl>];
538 push @nav, ['#document-errors' => 'Document Error'];
539
540 require Whatpm::CacheManifest;
541 Whatpm::CacheManifest->check_manifest ($manifest, sub {
542 my %opt = @_;
543 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
544 $type =~ tr/ /-/;
545 $type =~ s/\|/%7C/g;
546 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
547 print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
548 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
549 add_error ('structure', \%opt => $result);
550 });
551
552 print STDOUT qq[</div>];
553 } # print_structure_error_manifest_section
554
555 sub print_table_section ($) {
556 my $tables = shift;
557
558 push @nav, ['#tables' => 'Tables'];
559 print STDOUT qq[
560 <div id="tables" class="section">
561 <h2>Tables</h2>
562
563 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
564 <script src="../table-script.js" type="text/javascript"></script>
565 <noscript>
566 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
567 </noscript>
568 ];
569
570 require JSON;
571
572 my $i = 0;
573 for my $table_el (@$tables) {
574 $i++;
575 print STDOUT qq[<div class="section" id="table-$i"><h3>] .
576 get_node_link ($table_el) . q[</h3>];
577
578 ## TODO: Make |ContentChecker| return |form_table| result
579 ## so that this script don't have to run the algorithm twice.
580 my $table = Whatpm::HTMLTable->form_table ($table_el);
581
582 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
583 next unless $_;
584 delete $_->{element};
585 }
586
587 for (@{$table->{row_group}}) {
588 next unless $_;
589 next unless $_->{element};
590 $_->{type} = $_->{element}->manakai_local_name;
591 delete $_->{element};
592 }
593
594 for (@{$table->{cell}}) {
595 next unless $_;
596 for (@{$_}) {
597 next unless $_;
598 for (@$_) {
599 $_->{id} = refaddr $_->{element} if defined $_->{element};
600 delete $_->{element};
601 $_->{is_header} = $_->{is_header} ? 1 : 0;
602 }
603 }
604 }
605
606 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
607 print STDOUT JSON::objToJson ($table);
608 print STDOUT qq[, document.getElementById ('table-$i'));</script>];
609 }
610
611 print STDOUT qq[</div>];
612 } # print_table_section
613
614 sub print_id_section ($) {
615 my $ids = shift;
616
617 push @nav, ['#identifiers' => 'IDs'];
618 print STDOUT qq[
619 <div id="identifiers" class="section">
620 <h2>Identifiers</h2>
621
622 <dl>
623 ];
624 for my $id (sort {$a cmp $b} keys %$ids) {
625 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
626 for (@{$ids->{$id}}) {
627 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
628 }
629 }
630 print STDOUT qq[</dl></div>];
631 } # print_id_section
632
633 sub print_term_section ($) {
634 my $terms = shift;
635
636 push @nav, ['#terms' => 'Terms'];
637 print STDOUT qq[
638 <div id="terms" class="section">
639 <h2>Terms</h2>
640
641 <dl>
642 ];
643 for my $term (sort {$a cmp $b} keys %$terms) {
644 print STDOUT qq[<dt>@{[htescape $term]}</dt>];
645 for (@{$terms->{$term}}) {
646 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
647 }
648 }
649 print STDOUT qq[</dl></div>];
650 } # print_term_section
651
652 sub print_class_section ($) {
653 my $classes = shift;
654
655 push @nav, ['#classes' => 'Classes'];
656 print STDOUT qq[
657 <div id="classes" class="section">
658 <h2>Classes</h2>
659
660 <dl>
661 ];
662 for my $class (sort {$a cmp $b} keys %$classes) {
663 print STDOUT qq[<dt><code>@{[htescape $class]}</code></dt>];
664 for (@{$classes->{$class}}) {
665 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
666 }
667 }
668 print STDOUT qq[</dl></div>];
669 } # print_class_section
670
671 sub print_result_section ($) {
672 my $result = shift;
673
674 print STDOUT qq[
675 <div id="result-summary" class="section">
676 <h2>Result</h2>];
677
678 if ($result->{unsupported} and $result->{conforming_max}) {
679 print STDOUT qq[<p class=uncertain id=result-para>The conformance
680 checker cannot decide whether the document is conforming or
681 not, since the document contains one or more unsupported
682 features. The document might or might not be conforming.</p>];
683 } elsif ($result->{conforming_min}) {
684 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
685 found in this document.</p>];
686 } elsif ($result->{conforming_max}) {
687 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
688 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
689 it might be conforming.</p>];
690 } else {
691 print STDOUT qq[<p class=FAIL id=result-para>This document is
692 <strong><em>non</em>-conforming</strong>.</p>];
693 }
694
695 print STDOUT qq[<table>
696 <colgroup><col><colgroup><col><col><col><colgroup><col>
697 <thead>
698 <tr><th scope=col></th>
699 <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
700 Errors</a></th>
701 <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
702 Errors</a></th>
703 <th scope=col><a href="../error-description#level-w">Warnings</a></th>
704 <th scope=col>Score</th></tr></thead><tbody>];
705
706 my $must_error = 0;
707 my $should_error = 0;
708 my $warning = 0;
709 my $score_min = 0;
710 my $score_max = 0;
711 my $score_base = 20;
712 my $score_unit = $score_base / 100;
713 for (
714 [Transfer => 'transfer', ''],
715 [Character => 'char', ''],
716 [Syntax => 'syntax', '#parse-errors'],
717 [Structure => 'structure', '#document-errors'],
718 ) {
719 $must_error += ($result->{$_->[1]}->{must} += 0);
720 $should_error += ($result->{$_->[1]}->{should} += 0);
721 $warning += ($result->{$_->[1]}->{warning} += 0);
722 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
723 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
724
725 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
726 my $label = $_->[0];
727 if ($result->{$_->[1]}->{must} or
728 $result->{$_->[1]}->{should} or
729 $result->{$_->[1]}->{warning} or
730 $result->{$_->[1]}->{unsupported}) {
731 $label = qq[<a href="$_->[2]">$label</a>];
732 }
733
734 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
735 if ($uncertain) {
736 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
737 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
738 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
739 } else {
740 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
741 }
742 }
743
744 $score_max += $score_base;
745
746 print STDOUT qq[
747 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
748 </tbody>
749 <tfoot><tr class=uncertain><th scope=row>Total</th>
750 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
751 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
752 <td>$warning?</td>
753 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
754 </table>
755
756 <p><strong>Important</strong>: This conformance checking service
757 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
758 </div>];
759 push @nav, ['#result-summary' => 'Result'];
760 } # print_result_section
761
762 sub print_result_unknown_type_section ($) {
763 my $input = shift;
764
765 print STDOUT qq[
766 <div id="result-summary" class="section">
767 <p><em>Media type <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code> is not supported!</em></p>
768 </div>
769 ];
770 push @nav, ['#result-summary' => 'Result'];
771 } # print_result_unknown_type_section
772
773 sub print_result_input_error_section ($) {
774 my $input = shift;
775 print STDOUT qq[<div class="section" id="result-summary">
776 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
777 </div>];
778 push @nav, ['#result-summary' => 'Result'];
779 } # print_Result_input_error_section
780
781 sub get_error_label ($) {
782 my $err = shift;
783
784 my $r = '';
785
786 if (defined $err->{line}) {
787 if ($err->{column} > 0) {
788 $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a> column $err->{column}];
789 } else {
790 $err->{line} = $err->{line} - 1 || 1;
791 $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a>];
792 }
793 }
794
795 if (defined $err->{node}) {
796 $r .= ' ' if length $r;
797 $r = get_node_link ($err->{node});
798 }
799
800 if (defined $err->{index}) {
801 $r .= ' ' if length $r;
802 $r .= 'Index ' . (0+$err->{index});
803 }
804
805 if (defined $err->{value}) {
806 $r .= ' ' if length $r;
807 $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
808 }
809
810 return $r;
811 } # get_error_label
812
813 sub get_error_level_label ($) {
814 my $err = shift;
815
816 my $r = '';
817
818 if (not defined $err->{level} or $err->{level} eq 'm') {
819 $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
820 error</a></strong>: ];
821 } elsif ($err->{level} eq 's') {
822 $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
823 error</a></strong>: ];
824 } elsif ($err->{level} eq 'w') {
825 $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
826 ];
827 } elsif ($err->{level} eq 'unsupported') {
828 $r = qq[<strong><a href="../error-description#level-u">Not
829 supported</a></strong>: ];
830 } else {
831 my $elevel = htescape ($err->{level});
832 $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
833 ];
834 }
835
836 return $r;
837 } # get_error_level_label
838
839 sub get_node_path ($) {
840 my $node = shift;
841 my @r;
842 while (defined $node) {
843 my $rs;
844 if ($node->node_type == 1) {
845 $rs = $node->manakai_local_name;
846 $node = $node->parent_node;
847 } elsif ($node->node_type == 2) {
848 $rs = '@' . $node->manakai_local_name;
849 $node = $node->owner_element;
850 } elsif ($node->node_type == 3) {
851 $rs = '"' . $node->data . '"';
852 $node = $node->parent_node;
853 } elsif ($node->node_type == 9) {
854 @r = ('') unless @r;
855 $rs = '';
856 $node = $node->parent_node;
857 } else {
858 $rs = '#' . $node->node_type;
859 $node = $node->parent_node;
860 }
861 unshift @r, $rs;
862 }
863 return join '/', @r;
864 } # get_node_path
865
866 sub get_node_link ($) {
867 return qq[<a href="#node-@{[refaddr $_[0]]}">] .
868 htescape (get_node_path ($_[0])) . qq[</a>];
869 } # get_node_link
870
871 {
872 my $Msg = {};
873
874 sub load_text_catalog ($) {
875 my $lang = shift; # MUST be a canonical lang name
876 open my $file, '<', "cc-msg.$lang.txt" or die "$0: cc-msg.$lang.txt: $!";
877 while (<$file>) {
878 if (s/^([^;]+);([^;]*);//) {
879 my ($type, $cls, $msg) = ($1, $2, $_);
880 $msg =~ tr/\x0D\x0A//d;
881 $Msg->{$type} = [$cls, $msg];
882 }
883 }
884 } # load_text_catalog
885
886 sub get_text ($) {
887 my ($type, $level, $node) = @_;
888 $type = $level . ':' . $type if defined $level;
889 my @arg;
890 {
891 if (defined $Msg->{$type}) {
892 my $msg = $Msg->{$type}->[1];
893 $msg =~ s{<var>\$([0-9]+)</var>}{
894 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
895 }ge;
896 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
897 UNIVERSAL::can ($node, 'get_attribute_ns')
898 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
899 }ge;
900 $msg =~ s{<var>{\@}</var>}{
901 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
902 }ge;
903 $msg =~ s{<var>{local-name}</var>}{
904 UNIVERSAL::can ($node, 'manakai_local_name')
905 ? htescape ($node->manakai_local_name) : ''
906 }ge;
907 $msg =~ s{<var>{element-local-name}</var>}{
908 (UNIVERSAL::can ($node, 'owner_element') and
909 $node->owner_element)
910 ? htescape ($node->owner_element->manakai_local_name)
911 : ''
912 }ge;
913 return ($type, $Msg->{$type}->[0], $msg);
914 } elsif ($type =~ s/:([^:]*)$//) {
915 unshift @arg, $1;
916 redo;
917 }
918 }
919 return ($type, '', htescape ($_[0]));
920 } # get_text
921
922 }
923
924 sub get_input_document ($$) {
925 my ($http, $dom) = @_;
926
927 my $request_uri = $http->get_parameter ('uri');
928 my $r = {};
929 if (defined $request_uri and length $request_uri) {
930 my $uri = $dom->create_uri_reference ($request_uri);
931 unless ({
932 http => 1,
933 }->{lc $uri->uri_scheme}) {
934 return {uri => $request_uri, request_uri => $request_uri,
935 error_status_text => 'URI scheme not allowed'};
936 }
937
938 require Message::Util::HostPermit;
939 my $host_permit = new Message::Util::HostPermit;
940 $host_permit->add_rule (<<EOH);
941 Allow host=suika port=80
942 Deny host=suika
943 Allow host=suika.fam.cx port=80
944 Deny host=suika.fam.cx
945 Deny host=localhost
946 Deny host=*.localdomain
947 Deny ipv4=0.0.0.0/8
948 Deny ipv4=10.0.0.0/8
949 Deny ipv4=127.0.0.0/8
950 Deny ipv4=169.254.0.0/16
951 Deny ipv4=172.0.0.0/11
952 Deny ipv4=192.0.2.0/24
953 Deny ipv4=192.88.99.0/24
954 Deny ipv4=192.168.0.0/16
955 Deny ipv4=198.18.0.0/15
956 Deny ipv4=224.0.0.0/4
957 Deny ipv4=255.255.255.255/32
958 Deny ipv6=0::0/0
959 Allow host=*
960 EOH
961 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
962 return {uri => $request_uri, request_uri => $request_uri,
963 error_status_text => 'Connection to the host is forbidden'};
964 }
965
966 require LWP::UserAgent;
967 my $ua = WDCC::LWPUA->new;
968 $ua->{wdcc_dom} = $dom;
969 $ua->{wdcc_host_permit} = $host_permit;
970 $ua->agent ('Mozilla'); ## TODO: for now.
971 $ua->parse_head (0);
972 $ua->protocols_allowed ([qw/http/]);
973 $ua->max_size (1000_000);
974 my $req = HTTP::Request->new (GET => $request_uri);
975 my $res = $ua->request ($req);
976 ## TODO: 401 sets |is_success| true.
977 if ($res->is_success or $http->get_parameter ('error-page')) {
978 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
979 $r->{uri} = $res->request->uri;
980 $r->{request_uri} = $request_uri;
981
982 ## TODO: More strict parsing...
983 my $ct = $res->header ('Content-Type');
984 if (defined $ct and $ct =~ m#^([0-9A-Za-z._+-]+/[0-9A-Za-z._+-]+)#) {
985 $r->{media_type} = lc $1;
986 }
987 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
988 $r->{charset} = lc $1;
989 $r->{charset} =~ tr/\\//d;
990 }
991
992 my $input_charset = $http->get_parameter ('charset');
993 if (defined $input_charset and length $input_charset) {
994 $r->{charset_overridden}
995 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
996 $r->{charset} = $input_charset;
997 }
998
999 $r->{s} = ''.$res->content;
1000 } else {
1001 $r->{uri} = $res->request->uri;
1002 $r->{request_uri} = $request_uri;
1003 $r->{error_status_text} = $res->status_line;
1004 }
1005
1006 $r->{header_field} = [];
1007 $res->scan (sub {
1008 push @{$r->{header_field}}, [$_[0], $_[1]];
1009 });
1010 $r->{header_status_code} = $res->code;
1011 $r->{header_status_text} = $res->message;
1012 } else {
1013 $r->{s} = ''.$http->get_parameter ('s');
1014 $r->{uri} = q<thismessage:/>;
1015 $r->{request_uri} = q<thismessage:/>;
1016 $r->{base_uri} = q<thismessage:/>;
1017 $r->{charset} = ''.$http->get_parameter ('_charset_');
1018 $r->{charset} =~ s/\s+//g;
1019 $r->{charset} = 'utf-8' if $r->{charset} eq '';
1020 $r->{header_field} = [];
1021 }
1022
1023 my $input_format = $http->get_parameter ('i');
1024 if (defined $input_format and length $input_format) {
1025 $r->{media_type_overridden}
1026 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1027 $r->{media_type} = $input_format;
1028 }
1029 if (defined $r->{s} and not defined $r->{media_type}) {
1030 $r->{media_type} = 'text/html';
1031 $r->{media_type_overridden} = 1;
1032 }
1033
1034 if ($r->{media_type} eq 'text/xml') {
1035 unless (defined $r->{charset}) {
1036 $r->{charset} = 'us-ascii';
1037 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1038 $r->{charset_overridden} = 0;
1039 }
1040 }
1041
1042 if (length $r->{s} > 1000_000) {
1043 $r->{error_status_text} = 'Entity-body too large';
1044 delete $r->{s};
1045 return $r;
1046 }
1047
1048 return $r;
1049 } # get_input_document
1050
1051 package WDCC::LWPUA;
1052 BEGIN { push our @ISA, 'LWP::UserAgent'; }
1053
1054 sub redirect_ok {
1055 my $ua = shift;
1056 unless ($ua->SUPER::redirect_ok (@_)) {
1057 return 0;
1058 }
1059
1060 my $uris = $_[1]->header ('Location');
1061 return 0 unless $uris;
1062 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1063 unless ({
1064 http => 1,
1065 }->{lc $uri->uri_scheme}) {
1066 return 0;
1067 }
1068 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1069 return 0;
1070 }
1071 return 1;
1072 } # redirect_ok
1073
1074 =head1 AUTHOR
1075
1076 Wakaba <w@suika.fam.cx>.
1077
1078 =head1 LICENSE
1079
1080 Copyright 2007 Wakaba <w@suika.fam.cx>
1081
1082 This library is free software; you can redistribute it
1083 and/or modify it under the same terms as Perl itself.
1084
1085 =cut
1086
1087 ## $Date: 2007/11/04 09:15:02 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24