/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.33 - (show annotations) (download)
Sun Feb 10 02:42:01 2008 UTC (16 years, 9 months ago) by wakaba
Branch: MAIN
Changes since 1.32: +16 -48 lines
++ ChangeLog	10 Feb 2008 02:41:59 -0000
	* cc.cgi (print_listing_section): ID, class, and term
	section functions are merged.

2008-02-10  Wakaba  <wakaba@suika.fam.cx>

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4
5 use lib qw[/home/httpd/html/www/markup/html/whatpm
6 /home/wakaba/work/manakai2/lib];
7 use CGI::Carp qw[fatalsToBrowser];
8 use Scalar::Util qw[refaddr];
9 use Time::HiRes qw/time/;
10
11 sub htescape ($) {
12 my $s = $_[0];
13 $s =~ s/&/&amp;/g;
14 $s =~ s/</&lt;/g;
15 $s =~ s/>/&gt;/g;
16 $s =~ s/"/&quot;/g;
17 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18 sprintf '<var>U+%04X</var>', ord $1;
19 }ge;
20 return $s;
21 } # htescape
22
23 use Message::CGI::HTTP;
24 my $http = Message::CGI::HTTP->new;
25
26 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
27 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
28 exit;
29 }
30
31 binmode STDOUT, ':utf8';
32 $| = 1;
33
34 require Message::DOM::DOMImplementation;
35 my $dom = Message::DOM::DOMImplementation->new;
36
37 load_text_catalog ('en'); ## TODO: conneg
38
39 my @nav;
40 print STDOUT qq[Content-Type: text/html; charset=utf-8
41
42 <!DOCTYPE html>
43 <html lang="en">
44 <head>
45 <title>Web Document Conformance Checker (BETA)</title>
46 <link rel="stylesheet" href="../cc-style.css" type="text/css">
47 </head>
48 <body>
49 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
50 (<em>beta</em>)</h1>
51 ];
52
53 $| = 0;
54 my $input = get_input_document ($http, $dom);
55 my $char_length = 0;
56 my %time;
57
58 print qq[
59 <div id="document-info" class="section">
60 <dl>
61 <dt>Request URI</dt>
62 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 <dt>Document URI</dt>
64 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65 <script>
66 document.title = '<'
67 + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68 + document.title;
69 </script></dd>
70 ]; # no </dl> yet
71 push @nav, ['#document-info' => 'Information'];
72
73 if (defined $input->{s}) {
74 $char_length = length $input->{s};
75
76 print STDOUT qq[
77 <dt>Base URI</dt>
78 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79 <dt>Internet Media Type</dt>
80 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 <dt>Character Encoding</dt>
83 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 <dt>Length</dt>
86 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 </dl>
88 </div>
89 ];
90
91 my $result = {conforming_min => 1, conforming_max => 1};
92 check_and_print ($input => $result);
93 print_result_section ($result);
94 } else {
95 print STDOUT qq[</dl></div>];
96 print_result_input_error_section ($input);
97 }
98
99 print STDOUT qq[
100 <ul class="navigation" id="nav-items">
101 ];
102 for (@nav) {
103 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
104 }
105 print STDOUT qq[
106 </ul>
107 </body>
108 </html>
109 ];
110
111 for (qw/decode parse parse_html parse_xml parse_manifest
112 check check_manifest/) {
113 next unless defined $time{$_};
114 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
115 print $file $char_length, "\t", $time{$_}, "\n";
116 }
117
118 exit;
119
120 sub add_error ($$$) {
121 my ($layer, $err, $result) = @_;
122 if (defined $err->{level}) {
123 if ($err->{level} eq 's') {
124 $result->{$layer}->{should}++;
125 $result->{$layer}->{score_min} -= 2;
126 $result->{conforming_min} = 0;
127 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
128 $result->{$layer}->{warning}++;
129 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
130 $result->{$layer}->{unsupported}++;
131 $result->{unsupported} = 1;
132 } else {
133 $result->{$layer}->{must}++;
134 $result->{$layer}->{score_max} -= 2;
135 $result->{$layer}->{score_min} -= 2;
136 $result->{conforming_min} = 0;
137 $result->{conforming_max} = 0;
138 }
139 } else {
140 $result->{$layer}->{must}++;
141 $result->{$layer}->{score_max} -= 2;
142 $result->{$layer}->{score_min} -= 2;
143 $result->{conforming_min} = 0;
144 $result->{conforming_max} = 0;
145 }
146 } # add_error
147
148 sub check_and_print ($$) {
149 my ($input, $result) = @_;
150 $input->{id_prefix} = '';
151 #$input->{nested} = 1/0;
152
153 print_http_header_section ($input, $result);
154
155 my $doc;
156 my $el;
157 my $manifest;
158
159 if ($input->{media_type} eq 'text/html') {
160 ($doc, $el) = print_syntax_error_html_section ($input, $result);
161 print_source_string_section
162 (\($input->{s}), $input->{charset} || $doc->input_encoding);
163 } elsif ({
164 'text/xml' => 1,
165 'application/atom+xml' => 1,
166 'application/rss+xml' => 1,
167 'application/svg+xml' => 1,
168 'application/xhtml+xml' => 1,
169 'application/xml' => 1,
170 }->{$input->{media_type}}) {
171 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
172 print_source_string_section (\($input->{s}), $doc->input_encoding);
173 } elsif ($input->{media_type} eq 'text/cache-manifest') {
174 ## TODO: MUST be text/cache-manifest
175 $manifest = print_syntax_error_manifest_section ($input, $result);
176 print_source_string_section (\($input->{s}), 'utf-8');
177 } else {
178 ## TODO: Change HTTP status code??
179 print_result_unknown_type_section ($input, $result);
180 }
181
182 if (defined $doc or defined $el) {
183 print_structure_dump_dom_section ($input, $doc, $el);
184 my $elements = print_structure_error_dom_section
185 ($input, $doc, $el, $result);
186 print_table_section ($input, $elements->{table}) if @{$elements->{table}};
187 print_listing_section ({
188 id => 'identifiers', label => 'IDs', heading => 'Identifiers',
189 }, $input, $elements->{id}) if keys %{$elements->{id}};
190 print_listing_section ({
191 id => 'terms', label => 'Terms', heading => 'Terms',
192 }, $input, $elements->{term}) if keys %{$elements->{term}};
193 print_listing_section ({
194 id => 'classes', label => 'Classes', heading => 'Classes',
195 }, $input, $elements->{class}) if keys %{$elements->{class}};
196 } elsif (defined $manifest) {
197 print_structure_dump_manifest_section ($input, $manifest);
198 print_structure_error_manifest_section ($input, $manifest, $result);
199 }
200 } # check_and_print
201
202 sub print_http_header_section ($$) {
203 my ($input, $result) = @_;
204 return unless defined $input->{header_status_code} or
205 defined $input->{header_status_text} or
206 @{$input->{header_field}};
207
208 push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
209 print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
210 <h2>HTTP Header</h2>
211
212 <p><strong>Note</strong>: Due to the limitation of the
213 network library in use, the content of this section might
214 not be the real header.</p>
215
216 <table><tbody>
217 ];
218
219 if (defined $input->{header_status_code}) {
220 print STDOUT qq[<tr><th scope="row">Status code</th>];
221 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
222 }
223 if (defined $input->{header_status_text}) {
224 print STDOUT qq[<tr><th scope="row">Status text</th>];
225 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
226 }
227
228 for (@{$input->{header_field}}) {
229 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
230 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
231 }
232
233 print STDOUT qq[</tbody></table></div>];
234 } # print_http_header_section
235
236 sub print_syntax_error_html_section ($$) {
237 my ($input, $result) = @_;
238
239 require Encode;
240 require Whatpm::HTML;
241
242 print STDOUT qq[
243 <div id="$input->{id_prefix}parse-errors" class="section">
244 <h2>Parse Errors</h2>
245
246 <dl>];
247 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
248
249 my $onerror = sub {
250 my (%opt) = @_;
251 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
252 if ($opt{column} > 0) {
253 print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
254 } else {
255 $opt{line} = $opt{line} - 1 || 1;
256 print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];
257 }
258 $type =~ tr/ /-/;
259 $type =~ s/\|/%7C/g;
260 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
261 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
262 print STDOUT qq[$msg</dd>\n];
263
264 add_error ('syntax', \%opt => $result);
265 };
266
267 my $doc = $dom->create_document;
268 my $el;
269 my $inner_html_element = $http->get_parameter ('e');
270 if (defined $inner_html_element and length $inner_html_element) {
271 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
272 my $time1 = time;
273 my $t = Encode::decode ($input->{charset}, $input->{s});
274 $time{decode} = time - $time1;
275
276 $el = $doc->create_element_ns
277 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
278 $time1 = time;
279 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
280 $time{parse} = time - $time1;
281 } else {
282 my $time1 = time;
283 Whatpm::HTML->parse_byte_string
284 ($input->{charset}, $input->{s} => $doc, $onerror);
285 $time{parse_html} = time - $time1;
286 }
287 $doc->manakai_charset ($input->{official_charset})
288 if defined $input->{official_charset};
289
290 print STDOUT qq[</dl></div>];
291
292 return ($doc, $el);
293 } # print_syntax_error_html_section
294
295 sub print_syntax_error_xml_section ($$) {
296 my ($input, $result) = @_;
297
298 require Message::DOM::XMLParserTemp;
299
300 print STDOUT qq[
301 <div id="$input->{id_prefix}parse-errors" class="section">
302 <h2>Parse Errors</h2>
303
304 <dl>];
305 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
306
307 my $onerror = sub {
308 my $err = shift;
309 my $line = $err->location->line_number;
310 print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];
311 print STDOUT $err->location->column_number, "</dt><dd>";
312 print STDOUT htescape $err->text, "</dd>\n";
313
314 add_error ('syntax', {type => $err->text,
315 level => [
316 $err->SEVERITY_FATAL_ERROR => 'm',
317 $err->SEVERITY_ERROR => 'm',
318 $err->SEVERITY_WARNING => 's',
319 ]->[$err->severity]} => $result);
320
321 return 1;
322 };
323
324 my $time1 = time;
325 open my $fh, '<', \($input->{s});
326 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
327 ($fh => $dom, $onerror, charset => $input->{charset});
328 $time{parse_xml} = time - $time1;
329 $doc->manakai_charset ($input->{official_charset})
330 if defined $input->{official_charset};
331
332 print STDOUT qq[</dl></div>];
333
334 return ($doc, undef);
335 } # print_syntax_error_xml_section
336
337 sub print_syntax_error_manifest_section ($$) {
338 my ($input, $result) = @_;
339
340 require Whatpm::CacheManifest;
341
342 print STDOUT qq[
343 <div id="$input->{id_prefix}parse-errors" class="section">
344 <h2>Parse Errors</h2>
345
346 <dl>];
347 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
348
349 my $onerror = sub {
350 my (%opt) = @_;
351 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
352 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
353 qq[</dt>];
354 $type =~ tr/ /-/;
355 $type =~ s/\|/%7C/g;
356 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
357 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
358 print STDOUT qq[$msg</dd>\n];
359
360 add_error ('syntax', \%opt => $result);
361 };
362
363 my $time1 = time;
364 my $manifest = Whatpm::CacheManifest->parse_byte_string
365 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
366 $time{parse_manifest} = time - $time1;
367
368 print STDOUT qq[</dl></div>];
369
370 return $manifest;
371 } # print_syntax_error_manifest_section
372
373 sub print_source_string_section ($$) {
374 require Encode;
375 my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
376 return unless $enc;
377
378 my $s = \($enc->decode (${$_[0]}));
379 my $i = 1;
380 push @nav, ['#source-string' => 'Source'] unless $input->{nested};
381 print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
382 <h2>Document Source</h2>
383 <ol lang="">\n];
384 if (length $$s) {
385 while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
386 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
387 "</li>\n";
388 $i++;
389 }
390 if ($$s =~ /\G([^\x0A]+)/gc) {
391 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
392 "</li>\n";
393 }
394 } else {
395 print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
396 }
397 print STDOUT "</ol></div>";
398 } # print_input_string_section
399
400 sub print_document_tree ($) {
401 my $node = shift;
402 my $r = '<ol class="xoxo">';
403
404 my @node = ($node);
405 while (@node) {
406 my $child = shift @node;
407 unless (ref $child) {
408 $r .= $child;
409 next;
410 }
411
412 my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
413 my $nt = $child->node_type;
414 if ($nt == $child->ELEMENT_NODE) {
415 my $child_nsuri = $child->namespace_uri;
416 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
417 '</code>'; ## ISSUE: case
418
419 if ($child->has_attributes) {
420 $r .= '<ul class="attributes">';
421 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
422 @{$child->attributes}) {
423 $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
424 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
425 }
426 $r .= '</ul>';
427 }
428
429 if ($child->has_child_nodes) {
430 $r .= '<ol class="children">';
431 unshift @node, @{$child->child_nodes}, '</ol></li>';
432 } else {
433 $r .= '</li>';
434 }
435 } elsif ($nt == $child->TEXT_NODE) {
436 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
437 } elsif ($nt == $child->CDATA_SECTION_NODE) {
438 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
439 } elsif ($nt == $child->COMMENT_NODE) {
440 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
441 } elsif ($nt == $child->DOCUMENT_NODE) {
442 $r .= qq'<li id="$node_id" class="tree-document">Document';
443 $r .= qq[<ul class="attributes">];
444 my $cp = $child->manakai_charset;
445 if (defined $cp) {
446 $r .= qq[<li><code>charset</code> parameter = <code>];
447 $r .= htescape ($cp) . qq[</code></li>];
448 }
449 $r .= qq[<li><code>inputEncoding</code> = ];
450 my $ie = $child->input_encoding;
451 if (defined $ie) {
452 $r .= qq[<code>@{[htescape ($ie)]}</code>];
453 if ($child->manakai_has_bom) {
454 $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
455 }
456 } else {
457 $r .= qq[(<code>null</code>)];
458 }
459 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
460 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
461 unless ($child->manakai_is_html) {
462 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
463 if (defined $child->xml_encoding) {
464 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
465 } else {
466 $r .= qq[<li>XML encoding = (null)</li>];
467 }
468 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
469 }
470 $r .= qq[</ul>];
471 if ($child->has_child_nodes) {
472 $r .= '<ol class="children">';
473 unshift @node, @{$child->child_nodes}, '</ol></li>';
474 }
475 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
476 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
477 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
478 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
479 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
480 $r .= '</ul></li>';
481 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
482 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
483 } else {
484 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
485 }
486 }
487
488 $r .= '</ol>';
489 print STDOUT $r;
490 } # print_document_tree
491
492 sub print_structure_dump_dom_section ($$$) {
493 my ($input, $doc, $el) = @_;
494
495 print STDOUT qq[
496 <div id="$input->{id_prefix}document-tree" class="section">
497 <h2>Document Tree</h2>
498 ];
499 push @nav, ['#document-tree' => 'Tree'] unless $input->{nested};
500
501 print_document_tree ($el || $doc);
502
503 print STDOUT qq[</div>];
504 } # print_structure_dump_dom_section
505
506 sub print_structure_dump_manifest_section ($$) {
507 my ($input, $manifest) = @_;
508
509 print STDOUT qq[
510 <div id="$input->{id_prefix}dump-manifest" class="section">
511 <h2>Cache Manifest</h2>
512 ];
513 push @nav, ['#dump-manifest' => 'Caceh Manifest'] unless $input->{nested};
514
515 print STDOUT qq[<dl><dt>Explicit entries</dt>];
516 for my $uri (@{$manifest->[0]}) {
517 my $euri = htescape ($uri);
518 print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
519 }
520
521 print STDOUT qq[<dt>Fallback entries</dt><dd>
522 <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
523 <th scope=row>Fallback Entry</tr><tbody>];
524 for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
525 my $euri = htescape ($uri);
526 my $euri2 = htescape ($manifest->[1]->{$uri});
527 print STDOUT qq[<tr><td><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
528 <td><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
529 }
530
531 print STDOUT qq[</table><dt>Online whitelist</dt>];
532 for my $uri (@{$manifest->[2]}) {
533 my $euri = htescape ($uri);
534 print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
535 }
536
537 print STDOUT qq[</dl></div>];
538 } # print_structure_dump_manifest_section
539
540 sub print_structure_error_dom_section ($$$$) {
541 my ($input, $doc, $el, $result) = @_;
542
543 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
544 <h2>Document Errors</h2>
545
546 <dl>];
547 push @nav, ['#document-errors' => 'Document Error'] unless $input->{nested};
548
549 require Whatpm::ContentChecker;
550 my $onerror = sub {
551 my %opt = @_;
552 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
553 $type =~ tr/ /-/;
554 $type =~ s/\|/%7C/g;
555 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
556 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
557 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
558 print STDOUT $msg, "</dd>\n";
559 add_error ('structure', \%opt => $result);
560 };
561
562 my $elements;
563 my $time1 = time;
564 if ($el) {
565 $elements = Whatpm::ContentChecker->check_element ($el, $onerror);
566 } else {
567 $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);
568 }
569 $time{check} = time - $time1;
570
571 print STDOUT qq[</dl></div>];
572
573 return $elements;
574 } # print_structure_error_dom_section
575
576 sub print_structure_error_manifest_section ($$$) {
577 my ($input, $manifest, $result) = @_;
578
579 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
580 <h2>Document Errors</h2>
581
582 <dl>];
583 push @nav, ['#document-errors' => 'Document Error'] unless $input->{nested};
584
585 require Whatpm::CacheManifest;
586 Whatpm::CacheManifest->check_manifest ($manifest, sub {
587 my %opt = @_;
588 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
589 $type =~ tr/ /-/;
590 $type =~ s/\|/%7C/g;
591 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
592 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
593 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
594 add_error ('structure', \%opt => $result);
595 });
596
597 print STDOUT qq[</div>];
598 } # print_structure_error_manifest_section
599
600 sub print_table_section ($$) {
601 my ($input, $tables) = @_;
602
603 push @nav, ['#tables' => 'Tables'] unless $input->{nested};
604 print STDOUT qq[
605 <div id="$input->{id_prefix}tables" class="section">
606 <h2>Tables</h2>
607
608 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
609 <script src="../table-script.js" type="text/javascript"></script>
610 <noscript>
611 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
612 </noscript>
613 ];
614
615 require JSON;
616
617 my $i = 0;
618 for my $table_el (@$tables) {
619 $i++;
620 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
621 get_node_link ($input, $table_el) . q[</h3>];
622
623 ## TODO: Make |ContentChecker| return |form_table| result
624 ## so that this script don't have to run the algorithm twice.
625 my $table = Whatpm::HTMLTable->form_table ($table_el);
626
627 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
628 next unless $_;
629 delete $_->{element};
630 }
631
632 for (@{$table->{row_group}}) {
633 next unless $_;
634 next unless $_->{element};
635 $_->{type} = $_->{element}->manakai_local_name;
636 delete $_->{element};
637 }
638
639 for (@{$table->{cell}}) {
640 next unless $_;
641 for (@{$_}) {
642 next unless $_;
643 for (@$_) {
644 $_->{id} = refaddr $_->{element} if defined $_->{element};
645 delete $_->{element};
646 $_->{is_header} = $_->{is_header} ? 1 : 0;
647 }
648 }
649 }
650
651 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
652 print STDOUT JSON::objToJson ($table);
653 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
654 print STDOUT qq[, '$input->{id_prefix}');</script>];
655 }
656
657 print STDOUT qq[</div>];
658 } # print_table_section
659
660 sub print_listing_section ($$$) {
661 my ($opt, $input, $ids) = @_;
662
663 push @nav, ['#' . $opt->{id} => $opt->{label}] unless $input->{nested};
664 print STDOUT qq[
665 <div id="$input->{id_prefix}$opt->{id}" class="section">
666 <h2>$opt->{heading}</h2>
667
668 <dl>
669 ];
670 for my $id (sort {$a cmp $b} keys %$ids) {
671 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
672 for (@{$ids->{$id}}) {
673 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
674 }
675 }
676 print STDOUT qq[</dl></div>];
677 } # print_listing_section
678
679 sub print_result_section ($) {
680 my $result = shift;
681
682 print STDOUT qq[
683 <div id="result-summary" class="section">
684 <h2>Result</h2>];
685
686 if ($result->{unsupported} and $result->{conforming_max}) {
687 print STDOUT qq[<p class=uncertain id=result-para>The conformance
688 checker cannot decide whether the document is conforming or
689 not, since the document contains one or more unsupported
690 features. The document might or might not be conforming.</p>];
691 } elsif ($result->{conforming_min}) {
692 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
693 found in this document.</p>];
694 } elsif ($result->{conforming_max}) {
695 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
696 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
697 it might be conforming.</p>];
698 } else {
699 print STDOUT qq[<p class=FAIL id=result-para>This document is
700 <strong><em>non</em>-conforming</strong>.</p>];
701 }
702
703 print STDOUT qq[<table>
704 <colgroup><col><colgroup><col><col><col><colgroup><col>
705 <thead>
706 <tr><th scope=col></th>
707 <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
708 Errors</a></th>
709 <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
710 Errors</a></th>
711 <th scope=col><a href="../error-description#level-w">Warnings</a></th>
712 <th scope=col>Score</th></tr></thead><tbody>];
713
714 my $must_error = 0;
715 my $should_error = 0;
716 my $warning = 0;
717 my $score_min = 0;
718 my $score_max = 0;
719 my $score_base = 20;
720 my $score_unit = $score_base / 100;
721 for (
722 [Transfer => 'transfer', ''],
723 [Character => 'char', ''],
724 [Syntax => 'syntax', '#parse-errors'],
725 [Structure => 'structure', '#document-errors'],
726 ) {
727 $must_error += ($result->{$_->[1]}->{must} += 0);
728 $should_error += ($result->{$_->[1]}->{should} += 0);
729 $warning += ($result->{$_->[1]}->{warning} += 0);
730 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
731 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
732
733 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
734 my $label = $_->[0];
735 if ($result->{$_->[1]}->{must} or
736 $result->{$_->[1]}->{should} or
737 $result->{$_->[1]}->{warning} or
738 $result->{$_->[1]}->{unsupported}) {
739 $label = qq[<a href="$_->[2]">$label</a>];
740 }
741
742 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
743 if ($uncertain) {
744 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
745 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
746 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
747 } else {
748 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
749 }
750 }
751
752 $score_max += $score_base;
753
754 print STDOUT qq[
755 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
756 </tbody>
757 <tfoot><tr class=uncertain><th scope=row>Total</th>
758 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
759 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
760 <td>$warning?</td>
761 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
762 </table>
763
764 <p><strong>Important</strong>: This conformance checking service
765 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
766 </div>];
767 push @nav, ['#result-summary' => 'Result'];
768 } # print_result_section
769
770 sub print_result_unknown_type_section ($$) {
771 my ($input, $result) = @_;
772
773 my $euri = htescape ($input->{uri});
774 print STDOUT qq[
775 <div id="parse-errors" class="section">
776 <h2>Errors</h2>
777
778 <dl>
779 <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
780 <dd class=unsupported><strong><a href="../error-description#level-u">Not
781 supported</a></strong>:
782 Media type
783 <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
784 is not supported.</dd>
785 </dl>
786 </div>
787 ];
788 push @nav, ['#parse-errors' => 'Errors'];
789 add_error (char => {level => 'u'} => $result);
790 add_error (syntax => {level => 'u'} => $result);
791 add_error (structure => {level => 'u'} => $result);
792 } # print_result_unknown_type_section
793
794 sub print_result_input_error_section ($) {
795 my $input = shift;
796 print STDOUT qq[<div class="section" id="result-summary">
797 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
798 </div>];
799 push @nav, ['#result-summary' => 'Result'];
800 } # print_result_input_error_section
801
802 sub get_error_label ($$) {
803 my ($input, $err) = @_;
804
805 my $r = '';
806
807 if (defined $err->{line}) {
808 if ($err->{column} > 0) {
809 $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a> column $err->{column}];
810 } else {
811 $err->{line} = $err->{line} - 1 || 1;
812 $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a>];
813 }
814 }
815
816 if (defined $err->{node}) {
817 $r .= ' ' if length $r;
818 $r = get_node_link ($input, $err->{node});
819 }
820
821 if (defined $err->{index}) {
822 $r .= ' ' if length $r;
823 $r .= 'Index ' . (0+$err->{index});
824 }
825
826 if (defined $err->{value}) {
827 $r .= ' ' if length $r;
828 $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
829 }
830
831 return $r;
832 } # get_error_label
833
834 sub get_error_level_label ($) {
835 my $err = shift;
836
837 my $r = '';
838
839 if (not defined $err->{level} or $err->{level} eq 'm') {
840 $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
841 error</a></strong>: ];
842 } elsif ($err->{level} eq 's') {
843 $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
844 error</a></strong>: ];
845 } elsif ($err->{level} eq 'w') {
846 $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
847 ];
848 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
849 $r = qq[<strong><a href="../error-description#level-u">Not
850 supported</a></strong>: ];
851 } else {
852 my $elevel = htescape ($err->{level});
853 $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
854 ];
855 }
856
857 return $r;
858 } # get_error_level_label
859
860 sub get_node_path ($) {
861 my $node = shift;
862 my @r;
863 while (defined $node) {
864 my $rs;
865 if ($node->node_type == 1) {
866 $rs = $node->manakai_local_name;
867 $node = $node->parent_node;
868 } elsif ($node->node_type == 2) {
869 $rs = '@' . $node->manakai_local_name;
870 $node = $node->owner_element;
871 } elsif ($node->node_type == 3) {
872 $rs = '"' . $node->data . '"';
873 $node = $node->parent_node;
874 } elsif ($node->node_type == 9) {
875 @r = ('') unless @r;
876 $rs = '';
877 $node = $node->parent_node;
878 } else {
879 $rs = '#' . $node->node_type;
880 $node = $node->parent_node;
881 }
882 unshift @r, $rs;
883 }
884 return join '/', @r;
885 } # get_node_path
886
887 sub get_node_link ($$) {
888 return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
889 htescape (get_node_path ($_[1])) . qq[</a>];
890 } # get_node_link
891
892 {
893 my $Msg = {};
894
895 sub load_text_catalog ($) {
896 my $lang = shift; # MUST be a canonical lang name
897 open my $file, '<:utf8', "cc-msg.$lang.txt"
898 or die "$0: cc-msg.$lang.txt: $!";
899 while (<$file>) {
900 if (s/^([^;]+);([^;]*);//) {
901 my ($type, $cls, $msg) = ($1, $2, $_);
902 $msg =~ tr/\x0D\x0A//d;
903 $Msg->{$type} = [$cls, $msg];
904 }
905 }
906 } # load_text_catalog
907
908 sub get_text ($) {
909 my ($type, $level, $node) = @_;
910 $type = $level . ':' . $type if defined $level;
911 $level = 'm' unless defined $level;
912 my @arg;
913 {
914 if (defined $Msg->{$type}) {
915 my $msg = $Msg->{$type}->[1];
916 $msg =~ s{<var>\$([0-9]+)</var>}{
917 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
918 }ge;
919 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
920 UNIVERSAL::can ($node, 'get_attribute_ns')
921 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
922 }ge;
923 $msg =~ s{<var>{\@}</var>}{
924 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
925 }ge;
926 $msg =~ s{<var>{local-name}</var>}{
927 UNIVERSAL::can ($node, 'manakai_local_name')
928 ? htescape ($node->manakai_local_name) : ''
929 }ge;
930 $msg =~ s{<var>{element-local-name}</var>}{
931 (UNIVERSAL::can ($node, 'owner_element') and
932 $node->owner_element)
933 ? htescape ($node->owner_element->manakai_local_name)
934 : ''
935 }ge;
936 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
937 } elsif ($type =~ s/:([^:]*)$//) {
938 unshift @arg, $1;
939 redo;
940 }
941 }
942 return ($type, 'level-'.$level, htescape ($_[0]));
943 } # get_text
944
945 }
946
947 sub get_input_document ($$) {
948 my ($http, $dom) = @_;
949
950 my $request_uri = $http->get_parameter ('uri');
951 my $r = {};
952 if (defined $request_uri and length $request_uri) {
953 my $uri = $dom->create_uri_reference ($request_uri);
954 unless ({
955 http => 1,
956 }->{lc $uri->uri_scheme}) {
957 return {uri => $request_uri, request_uri => $request_uri,
958 error_status_text => 'URI scheme not allowed'};
959 }
960
961 require Message::Util::HostPermit;
962 my $host_permit = new Message::Util::HostPermit;
963 $host_permit->add_rule (<<EOH);
964 Allow host=suika port=80
965 Deny host=suika
966 Allow host=suika.fam.cx port=80
967 Deny host=suika.fam.cx
968 Deny host=localhost
969 Deny host=*.localdomain
970 Deny ipv4=0.0.0.0/8
971 Deny ipv4=10.0.0.0/8
972 Deny ipv4=127.0.0.0/8
973 Deny ipv4=169.254.0.0/16
974 Deny ipv4=172.0.0.0/11
975 Deny ipv4=192.0.2.0/24
976 Deny ipv4=192.88.99.0/24
977 Deny ipv4=192.168.0.0/16
978 Deny ipv4=198.18.0.0/15
979 Deny ipv4=224.0.0.0/4
980 Deny ipv4=255.255.255.255/32
981 Deny ipv6=0::0/0
982 Allow host=*
983 EOH
984 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
985 return {uri => $request_uri, request_uri => $request_uri,
986 error_status_text => 'Connection to the host is forbidden'};
987 }
988
989 require LWP::UserAgent;
990 my $ua = WDCC::LWPUA->new;
991 $ua->{wdcc_dom} = $dom;
992 $ua->{wdcc_host_permit} = $host_permit;
993 $ua->agent ('Mozilla'); ## TODO: for now.
994 $ua->parse_head (0);
995 $ua->protocols_allowed ([qw/http/]);
996 $ua->max_size (1000_000);
997 my $req = HTTP::Request->new (GET => $request_uri);
998 $req->header ('Accept-Encoding' => 'identity, *; q=0');
999 my $res = $ua->request ($req);
1000 ## TODO: 401 sets |is_success| true.
1001 if ($res->is_success or $http->get_parameter ('error-page')) {
1002 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1003 $r->{uri} = $res->request->uri;
1004 $r->{request_uri} = $request_uri;
1005
1006 ## TODO: More strict parsing...
1007 my $ct = $res->header ('Content-Type');
1008 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1009 $r->{charset} = lc $1;
1010 $r->{charset} =~ tr/\\//d;
1011 $r->{official_charset} = $r->{charset};
1012 }
1013
1014 my $input_charset = $http->get_parameter ('charset');
1015 if (defined $input_charset and length $input_charset) {
1016 $r->{charset_overridden}
1017 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1018 $r->{charset} = $input_charset;
1019 }
1020
1021 ## TODO: Support for HTTP Content-Encoding
1022
1023 $r->{s} = ''.$res->content;
1024
1025 require Whatpm::ContentType;
1026 ($r->{official_type}, $r->{media_type})
1027 = Whatpm::ContentType->get_sniffed_type
1028 (get_file_head => sub {
1029 return substr $r->{s}, 0, shift;
1030 },
1031 http_content_type_byte => $ct,
1032 has_http_content_encoding =>
1033 defined $res->header ('Content-Encoding'),
1034 supported_image_types => {});
1035 } else {
1036 $r->{uri} = $res->request->uri;
1037 $r->{request_uri} = $request_uri;
1038 $r->{error_status_text} = $res->status_line;
1039 }
1040
1041 $r->{header_field} = [];
1042 $res->scan (sub {
1043 push @{$r->{header_field}}, [$_[0], $_[1]];
1044 });
1045 $r->{header_status_code} = $res->code;
1046 $r->{header_status_text} = $res->message;
1047 } else {
1048 $r->{s} = ''.$http->get_parameter ('s');
1049 $r->{uri} = q<thismessage:/>;
1050 $r->{request_uri} = q<thismessage:/>;
1051 $r->{base_uri} = q<thismessage:/>;
1052 $r->{charset} = ''.$http->get_parameter ('_charset_');
1053 $r->{charset} =~ s/\s+//g;
1054 $r->{charset} = 'utf-8' if $r->{charset} eq '';
1055 $r->{official_charset} = $r->{charset};
1056 $r->{header_field} = [];
1057
1058 require Whatpm::ContentType;
1059 ($r->{official_type}, $r->{media_type})
1060 = Whatpm::ContentType->get_sniffed_type
1061 (get_file_head => sub {
1062 return substr $r->{s}, 0, shift;
1063 },
1064 http_content_type_byte => undef,
1065 has_http_content_encoding => 0,
1066 supported_image_types => {});
1067 }
1068
1069 my $input_format = $http->get_parameter ('i');
1070 if (defined $input_format and length $input_format) {
1071 $r->{media_type_overridden}
1072 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1073 $r->{media_type} = $input_format;
1074 }
1075 if (defined $r->{s} and not defined $r->{media_type}) {
1076 $r->{media_type} = 'text/html';
1077 $r->{media_type_overridden} = 1;
1078 }
1079
1080 if ($r->{media_type} eq 'text/xml') {
1081 unless (defined $r->{charset}) {
1082 $r->{charset} = 'us-ascii';
1083 $r->{official_charset} = $r->{charset};
1084 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1085 $r->{charset_overridden} = 0;
1086 }
1087 }
1088
1089 if (length $r->{s} > 1000_000) {
1090 $r->{error_status_text} = 'Entity-body too large';
1091 delete $r->{s};
1092 return $r;
1093 }
1094
1095 return $r;
1096 } # get_input_document
1097
1098 package WDCC::LWPUA;
1099 BEGIN { push our @ISA, 'LWP::UserAgent'; }
1100
1101 sub redirect_ok {
1102 my $ua = shift;
1103 unless ($ua->SUPER::redirect_ok (@_)) {
1104 return 0;
1105 }
1106
1107 my $uris = $_[1]->header ('Location');
1108 return 0 unless $uris;
1109 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1110 unless ({
1111 http => 1,
1112 }->{lc $uri->uri_scheme}) {
1113 return 0;
1114 }
1115 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1116 return 0;
1117 }
1118 return 1;
1119 } # redirect_ok
1120
1121 =head1 AUTHOR
1122
1123 Wakaba <w@suika.fam.cx>.
1124
1125 =head1 LICENSE
1126
1127 Copyright 2007 Wakaba <w@suika.fam.cx>
1128
1129 This library is free software; you can redistribute it
1130 and/or modify it under the same terms as Perl itself.
1131
1132 =cut
1133
1134 ## $Date: 2008/02/10 02:30:14 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24