/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.22 - (hide annotations) (download)
Sun Nov 4 09:15:02 2007 UTC (17 years ago) by wakaba
Branch: MAIN
Changes since 1.21: +142 -10 lines
++ ChangeLog	4 Nov 2007 09:14:24 -0000
2007-11-04  Wakaba  <wakaba@suika.fam.cx>

	* cc-interface.en.html (i): |text/cache-manifest| is added.

	* cc-style.css: New rules for manifest dump.

	* cc.cgi: Support for |text/cache-manifest|.

	* error-description-soruce.en.xml (#cache-manifest-errors): New
	section.

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3    
4     use lib qw[/home/httpd/html/www/markup/html/whatpm
5 wakaba 1.16 /home/wakaba/work/manakai2/lib];
6 wakaba 1.1 use CGI::Carp qw[fatalsToBrowser];
7 wakaba 1.2 use Scalar::Util qw[refaddr];
8 wakaba 1.16 use Time::HiRes qw/time/;
9 wakaba 1.1
10 wakaba 1.2 sub htescape ($) {
11     my $s = $_[0];
12     $s =~ s/&/&amp;/g;
13     $s =~ s/</&lt;/g;
14     $s =~ s/>/&gt;/g;
15     $s =~ s/"/&quot;/g;
16 wakaba 1.12 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
17     sprintf '<var>U+%04X</var>', ord $1;
18     }ge;
19 wakaba 1.2 return $s;
20     } # htescape
21    
22 wakaba 1.16 use Message::CGI::HTTP;
23     my $http = Message::CGI::HTTP->new;
24 wakaba 1.1
25 wakaba 1.16 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
26 wakaba 1.8 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
27     exit;
28     }
29    
30 wakaba 1.12 binmode STDOUT, ':utf8';
31 wakaba 1.14 $| = 1;
32 wakaba 1.12
33 wakaba 1.9 require Message::DOM::DOMImplementation;
34     my $dom = Message::DOM::DOMImplementation->new;
35    
36 wakaba 1.7 load_text_catalog ('en'); ## TODO: conneg
37    
38 wakaba 1.3 my @nav;
39 wakaba 1.2 print STDOUT qq[Content-Type: text/html; charset=utf-8
40    
41     <!DOCTYPE html>
42     <html lang="en">
43     <head>
44     <title>Web Document Conformance Checker (BETA)</title>
45 wakaba 1.3 <link rel="stylesheet" href="../cc-style.css" type="text/css">
46 wakaba 1.2 </head>
47     <body>
48 wakaba 1.13 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
49     (<em>beta</em>)</h1>
50 wakaba 1.14 ];
51 wakaba 1.2
52 wakaba 1.14 $| = 0;
53     my $input = get_input_document ($http, $dom);
54 wakaba 1.16 my $inner_html_element = $http->get_parameter ('e');
55     my $char_length = 0;
56     my %time;
57 wakaba 1.14
58     print qq[
59 wakaba 1.4 <div id="document-info" class="section">
60 wakaba 1.2 <dl>
61 wakaba 1.9 <dt>Request URI</dt>
62     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 wakaba 1.2 <dt>Document URI</dt>
64 wakaba 1.9 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}">@{[htescape $input->{uri}]}</a>&gt;</code></dd>
65 wakaba 1.2 ]; # no </dl> yet
66 wakaba 1.3 push @nav, ['#document-info' => 'Information'];
67 wakaba 1.1
68 wakaba 1.9 if (defined $input->{s}) {
69 wakaba 1.16 $char_length = length $input->{s};
70 wakaba 1.9
71     print STDOUT qq[
72     <dt>Base URI</dt>
73     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
74     <dt>Internet Media Type</dt>
75     <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
76     @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : '']}</dd>
77     <dt>Character Encoding</dt>
78     <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
79     @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
80 wakaba 1.16 <dt>Length</dt>
81     <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
82 wakaba 1.9 </dl>
83     </div>
84     ];
85    
86 wakaba 1.20 my $result = {conforming_min => 1, conforming_max => 1};
87 wakaba 1.19 print_http_header_section ($input, $result);
88 wakaba 1.9
89 wakaba 1.1 my $doc;
90     my $el;
91 wakaba 1.22 my $manifest;
92 wakaba 1.1
93 wakaba 1.9 if ($input->{media_type} eq 'text/html') {
94 wakaba 1.19 ($doc, $el) = print_syntax_error_html_section ($input, $result);
95 wakaba 1.9 print_source_string_section (\($input->{s}), $input->{charset});
96     } elsif ({
97     'text/xml' => 1,
98 wakaba 1.16 'application/atom+xml' => 1,
99     'application/rss+xml' => 1,
100     'application/svg+xml' => 1,
101 wakaba 1.9 'application/xhtml+xml' => 1,
102     'application/xml' => 1,
103     }->{$input->{media_type}}) {
104 wakaba 1.19 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
105 wakaba 1.9 print_source_string_section (\($input->{s}), $doc->input_encoding);
106 wakaba 1.22 } elsif ($input->{media_type} eq 'text/cache-manifest') {
107     ## TODO: MUST be text/cache-manifest
108     $manifest = print_syntax_error_manifest_section ($input, $result);
109     print_source_string_section (\($input->{s}), 'utf-8');
110 wakaba 1.2 } else {
111 wakaba 1.9 ## TODO: Change HTTP status code??
112 wakaba 1.18 print_result_unknown_type_section ($input);
113 wakaba 1.2 }
114    
115     if (defined $doc or defined $el) {
116 wakaba 1.22 print_structure_dump_dom_section ($doc, $el);
117     my $elements = print_structure_error_dom_section ($doc, $el, $result);
118 wakaba 1.18 print_table_section ($elements->{table}) if @{$elements->{table}};
119     print_id_section ($elements->{id}) if keys %{$elements->{id}};
120     print_term_section ($elements->{term}) if keys %{$elements->{term}};
121     print_class_section ($elements->{class}) if keys %{$elements->{class}};
122 wakaba 1.22 } elsif (defined $manifest) {
123     print_structure_dump_manifest_section ($manifest);
124     print_structure_error_manifest_section ($manifest, $result);
125 wakaba 1.1 }
126    
127 wakaba 1.19 print_result_section ($result);
128 wakaba 1.9 } else {
129 wakaba 1.18 print STDOUT qq[</dl></div>];
130     print_result_input_error_section ($input);
131 wakaba 1.9 }
132 wakaba 1.3
133 wakaba 1.2 print STDOUT qq[
134 wakaba 1.3 <ul class="navigation" id="nav-items">
135     ];
136     for (@nav) {
137     print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
138     }
139     print STDOUT qq[
140     </ul>
141 wakaba 1.2 </body>
142     </html>
143     ];
144 wakaba 1.1
145 wakaba 1.22 for (qw/decode parse parse_xml parse_manifest check check_manifest/) {
146 wakaba 1.16 next unless defined $time{$_};
147     open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
148     print $file $char_length, "\t", $time{$_}, "\n";
149     }
150    
151 wakaba 1.1 exit;
152    
153 wakaba 1.19 sub add_error ($$$) {
154     my ($layer, $err, $result) = @_;
155     if (defined $err->{level}) {
156     if ($err->{level} eq 's') {
157     $result->{$layer}->{should}++;
158     $result->{$layer}->{score_min} -= 2;
159     $result->{conforming_min} = 0;
160     } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
161     $result->{$layer}->{warning}++;
162     } elsif ($err->{level} eq 'unsupported') {
163     $result->{$layer}->{unsupported}++;
164     $result->{unsupported} = 1;
165     } else {
166     $result->{$layer}->{must}++;
167     $result->{$layer}->{score_max} -= 2;
168     $result->{$layer}->{score_min} -= 2;
169     $result->{conforming_min} = 0;
170     $result->{conforming_max} = 0;
171     }
172     } else {
173     $result->{$layer}->{must}++;
174     $result->{$layer}->{score_max} -= 2;
175     $result->{$layer}->{score_min} -= 2;
176     $result->{conforming_min} = 0;
177     $result->{conforming_max} = 0;
178     }
179     } # add_error
180    
181     sub print_http_header_section ($$) {
182     my ($input, $result) = @_;
183 wakaba 1.9 return unless defined $input->{header_status_code} or
184     defined $input->{header_status_text} or
185     @{$input->{header_field}};
186    
187     push @nav, ['#source-header' => 'HTTP Header'];
188     print STDOUT qq[<div id="source-header" class="section">
189     <h2>HTTP Header</h2>
190    
191     <p><strong>Note</strong>: Due to the limitation of the
192     network library in use, the content of this section might
193     not be the real header.</p>
194    
195     <table><tbody>
196     ];
197    
198     if (defined $input->{header_status_code}) {
199     print STDOUT qq[<tr><th scope="row">Status code</th>];
200     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
201     }
202     if (defined $input->{header_status_text}) {
203     print STDOUT qq[<tr><th scope="row">Status text</th>];
204     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
205     }
206    
207     for (@{$input->{header_field}}) {
208     print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
209     print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
210     }
211    
212     print STDOUT qq[</tbody></table></div>];
213     } # print_http_header_section
214    
215 wakaba 1.19 sub print_syntax_error_html_section ($$) {
216     my ($input, $result) = @_;
217 wakaba 1.18
218     require Encode;
219     require Whatpm::HTML;
220    
221     $input->{charset} ||= 'ISO-8859-1'; ## TODO: for now.
222    
223     my $time1 = time;
224     my $t = Encode::decode ($input->{charset}, $input->{s});
225     $time{decode} = time - $time1;
226    
227     print STDOUT qq[
228     <div id="parse-errors" class="section">
229     <h2>Parse Errors</h2>
230    
231     <dl>];
232     push @nav, ['#parse-errors' => 'Parse Error'];
233    
234     my $onerror = sub {
235     my (%opt) = @_;
236     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
237     if ($opt{column} > 0) {
238     print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
239     } else {
240     $opt{line} = $opt{line} - 1 || 1;
241     print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];
242     }
243     $type =~ tr/ /-/;
244     $type =~ s/\|/%7C/g;
245     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
246     print STDOUT qq[<dd class="$cls">$msg</dd>\n];
247 wakaba 1.19
248     add_error ('syntax', \%opt => $result);
249 wakaba 1.18 };
250    
251     my $doc = $dom->create_document;
252     my $el;
253     $time1 = time;
254     if (defined $inner_html_element and length $inner_html_element) {
255     $el = $doc->create_element_ns
256     ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
257     Whatpm::HTML->set_inner_html ($el, $t, $onerror);
258     } else {
259     Whatpm::HTML->parse_string ($t => $doc, $onerror);
260     }
261     $time{parse} = time - $time1;
262    
263     print STDOUT qq[</dl></div>];
264    
265     return ($doc, $el);
266     } # print_syntax_error_html_section
267    
268 wakaba 1.19 sub print_syntax_error_xml_section ($$) {
269     my ($input, $result) = @_;
270 wakaba 1.18
271     require Message::DOM::XMLParserTemp;
272    
273     print STDOUT qq[
274     <div id="parse-errors" class="section">
275     <h2>Parse Errors</h2>
276    
277     <dl>];
278     push @nav, ['#parse-errors' => 'Parse Error'];
279    
280     my $onerror = sub {
281     my $err = shift;
282     my $line = $err->location->line_number;
283     print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];
284     print STDOUT $err->location->column_number, "</dt><dd>";
285     print STDOUT htescape $err->text, "</dd>\n";
286 wakaba 1.19
287     add_error ('syntax', {type => $err->text,
288     level => [
289     $err->SEVERITY_FATAL_ERROR => 'm',
290     $err->SEVERITY_ERROR => 'm',
291     $err->SEVERITY_WARNING => 's',
292     ]->[$err->severity]} => $result);
293    
294 wakaba 1.18 return 1;
295     };
296    
297     my $time1 = time;
298     open my $fh, '<', \($input->{s});
299     my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
300     ($fh => $dom, $onerror, charset => $input->{charset});
301     $time{parse_xml} = time - $time1;
302    
303     print STDOUT qq[</dl></div>];
304    
305     return ($doc, undef);
306     } # print_syntax_error_xml_section
307    
308 wakaba 1.22 sub print_syntax_error_manifest_section ($$) {
309     my ($input, $result) = @_;
310    
311     require Whatpm::CacheManifest;
312    
313     print STDOUT qq[
314     <div id="parse-errors" class="section">
315     <h2>Parse Errors</h2>
316    
317     <dl>];
318     push @nav, ['#parse-errors' => 'Parse Error'];
319    
320     my $onerror = sub {
321     my (%opt) = @_;
322     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
323     print STDOUT qq[<dt class="$cls">], get_error_label (\%opt), qq[</dt>];
324     $type =~ tr/ /-/;
325     $type =~ s/\|/%7C/g;
326     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
327     print STDOUT qq[<dd class="$cls">$msg</dd>\n];
328    
329     add_error ('syntax', \%opt => $result);
330     };
331    
332     my $time1 = time;
333     my $manifest = Whatpm::CacheManifest->parse_byte_string
334     ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
335     $time{parse_manifest} = time - $time1;
336    
337     print STDOUT qq[</dl></div>];
338    
339     return $manifest;
340     } # print_syntax_error_manifest_section
341    
342 wakaba 1.9 sub print_source_string_section ($$) {
343     require Encode;
344     my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
345     return unless $enc;
346    
347     my $s = \($enc->decode (${$_[0]}));
348     my $i = 1;
349     push @nav, ['#source-string' => 'Source'];
350     print STDOUT qq[<div id="source-string" class="section">
351     <h2>Document Source</h2>
352     <ol lang="">\n];
353 wakaba 1.7 if (length $$s) {
354     while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
355     print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
356     $i++;
357     }
358     if ($$s =~ /\G([^\x0A]+)/gc) {
359     print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
360     }
361     } else {
362     print STDOUT q[<li id="line-1"></li>];
363 wakaba 1.2 }
364 wakaba 1.9 print STDOUT "</ol></div>";
365     } # print_input_string_section
366 wakaba 1.2
367     sub print_document_tree ($) {
368 wakaba 1.1 my $node = shift;
369 wakaba 1.2 my $r = '<ol class="xoxo">';
370 wakaba 1.1
371 wakaba 1.2 my @node = ($node);
372 wakaba 1.1 while (@node) {
373     my $child = shift @node;
374 wakaba 1.2 unless (ref $child) {
375     $r .= $child;
376     next;
377     }
378    
379     my $node_id = 'node-'.refaddr $child;
380     my $nt = $child->node_type;
381     if ($nt == $child->ELEMENT_NODE) {
382 wakaba 1.4 my $child_nsuri = $child->namespace_uri;
383     $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
384 wakaba 1.2 '</code>'; ## ISSUE: case
385    
386     if ($child->has_attributes) {
387     $r .= '<ul class="attributes">';
388 wakaba 1.4 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
389 wakaba 1.2 @{$child->attributes}) {
390 wakaba 1.4 $r .= qq[<li id="$attr->[3]" class="tree-attribute"><code title="@{[defined $_->[2] ? $_->[2] : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
391 wakaba 1.2 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
392     }
393     $r .= '</ul>';
394     }
395    
396 wakaba 1.7 if ($child->has_child_nodes) {
397 wakaba 1.2 $r .= '<ol class="children">';
398 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
399     } else {
400     $r .= '</li>';
401 wakaba 1.2 }
402     } elsif ($nt == $child->TEXT_NODE) {
403 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
404 wakaba 1.2 } elsif ($nt == $child->CDATA_SECTION_NODE) {
405 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
406 wakaba 1.2 } elsif ($nt == $child->COMMENT_NODE) {
407 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
408 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_NODE) {
409 wakaba 1.6 $r .= qq'<li id="$node_id" class="tree-document">Document';
410 wakaba 1.7 $r .= qq[<ul class="attributes">];
411     $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
412     $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
413 wakaba 1.9 unless ($child->manakai_is_html) {
414     $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
415     if (defined $child->xml_encoding) {
416     $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
417     } else {
418     $r .= qq[<li>XML encoding = (null)</li>];
419     }
420     $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
421     }
422 wakaba 1.7 $r .= qq[</ul>];
423 wakaba 1.2 if ($child->has_child_nodes) {
424 wakaba 1.7 $r .= '<ol class="children">';
425 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
426 wakaba 1.1 }
427 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
428 wakaba 1.5 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
429     $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
430     $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
431     $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
432 wakaba 1.2 $r .= '</ul></li>';
433     } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
434 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
435 wakaba 1.1 } else {
436 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
437 wakaba 1.1 }
438     }
439 wakaba 1.2
440     $r .= '</ol>';
441     print STDOUT $r;
442     } # print_document_tree
443 wakaba 1.1
444 wakaba 1.22 sub print_structure_dump_dom_section ($$) {
445 wakaba 1.18 my ($doc, $el) = @_;
446    
447     print STDOUT qq[
448     <div id="document-tree" class="section">
449     <h2>Document Tree</h2>
450     ];
451     push @nav, ['#document-tree' => 'Tree'];
452    
453     print_document_tree ($el || $doc);
454    
455     print STDOUT qq[</div>];
456 wakaba 1.22 } # print_structure_dump_dom_section
457    
458     sub print_structure_dump_manifest_section ($) {
459     my $manifest = shift;
460    
461     print STDOUT qq[
462     <div id="dump-manifest" class="section">
463     <h2>Cache Manifest</h2>
464     ];
465     push @nav, ['#dump-manifest' => 'Caceh Manifest'];
466    
467     print STDOUT qq[<dl><dt>Explicit entries</dt>];
468     for my $uri (@{$manifest->[0]}) {
469     my $euri = htescape ($uri);
470     print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
471     }
472    
473     print STDOUT qq[<dt>Fallback entries</dt><dd>
474     <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
475     <th scope=row>Fallback Entry</tr><tbody>];
476     for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
477     my $euri = htescape ($uri);
478     my $euri2 = htescape ($manifest->[1]->{$uri});
479     print STDOUT qq[<tr><td><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
480     <td><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
481     }
482    
483     print STDOUT qq[</table><dt>Online whitelist</dt>];
484     for my $uri (@{$manifest->[2]}) {
485     my $euri = htescape ($uri);
486     print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
487     }
488 wakaba 1.18
489 wakaba 1.22 print STDOUT qq[</dl></div>];
490     } # print_structure_dump_manifest_section
491    
492     sub print_structure_error_dom_section ($$$) {
493 wakaba 1.19 my ($doc, $el, $result) = @_;
494 wakaba 1.18
495     print STDOUT qq[<div id="document-errors" class="section">
496     <h2>Document Errors</h2>
497    
498     <dl>];
499     push @nav, ['#document-errors' => 'Document Error'];
500    
501     require Whatpm::ContentChecker;
502     my $onerror = sub {
503     my %opt = @_;
504     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
505     $type =~ tr/ /-/;
506     $type =~ s/\|/%7C/g;
507     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
508 wakaba 1.22 print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
509 wakaba 1.18 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
510 wakaba 1.19 add_error ('structure', \%opt => $result);
511 wakaba 1.18 };
512    
513     my $elements;
514     my $time1 = time;
515     if ($el) {
516     $elements = Whatpm::ContentChecker->check_element ($el, $onerror);
517     } else {
518     $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);
519     }
520     $time{check} = time - $time1;
521    
522     print STDOUT qq[</dl></div>];
523    
524     return $elements;
525 wakaba 1.22 } # print_structure_error_dom_section
526    
527     sub print_structure_error_manifest_section ($$$) {
528     my ($manifest, $result) = @_;
529    
530     print STDOUT qq[<div id="document-errors" class="section">
531     <h2>Document Errors</h2>
532    
533     <dl>];
534     push @nav, ['#document-errors' => 'Document Error'];
535    
536     require Whatpm::CacheManifest;
537     Whatpm::CacheManifest->check_manifest ($manifest, sub {
538     my %opt = @_;
539     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
540     $type =~ tr/ /-/;
541     $type =~ s/\|/%7C/g;
542     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
543     print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
544     qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
545     add_error ('structure', \%opt => $result);
546     });
547    
548     print STDOUT qq[</div>];
549     } # print_structure_error_manifest_section
550 wakaba 1.18
551     sub print_table_section ($) {
552     my $tables = shift;
553    
554     push @nav, ['#tables' => 'Tables'];
555     print STDOUT qq[
556     <div id="tables" class="section">
557     <h2>Tables</h2>
558    
559     <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
560     <script src="../table-script.js" type="text/javascript"></script>
561     <noscript>
562     <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
563     </noscript>
564     ];
565    
566     require JSON;
567    
568     my $i = 0;
569     for my $table_el (@$tables) {
570     $i++;
571     print STDOUT qq[<div class="section" id="table-$i"><h3>] .
572     get_node_link ($table_el) . q[</h3>];
573    
574     ## TODO: Make |ContentChecker| return |form_table| result
575     ## so that this script don't have to run the algorithm twice.
576     my $table = Whatpm::HTMLTable->form_table ($table_el);
577    
578     for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
579     next unless $_;
580     delete $_->{element};
581     }
582    
583     for (@{$table->{row_group}}) {
584     next unless $_;
585     next unless $_->{element};
586     $_->{type} = $_->{element}->manakai_local_name;
587     delete $_->{element};
588     }
589    
590     for (@{$table->{cell}}) {
591     next unless $_;
592     for (@{$_}) {
593     next unless $_;
594     for (@$_) {
595     $_->{id} = refaddr $_->{element} if defined $_->{element};
596     delete $_->{element};
597     $_->{is_header} = $_->{is_header} ? 1 : 0;
598     }
599     }
600     }
601    
602     print STDOUT '</div><script type="text/javascript">tableToCanvas (';
603     print STDOUT JSON::objToJson ($table);
604     print STDOUT qq[, document.getElementById ('table-$i'));</script>];
605     }
606    
607     print STDOUT qq[</div>];
608     } # print_table_section
609    
610     sub print_id_section ($) {
611     my $ids = shift;
612    
613     push @nav, ['#identifiers' => 'IDs'];
614     print STDOUT qq[
615     <div id="identifiers" class="section">
616     <h2>Identifiers</h2>
617    
618     <dl>
619     ];
620     for my $id (sort {$a cmp $b} keys %$ids) {
621     print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
622     for (@{$ids->{$id}}) {
623     print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
624     }
625     }
626     print STDOUT qq[</dl></div>];
627     } # print_id_section
628    
629     sub print_term_section ($) {
630     my $terms = shift;
631    
632     push @nav, ['#terms' => 'Terms'];
633     print STDOUT qq[
634     <div id="terms" class="section">
635     <h2>Terms</h2>
636    
637     <dl>
638     ];
639     for my $term (sort {$a cmp $b} keys %$terms) {
640     print STDOUT qq[<dt>@{[htescape $term]}</dt>];
641     for (@{$terms->{$term}}) {
642     print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
643     }
644     }
645     print STDOUT qq[</dl></div>];
646     } # print_term_section
647    
648     sub print_class_section ($) {
649     my $classes = shift;
650    
651     push @nav, ['#classes' => 'Classes'];
652     print STDOUT qq[
653     <div id="classes" class="section">
654     <h2>Classes</h2>
655    
656     <dl>
657     ];
658     for my $class (sort {$a cmp $b} keys %$classes) {
659     print STDOUT qq[<dt><code>@{[htescape $class]}</code></dt>];
660     for (@{$classes->{$class}}) {
661     print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
662     }
663     }
664     print STDOUT qq[</dl></div>];
665     } # print_class_section
666    
667 wakaba 1.19 sub print_result_section ($) {
668     my $result = shift;
669    
670     print STDOUT qq[
671     <div id="result-summary" class="section">
672     <h2>Result</h2>];
673    
674 wakaba 1.21 if ($result->{unsupported} and $result->{conforming_max}) {
675 wakaba 1.19 print STDOUT qq[<p class=uncertain id=result-para>The conformance
676     checker cannot decide whether the document is conforming or
677     not, since the document contains one or more unsupported
678 wakaba 1.21 features. The document might or might not be conforming.</p>];
679 wakaba 1.19 } elsif ($result->{conforming_min}) {
680     print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
681     found in this document.</p>];
682     } elsif ($result->{conforming_max}) {
683     print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
684     is <strong>likely <em>non</em>-conforming</strong>, but in rare case
685     it might be conforming.</p>];
686     } else {
687     print STDOUT qq[<p class=FAIL id=result-para>This document is
688     <strong><em>non</em>-conforming</strong>.</p>];
689     }
690    
691     print STDOUT qq[<table>
692     <colgroup><col><colgroup><col><col><col><colgroup><col>
693     <thead>
694     <tr><th scope=col></th><th scope=col><em class=rfc2119>MUST</em>-level
695     Errors</th><th scope=col><em class=rfc2119>SHOULD</em>-level
696     Errors</th><th scope=col>Warnings</th><th scope=col>Score</th></tr>
697     </thead><tbody>];
698    
699     my $must_error = 0;
700     my $should_error = 0;
701     my $warning = 0;
702     my $score_min = 0;
703     my $score_max = 0;
704     my $score_base = 20;
705 wakaba 1.21 my $score_unit = $score_base / 100;
706 wakaba 1.19 for (
707     [Transfer => 'transfer', ''],
708     [Character => 'char', ''],
709     [Syntax => 'syntax', '#parse-errors'],
710     [Structure => 'structure', '#document-errors'],
711     ) {
712     $must_error += ($result->{$_->[1]}->{must} += 0);
713     $should_error += ($result->{$_->[1]}->{should} += 0);
714     $warning += ($result->{$_->[1]}->{warning} += 0);
715 wakaba 1.21 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
716     $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
717 wakaba 1.19
718     my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
719     my $label = $_->[0];
720     if ($result->{$_->[1]}->{must} or
721     $result->{$_->[1]}->{should} or
722     $result->{$_->[1]}->{warning} or
723     $result->{$_->[1]}->{unsupported}) {
724     $label = qq[<a href="$_->[2]">$label</a>];
725     }
726    
727     print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
728     if ($uncertain) {
729 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
730 wakaba 1.19 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
731 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
732 wakaba 1.19 } else {
733 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
734 wakaba 1.19 }
735     }
736    
737     $score_max += $score_base;
738    
739     print STDOUT qq[
740     <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
741     </tbody>
742 wakaba 1.21 <tfoot><tr class=uncertain><th scope=row>Total</th>
743     <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
744     <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
745     <td>$warning?</td>
746     <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
747 wakaba 1.19 </table>
748    
749     <p><strong>Important</strong>: This conformance checking service
750     is <em>under development</em>. The result above might be <em>wrong</em>.</p>
751     </div>];
752     push @nav, ['#result-summary' => 'Result'];
753     } # print_result_section
754    
755 wakaba 1.18 sub print_result_unknown_type_section ($) {
756     my $input = shift;
757    
758     print STDOUT qq[
759     <div id="result-summary" class="section">
760     <p><em>Media type <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code> is not supported!</em></p>
761     </div>
762     ];
763     push @nav, ['#result-summary' => 'Result'];
764     } # print_result_unknown_type_section
765    
766     sub print_result_input_error_section ($) {
767     my $input = shift;
768     print STDOUT qq[<div class="section" id="result-summary">
769     <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
770     </div>];
771     push @nav, ['#result-summary' => 'Result'];
772     } # print_Result_input_error_section
773    
774 wakaba 1.22 sub get_error_label ($) {
775     my $err = shift;
776    
777     my $r = '';
778    
779     if (defined $err->{line}) {
780     if ($err->{column} > 0) {
781     $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a> column $err->{column}];
782     } else {
783     $err->{line} = $err->{line} - 1 || 1;
784     $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a>];
785     }
786     }
787    
788     if (defined $err->{node}) {
789     $r .= ' ' if length $r;
790     $r = get_node_path ($err->{node});
791     }
792    
793     if (defined $err->{index}) {
794     $r .= ' ' if length $r;
795     $r .= 'Index ' . (0+$err->{index});
796     }
797    
798     if (defined $err->{value}) {
799     $r .= ' ' if length $r;
800     $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
801     }
802    
803     return $r;
804     } # get_error_label
805    
806 wakaba 1.1 sub get_node_path ($) {
807     my $node = shift;
808     my @r;
809     while (defined $node) {
810     my $rs;
811     if ($node->node_type == 1) {
812     $rs = $node->manakai_local_name;
813     $node = $node->parent_node;
814     } elsif ($node->node_type == 2) {
815     $rs = '@' . $node->manakai_local_name;
816     $node = $node->owner_element;
817     } elsif ($node->node_type == 3) {
818     $rs = '"' . $node->data . '"';
819     $node = $node->parent_node;
820     } elsif ($node->node_type == 9) {
821 wakaba 1.9 @r = ('') unless @r;
822 wakaba 1.1 $rs = '';
823     $node = $node->parent_node;
824     } else {
825     $rs = '#' . $node->node_type;
826     $node = $node->parent_node;
827     }
828     unshift @r, $rs;
829     }
830     return join '/', @r;
831     } # get_node_path
832    
833 wakaba 1.6 sub get_node_link ($) {
834     return qq[<a href="#node-@{[refaddr $_[0]]}">] .
835     htescape (get_node_path ($_[0])) . qq[</a>];
836     } # get_node_link
837    
838 wakaba 1.7 {
839     my $Msg = {};
840    
841     sub load_text_catalog ($) {
842     my $lang = shift; # MUST be a canonical lang name
843     open my $file, '<', "cc-msg.$lang.txt" or die "$0: cc-msg.$lang.txt: $!";
844     while (<$file>) {
845     if (s/^([^;]+);([^;]*);//) {
846     my ($type, $cls, $msg) = ($1, $2, $_);
847     $msg =~ tr/\x0D\x0A//d;
848     $Msg->{$type} = [$cls, $msg];
849     }
850     }
851     } # load_text_catalog
852    
853     sub get_text ($) {
854 wakaba 1.15 my ($type, $level, $node) = @_;
855 wakaba 1.7 $type = $level . ':' . $type if defined $level;
856     my @arg;
857     {
858     if (defined $Msg->{$type}) {
859     my $msg = $Msg->{$type}->[1];
860 wakaba 1.10 $msg =~ s{<var>\$([0-9]+)</var>}{
861     defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
862     }ge;
863 wakaba 1.15 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
864     UNIVERSAL::can ($node, 'get_attribute_ns')
865     ? htescape ($node->get_attribute_ns (undef, $1)) : ''
866     }ge;
867     $msg =~ s{<var>{\@}</var>}{
868     UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
869     }ge;
870 wakaba 1.17 $msg =~ s{<var>{local-name}</var>}{
871     UNIVERSAL::can ($node, 'manakai_local_name')
872     ? htescape ($node->manakai_local_name) : ''
873     }ge;
874     $msg =~ s{<var>{element-local-name}</var>}{
875     (UNIVERSAL::can ($node, 'owner_element') and
876     $node->owner_element)
877     ? htescape ($node->owner_element->manakai_local_name)
878     : ''
879     }ge;
880 wakaba 1.11 return ($type, $Msg->{$type}->[0], $msg);
881 wakaba 1.7 } elsif ($type =~ s/:([^:]*)$//) {
882     unshift @arg, $1;
883     redo;
884     }
885     }
886 wakaba 1.11 return ($type, '', htescape ($_[0]));
887 wakaba 1.7 } # get_text
888    
889     }
890    
891 wakaba 1.9 sub get_input_document ($$) {
892     my ($http, $dom) = @_;
893    
894 wakaba 1.16 my $request_uri = $http->get_parameter ('uri');
895 wakaba 1.9 my $r = {};
896     if (defined $request_uri and length $request_uri) {
897     my $uri = $dom->create_uri_reference ($request_uri);
898     unless ({
899     http => 1,
900     }->{lc $uri->uri_scheme}) {
901     return {uri => $request_uri, request_uri => $request_uri,
902     error_status_text => 'URI scheme not allowed'};
903     }
904    
905     require Message::Util::HostPermit;
906     my $host_permit = new Message::Util::HostPermit;
907     $host_permit->add_rule (<<EOH);
908     Allow host=suika port=80
909     Deny host=suika
910     Allow host=suika.fam.cx port=80
911     Deny host=suika.fam.cx
912     Deny host=localhost
913     Deny host=*.localdomain
914     Deny ipv4=0.0.0.0/8
915     Deny ipv4=10.0.0.0/8
916     Deny ipv4=127.0.0.0/8
917     Deny ipv4=169.254.0.0/16
918     Deny ipv4=172.0.0.0/11
919     Deny ipv4=192.0.2.0/24
920     Deny ipv4=192.88.99.0/24
921     Deny ipv4=192.168.0.0/16
922     Deny ipv4=198.18.0.0/15
923     Deny ipv4=224.0.0.0/4
924     Deny ipv4=255.255.255.255/32
925     Deny ipv6=0::0/0
926     Allow host=*
927     EOH
928     unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
929     return {uri => $request_uri, request_uri => $request_uri,
930     error_status_text => 'Connection to the host is forbidden'};
931     }
932    
933     require LWP::UserAgent;
934     my $ua = WDCC::LWPUA->new;
935     $ua->{wdcc_dom} = $dom;
936     $ua->{wdcc_host_permit} = $host_permit;
937     $ua->agent ('Mozilla'); ## TODO: for now.
938     $ua->parse_head (0);
939     $ua->protocols_allowed ([qw/http/]);
940     $ua->max_size (1000_000);
941     my $req = HTTP::Request->new (GET => $request_uri);
942     my $res = $ua->request ($req);
943 wakaba 1.16 ## TODO: 401 sets |is_success| true.
944     if ($res->is_success or $http->get_parameter ('error-page')) {
945 wakaba 1.9 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
946     $r->{uri} = $res->request->uri;
947     $r->{request_uri} = $request_uri;
948    
949     ## TODO: More strict parsing...
950     my $ct = $res->header ('Content-Type');
951     if (defined $ct and $ct =~ m#^([0-9A-Za-z._+-]+/[0-9A-Za-z._+-]+)#) {
952     $r->{media_type} = lc $1;
953     }
954 wakaba 1.22 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
955 wakaba 1.9 $r->{charset} = lc $1;
956     $r->{charset} =~ tr/\\//d;
957     }
958    
959 wakaba 1.16 my $input_charset = $http->get_parameter ('charset');
960 wakaba 1.9 if (defined $input_charset and length $input_charset) {
961     $r->{charset_overridden}
962     = (not defined $r->{charset} or $r->{charset} ne $input_charset);
963     $r->{charset} = $input_charset;
964     }
965    
966     $r->{s} = ''.$res->content;
967     } else {
968     $r->{uri} = $res->request->uri;
969     $r->{request_uri} = $request_uri;
970     $r->{error_status_text} = $res->status_line;
971     }
972    
973     $r->{header_field} = [];
974     $res->scan (sub {
975     push @{$r->{header_field}}, [$_[0], $_[1]];
976     });
977     $r->{header_status_code} = $res->code;
978     $r->{header_status_text} = $res->message;
979     } else {
980 wakaba 1.16 $r->{s} = ''.$http->get_parameter ('s');
981 wakaba 1.9 $r->{uri} = q<thismessage:/>;
982     $r->{request_uri} = q<thismessage:/>;
983     $r->{base_uri} = q<thismessage:/>;
984 wakaba 1.16 $r->{charset} = ''.$http->get_parameter ('_charset_');
985 wakaba 1.9 $r->{charset} =~ s/\s+//g;
986     $r->{charset} = 'utf-8' if $r->{charset} eq '';
987     $r->{header_field} = [];
988     }
989    
990 wakaba 1.16 my $input_format = $http->get_parameter ('i');
991 wakaba 1.9 if (defined $input_format and length $input_format) {
992     $r->{media_type_overridden}
993     = (not defined $r->{media_type} or $input_format ne $r->{media_type});
994     $r->{media_type} = $input_format;
995     }
996     if (defined $r->{s} and not defined $r->{media_type}) {
997     $r->{media_type} = 'text/html';
998     $r->{media_type_overridden} = 1;
999     }
1000    
1001     if ($r->{media_type} eq 'text/xml') {
1002     unless (defined $r->{charset}) {
1003     $r->{charset} = 'us-ascii';
1004     } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1005     $r->{charset_overridden} = 0;
1006     }
1007     }
1008    
1009     if (length $r->{s} > 1000_000) {
1010     $r->{error_status_text} = 'Entity-body too large';
1011     delete $r->{s};
1012     return $r;
1013     }
1014    
1015     return $r;
1016     } # get_input_document
1017    
1018     package WDCC::LWPUA;
1019     BEGIN { push our @ISA, 'LWP::UserAgent'; }
1020    
1021     sub redirect_ok {
1022     my $ua = shift;
1023     unless ($ua->SUPER::redirect_ok (@_)) {
1024     return 0;
1025     }
1026    
1027     my $uris = $_[1]->header ('Location');
1028     return 0 unless $uris;
1029     my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1030     unless ({
1031     http => 1,
1032     }->{lc $uri->uri_scheme}) {
1033     return 0;
1034     }
1035     unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1036     return 0;
1037     }
1038     return 1;
1039     } # redirect_ok
1040    
1041 wakaba 1.1 =head1 AUTHOR
1042    
1043     Wakaba <w@suika.fam.cx>.
1044    
1045     =head1 LICENSE
1046    
1047     Copyright 2007 Wakaba <w@suika.fam.cx>
1048    
1049     This library is free software; you can redistribute it
1050     and/or modify it under the same terms as Perl itself.
1051    
1052     =cut
1053    
1054 wakaba 1.22 ## $Date: 2007/09/11 08:25:23 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24