/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.19 - (hide annotations) (download)
Mon Sep 10 11:51:09 2007 UTC (16 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.18: +137 -14 lines
++ ChangeLog	10 Sep 2007 11:50:30 -0000
2007-09-10  Wakaba  <wakaba@suika.fam.cx>

	* cc.cgi, cc-style.css: Scoring support.

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3    
4     use lib qw[/home/httpd/html/www/markup/html/whatpm
5 wakaba 1.16 /home/wakaba/work/manakai2/lib];
6 wakaba 1.1 use CGI::Carp qw[fatalsToBrowser];
7 wakaba 1.2 use Scalar::Util qw[refaddr];
8 wakaba 1.16 use Time::HiRes qw/time/;
9 wakaba 1.1
10 wakaba 1.2 sub htescape ($) {
11     my $s = $_[0];
12     $s =~ s/&/&amp;/g;
13     $s =~ s/</&lt;/g;
14     $s =~ s/>/&gt;/g;
15     $s =~ s/"/&quot;/g;
16 wakaba 1.12 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
17     sprintf '<var>U+%04X</var>', ord $1;
18     }ge;
19 wakaba 1.2 return $s;
20     } # htescape
21    
22 wakaba 1.16 use Message::CGI::HTTP;
23     my $http = Message::CGI::HTTP->new;
24 wakaba 1.1
25 wakaba 1.16 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
26 wakaba 1.8 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
27     exit;
28     }
29    
30 wakaba 1.12 binmode STDOUT, ':utf8';
31 wakaba 1.14 $| = 1;
32 wakaba 1.12
33 wakaba 1.9 require Message::DOM::DOMImplementation;
34     my $dom = Message::DOM::DOMImplementation->new;
35    
36 wakaba 1.7 load_text_catalog ('en'); ## TODO: conneg
37    
38 wakaba 1.3 my @nav;
39 wakaba 1.2 print STDOUT qq[Content-Type: text/html; charset=utf-8
40    
41     <!DOCTYPE html>
42     <html lang="en">
43     <head>
44     <title>Web Document Conformance Checker (BETA)</title>
45 wakaba 1.3 <link rel="stylesheet" href="../cc-style.css" type="text/css">
46 wakaba 1.2 </head>
47     <body>
48 wakaba 1.13 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
49     (<em>beta</em>)</h1>
50 wakaba 1.14 ];
51 wakaba 1.2
52 wakaba 1.14 $| = 0;
53     my $input = get_input_document ($http, $dom);
54 wakaba 1.16 my $inner_html_element = $http->get_parameter ('e');
55     my $char_length = 0;
56     my %time;
57 wakaba 1.14
58     print qq[
59 wakaba 1.4 <div id="document-info" class="section">
60 wakaba 1.2 <dl>
61 wakaba 1.9 <dt>Request URI</dt>
62     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 wakaba 1.2 <dt>Document URI</dt>
64 wakaba 1.9 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}">@{[htescape $input->{uri}]}</a>&gt;</code></dd>
65 wakaba 1.2 ]; # no </dl> yet
66 wakaba 1.3 push @nav, ['#document-info' => 'Information'];
67 wakaba 1.1
68 wakaba 1.9 if (defined $input->{s}) {
69 wakaba 1.16 $char_length = length $input->{s};
70 wakaba 1.9
71     print STDOUT qq[
72     <dt>Base URI</dt>
73     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
74     <dt>Internet Media Type</dt>
75     <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
76     @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : '']}</dd>
77     <dt>Character Encoding</dt>
78     <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
79     @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
80 wakaba 1.16 <dt>Length</dt>
81     <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
82 wakaba 1.9 </dl>
83     </div>
84     ];
85    
86 wakaba 1.19 my $result = {};
87     print_http_header_section ($input, $result);
88 wakaba 1.9
89 wakaba 1.1 my $doc;
90     my $el;
91    
92 wakaba 1.9 if ($input->{media_type} eq 'text/html') {
93 wakaba 1.19 ($doc, $el) = print_syntax_error_html_section ($input, $result);
94 wakaba 1.9 print_source_string_section (\($input->{s}), $input->{charset});
95     } elsif ({
96     'text/xml' => 1,
97 wakaba 1.16 'application/atom+xml' => 1,
98     'application/rss+xml' => 1,
99     'application/svg+xml' => 1,
100 wakaba 1.9 'application/xhtml+xml' => 1,
101     'application/xml' => 1,
102     }->{$input->{media_type}}) {
103 wakaba 1.19 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
104 wakaba 1.9 print_source_string_section (\($input->{s}), $doc->input_encoding);
105 wakaba 1.2 } else {
106 wakaba 1.9 ## TODO: Change HTTP status code??
107 wakaba 1.18 print_result_unknown_type_section ($input);
108 wakaba 1.2 }
109    
110     if (defined $doc or defined $el) {
111 wakaba 1.18 print_structure_dump_section ($doc, $el);
112 wakaba 1.19 my $elements = print_structure_error_section ($doc, $el, $result);
113 wakaba 1.18 print_table_section ($elements->{table}) if @{$elements->{table}};
114     print_id_section ($elements->{id}) if keys %{$elements->{id}};
115     print_term_section ($elements->{term}) if keys %{$elements->{term}};
116     print_class_section ($elements->{class}) if keys %{$elements->{class}};
117 wakaba 1.1 }
118    
119 wakaba 1.19 print_result_section ($result);
120 wakaba 1.9 } else {
121 wakaba 1.18 print STDOUT qq[</dl></div>];
122     print_result_input_error_section ($input);
123 wakaba 1.9 }
124 wakaba 1.3
125 wakaba 1.2 print STDOUT qq[
126 wakaba 1.3 <ul class="navigation" id="nav-items">
127     ];
128     for (@nav) {
129     print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
130     }
131     print STDOUT qq[
132     </ul>
133 wakaba 1.2 </body>
134     </html>
135     ];
136 wakaba 1.1
137 wakaba 1.16 for (qw/decode parse parse_xml check/) {
138     next unless defined $time{$_};
139     open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
140     print $file $char_length, "\t", $time{$_}, "\n";
141     }
142    
143 wakaba 1.1 exit;
144    
145 wakaba 1.19 sub add_error ($$$) {
146     my ($layer, $err, $result) = @_;
147     if (defined $err->{level}) {
148     if ($err->{level} eq 's') {
149     $result->{$layer}->{should}++;
150     $result->{$layer}->{score_min} -= 2;
151     $result->{conforming_min} = 0;
152     } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
153     $result->{$layer}->{warning}++;
154     } elsif ($err->{level} eq 'unsupported') {
155     $result->{$layer}->{unsupported}++;
156     $result->{unsupported} = 1;
157     } else {
158     $result->{$layer}->{must}++;
159     $result->{$layer}->{score_max} -= 2;
160     $result->{$layer}->{score_min} -= 2;
161     $result->{conforming_min} = 0;
162     $result->{conforming_max} = 0;
163     }
164     } else {
165     $result->{$layer}->{must}++;
166     $result->{$layer}->{score_max} -= 2;
167     $result->{$layer}->{score_min} -= 2;
168     $result->{conforming_min} = 0;
169     $result->{conforming_max} = 0;
170     }
171     } # add_error
172    
173     sub print_http_header_section ($$) {
174     my ($input, $result) = @_;
175 wakaba 1.9 return unless defined $input->{header_status_code} or
176     defined $input->{header_status_text} or
177     @{$input->{header_field}};
178    
179     push @nav, ['#source-header' => 'HTTP Header'];
180     print STDOUT qq[<div id="source-header" class="section">
181     <h2>HTTP Header</h2>
182    
183     <p><strong>Note</strong>: Due to the limitation of the
184     network library in use, the content of this section might
185     not be the real header.</p>
186    
187     <table><tbody>
188     ];
189    
190     if (defined $input->{header_status_code}) {
191     print STDOUT qq[<tr><th scope="row">Status code</th>];
192     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
193     }
194     if (defined $input->{header_status_text}) {
195     print STDOUT qq[<tr><th scope="row">Status text</th>];
196     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
197     }
198    
199     for (@{$input->{header_field}}) {
200     print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
201     print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
202     }
203    
204     print STDOUT qq[</tbody></table></div>];
205     } # print_http_header_section
206    
207 wakaba 1.19 sub print_syntax_error_html_section ($$) {
208     my ($input, $result) = @_;
209 wakaba 1.18
210     require Encode;
211     require Whatpm::HTML;
212    
213     $input->{charset} ||= 'ISO-8859-1'; ## TODO: for now.
214    
215     my $time1 = time;
216     my $t = Encode::decode ($input->{charset}, $input->{s});
217     $time{decode} = time - $time1;
218    
219     print STDOUT qq[
220     <div id="parse-errors" class="section">
221     <h2>Parse Errors</h2>
222    
223     <dl>];
224     push @nav, ['#parse-errors' => 'Parse Error'];
225    
226     my $onerror = sub {
227     my (%opt) = @_;
228     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
229     if ($opt{column} > 0) {
230     print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
231     } else {
232     $opt{line} = $opt{line} - 1 || 1;
233     print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];
234     }
235     $type =~ tr/ /-/;
236     $type =~ s/\|/%7C/g;
237     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
238     print STDOUT qq[<dd class="$cls">$msg</dd>\n];
239 wakaba 1.19
240     add_error ('syntax', \%opt => $result);
241 wakaba 1.18 };
242    
243     my $doc = $dom->create_document;
244     my $el;
245     $time1 = time;
246     if (defined $inner_html_element and length $inner_html_element) {
247     $el = $doc->create_element_ns
248     ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
249     Whatpm::HTML->set_inner_html ($el, $t, $onerror);
250     } else {
251     Whatpm::HTML->parse_string ($t => $doc, $onerror);
252     }
253     $time{parse} = time - $time1;
254    
255     print STDOUT qq[</dl></div>];
256    
257     return ($doc, $el);
258     } # print_syntax_error_html_section
259    
260 wakaba 1.19 sub print_syntax_error_xml_section ($$) {
261     my ($input, $result) = @_;
262 wakaba 1.18
263     require Message::DOM::XMLParserTemp;
264    
265     print STDOUT qq[
266     <div id="parse-errors" class="section">
267     <h2>Parse Errors</h2>
268    
269     <dl>];
270     push @nav, ['#parse-errors' => 'Parse Error'];
271    
272     my $onerror = sub {
273     my $err = shift;
274     my $line = $err->location->line_number;
275     print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];
276     print STDOUT $err->location->column_number, "</dt><dd>";
277     print STDOUT htescape $err->text, "</dd>\n";
278 wakaba 1.19
279     add_error ('syntax', {type => $err->text,
280     level => [
281     $err->SEVERITY_FATAL_ERROR => 'm',
282     $err->SEVERITY_ERROR => 'm',
283     $err->SEVERITY_WARNING => 's',
284     ]->[$err->severity]} => $result);
285    
286 wakaba 1.18 return 1;
287     };
288    
289     my $time1 = time;
290     open my $fh, '<', \($input->{s});
291     my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
292     ($fh => $dom, $onerror, charset => $input->{charset});
293     $time{parse_xml} = time - $time1;
294    
295     print STDOUT qq[</dl></div>];
296    
297     return ($doc, undef);
298     } # print_syntax_error_xml_section
299    
300 wakaba 1.9 sub print_source_string_section ($$) {
301     require Encode;
302     my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
303     return unless $enc;
304    
305     my $s = \($enc->decode (${$_[0]}));
306     my $i = 1;
307     push @nav, ['#source-string' => 'Source'];
308     print STDOUT qq[<div id="source-string" class="section">
309     <h2>Document Source</h2>
310     <ol lang="">\n];
311 wakaba 1.7 if (length $$s) {
312     while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
313     print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
314     $i++;
315     }
316     if ($$s =~ /\G([^\x0A]+)/gc) {
317     print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
318     }
319     } else {
320     print STDOUT q[<li id="line-1"></li>];
321 wakaba 1.2 }
322 wakaba 1.9 print STDOUT "</ol></div>";
323     } # print_input_string_section
324 wakaba 1.2
325     sub print_document_tree ($) {
326 wakaba 1.1 my $node = shift;
327 wakaba 1.2 my $r = '<ol class="xoxo">';
328 wakaba 1.1
329 wakaba 1.2 my @node = ($node);
330 wakaba 1.1 while (@node) {
331     my $child = shift @node;
332 wakaba 1.2 unless (ref $child) {
333     $r .= $child;
334     next;
335     }
336    
337     my $node_id = 'node-'.refaddr $child;
338     my $nt = $child->node_type;
339     if ($nt == $child->ELEMENT_NODE) {
340 wakaba 1.4 my $child_nsuri = $child->namespace_uri;
341     $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
342 wakaba 1.2 '</code>'; ## ISSUE: case
343    
344     if ($child->has_attributes) {
345     $r .= '<ul class="attributes">';
346 wakaba 1.4 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
347 wakaba 1.2 @{$child->attributes}) {
348 wakaba 1.4 $r .= qq[<li id="$attr->[3]" class="tree-attribute"><code title="@{[defined $_->[2] ? $_->[2] : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
349 wakaba 1.2 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
350     }
351     $r .= '</ul>';
352     }
353    
354 wakaba 1.7 if ($child->has_child_nodes) {
355 wakaba 1.2 $r .= '<ol class="children">';
356 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
357     } else {
358     $r .= '</li>';
359 wakaba 1.2 }
360     } elsif ($nt == $child->TEXT_NODE) {
361 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
362 wakaba 1.2 } elsif ($nt == $child->CDATA_SECTION_NODE) {
363 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
364 wakaba 1.2 } elsif ($nt == $child->COMMENT_NODE) {
365 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
366 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_NODE) {
367 wakaba 1.6 $r .= qq'<li id="$node_id" class="tree-document">Document';
368 wakaba 1.7 $r .= qq[<ul class="attributes">];
369     $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
370     $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
371 wakaba 1.9 unless ($child->manakai_is_html) {
372     $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
373     if (defined $child->xml_encoding) {
374     $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
375     } else {
376     $r .= qq[<li>XML encoding = (null)</li>];
377     }
378     $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
379     }
380 wakaba 1.7 $r .= qq[</ul>];
381 wakaba 1.2 if ($child->has_child_nodes) {
382 wakaba 1.7 $r .= '<ol class="children">';
383 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
384 wakaba 1.1 }
385 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
386 wakaba 1.5 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
387     $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
388     $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
389     $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
390 wakaba 1.2 $r .= '</ul></li>';
391     } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
392 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
393 wakaba 1.1 } else {
394 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
395 wakaba 1.1 }
396     }
397 wakaba 1.2
398     $r .= '</ol>';
399     print STDOUT $r;
400     } # print_document_tree
401 wakaba 1.1
402 wakaba 1.18 sub print_structure_dump_section ($$) {
403     my ($doc, $el) = @_;
404    
405     print STDOUT qq[
406     <div id="document-tree" class="section">
407     <h2>Document Tree</h2>
408     ];
409     push @nav, ['#document-tree' => 'Tree'];
410    
411     print_document_tree ($el || $doc);
412    
413     print STDOUT qq[</div>];
414     } # print_structure_dump_section
415    
416 wakaba 1.19 sub print_structure_error_section ($$$) {
417     my ($doc, $el, $result) = @_;
418 wakaba 1.18
419     print STDOUT qq[<div id="document-errors" class="section">
420     <h2>Document Errors</h2>
421    
422     <dl>];
423     push @nav, ['#document-errors' => 'Document Error'];
424    
425     require Whatpm::ContentChecker;
426     my $onerror = sub {
427     my %opt = @_;
428     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
429     $type =~ tr/ /-/;
430     $type =~ s/\|/%7C/g;
431     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
432     print STDOUT qq[<dt class="$cls">] . get_node_link ($opt{node}) .
433     qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
434 wakaba 1.19 add_error ('structure', \%opt => $result);
435 wakaba 1.18 };
436    
437     my $elements;
438     my $time1 = time;
439     if ($el) {
440     $elements = Whatpm::ContentChecker->check_element ($el, $onerror);
441     } else {
442     $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);
443     }
444     $time{check} = time - $time1;
445    
446     print STDOUT qq[</dl></div>];
447    
448     return $elements;
449     } # print_structure_error_section
450    
451     sub print_table_section ($) {
452     my $tables = shift;
453    
454     push @nav, ['#tables' => 'Tables'];
455     print STDOUT qq[
456     <div id="tables" class="section">
457     <h2>Tables</h2>
458    
459     <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
460     <script src="../table-script.js" type="text/javascript"></script>
461     <noscript>
462     <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
463     </noscript>
464     ];
465    
466     require JSON;
467    
468     my $i = 0;
469     for my $table_el (@$tables) {
470     $i++;
471     print STDOUT qq[<div class="section" id="table-$i"><h3>] .
472     get_node_link ($table_el) . q[</h3>];
473    
474     ## TODO: Make |ContentChecker| return |form_table| result
475     ## so that this script don't have to run the algorithm twice.
476     my $table = Whatpm::HTMLTable->form_table ($table_el);
477    
478     for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
479     next unless $_;
480     delete $_->{element};
481     }
482    
483     for (@{$table->{row_group}}) {
484     next unless $_;
485     next unless $_->{element};
486     $_->{type} = $_->{element}->manakai_local_name;
487     delete $_->{element};
488     }
489    
490     for (@{$table->{cell}}) {
491     next unless $_;
492     for (@{$_}) {
493     next unless $_;
494     for (@$_) {
495     $_->{id} = refaddr $_->{element} if defined $_->{element};
496     delete $_->{element};
497     $_->{is_header} = $_->{is_header} ? 1 : 0;
498     }
499     }
500     }
501    
502     print STDOUT '</div><script type="text/javascript">tableToCanvas (';
503     print STDOUT JSON::objToJson ($table);
504     print STDOUT qq[, document.getElementById ('table-$i'));</script>];
505     }
506    
507     print STDOUT qq[</div>];
508     } # print_table_section
509    
510     sub print_id_section ($) {
511     my $ids = shift;
512    
513     push @nav, ['#identifiers' => 'IDs'];
514     print STDOUT qq[
515     <div id="identifiers" class="section">
516     <h2>Identifiers</h2>
517    
518     <dl>
519     ];
520     for my $id (sort {$a cmp $b} keys %$ids) {
521     print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
522     for (@{$ids->{$id}}) {
523     print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
524     }
525     }
526     print STDOUT qq[</dl></div>];
527     } # print_id_section
528    
529     sub print_term_section ($) {
530     my $terms = shift;
531    
532     push @nav, ['#terms' => 'Terms'];
533     print STDOUT qq[
534     <div id="terms" class="section">
535     <h2>Terms</h2>
536    
537     <dl>
538     ];
539     for my $term (sort {$a cmp $b} keys %$terms) {
540     print STDOUT qq[<dt>@{[htescape $term]}</dt>];
541     for (@{$terms->{$term}}) {
542     print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
543     }
544     }
545     print STDOUT qq[</dl></div>];
546     } # print_term_section
547    
548     sub print_class_section ($) {
549     my $classes = shift;
550    
551     push @nav, ['#classes' => 'Classes'];
552     print STDOUT qq[
553     <div id="classes" class="section">
554     <h2>Classes</h2>
555    
556     <dl>
557     ];
558     for my $class (sort {$a cmp $b} keys %$classes) {
559     print STDOUT qq[<dt><code>@{[htescape $class]}</code></dt>];
560     for (@{$classes->{$class}}) {
561     print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
562     }
563     }
564     print STDOUT qq[</dl></div>];
565     } # print_class_section
566    
567 wakaba 1.19 sub print_result_section ($) {
568     my $result = shift;
569    
570     print STDOUT qq[
571     <div id="result-summary" class="section">
572     <h2>Result</h2>];
573    
574     if ($result->{unsupported}) {
575     print STDOUT qq[<p class=uncertain id=result-para>The conformance
576     checker cannot decide whether the document is conforming or
577     not, since the document contains one or more unsupported
578     features.</p>];
579     } elsif ($result->{conforming_min}) {
580     print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
581     found in this document.</p>];
582     } elsif ($result->{conforming_max}) {
583     print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
584     is <strong>likely <em>non</em>-conforming</strong>, but in rare case
585     it might be conforming.</p>];
586     } else {
587     print STDOUT qq[<p class=FAIL id=result-para>This document is
588     <strong><em>non</em>-conforming</strong>.</p>];
589     }
590    
591     print STDOUT qq[<table>
592     <colgroup><col><colgroup><col><col><col><colgroup><col>
593     <thead>
594     <tr><th scope=col></th><th scope=col><em class=rfc2119>MUST</em>-level
595     Errors</th><th scope=col><em class=rfc2119>SHOULD</em>-level
596     Errors</th><th scope=col>Warnings</th><th scope=col>Score</th></tr>
597     </thead><tbody>];
598    
599     my $must_error = 0;
600     my $should_error = 0;
601     my $warning = 0;
602     my $score_min = 0;
603     my $score_max = 0;
604     my $score_base = 20;
605     for (
606     [Transfer => 'transfer', ''],
607     [Character => 'char', ''],
608     [Syntax => 'syntax', '#parse-errors'],
609     [Structure => 'structure', '#document-errors'],
610     ) {
611     $must_error += ($result->{$_->[1]}->{must} += 0);
612     $should_error += ($result->{$_->[1]}->{should} += 0);
613     $warning += ($result->{$_->[1]}->{warning} += 0);
614     $score_min += ($result->{$_->[1]}->{score_min} += $score_base);
615     $score_max += ($result->{$_->[1]}->{score_max} += $score_base);
616    
617     my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
618     my $label = $_->[0];
619     if ($result->{$_->[1]}->{must} or
620     $result->{$_->[1]}->{should} or
621     $result->{$_->[1]}->{warning} or
622     $result->{$_->[1]}->{unsupported}) {
623     $label = qq[<a href="$_->[2]">$label</a>];
624     }
625    
626     print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
627     if ($uncertain) {
628     print qq[<td class="@{[$score_max < $score_base ? $score_min < $score_max ? 'FAIL' : 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
629     } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
630     print qq[<td class="@{[$score_max < $score_base ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max} + $score_base</td></tr>];
631     } else {
632     print qq[<td class="@{[$score_max < $score_base ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
633     }
634     }
635    
636     $score_max += $score_base;
637    
638     print STDOUT qq[
639     <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
640     </tbody>
641     <tfoot><tr class=uncertain><th scope=row>Total</th><td>$must_error?</td><td>$should_error?</td><td>$warning?</td><td><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
642     </table>
643    
644     <p><strong>Important</strong>: This conformance checking service
645     is <em>under development</em>. The result above might be <em>wrong</em>.</p>
646     </div>];
647     push @nav, ['#result-summary' => 'Result'];
648     } # print_result_section
649    
650 wakaba 1.18 sub print_result_unknown_type_section ($) {
651     my $input = shift;
652    
653     print STDOUT qq[
654     <div id="result-summary" class="section">
655     <p><em>Media type <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code> is not supported!</em></p>
656     </div>
657     ];
658     push @nav, ['#result-summary' => 'Result'];
659     } # print_result_unknown_type_section
660    
661     sub print_result_input_error_section ($) {
662     my $input = shift;
663     print STDOUT qq[<div class="section" id="result-summary">
664     <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
665     </div>];
666     push @nav, ['#result-summary' => 'Result'];
667     } # print_Result_input_error_section
668    
669 wakaba 1.1 sub get_node_path ($) {
670     my $node = shift;
671     my @r;
672     while (defined $node) {
673     my $rs;
674     if ($node->node_type == 1) {
675     $rs = $node->manakai_local_name;
676     $node = $node->parent_node;
677     } elsif ($node->node_type == 2) {
678     $rs = '@' . $node->manakai_local_name;
679     $node = $node->owner_element;
680     } elsif ($node->node_type == 3) {
681     $rs = '"' . $node->data . '"';
682     $node = $node->parent_node;
683     } elsif ($node->node_type == 9) {
684 wakaba 1.9 @r = ('') unless @r;
685 wakaba 1.1 $rs = '';
686     $node = $node->parent_node;
687     } else {
688     $rs = '#' . $node->node_type;
689     $node = $node->parent_node;
690     }
691     unshift @r, $rs;
692     }
693     return join '/', @r;
694     } # get_node_path
695    
696 wakaba 1.6 sub get_node_link ($) {
697     return qq[<a href="#node-@{[refaddr $_[0]]}">] .
698     htescape (get_node_path ($_[0])) . qq[</a>];
699     } # get_node_link
700    
701 wakaba 1.7 {
702     my $Msg = {};
703    
704     sub load_text_catalog ($) {
705     my $lang = shift; # MUST be a canonical lang name
706     open my $file, '<', "cc-msg.$lang.txt" or die "$0: cc-msg.$lang.txt: $!";
707     while (<$file>) {
708     if (s/^([^;]+);([^;]*);//) {
709     my ($type, $cls, $msg) = ($1, $2, $_);
710     $msg =~ tr/\x0D\x0A//d;
711     $Msg->{$type} = [$cls, $msg];
712     }
713     }
714     } # load_text_catalog
715    
716     sub get_text ($) {
717 wakaba 1.15 my ($type, $level, $node) = @_;
718 wakaba 1.7 $type = $level . ':' . $type if defined $level;
719     my @arg;
720     {
721     if (defined $Msg->{$type}) {
722     my $msg = $Msg->{$type}->[1];
723 wakaba 1.10 $msg =~ s{<var>\$([0-9]+)</var>}{
724     defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
725     }ge;
726 wakaba 1.15 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
727     UNIVERSAL::can ($node, 'get_attribute_ns')
728     ? htescape ($node->get_attribute_ns (undef, $1)) : ''
729     }ge;
730     $msg =~ s{<var>{\@}</var>}{
731     UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
732     }ge;
733 wakaba 1.17 $msg =~ s{<var>{local-name}</var>}{
734     UNIVERSAL::can ($node, 'manakai_local_name')
735     ? htescape ($node->manakai_local_name) : ''
736     }ge;
737     $msg =~ s{<var>{element-local-name}</var>}{
738     (UNIVERSAL::can ($node, 'owner_element') and
739     $node->owner_element)
740     ? htescape ($node->owner_element->manakai_local_name)
741     : ''
742     }ge;
743 wakaba 1.11 return ($type, $Msg->{$type}->[0], $msg);
744 wakaba 1.7 } elsif ($type =~ s/:([^:]*)$//) {
745     unshift @arg, $1;
746     redo;
747     }
748     }
749 wakaba 1.11 return ($type, '', htescape ($_[0]));
750 wakaba 1.7 } # get_text
751    
752     }
753    
754 wakaba 1.9 sub get_input_document ($$) {
755     my ($http, $dom) = @_;
756    
757 wakaba 1.16 my $request_uri = $http->get_parameter ('uri');
758 wakaba 1.9 my $r = {};
759     if (defined $request_uri and length $request_uri) {
760     my $uri = $dom->create_uri_reference ($request_uri);
761     unless ({
762     http => 1,
763     }->{lc $uri->uri_scheme}) {
764     return {uri => $request_uri, request_uri => $request_uri,
765     error_status_text => 'URI scheme not allowed'};
766     }
767    
768     require Message::Util::HostPermit;
769     my $host_permit = new Message::Util::HostPermit;
770     $host_permit->add_rule (<<EOH);
771     Allow host=suika port=80
772     Deny host=suika
773     Allow host=suika.fam.cx port=80
774     Deny host=suika.fam.cx
775     Deny host=localhost
776     Deny host=*.localdomain
777     Deny ipv4=0.0.0.0/8
778     Deny ipv4=10.0.0.0/8
779     Deny ipv4=127.0.0.0/8
780     Deny ipv4=169.254.0.0/16
781     Deny ipv4=172.0.0.0/11
782     Deny ipv4=192.0.2.0/24
783     Deny ipv4=192.88.99.0/24
784     Deny ipv4=192.168.0.0/16
785     Deny ipv4=198.18.0.0/15
786     Deny ipv4=224.0.0.0/4
787     Deny ipv4=255.255.255.255/32
788     Deny ipv6=0::0/0
789     Allow host=*
790     EOH
791     unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
792     return {uri => $request_uri, request_uri => $request_uri,
793     error_status_text => 'Connection to the host is forbidden'};
794     }
795    
796     require LWP::UserAgent;
797     my $ua = WDCC::LWPUA->new;
798     $ua->{wdcc_dom} = $dom;
799     $ua->{wdcc_host_permit} = $host_permit;
800     $ua->agent ('Mozilla'); ## TODO: for now.
801     $ua->parse_head (0);
802     $ua->protocols_allowed ([qw/http/]);
803     $ua->max_size (1000_000);
804     my $req = HTTP::Request->new (GET => $request_uri);
805     my $res = $ua->request ($req);
806 wakaba 1.16 ## TODO: 401 sets |is_success| true.
807     if ($res->is_success or $http->get_parameter ('error-page')) {
808 wakaba 1.9 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
809     $r->{uri} = $res->request->uri;
810     $r->{request_uri} = $request_uri;
811    
812     ## TODO: More strict parsing...
813     my $ct = $res->header ('Content-Type');
814     if (defined $ct and $ct =~ m#^([0-9A-Za-z._+-]+/[0-9A-Za-z._+-]+)#) {
815     $r->{media_type} = lc $1;
816     }
817     if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?(\S+)"?/i) {
818     $r->{charset} = lc $1;
819     $r->{charset} =~ tr/\\//d;
820     }
821    
822 wakaba 1.16 my $input_charset = $http->get_parameter ('charset');
823 wakaba 1.9 if (defined $input_charset and length $input_charset) {
824     $r->{charset_overridden}
825     = (not defined $r->{charset} or $r->{charset} ne $input_charset);
826     $r->{charset} = $input_charset;
827     }
828    
829     $r->{s} = ''.$res->content;
830     } else {
831     $r->{uri} = $res->request->uri;
832     $r->{request_uri} = $request_uri;
833     $r->{error_status_text} = $res->status_line;
834     }
835    
836     $r->{header_field} = [];
837     $res->scan (sub {
838     push @{$r->{header_field}}, [$_[0], $_[1]];
839     });
840     $r->{header_status_code} = $res->code;
841     $r->{header_status_text} = $res->message;
842     } else {
843 wakaba 1.16 $r->{s} = ''.$http->get_parameter ('s');
844 wakaba 1.9 $r->{uri} = q<thismessage:/>;
845     $r->{request_uri} = q<thismessage:/>;
846     $r->{base_uri} = q<thismessage:/>;
847 wakaba 1.16 $r->{charset} = ''.$http->get_parameter ('_charset_');
848 wakaba 1.9 $r->{charset} =~ s/\s+//g;
849     $r->{charset} = 'utf-8' if $r->{charset} eq '';
850     $r->{header_field} = [];
851     }
852    
853 wakaba 1.16 my $input_format = $http->get_parameter ('i');
854 wakaba 1.9 if (defined $input_format and length $input_format) {
855     $r->{media_type_overridden}
856     = (not defined $r->{media_type} or $input_format ne $r->{media_type});
857     $r->{media_type} = $input_format;
858     }
859     if (defined $r->{s} and not defined $r->{media_type}) {
860     $r->{media_type} = 'text/html';
861     $r->{media_type_overridden} = 1;
862     }
863    
864     if ($r->{media_type} eq 'text/xml') {
865     unless (defined $r->{charset}) {
866     $r->{charset} = 'us-ascii';
867     } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
868     $r->{charset_overridden} = 0;
869     }
870     }
871    
872     if (length $r->{s} > 1000_000) {
873     $r->{error_status_text} = 'Entity-body too large';
874     delete $r->{s};
875     return $r;
876     }
877    
878     return $r;
879     } # get_input_document
880    
881     package WDCC::LWPUA;
882     BEGIN { push our @ISA, 'LWP::UserAgent'; }
883    
884     sub redirect_ok {
885     my $ua = shift;
886     unless ($ua->SUPER::redirect_ok (@_)) {
887     return 0;
888     }
889    
890     my $uris = $_[1]->header ('Location');
891     return 0 unless $uris;
892     my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
893     unless ({
894     http => 1,
895     }->{lc $uri->uri_scheme}) {
896     return 0;
897     }
898     unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
899     return 0;
900     }
901     return 1;
902     } # redirect_ok
903    
904 wakaba 1.1 =head1 AUTHOR
905    
906     Wakaba <w@suika.fam.cx>.
907    
908     =head1 LICENSE
909    
910     Copyright 2007 Wakaba <w@suika.fam.cx>
911    
912     This library is free software; you can redistribute it
913     and/or modify it under the same terms as Perl itself.
914    
915     =cut
916    
917 wakaba 1.19 ## $Date: 2007/09/02 08:40:49 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24