/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.21 - (hide annotations) (download)
Tue Sep 11 08:25:23 2007 UTC (16 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.20: +14 -9 lines
Scoring bug fixes

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3    
4     use lib qw[/home/httpd/html/www/markup/html/whatpm
5 wakaba 1.16 /home/wakaba/work/manakai2/lib];
6 wakaba 1.1 use CGI::Carp qw[fatalsToBrowser];
7 wakaba 1.2 use Scalar::Util qw[refaddr];
8 wakaba 1.16 use Time::HiRes qw/time/;
9 wakaba 1.1
10 wakaba 1.2 sub htescape ($) {
11     my $s = $_[0];
12     $s =~ s/&/&/g;
13     $s =~ s/</&lt;/g;
14     $s =~ s/>/&gt;/g;
15     $s =~ s/"/&quot;/g;
16 wakaba 1.12 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
17     sprintf '<var>U+%04X</var>', ord $1;
18     }ge;
19 wakaba 1.2 return $s;
20     } # htescape
21    
22 wakaba 1.16 use Message::CGI::HTTP;
23     my $http = Message::CGI::HTTP->new;
24 wakaba 1.1
25 wakaba 1.16 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
26 wakaba 1.8 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
27     exit;
28     }
29    
30 wakaba 1.12 binmode STDOUT, ':utf8';
31 wakaba 1.14 $| = 1;
32 wakaba 1.12
33 wakaba 1.9 require Message::DOM::DOMImplementation;
34     my $dom = Message::DOM::DOMImplementation->new;
35    
36 wakaba 1.7 load_text_catalog ('en'); ## TODO: conneg
37    
38 wakaba 1.3 my @nav;
39 wakaba 1.2 print STDOUT qq[Content-Type: text/html; charset=utf-8
40    
41     <!DOCTYPE html>
42     <html lang="en">
43     <head>
44     <title>Web Document Conformance Checker (BETA)</title>
45 wakaba 1.3 <link rel="stylesheet" href="../cc-style.css" type="text/css">
46 wakaba 1.2 </head>
47     <body>
48 wakaba 1.13 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
49     (<em>beta</em>)</h1>
50 wakaba 1.14 ];
51 wakaba 1.2
52 wakaba 1.14 $| = 0;
53     my $input = get_input_document ($http, $dom);
54 wakaba 1.16 my $inner_html_element = $http->get_parameter ('e');
55     my $char_length = 0;
56     my %time;
57 wakaba 1.14
58     print qq[
59 wakaba 1.4 <div id="document-info" class="section">
60 wakaba 1.2 <dl>
61 wakaba 1.9 <dt>Request URI</dt>
62     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 wakaba 1.2 <dt>Document URI</dt>
64 wakaba 1.9 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}">@{[htescape $input->{uri}]}</a>&gt;</code></dd>
65 wakaba 1.2 ]; # no </dl> yet
66 wakaba 1.3 push @nav, ['#document-info' => 'Information'];
67 wakaba 1.1
68 wakaba 1.9 if (defined $input->{s}) {
69 wakaba 1.16 $char_length = length $input->{s};
70 wakaba 1.9
71     print STDOUT qq[
72     <dt>Base URI</dt>
73     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
74     <dt>Internet Media Type</dt>
75     <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
76     @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : '']}</dd>
77     <dt>Character Encoding</dt>
78     <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
79     @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
80 wakaba 1.16 <dt>Length</dt>
81     <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
82 wakaba 1.9 </dl>
83     </div>
84     ];
85    
86 wakaba 1.20 my $result = {conforming_min => 1, conforming_max => 1};
87 wakaba 1.19 print_http_header_section ($input, $result);
88 wakaba 1.9
89 wakaba 1.1 my $doc;
90     my $el;
91    
92 wakaba 1.9 if ($input->{media_type} eq 'text/html') {
93 wakaba 1.19 ($doc, $el) = print_syntax_error_html_section ($input, $result);
94 wakaba 1.9 print_source_string_section (\($input->{s}), $input->{charset});
95     } elsif ({
96     'text/xml' => 1,
97 wakaba 1.16 'application/atom+xml' => 1,
98     'application/rss+xml' => 1,
99     'application/svg+xml' => 1,
100 wakaba 1.9 'application/xhtml+xml' => 1,
101     'application/xml' => 1,
102     }->{$input->{media_type}}) {
103 wakaba 1.19 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
104 wakaba 1.9 print_source_string_section (\($input->{s}), $doc->input_encoding);
105 wakaba 1.2 } else {
106 wakaba 1.9 ## TODO: Change HTTP status code??
107 wakaba 1.18 print_result_unknown_type_section ($input);
108 wakaba 1.2 }
109    
110     if (defined $doc or defined $el) {
111 wakaba 1.18 print_structure_dump_section ($doc, $el);
112 wakaba 1.19 my $elements = print_structure_error_section ($doc, $el, $result);
113 wakaba 1.18 print_table_section ($elements->{table}) if @{$elements->{table}};
114     print_id_section ($elements->{id}) if keys %{$elements->{id}};
115     print_term_section ($elements->{term}) if keys %{$elements->{term}};
116     print_class_section ($elements->{class}) if keys %{$elements->{class}};
117 wakaba 1.1 }
118    
119 wakaba 1.19 print_result_section ($result);
120 wakaba 1.9 } else {
121 wakaba 1.18 print STDOUT qq[</dl></div>];
122     print_result_input_error_section ($input);
123 wakaba 1.9 }
124 wakaba 1.3
125 wakaba 1.2 print STDOUT qq[
126 wakaba 1.3 <ul class="navigation" id="nav-items">
127     ];
128     for (@nav) {
129     print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
130     }
131     print STDOUT qq[
132     </ul>
133 wakaba 1.2 </body>
134     </html>
135     ];
136 wakaba 1.1
137 wakaba 1.16 for (qw/decode parse parse_xml check/) {
138     next unless defined $time{$_};
139     open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
140     print $file $char_length, "\t", $time{$_}, "\n";
141     }
142    
143 wakaba 1.1 exit;
144    
145 wakaba 1.19 sub add_error ($$$) {
146     my ($layer, $err, $result) = @_;
147     if (defined $err->{level}) {
148     if ($err->{level} eq 's') {
149     $result->{$layer}->{should}++;
150     $result->{$layer}->{score_min} -= 2;
151     $result->{conforming_min} = 0;
152     } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
153     $result->{$layer}->{warning}++;
154     } elsif ($err->{level} eq 'unsupported') {
155     $result->{$layer}->{unsupported}++;
156     $result->{unsupported} = 1;
157     } else {
158     $result->{$layer}->{must}++;
159     $result->{$layer}->{score_max} -= 2;
160     $result->{$layer}->{score_min} -= 2;
161     $result->{conforming_min} = 0;
162     $result->{conforming_max} = 0;
163     }
164     } else {
165     $result->{$layer}->{must}++;
166     $result->{$layer}->{score_max} -= 2;
167     $result->{$layer}->{score_min} -= 2;
168     $result->{conforming_min} = 0;
169     $result->{conforming_max} = 0;
170     }
171     } # add_error
172    
173     sub print_http_header_section ($$) {
174     my ($input, $result) = @_;
175 wakaba 1.9 return unless defined $input->{header_status_code} or
176     defined $input->{header_status_text} or
177     @{$input->{header_field}};
178    
179     push @nav, ['#source-header' => 'HTTP Header'];
180     print STDOUT qq[<div id="source-header" class="section">
181     <h2>HTTP Header</h2>
182    
183     <p><strong>Note</strong>: Due to the limitation of the
184     network library in use, the content of this section might
185     not be the real header.</p>
186    
187     <table><tbody>
188     ];
189    
190     if (defined $input->{header_status_code}) {
191     print STDOUT qq[<tr><th scope="row">Status code</th>];
192     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
193     }
194     if (defined $input->{header_status_text}) {
195     print STDOUT qq[<tr><th scope="row">Status text</th>];
196     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
197     }
198    
199     for (@{$input->{header_field}}) {
200     print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
201     print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
202     }
203    
204     print STDOUT qq[</tbody></table></div>];
205     } # print_http_header_section
206    
207 wakaba 1.19 sub print_syntax_error_html_section ($$) {
208     my ($input, $result) = @_;
209 wakaba 1.18
210     require Encode;
211     require Whatpm::HTML;
212    
213     $input->{charset} ||= 'ISO-8859-1'; ## TODO: for now.
214    
215     my $time1 = time;
216     my $t = Encode::decode ($input->{charset}, $input->{s});
217     $time{decode} = time - $time1;
218    
219     print STDOUT qq[
220     <div id="parse-errors" class="section">
221     <h2>Parse Errors</h2>
222    
223     <dl>];
224     push @nav, ['#parse-errors' => 'Parse Error'];
225    
226     my $onerror = sub {
227     my (%opt) = @_;
228     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
229     if ($opt{column} > 0) {
230     print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
231     } else {
232     $opt{line} = $opt{line} - 1 || 1;
233     print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];
234     }
235     $type =~ tr/ /-/;
236     $type =~ s/\|/%7C/g;
237     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
238     print STDOUT qq[<dd class="$cls">$msg</dd>\n];
239 wakaba 1.19
240     add_error ('syntax', \%opt => $result);
241 wakaba 1.18 };
242    
243     my $doc = $dom->create_document;
244     my $el;
245     $time1 = time;
246     if (defined $inner_html_element and length $inner_html_element) {
247     $el = $doc->create_element_ns
248     ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
249     Whatpm::HTML->set_inner_html ($el, $t, $onerror);
250     } else {
251     Whatpm::HTML->parse_string ($t => $doc, $onerror);
252     }
253     $time{parse} = time - $time1;
254    
255     print STDOUT qq[</dl></div>];
256    
257     return ($doc, $el);
258     } # print_syntax_error_html_section
259    
260 wakaba 1.19 sub print_syntax_error_xml_section ($$) {
261     my ($input, $result) = @_;
262 wakaba 1.18
263     require Message::DOM::XMLParserTemp;
264    
265     print STDOUT qq[
266     <div id="parse-errors" class="section">
267     <h2>Parse Errors</h2>
268    
269     <dl>];
270     push @nav, ['#parse-errors' => 'Parse Error'];
271    
272     my $onerror = sub {
273     my $err = shift;
274     my $line = $err->location->line_number;
275     print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];
276     print STDOUT $err->location->column_number, "</dt><dd>";
277     print STDOUT htescape $err->text, "</dd>\n";
278 wakaba 1.19
279     add_error ('syntax', {type => $err->text,
280     level => [
281     $err->SEVERITY_FATAL_ERROR => 'm',
282     $err->SEVERITY_ERROR => 'm',
283     $err->SEVERITY_WARNING => 's',
284     ]->[$err->severity]} => $result);
285    
286 wakaba 1.18 return 1;
287     };
288    
289     my $time1 = time;
290     open my $fh, '<', \($input->{s});
291     my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
292     ($fh => $dom, $onerror, charset => $input->{charset});
293     $time{parse_xml} = time - $time1;
294    
295     print STDOUT qq[</dl></div>];
296    
297     return ($doc, undef);
298     } # print_syntax_error_xml_section
299    
300 wakaba 1.9 sub print_source_string_section ($$) {
301     require Encode;
302     my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
303     return unless $enc;
304    
305     my $s = \($enc->decode (${$_[0]}));
306     my $i = 1;
307     push @nav, ['#source-string' => 'Source'];
308     print STDOUT qq[<div id="source-string" class="section">
309     <h2>Document Source</h2>
310     <ol lang="">\n];
311 wakaba 1.7 if (length $$s) {
312     while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
313     print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
314     $i++;
315     }
316     if ($$s =~ /\G([^\x0A]+)/gc) {
317     print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
318     }
319     } else {
320     print STDOUT q[<li id="line-1"></li>];
321 wakaba 1.2 }
322 wakaba 1.9 print STDOUT "</ol></div>";
323     } # print_input_string_section
324 wakaba 1.2
325     sub print_document_tree ($) {
326 wakaba 1.1 my $node = shift;
327 wakaba 1.2 my $r = '<ol class="xoxo">';
328 wakaba 1.1
329 wakaba 1.2 my @node = ($node);
330 wakaba 1.1 while (@node) {
331     my $child = shift @node;
332 wakaba 1.2 unless (ref $child) {
333     $r .= $child;
334     next;
335     }
336    
337     my $node_id = 'node-'.refaddr $child;
338     my $nt = $child->node_type;
339     if ($nt == $child->ELEMENT_NODE) {
340 wakaba 1.4 my $child_nsuri = $child->namespace_uri;
341     $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
342 wakaba 1.2 '</code>'; ## ISSUE: case
343    
344     if ($child->has_attributes) {
345     $r .= '<ul class="attributes">';
346 wakaba 1.4 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
347 wakaba 1.2 @{$child->attributes}) {
348 wakaba 1.4 $r .= qq[<li id="$attr->[3]" class="tree-attribute"><code title="@{[defined $_->[2] ? $_->[2] : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
349 wakaba 1.2 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
350     }
351     $r .= '</ul>';
352     }
353    
354 wakaba 1.7 if ($child->has_child_nodes) {
355 wakaba 1.2 $r .= '<ol class="children">';
356 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
357     } else {
358     $r .= '</li>';
359 wakaba 1.2 }
360     } elsif ($nt == $child->TEXT_NODE) {
361 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
362 wakaba 1.2 } elsif ($nt == $child->CDATA_SECTION_NODE) {
363 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
364 wakaba 1.2 } elsif ($nt == $child->COMMENT_NODE) {
365 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
366 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_NODE) {
367 wakaba 1.6 $r .= qq'<li id="$node_id" class="tree-document">Document';
368 wakaba 1.7 $r .= qq[<ul class="attributes">];
369     $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
370     $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
371 wakaba 1.9 unless ($child->manakai_is_html) {
372     $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
373     if (defined $child->xml_encoding) {
374     $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
375     } else {
376     $r .= qq[<li>XML encoding = (null)</li>];
377     }
378     $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
379     }
380 wakaba 1.7 $r .= qq[</ul>];
381 wakaba 1.2 if ($child->has_child_nodes) {
382 wakaba 1.7 $r .= '<ol class="children">';
383 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
384 wakaba 1.1 }
385 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
386 wakaba 1.5 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
387     $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
388     $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
389     $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
390 wakaba 1.2 $r .= '</ul></li>';
391     } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
392 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
393 wakaba 1.1 } else {
394 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
395 wakaba 1.1 }
396     }
397 wakaba 1.2
398     $r .= '</ol>';
399     print STDOUT $r;
400     } # print_document_tree
401 wakaba 1.1
402 wakaba 1.18 sub print_structure_dump_section ($$) {
403     my ($doc, $el) = @_;
404    
405     print STDOUT qq[
406     <div id="document-tree" class="section">
407     <h2>Document Tree</h2>
408     ];
409     push @nav, ['#document-tree' => 'Tree'];
410    
411     print_document_tree ($el || $doc);
412    
413     print STDOUT qq[</div>];
414     } # print_structure_dump_section
415    
416 wakaba 1.19 sub print_structure_error_section ($$$) {
417     my ($doc, $el, $result) = @_;
418 wakaba 1.18
419     print STDOUT qq[<div id="document-errors" class="section">
420     <h2>Document Errors</h2>
421    
422     <dl>];
423     push @nav, ['#document-errors' => 'Document Error'];
424    
425     require Whatpm::ContentChecker;
426     my $onerror = sub {
427     my %opt = @_;
428     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
429     $type =~ tr/ /-/;
430     $type =~ s/\|/%7C/g;
431     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
432     print STDOUT qq[<dt class="$cls">] . get_node_link ($opt{node}) .
433     qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
434 wakaba 1.19 add_error ('structure', \%opt => $result);
435 wakaba 1.18 };
436    
437     my $elements;
438     my $time1 = time;
439     if ($el) {
440     $elements = Whatpm::ContentChecker->check_element ($el, $onerror);
441     } else {
442     $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);
443     }
444     $time{check} = time - $time1;
445    
446     print STDOUT qq[</dl></div>];
447    
448     return $elements;
449     } # print_structure_error_section
450    
451     sub print_table_section ($) {
452     my $tables = shift;
453    
454     push @nav, ['#tables' => 'Tables'];
455     print STDOUT qq[
456     <div id="tables" class="section">
457     <h2>Tables</h2>
458    
459     <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
460     <script src="../table-script.js" type="text/javascript"></script>
461     <noscript>
462     <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
463     </noscript>
464     ];
465    
466     require JSON;
467    
468     my $i = 0;
469     for my $table_el (@$tables) {
470     $i++;
471     print STDOUT qq[<div class="section" id="table-$i"><h3>] .
472     get_node_link ($table_el) . q[</h3>];
473    
474     ## TODO: Make |ContentChecker| return |form_table| result
475     ## so that this script don't have to run the algorithm twice.
476     my $table = Whatpm::HTMLTable->form_table ($table_el);
477    
478     for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
479     next unless $_;
480     delete $_->{element};
481     }
482    
483     for (@{$table->{row_group}}) {
484     next unless $_;
485     next unless $_->{element};
486     $_->{type} = $_->{element}->manakai_local_name;
487     delete $_->{element};
488     }
489    
490     for (@{$table->{cell}}) {
491     next unless $_;
492     for (@{$_}) {
493     next unless $_;
494     for (@$_) {
495     $_->{id} = refaddr $_->{element} if defined $_->{element};
496     delete $_->{element};
497     $_->{is_header} = $_->{is_header} ? 1 : 0;
498     }
499     }
500     }
501    
502     print STDOUT '</div><script type="text/javascript">tableToCanvas (';
503     print STDOUT JSON::objToJson ($table);
504     print STDOUT qq[, document.getElementById ('table-$i'));</script>];
505     }
506    
507     print STDOUT qq[</div>];
508     } # print_table_section
509    
510     sub print_id_section ($) {
511     my $ids = shift;
512    
513     push @nav, ['#identifiers' => 'IDs'];
514     print STDOUT qq[
515     <div id="identifiers" class="section">
516     <h2>Identifiers</h2>
517    
518     <dl>
519     ];
520     for my $id (sort {$a cmp $b} keys %$ids) {
521     print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
522     for (@{$ids->{$id}}) {
523     print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
524     }
525     }
526     print STDOUT qq[</dl></div>];
527     } # print_id_section
528    
529     sub print_term_section ($) {
530     my $terms = shift;
531    
532     push @nav, ['#terms' => 'Terms'];
533     print STDOUT qq[
534     <div id="terms" class="section">
535     <h2>Terms</h2>
536    
537     <dl>
538     ];
539     for my $term (sort {$a cmp $b} keys %$terms) {
540     print STDOUT qq[<dt>@{[htescape $term]}</dt>];
541     for (@{$terms->{$term}}) {
542     print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
543     }
544     }
545     print STDOUT qq[</dl></div>];
546     } # print_term_section
547    
548     sub print_class_section ($) {
549     my $classes = shift;
550    
551     push @nav, ['#classes' => 'Classes'];
552     print STDOUT qq[
553     <div id="classes" class="section">
554     <h2>Classes</h2>
555    
556     <dl>
557     ];
558     for my $class (sort {$a cmp $b} keys %$classes) {
559     print STDOUT qq[<dt><code>@{[htescape $class]}</code></dt>];
560     for (@{$classes->{$class}}) {
561     print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
562     }
563     }
564     print STDOUT qq[</dl></div>];
565     } # print_class_section
566    
567 wakaba 1.19 sub print_result_section ($) {
568     my $result = shift;
569    
570     print STDOUT qq[
571     <div id="result-summary" class="section">
572     <h2>Result</h2>];
573    
574 wakaba 1.21 if ($result->{unsupported} and $result->{conforming_max}) {
575 wakaba 1.19 print STDOUT qq[<p class=uncertain id=result-para>The conformance
576     checker cannot decide whether the document is conforming or
577     not, since the document contains one or more unsupported
578 wakaba 1.21 features. The document might or might not be conforming.</p>];
579 wakaba 1.19 } elsif ($result->{conforming_min}) {
580     print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
581     found in this document.</p>];
582     } elsif ($result->{conforming_max}) {
583     print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
584     is <strong>likely <em>non</em>-conforming</strong>, but in rare case
585     it might be conforming.</p>];
586     } else {
587     print STDOUT qq[<p class=FAIL id=result-para>This document is
588     <strong><em>non</em>-conforming</strong>.</p>];
589     }
590    
591     print STDOUT qq[<table>
592     <colgroup><col><colgroup><col><col><col><colgroup><col>
593     <thead>
594     <tr><th scope=col></th><th scope=col><em class=rfc2119>MUST</em>-level
595     Errors</th><th scope=col><em class=rfc2119>SHOULD</em>-level
596     Errors</th><th scope=col>Warnings</th><th scope=col>Score</th></tr>
597     </thead><tbody>];
598    
599     my $must_error = 0;
600     my $should_error = 0;
601     my $warning = 0;
602     my $score_min = 0;
603     my $score_max = 0;
604     my $score_base = 20;
605 wakaba 1.21 my $score_unit = $score_base / 100;
606 wakaba 1.19 for (
607     [Transfer => 'transfer', ''],
608     [Character => 'char', ''],
609     [Syntax => 'syntax', '#parse-errors'],
610     [Structure => 'structure', '#document-errors'],
611     ) {
612     $must_error += ($result->{$_->[1]}->{must} += 0);
613     $should_error += ($result->{$_->[1]}->{should} += 0);
614     $warning += ($result->{$_->[1]}->{warning} += 0);
615 wakaba 1.21 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
616     $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
617 wakaba 1.19
618     my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
619     my $label = $_->[0];
620     if ($result->{$_->[1]}->{must} or
621     $result->{$_->[1]}->{should} or
622     $result->{$_->[1]}->{warning} or
623     $result->{$_->[1]}->{unsupported}) {
624     $label = qq[<a href="$_->[2]">$label</a>];
625     }
626    
627     print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
628     if ($uncertain) {
629 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
630 wakaba 1.19 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
631 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
632 wakaba 1.19 } else {
633 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
634 wakaba 1.19 }
635     }
636    
637     $score_max += $score_base;
638    
639     print STDOUT qq[
640     <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
641     </tbody>
642 wakaba 1.21 <tfoot><tr class=uncertain><th scope=row>Total</th>
643     <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
644     <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
645     <td>$warning?</td>
646     <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
647 wakaba 1.19 </table>
648    
649     <p><strong>Important</strong>: This conformance checking service
650     is <em>under development</em>. The result above might be <em>wrong</em>.</p>
651     </div>];
652     push @nav, ['#result-summary' => 'Result'];
653     } # print_result_section
654    
655 wakaba 1.18 sub print_result_unknown_type_section ($) {
656     my $input = shift;
657    
658     print STDOUT qq[
659     <div id="result-summary" class="section">
660     <p><em>Media type <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code> is not supported!</em></p>
661     </div>
662     ];
663     push @nav, ['#result-summary' => 'Result'];
664     } # print_result_unknown_type_section
665    
666     sub print_result_input_error_section ($) {
667     my $input = shift;
668     print STDOUT qq[<div class="section" id="result-summary">
669     <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
670     </div>];
671     push @nav, ['#result-summary' => 'Result'];
672     } # print_Result_input_error_section
673    
674 wakaba 1.1 sub get_node_path ($) {
675     my $node = shift;
676     my @r;
677     while (defined $node) {
678     my $rs;
679     if ($node->node_type == 1) {
680     $rs = $node->manakai_local_name;
681     $node = $node->parent_node;
682     } elsif ($node->node_type == 2) {
683     $rs = '@' . $node->manakai_local_name;
684     $node = $node->owner_element;
685     } elsif ($node->node_type == 3) {
686     $rs = '"' . $node->data . '"';
687     $node = $node->parent_node;
688     } elsif ($node->node_type == 9) {
689 wakaba 1.9 @r = ('') unless @r;
690 wakaba 1.1 $rs = '';
691     $node = $node->parent_node;
692     } else {
693     $rs = '#' . $node->node_type;
694     $node = $node->parent_node;
695     }
696     unshift @r, $rs;
697     }
698     return join '/', @r;
699     } # get_node_path
700    
701 wakaba 1.6 sub get_node_link ($) {
702     return qq[<a href="#node-@{[refaddr $_[0]]}">] .
703     htescape (get_node_path ($_[0])) . qq[</a>];
704     } # get_node_link
705    
706 wakaba 1.7 {
707     my $Msg = {};
708    
709     sub load_text_catalog ($) {
710     my $lang = shift; # MUST be a canonical lang name
711     open my $file, '<', "cc-msg.$lang.txt" or die "$0: cc-msg.$lang.txt: $!";
712     while (<$file>) {
713     if (s/^([^;]+);([^;]*);//) {
714     my ($type, $cls, $msg) = ($1, $2, $_);
715     $msg =~ tr/\x0D\x0A//d;
716     $Msg->{$type} = [$cls, $msg];
717     }
718     }
719     } # load_text_catalog
720    
721     sub get_text ($) {
722 wakaba 1.15 my ($type, $level, $node) = @_;
723 wakaba 1.7 $type = $level . ':' . $type if defined $level;
724     my @arg;
725     {
726     if (defined $Msg->{$type}) {
727     my $msg = $Msg->{$type}->[1];
728 wakaba 1.10 $msg =~ s{<var>\$([0-9]+)</var>}{
729     defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
730     }ge;
731 wakaba 1.15 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
732     UNIVERSAL::can ($node, 'get_attribute_ns')
733     ? htescape ($node->get_attribute_ns (undef, $1)) : ''
734     }ge;
735     $msg =~ s{<var>{\@}</var>}{
736     UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
737     }ge;
738 wakaba 1.17 $msg =~ s{<var>{local-name}</var>}{
739     UNIVERSAL::can ($node, 'manakai_local_name')
740     ? htescape ($node->manakai_local_name) : ''
741     }ge;
742     $msg =~ s{<var>{element-local-name}</var>}{
743     (UNIVERSAL::can ($node, 'owner_element') and
744     $node->owner_element)
745     ? htescape ($node->owner_element->manakai_local_name)
746     : ''
747     }ge;
748 wakaba 1.11 return ($type, $Msg->{$type}->[0], $msg);
749 wakaba 1.7 } elsif ($type =~ s/:([^:]*)$//) {
750     unshift @arg, $1;
751     redo;
752     }
753     }
754 wakaba 1.11 return ($type, '', htescape ($_[0]));
755 wakaba 1.7 } # get_text
756    
757     }
758    
759 wakaba 1.9 sub get_input_document ($$) {
760     my ($http, $dom) = @_;
761    
762 wakaba 1.16 my $request_uri = $http->get_parameter ('uri');
763 wakaba 1.9 my $r = {};
764     if (defined $request_uri and length $request_uri) {
765     my $uri = $dom->create_uri_reference ($request_uri);
766     unless ({
767     http => 1,
768     }->{lc $uri->uri_scheme}) {
769     return {uri => $request_uri, request_uri => $request_uri,
770     error_status_text => 'URI scheme not allowed'};
771     }
772    
773     require Message::Util::HostPermit;
774     my $host_permit = new Message::Util::HostPermit;
775     $host_permit->add_rule (<<EOH);
776     Allow host=suika port=80
777     Deny host=suika
778     Allow host=suika.fam.cx port=80
779     Deny host=suika.fam.cx
780     Deny host=localhost
781     Deny host=*.localdomain
782     Deny ipv4=0.0.0.0/8
783     Deny ipv4=10.0.0.0/8
784     Deny ipv4=127.0.0.0/8
785     Deny ipv4=169.254.0.0/16
786     Deny ipv4=172.0.0.0/11
787     Deny ipv4=192.0.2.0/24
788     Deny ipv4=192.88.99.0/24
789     Deny ipv4=192.168.0.0/16
790     Deny ipv4=198.18.0.0/15
791     Deny ipv4=224.0.0.0/4
792     Deny ipv4=255.255.255.255/32
793     Deny ipv6=0::0/0
794     Allow host=*
795     EOH
796     unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
797     return {uri => $request_uri, request_uri => $request_uri,
798     error_status_text => 'Connection to the host is forbidden'};
799     }
800    
801     require LWP::UserAgent;
802     my $ua = WDCC::LWPUA->new;
803     $ua->{wdcc_dom} = $dom;
804     $ua->{wdcc_host_permit} = $host_permit;
805     $ua->agent ('Mozilla'); ## TODO: for now.
806     $ua->parse_head (0);
807     $ua->protocols_allowed ([qw/http/]);
808     $ua->max_size (1000_000);
809     my $req = HTTP::Request->new (GET => $request_uri);
810     my $res = $ua->request ($req);
811 wakaba 1.16 ## TODO: 401 sets |is_success| true.
812     if ($res->is_success or $http->get_parameter ('error-page')) {
813 wakaba 1.9 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
814     $r->{uri} = $res->request->uri;
815     $r->{request_uri} = $request_uri;
816    
817     ## TODO: More strict parsing...
818     my $ct = $res->header ('Content-Type');
819     if (defined $ct and $ct =~ m#^([0-9A-Za-z._+-]+/[0-9A-Za-z._+-]+)#) {
820     $r->{media_type} = lc $1;
821     }
822     if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?(\S+)"?/i) {
823     $r->{charset} = lc $1;
824     $r->{charset} =~ tr/\\//d;
825     }
826    
827 wakaba 1.16 my $input_charset = $http->get_parameter ('charset');
828 wakaba 1.9 if (defined $input_charset and length $input_charset) {
829     $r->{charset_overridden}
830     = (not defined $r->{charset} or $r->{charset} ne $input_charset);
831     $r->{charset} = $input_charset;
832     }
833    
834     $r->{s} = ''.$res->content;
835     } else {
836     $r->{uri} = $res->request->uri;
837     $r->{request_uri} = $request_uri;
838     $r->{error_status_text} = $res->status_line;
839     }
840    
841     $r->{header_field} = [];
842     $res->scan (sub {
843     push @{$r->{header_field}}, [$_[0], $_[1]];
844     });
845     $r->{header_status_code} = $res->code;
846     $r->{header_status_text} = $res->message;
847     } else {
848 wakaba 1.16 $r->{s} = ''.$http->get_parameter ('s');
849 wakaba 1.9 $r->{uri} = q<thismessage:/>;
850     $r->{request_uri} = q<thismessage:/>;
851     $r->{base_uri} = q<thismessage:/>;
852 wakaba 1.16 $r->{charset} = ''.$http->get_parameter ('_charset_');
853 wakaba 1.9 $r->{charset} =~ s/\s+//g;
854     $r->{charset} = 'utf-8' if $r->{charset} eq '';
855     $r->{header_field} = [];
856     }
857    
858 wakaba 1.16 my $input_format = $http->get_parameter ('i');
859 wakaba 1.9 if (defined $input_format and length $input_format) {
860     $r->{media_type_overridden}
861     = (not defined $r->{media_type} or $input_format ne $r->{media_type});
862     $r->{media_type} = $input_format;
863     }
864     if (defined $r->{s} and not defined $r->{media_type}) {
865     $r->{media_type} = 'text/html';
866     $r->{media_type_overridden} = 1;
867     }
868    
869     if ($r->{media_type} eq 'text/xml') {
870     unless (defined $r->{charset}) {
871     $r->{charset} = 'us-ascii';
872     } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
873     $r->{charset_overridden} = 0;
874     }
875     }
876    
877     if (length $r->{s} > 1000_000) {
878     $r->{error_status_text} = 'Entity-body too large';
879     delete $r->{s};
880     return $r;
881     }
882    
883     return $r;
884     } # get_input_document
885    
886     package WDCC::LWPUA;
887     BEGIN { push our @ISA, 'LWP::UserAgent'; }
888    
889     sub redirect_ok {
890     my $ua = shift;
891     unless ($ua->SUPER::redirect_ok (@_)) {
892     return 0;
893     }
894    
895     my $uris = $_[1]->header ('Location');
896     return 0 unless $uris;
897     my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
898     unless ({
899     http => 1,
900     }->{lc $uri->uri_scheme}) {
901     return 0;
902     }
903     unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
904     return 0;
905     }
906     return 1;
907     } # redirect_ok
908    
909 wakaba 1.1 =head1 AUTHOR
910    
911     Wakaba <w@suika.fam.cx>.
912    
913     =head1 LICENSE
914    
915     Copyright 2007 Wakaba <w@suika.fam.cx>
916    
917     This library is free software; you can redistribute it
918     and/or modify it under the same terms as Perl itself.
919    
920     =cut
921    
922 wakaba 1.21 ## $Date: 2007/09/10 12:09:34 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24