/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.21 - (show annotations) (download)
Tue Sep 11 08:25:23 2007 UTC (16 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.20: +14 -9 lines
Scoring bug fixes

1 #!/usr/bin/perl
2 use strict;
3
4 use lib qw[/home/httpd/html/www/markup/html/whatpm
5 /home/wakaba/work/manakai2/lib];
6 use CGI::Carp qw[fatalsToBrowser];
7 use Scalar::Util qw[refaddr];
8 use Time::HiRes qw/time/;
9
10 sub htescape ($) {
11 my $s = $_[0];
12 $s =~ s/&/&/g;
13 $s =~ s/</&lt;/g;
14 $s =~ s/>/&gt;/g;
15 $s =~ s/"/&quot;/g;
16 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
17 sprintf '<var>U+%04X</var>', ord $1;
18 }ge;
19 return $s;
20 } # htescape
21
22 use Message::CGI::HTTP;
23 my $http = Message::CGI::HTTP->new;
24
25 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
26 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
27 exit;
28 }
29
30 binmode STDOUT, ':utf8';
31 $| = 1;
32
33 require Message::DOM::DOMImplementation;
34 my $dom = Message::DOM::DOMImplementation->new;
35
36 load_text_catalog ('en'); ## TODO: conneg
37
38 my @nav;
39 print STDOUT qq[Content-Type: text/html; charset=utf-8
40
41 <!DOCTYPE html>
42 <html lang="en">
43 <head>
44 <title>Web Document Conformance Checker (BETA)</title>
45 <link rel="stylesheet" href="../cc-style.css" type="text/css">
46 </head>
47 <body>
48 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
49 (<em>beta</em>)</h1>
50 ];
51
52 $| = 0;
53 my $input = get_input_document ($http, $dom);
54 my $inner_html_element = $http->get_parameter ('e');
55 my $char_length = 0;
56 my %time;
57
58 print qq[
59 <div id="document-info" class="section">
60 <dl>
61 <dt>Request URI</dt>
62 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 <dt>Document URI</dt>
64 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}">@{[htescape $input->{uri}]}</a>&gt;</code></dd>
65 ]; # no </dl> yet
66 push @nav, ['#document-info' => 'Information'];
67
68 if (defined $input->{s}) {
69 $char_length = length $input->{s};
70
71 print STDOUT qq[
72 <dt>Base URI</dt>
73 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
74 <dt>Internet Media Type</dt>
75 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
76 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : '']}</dd>
77 <dt>Character Encoding</dt>
78 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
79 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
80 <dt>Length</dt>
81 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
82 </dl>
83 </div>
84 ];
85
86 my $result = {conforming_min => 1, conforming_max => 1};
87 print_http_header_section ($input, $result);
88
89 my $doc;
90 my $el;
91
92 if ($input->{media_type} eq 'text/html') {
93 ($doc, $el) = print_syntax_error_html_section ($input, $result);
94 print_source_string_section (\($input->{s}), $input->{charset});
95 } elsif ({
96 'text/xml' => 1,
97 'application/atom+xml' => 1,
98 'application/rss+xml' => 1,
99 'application/svg+xml' => 1,
100 'application/xhtml+xml' => 1,
101 'application/xml' => 1,
102 }->{$input->{media_type}}) {
103 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
104 print_source_string_section (\($input->{s}), $doc->input_encoding);
105 } else {
106 ## TODO: Change HTTP status code??
107 print_result_unknown_type_section ($input);
108 }
109
110 if (defined $doc or defined $el) {
111 print_structure_dump_section ($doc, $el);
112 my $elements = print_structure_error_section ($doc, $el, $result);
113 print_table_section ($elements->{table}) if @{$elements->{table}};
114 print_id_section ($elements->{id}) if keys %{$elements->{id}};
115 print_term_section ($elements->{term}) if keys %{$elements->{term}};
116 print_class_section ($elements->{class}) if keys %{$elements->{class}};
117 }
118
119 print_result_section ($result);
120 } else {
121 print STDOUT qq[</dl></div>];
122 print_result_input_error_section ($input);
123 }
124
125 print STDOUT qq[
126 <ul class="navigation" id="nav-items">
127 ];
128 for (@nav) {
129 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
130 }
131 print STDOUT qq[
132 </ul>
133 </body>
134 </html>
135 ];
136
137 for (qw/decode parse parse_xml check/) {
138 next unless defined $time{$_};
139 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
140 print $file $char_length, "\t", $time{$_}, "\n";
141 }
142
143 exit;
144
145 sub add_error ($$$) {
146 my ($layer, $err, $result) = @_;
147 if (defined $err->{level}) {
148 if ($err->{level} eq 's') {
149 $result->{$layer}->{should}++;
150 $result->{$layer}->{score_min} -= 2;
151 $result->{conforming_min} = 0;
152 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
153 $result->{$layer}->{warning}++;
154 } elsif ($err->{level} eq 'unsupported') {
155 $result->{$layer}->{unsupported}++;
156 $result->{unsupported} = 1;
157 } else {
158 $result->{$layer}->{must}++;
159 $result->{$layer}->{score_max} -= 2;
160 $result->{$layer}->{score_min} -= 2;
161 $result->{conforming_min} = 0;
162 $result->{conforming_max} = 0;
163 }
164 } else {
165 $result->{$layer}->{must}++;
166 $result->{$layer}->{score_max} -= 2;
167 $result->{$layer}->{score_min} -= 2;
168 $result->{conforming_min} = 0;
169 $result->{conforming_max} = 0;
170 }
171 } # add_error
172
173 sub print_http_header_section ($$) {
174 my ($input, $result) = @_;
175 return unless defined $input->{header_status_code} or
176 defined $input->{header_status_text} or
177 @{$input->{header_field}};
178
179 push @nav, ['#source-header' => 'HTTP Header'];
180 print STDOUT qq[<div id="source-header" class="section">
181 <h2>HTTP Header</h2>
182
183 <p><strong>Note</strong>: Due to the limitation of the
184 network library in use, the content of this section might
185 not be the real header.</p>
186
187 <table><tbody>
188 ];
189
190 if (defined $input->{header_status_code}) {
191 print STDOUT qq[<tr><th scope="row">Status code</th>];
192 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
193 }
194 if (defined $input->{header_status_text}) {
195 print STDOUT qq[<tr><th scope="row">Status text</th>];
196 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
197 }
198
199 for (@{$input->{header_field}}) {
200 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
201 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
202 }
203
204 print STDOUT qq[</tbody></table></div>];
205 } # print_http_header_section
206
207 sub print_syntax_error_html_section ($$) {
208 my ($input, $result) = @_;
209
210 require Encode;
211 require Whatpm::HTML;
212
213 $input->{charset} ||= 'ISO-8859-1'; ## TODO: for now.
214
215 my $time1 = time;
216 my $t = Encode::decode ($input->{charset}, $input->{s});
217 $time{decode} = time - $time1;
218
219 print STDOUT qq[
220 <div id="parse-errors" class="section">
221 <h2>Parse Errors</h2>
222
223 <dl>];
224 push @nav, ['#parse-errors' => 'Parse Error'];
225
226 my $onerror = sub {
227 my (%opt) = @_;
228 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
229 if ($opt{column} > 0) {
230 print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
231 } else {
232 $opt{line} = $opt{line} - 1 || 1;
233 print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];
234 }
235 $type =~ tr/ /-/;
236 $type =~ s/\|/%7C/g;
237 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
238 print STDOUT qq[<dd class="$cls">$msg</dd>\n];
239
240 add_error ('syntax', \%opt => $result);
241 };
242
243 my $doc = $dom->create_document;
244 my $el;
245 $time1 = time;
246 if (defined $inner_html_element and length $inner_html_element) {
247 $el = $doc->create_element_ns
248 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
249 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
250 } else {
251 Whatpm::HTML->parse_string ($t => $doc, $onerror);
252 }
253 $time{parse} = time - $time1;
254
255 print STDOUT qq[</dl></div>];
256
257 return ($doc, $el);
258 } # print_syntax_error_html_section
259
260 sub print_syntax_error_xml_section ($$) {
261 my ($input, $result) = @_;
262
263 require Message::DOM::XMLParserTemp;
264
265 print STDOUT qq[
266 <div id="parse-errors" class="section">
267 <h2>Parse Errors</h2>
268
269 <dl>];
270 push @nav, ['#parse-errors' => 'Parse Error'];
271
272 my $onerror = sub {
273 my $err = shift;
274 my $line = $err->location->line_number;
275 print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];
276 print STDOUT $err->location->column_number, "</dt><dd>";
277 print STDOUT htescape $err->text, "</dd>\n";
278
279 add_error ('syntax', {type => $err->text,
280 level => [
281 $err->SEVERITY_FATAL_ERROR => 'm',
282 $err->SEVERITY_ERROR => 'm',
283 $err->SEVERITY_WARNING => 's',
284 ]->[$err->severity]} => $result);
285
286 return 1;
287 };
288
289 my $time1 = time;
290 open my $fh, '<', \($input->{s});
291 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
292 ($fh => $dom, $onerror, charset => $input->{charset});
293 $time{parse_xml} = time - $time1;
294
295 print STDOUT qq[</dl></div>];
296
297 return ($doc, undef);
298 } # print_syntax_error_xml_section
299
300 sub print_source_string_section ($$) {
301 require Encode;
302 my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
303 return unless $enc;
304
305 my $s = \($enc->decode (${$_[0]}));
306 my $i = 1;
307 push @nav, ['#source-string' => 'Source'];
308 print STDOUT qq[<div id="source-string" class="section">
309 <h2>Document Source</h2>
310 <ol lang="">\n];
311 if (length $$s) {
312 while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
313 print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
314 $i++;
315 }
316 if ($$s =~ /\G([^\x0A]+)/gc) {
317 print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
318 }
319 } else {
320 print STDOUT q[<li id="line-1"></li>];
321 }
322 print STDOUT "</ol></div>";
323 } # print_input_string_section
324
325 sub print_document_tree ($) {
326 my $node = shift;
327 my $r = '<ol class="xoxo">';
328
329 my @node = ($node);
330 while (@node) {
331 my $child = shift @node;
332 unless (ref $child) {
333 $r .= $child;
334 next;
335 }
336
337 my $node_id = 'node-'.refaddr $child;
338 my $nt = $child->node_type;
339 if ($nt == $child->ELEMENT_NODE) {
340 my $child_nsuri = $child->namespace_uri;
341 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
342 '</code>'; ## ISSUE: case
343
344 if ($child->has_attributes) {
345 $r .= '<ul class="attributes">';
346 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
347 @{$child->attributes}) {
348 $r .= qq[<li id="$attr->[3]" class="tree-attribute"><code title="@{[defined $_->[2] ? $_->[2] : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
349 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
350 }
351 $r .= '</ul>';
352 }
353
354 if ($child->has_child_nodes) {
355 $r .= '<ol class="children">';
356 unshift @node, @{$child->child_nodes}, '</ol></li>';
357 } else {
358 $r .= '</li>';
359 }
360 } elsif ($nt == $child->TEXT_NODE) {
361 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
362 } elsif ($nt == $child->CDATA_SECTION_NODE) {
363 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
364 } elsif ($nt == $child->COMMENT_NODE) {
365 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
366 } elsif ($nt == $child->DOCUMENT_NODE) {
367 $r .= qq'<li id="$node_id" class="tree-document">Document';
368 $r .= qq[<ul class="attributes">];
369 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
370 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
371 unless ($child->manakai_is_html) {
372 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
373 if (defined $child->xml_encoding) {
374 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
375 } else {
376 $r .= qq[<li>XML encoding = (null)</li>];
377 }
378 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
379 }
380 $r .= qq[</ul>];
381 if ($child->has_child_nodes) {
382 $r .= '<ol class="children">';
383 unshift @node, @{$child->child_nodes}, '</ol></li>';
384 }
385 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
386 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
387 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
388 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
389 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
390 $r .= '</ul></li>';
391 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
392 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
393 } else {
394 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
395 }
396 }
397
398 $r .= '</ol>';
399 print STDOUT $r;
400 } # print_document_tree
401
402 sub print_structure_dump_section ($$) {
403 my ($doc, $el) = @_;
404
405 print STDOUT qq[
406 <div id="document-tree" class="section">
407 <h2>Document Tree</h2>
408 ];
409 push @nav, ['#document-tree' => 'Tree'];
410
411 print_document_tree ($el || $doc);
412
413 print STDOUT qq[</div>];
414 } # print_structure_dump_section
415
416 sub print_structure_error_section ($$$) {
417 my ($doc, $el, $result) = @_;
418
419 print STDOUT qq[<div id="document-errors" class="section">
420 <h2>Document Errors</h2>
421
422 <dl>];
423 push @nav, ['#document-errors' => 'Document Error'];
424
425 require Whatpm::ContentChecker;
426 my $onerror = sub {
427 my %opt = @_;
428 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
429 $type =~ tr/ /-/;
430 $type =~ s/\|/%7C/g;
431 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
432 print STDOUT qq[<dt class="$cls">] . get_node_link ($opt{node}) .
433 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
434 add_error ('structure', \%opt => $result);
435 };
436
437 my $elements;
438 my $time1 = time;
439 if ($el) {
440 $elements = Whatpm::ContentChecker->check_element ($el, $onerror);
441 } else {
442 $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);
443 }
444 $time{check} = time - $time1;
445
446 print STDOUT qq[</dl></div>];
447
448 return $elements;
449 } # print_structure_error_section
450
451 sub print_table_section ($) {
452 my $tables = shift;
453
454 push @nav, ['#tables' => 'Tables'];
455 print STDOUT qq[
456 <div id="tables" class="section">
457 <h2>Tables</h2>
458
459 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
460 <script src="../table-script.js" type="text/javascript"></script>
461 <noscript>
462 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
463 </noscript>
464 ];
465
466 require JSON;
467
468 my $i = 0;
469 for my $table_el (@$tables) {
470 $i++;
471 print STDOUT qq[<div class="section" id="table-$i"><h3>] .
472 get_node_link ($table_el) . q[</h3>];
473
474 ## TODO: Make |ContentChecker| return |form_table| result
475 ## so that this script don't have to run the algorithm twice.
476 my $table = Whatpm::HTMLTable->form_table ($table_el);
477
478 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
479 next unless $_;
480 delete $_->{element};
481 }
482
483 for (@{$table->{row_group}}) {
484 next unless $_;
485 next unless $_->{element};
486 $_->{type} = $_->{element}->manakai_local_name;
487 delete $_->{element};
488 }
489
490 for (@{$table->{cell}}) {
491 next unless $_;
492 for (@{$_}) {
493 next unless $_;
494 for (@$_) {
495 $_->{id} = refaddr $_->{element} if defined $_->{element};
496 delete $_->{element};
497 $_->{is_header} = $_->{is_header} ? 1 : 0;
498 }
499 }
500 }
501
502 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
503 print STDOUT JSON::objToJson ($table);
504 print STDOUT qq[, document.getElementById ('table-$i'));</script>];
505 }
506
507 print STDOUT qq[</div>];
508 } # print_table_section
509
510 sub print_id_section ($) {
511 my $ids = shift;
512
513 push @nav, ['#identifiers' => 'IDs'];
514 print STDOUT qq[
515 <div id="identifiers" class="section">
516 <h2>Identifiers</h2>
517
518 <dl>
519 ];
520 for my $id (sort {$a cmp $b} keys %$ids) {
521 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
522 for (@{$ids->{$id}}) {
523 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
524 }
525 }
526 print STDOUT qq[</dl></div>];
527 } # print_id_section
528
529 sub print_term_section ($) {
530 my $terms = shift;
531
532 push @nav, ['#terms' => 'Terms'];
533 print STDOUT qq[
534 <div id="terms" class="section">
535 <h2>Terms</h2>
536
537 <dl>
538 ];
539 for my $term (sort {$a cmp $b} keys %$terms) {
540 print STDOUT qq[<dt>@{[htescape $term]}</dt>];
541 for (@{$terms->{$term}}) {
542 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
543 }
544 }
545 print STDOUT qq[</dl></div>];
546 } # print_term_section
547
548 sub print_class_section ($) {
549 my $classes = shift;
550
551 push @nav, ['#classes' => 'Classes'];
552 print STDOUT qq[
553 <div id="classes" class="section">
554 <h2>Classes</h2>
555
556 <dl>
557 ];
558 for my $class (sort {$a cmp $b} keys %$classes) {
559 print STDOUT qq[<dt><code>@{[htescape $class]}</code></dt>];
560 for (@{$classes->{$class}}) {
561 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
562 }
563 }
564 print STDOUT qq[</dl></div>];
565 } # print_class_section
566
567 sub print_result_section ($) {
568 my $result = shift;
569
570 print STDOUT qq[
571 <div id="result-summary" class="section">
572 <h2>Result</h2>];
573
574 if ($result->{unsupported} and $result->{conforming_max}) {
575 print STDOUT qq[<p class=uncertain id=result-para>The conformance
576 checker cannot decide whether the document is conforming or
577 not, since the document contains one or more unsupported
578 features. The document might or might not be conforming.</p>];
579 } elsif ($result->{conforming_min}) {
580 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
581 found in this document.</p>];
582 } elsif ($result->{conforming_max}) {
583 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
584 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
585 it might be conforming.</p>];
586 } else {
587 print STDOUT qq[<p class=FAIL id=result-para>This document is
588 <strong><em>non</em>-conforming</strong>.</p>];
589 }
590
591 print STDOUT qq[<table>
592 <colgroup><col><colgroup><col><col><col><colgroup><col>
593 <thead>
594 <tr><th scope=col></th><th scope=col><em class=rfc2119>MUST</em>-level
595 Errors</th><th scope=col><em class=rfc2119>SHOULD</em>-level
596 Errors</th><th scope=col>Warnings</th><th scope=col>Score</th></tr>
597 </thead><tbody>];
598
599 my $must_error = 0;
600 my $should_error = 0;
601 my $warning = 0;
602 my $score_min = 0;
603 my $score_max = 0;
604 my $score_base = 20;
605 my $score_unit = $score_base / 100;
606 for (
607 [Transfer => 'transfer', ''],
608 [Character => 'char', ''],
609 [Syntax => 'syntax', '#parse-errors'],
610 [Structure => 'structure', '#document-errors'],
611 ) {
612 $must_error += ($result->{$_->[1]}->{must} += 0);
613 $should_error += ($result->{$_->[1]}->{should} += 0);
614 $warning += ($result->{$_->[1]}->{warning} += 0);
615 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
616 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
617
618 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
619 my $label = $_->[0];
620 if ($result->{$_->[1]}->{must} or
621 $result->{$_->[1]}->{should} or
622 $result->{$_->[1]}->{warning} or
623 $result->{$_->[1]}->{unsupported}) {
624 $label = qq[<a href="$_->[2]">$label</a>];
625 }
626
627 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
628 if ($uncertain) {
629 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
630 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
631 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
632 } else {
633 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
634 }
635 }
636
637 $score_max += $score_base;
638
639 print STDOUT qq[
640 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
641 </tbody>
642 <tfoot><tr class=uncertain><th scope=row>Total</th>
643 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
644 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
645 <td>$warning?</td>
646 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
647 </table>
648
649 <p><strong>Important</strong>: This conformance checking service
650 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
651 </div>];
652 push @nav, ['#result-summary' => 'Result'];
653 } # print_result_section
654
655 sub print_result_unknown_type_section ($) {
656 my $input = shift;
657
658 print STDOUT qq[
659 <div id="result-summary" class="section">
660 <p><em>Media type <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code> is not supported!</em></p>
661 </div>
662 ];
663 push @nav, ['#result-summary' => 'Result'];
664 } # print_result_unknown_type_section
665
666 sub print_result_input_error_section ($) {
667 my $input = shift;
668 print STDOUT qq[<div class="section" id="result-summary">
669 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
670 </div>];
671 push @nav, ['#result-summary' => 'Result'];
672 } # print_Result_input_error_section
673
674 sub get_node_path ($) {
675 my $node = shift;
676 my @r;
677 while (defined $node) {
678 my $rs;
679 if ($node->node_type == 1) {
680 $rs = $node->manakai_local_name;
681 $node = $node->parent_node;
682 } elsif ($node->node_type == 2) {
683 $rs = '@' . $node->manakai_local_name;
684 $node = $node->owner_element;
685 } elsif ($node->node_type == 3) {
686 $rs = '"' . $node->data . '"';
687 $node = $node->parent_node;
688 } elsif ($node->node_type == 9) {
689 @r = ('') unless @r;
690 $rs = '';
691 $node = $node->parent_node;
692 } else {
693 $rs = '#' . $node->node_type;
694 $node = $node->parent_node;
695 }
696 unshift @r, $rs;
697 }
698 return join '/', @r;
699 } # get_node_path
700
701 sub get_node_link ($) {
702 return qq[<a href="#node-@{[refaddr $_[0]]}">] .
703 htescape (get_node_path ($_[0])) . qq[</a>];
704 } # get_node_link
705
706 {
707 my $Msg = {};
708
709 sub load_text_catalog ($) {
710 my $lang = shift; # MUST be a canonical lang name
711 open my $file, '<', "cc-msg.$lang.txt" or die "$0: cc-msg.$lang.txt: $!";
712 while (<$file>) {
713 if (s/^([^;]+);([^;]*);//) {
714 my ($type, $cls, $msg) = ($1, $2, $_);
715 $msg =~ tr/\x0D\x0A//d;
716 $Msg->{$type} = [$cls, $msg];
717 }
718 }
719 } # load_text_catalog
720
721 sub get_text ($) {
722 my ($type, $level, $node) = @_;
723 $type = $level . ':' . $type if defined $level;
724 my @arg;
725 {
726 if (defined $Msg->{$type}) {
727 my $msg = $Msg->{$type}->[1];
728 $msg =~ s{<var>\$([0-9]+)</var>}{
729 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
730 }ge;
731 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
732 UNIVERSAL::can ($node, 'get_attribute_ns')
733 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
734 }ge;
735 $msg =~ s{<var>{\@}</var>}{
736 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
737 }ge;
738 $msg =~ s{<var>{local-name}</var>}{
739 UNIVERSAL::can ($node, 'manakai_local_name')
740 ? htescape ($node->manakai_local_name) : ''
741 }ge;
742 $msg =~ s{<var>{element-local-name}</var>}{
743 (UNIVERSAL::can ($node, 'owner_element') and
744 $node->owner_element)
745 ? htescape ($node->owner_element->manakai_local_name)
746 : ''
747 }ge;
748 return ($type, $Msg->{$type}->[0], $msg);
749 } elsif ($type =~ s/:([^:]*)$//) {
750 unshift @arg, $1;
751 redo;
752 }
753 }
754 return ($type, '', htescape ($_[0]));
755 } # get_text
756
757 }
758
759 sub get_input_document ($$) {
760 my ($http, $dom) = @_;
761
762 my $request_uri = $http->get_parameter ('uri');
763 my $r = {};
764 if (defined $request_uri and length $request_uri) {
765 my $uri = $dom->create_uri_reference ($request_uri);
766 unless ({
767 http => 1,
768 }->{lc $uri->uri_scheme}) {
769 return {uri => $request_uri, request_uri => $request_uri,
770 error_status_text => 'URI scheme not allowed'};
771 }
772
773 require Message::Util::HostPermit;
774 my $host_permit = new Message::Util::HostPermit;
775 $host_permit->add_rule (<<EOH);
776 Allow host=suika port=80
777 Deny host=suika
778 Allow host=suika.fam.cx port=80
779 Deny host=suika.fam.cx
780 Deny host=localhost
781 Deny host=*.localdomain
782 Deny ipv4=0.0.0.0/8
783 Deny ipv4=10.0.0.0/8
784 Deny ipv4=127.0.0.0/8
785 Deny ipv4=169.254.0.0/16
786 Deny ipv4=172.0.0.0/11
787 Deny ipv4=192.0.2.0/24
788 Deny ipv4=192.88.99.0/24
789 Deny ipv4=192.168.0.0/16
790 Deny ipv4=198.18.0.0/15
791 Deny ipv4=224.0.0.0/4
792 Deny ipv4=255.255.255.255/32
793 Deny ipv6=0::0/0
794 Allow host=*
795 EOH
796 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
797 return {uri => $request_uri, request_uri => $request_uri,
798 error_status_text => 'Connection to the host is forbidden'};
799 }
800
801 require LWP::UserAgent;
802 my $ua = WDCC::LWPUA->new;
803 $ua->{wdcc_dom} = $dom;
804 $ua->{wdcc_host_permit} = $host_permit;
805 $ua->agent ('Mozilla'); ## TODO: for now.
806 $ua->parse_head (0);
807 $ua->protocols_allowed ([qw/http/]);
808 $ua->max_size (1000_000);
809 my $req = HTTP::Request->new (GET => $request_uri);
810 my $res = $ua->request ($req);
811 ## TODO: 401 sets |is_success| true.
812 if ($res->is_success or $http->get_parameter ('error-page')) {
813 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
814 $r->{uri} = $res->request->uri;
815 $r->{request_uri} = $request_uri;
816
817 ## TODO: More strict parsing...
818 my $ct = $res->header ('Content-Type');
819 if (defined $ct and $ct =~ m#^([0-9A-Za-z._+-]+/[0-9A-Za-z._+-]+)#) {
820 $r->{media_type} = lc $1;
821 }
822 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?(\S+)"?/i) {
823 $r->{charset} = lc $1;
824 $r->{charset} =~ tr/\\//d;
825 }
826
827 my $input_charset = $http->get_parameter ('charset');
828 if (defined $input_charset and length $input_charset) {
829 $r->{charset_overridden}
830 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
831 $r->{charset} = $input_charset;
832 }
833
834 $r->{s} = ''.$res->content;
835 } else {
836 $r->{uri} = $res->request->uri;
837 $r->{request_uri} = $request_uri;
838 $r->{error_status_text} = $res->status_line;
839 }
840
841 $r->{header_field} = [];
842 $res->scan (sub {
843 push @{$r->{header_field}}, [$_[0], $_[1]];
844 });
845 $r->{header_status_code} = $res->code;
846 $r->{header_status_text} = $res->message;
847 } else {
848 $r->{s} = ''.$http->get_parameter ('s');
849 $r->{uri} = q<thismessage:/>;
850 $r->{request_uri} = q<thismessage:/>;
851 $r->{base_uri} = q<thismessage:/>;
852 $r->{charset} = ''.$http->get_parameter ('_charset_');
853 $r->{charset} =~ s/\s+//g;
854 $r->{charset} = 'utf-8' if $r->{charset} eq '';
855 $r->{header_field} = [];
856 }
857
858 my $input_format = $http->get_parameter ('i');
859 if (defined $input_format and length $input_format) {
860 $r->{media_type_overridden}
861 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
862 $r->{media_type} = $input_format;
863 }
864 if (defined $r->{s} and not defined $r->{media_type}) {
865 $r->{media_type} = 'text/html';
866 $r->{media_type_overridden} = 1;
867 }
868
869 if ($r->{media_type} eq 'text/xml') {
870 unless (defined $r->{charset}) {
871 $r->{charset} = 'us-ascii';
872 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
873 $r->{charset_overridden} = 0;
874 }
875 }
876
877 if (length $r->{s} > 1000_000) {
878 $r->{error_status_text} = 'Entity-body too large';
879 delete $r->{s};
880 return $r;
881 }
882
883 return $r;
884 } # get_input_document
885
886 package WDCC::LWPUA;
887 BEGIN { push our @ISA, 'LWP::UserAgent'; }
888
889 sub redirect_ok {
890 my $ua = shift;
891 unless ($ua->SUPER::redirect_ok (@_)) {
892 return 0;
893 }
894
895 my $uris = $_[1]->header ('Location');
896 return 0 unless $uris;
897 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
898 unless ({
899 http => 1,
900 }->{lc $uri->uri_scheme}) {
901 return 0;
902 }
903 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
904 return 0;
905 }
906 return 1;
907 } # redirect_ok
908
909 =head1 AUTHOR
910
911 Wakaba <w@suika.fam.cx>.
912
913 =head1 LICENSE
914
915 Copyright 2007 Wakaba <w@suika.fam.cx>
916
917 This library is free software; you can redistribute it
918 and/or modify it under the same terms as Perl itself.
919
920 =cut
921
922 ## $Date: 2007/09/10 12:09:34 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24