/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.19 - (show annotations) (download)
Mon Sep 10 11:51:09 2007 UTC (17 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.18: +137 -14 lines
++ ChangeLog	10 Sep 2007 11:50:30 -0000
2007-09-10  Wakaba  <wakaba@suika.fam.cx>

	* cc.cgi, cc-style.css: Scoring support.

1 #!/usr/bin/perl
2 use strict;
3
4 use lib qw[/home/httpd/html/www/markup/html/whatpm
5 /home/wakaba/work/manakai2/lib];
6 use CGI::Carp qw[fatalsToBrowser];
7 use Scalar::Util qw[refaddr];
8 use Time::HiRes qw/time/;
9
10 sub htescape ($) {
11 my $s = $_[0];
12 $s =~ s/&/&amp;/g;
13 $s =~ s/</&lt;/g;
14 $s =~ s/>/&gt;/g;
15 $s =~ s/"/&quot;/g;
16 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
17 sprintf '<var>U+%04X</var>', ord $1;
18 }ge;
19 return $s;
20 } # htescape
21
22 use Message::CGI::HTTP;
23 my $http = Message::CGI::HTTP->new;
24
25 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
26 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
27 exit;
28 }
29
30 binmode STDOUT, ':utf8';
31 $| = 1;
32
33 require Message::DOM::DOMImplementation;
34 my $dom = Message::DOM::DOMImplementation->new;
35
36 load_text_catalog ('en'); ## TODO: conneg
37
38 my @nav;
39 print STDOUT qq[Content-Type: text/html; charset=utf-8
40
41 <!DOCTYPE html>
42 <html lang="en">
43 <head>
44 <title>Web Document Conformance Checker (BETA)</title>
45 <link rel="stylesheet" href="../cc-style.css" type="text/css">
46 </head>
47 <body>
48 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
49 (<em>beta</em>)</h1>
50 ];
51
52 $| = 0;
53 my $input = get_input_document ($http, $dom);
54 my $inner_html_element = $http->get_parameter ('e');
55 my $char_length = 0;
56 my %time;
57
58 print qq[
59 <div id="document-info" class="section">
60 <dl>
61 <dt>Request URI</dt>
62 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 <dt>Document URI</dt>
64 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}">@{[htescape $input->{uri}]}</a>&gt;</code></dd>
65 ]; # no </dl> yet
66 push @nav, ['#document-info' => 'Information'];
67
68 if (defined $input->{s}) {
69 $char_length = length $input->{s};
70
71 print STDOUT qq[
72 <dt>Base URI</dt>
73 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
74 <dt>Internet Media Type</dt>
75 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
76 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : '']}</dd>
77 <dt>Character Encoding</dt>
78 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
79 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
80 <dt>Length</dt>
81 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
82 </dl>
83 </div>
84 ];
85
86 my $result = {};
87 print_http_header_section ($input, $result);
88
89 my $doc;
90 my $el;
91
92 if ($input->{media_type} eq 'text/html') {
93 ($doc, $el) = print_syntax_error_html_section ($input, $result);
94 print_source_string_section (\($input->{s}), $input->{charset});
95 } elsif ({
96 'text/xml' => 1,
97 'application/atom+xml' => 1,
98 'application/rss+xml' => 1,
99 'application/svg+xml' => 1,
100 'application/xhtml+xml' => 1,
101 'application/xml' => 1,
102 }->{$input->{media_type}}) {
103 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
104 print_source_string_section (\($input->{s}), $doc->input_encoding);
105 } else {
106 ## TODO: Change HTTP status code??
107 print_result_unknown_type_section ($input);
108 }
109
110 if (defined $doc or defined $el) {
111 print_structure_dump_section ($doc, $el);
112 my $elements = print_structure_error_section ($doc, $el, $result);
113 print_table_section ($elements->{table}) if @{$elements->{table}};
114 print_id_section ($elements->{id}) if keys %{$elements->{id}};
115 print_term_section ($elements->{term}) if keys %{$elements->{term}};
116 print_class_section ($elements->{class}) if keys %{$elements->{class}};
117 }
118
119 print_result_section ($result);
120 } else {
121 print STDOUT qq[</dl></div>];
122 print_result_input_error_section ($input);
123 }
124
125 print STDOUT qq[
126 <ul class="navigation" id="nav-items">
127 ];
128 for (@nav) {
129 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
130 }
131 print STDOUT qq[
132 </ul>
133 </body>
134 </html>
135 ];
136
137 for (qw/decode parse parse_xml check/) {
138 next unless defined $time{$_};
139 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
140 print $file $char_length, "\t", $time{$_}, "\n";
141 }
142
143 exit;
144
145 sub add_error ($$$) {
146 my ($layer, $err, $result) = @_;
147 if (defined $err->{level}) {
148 if ($err->{level} eq 's') {
149 $result->{$layer}->{should}++;
150 $result->{$layer}->{score_min} -= 2;
151 $result->{conforming_min} = 0;
152 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
153 $result->{$layer}->{warning}++;
154 } elsif ($err->{level} eq 'unsupported') {
155 $result->{$layer}->{unsupported}++;
156 $result->{unsupported} = 1;
157 } else {
158 $result->{$layer}->{must}++;
159 $result->{$layer}->{score_max} -= 2;
160 $result->{$layer}->{score_min} -= 2;
161 $result->{conforming_min} = 0;
162 $result->{conforming_max} = 0;
163 }
164 } else {
165 $result->{$layer}->{must}++;
166 $result->{$layer}->{score_max} -= 2;
167 $result->{$layer}->{score_min} -= 2;
168 $result->{conforming_min} = 0;
169 $result->{conforming_max} = 0;
170 }
171 } # add_error
172
173 sub print_http_header_section ($$) {
174 my ($input, $result) = @_;
175 return unless defined $input->{header_status_code} or
176 defined $input->{header_status_text} or
177 @{$input->{header_field}};
178
179 push @nav, ['#source-header' => 'HTTP Header'];
180 print STDOUT qq[<div id="source-header" class="section">
181 <h2>HTTP Header</h2>
182
183 <p><strong>Note</strong>: Due to the limitation of the
184 network library in use, the content of this section might
185 not be the real header.</p>
186
187 <table><tbody>
188 ];
189
190 if (defined $input->{header_status_code}) {
191 print STDOUT qq[<tr><th scope="row">Status code</th>];
192 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
193 }
194 if (defined $input->{header_status_text}) {
195 print STDOUT qq[<tr><th scope="row">Status text</th>];
196 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
197 }
198
199 for (@{$input->{header_field}}) {
200 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
201 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
202 }
203
204 print STDOUT qq[</tbody></table></div>];
205 } # print_http_header_section
206
207 sub print_syntax_error_html_section ($$) {
208 my ($input, $result) = @_;
209
210 require Encode;
211 require Whatpm::HTML;
212
213 $input->{charset} ||= 'ISO-8859-1'; ## TODO: for now.
214
215 my $time1 = time;
216 my $t = Encode::decode ($input->{charset}, $input->{s});
217 $time{decode} = time - $time1;
218
219 print STDOUT qq[
220 <div id="parse-errors" class="section">
221 <h2>Parse Errors</h2>
222
223 <dl>];
224 push @nav, ['#parse-errors' => 'Parse Error'];
225
226 my $onerror = sub {
227 my (%opt) = @_;
228 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
229 if ($opt{column} > 0) {
230 print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
231 } else {
232 $opt{line} = $opt{line} - 1 || 1;
233 print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];
234 }
235 $type =~ tr/ /-/;
236 $type =~ s/\|/%7C/g;
237 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
238 print STDOUT qq[<dd class="$cls">$msg</dd>\n];
239
240 add_error ('syntax', \%opt => $result);
241 };
242
243 my $doc = $dom->create_document;
244 my $el;
245 $time1 = time;
246 if (defined $inner_html_element and length $inner_html_element) {
247 $el = $doc->create_element_ns
248 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
249 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
250 } else {
251 Whatpm::HTML->parse_string ($t => $doc, $onerror);
252 }
253 $time{parse} = time - $time1;
254
255 print STDOUT qq[</dl></div>];
256
257 return ($doc, $el);
258 } # print_syntax_error_html_section
259
260 sub print_syntax_error_xml_section ($$) {
261 my ($input, $result) = @_;
262
263 require Message::DOM::XMLParserTemp;
264
265 print STDOUT qq[
266 <div id="parse-errors" class="section">
267 <h2>Parse Errors</h2>
268
269 <dl>];
270 push @nav, ['#parse-errors' => 'Parse Error'];
271
272 my $onerror = sub {
273 my $err = shift;
274 my $line = $err->location->line_number;
275 print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];
276 print STDOUT $err->location->column_number, "</dt><dd>";
277 print STDOUT htescape $err->text, "</dd>\n";
278
279 add_error ('syntax', {type => $err->text,
280 level => [
281 $err->SEVERITY_FATAL_ERROR => 'm',
282 $err->SEVERITY_ERROR => 'm',
283 $err->SEVERITY_WARNING => 's',
284 ]->[$err->severity]} => $result);
285
286 return 1;
287 };
288
289 my $time1 = time;
290 open my $fh, '<', \($input->{s});
291 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
292 ($fh => $dom, $onerror, charset => $input->{charset});
293 $time{parse_xml} = time - $time1;
294
295 print STDOUT qq[</dl></div>];
296
297 return ($doc, undef);
298 } # print_syntax_error_xml_section
299
300 sub print_source_string_section ($$) {
301 require Encode;
302 my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
303 return unless $enc;
304
305 my $s = \($enc->decode (${$_[0]}));
306 my $i = 1;
307 push @nav, ['#source-string' => 'Source'];
308 print STDOUT qq[<div id="source-string" class="section">
309 <h2>Document Source</h2>
310 <ol lang="">\n];
311 if (length $$s) {
312 while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
313 print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
314 $i++;
315 }
316 if ($$s =~ /\G([^\x0A]+)/gc) {
317 print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
318 }
319 } else {
320 print STDOUT q[<li id="line-1"></li>];
321 }
322 print STDOUT "</ol></div>";
323 } # print_input_string_section
324
325 sub print_document_tree ($) {
326 my $node = shift;
327 my $r = '<ol class="xoxo">';
328
329 my @node = ($node);
330 while (@node) {
331 my $child = shift @node;
332 unless (ref $child) {
333 $r .= $child;
334 next;
335 }
336
337 my $node_id = 'node-'.refaddr $child;
338 my $nt = $child->node_type;
339 if ($nt == $child->ELEMENT_NODE) {
340 my $child_nsuri = $child->namespace_uri;
341 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
342 '</code>'; ## ISSUE: case
343
344 if ($child->has_attributes) {
345 $r .= '<ul class="attributes">';
346 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
347 @{$child->attributes}) {
348 $r .= qq[<li id="$attr->[3]" class="tree-attribute"><code title="@{[defined $_->[2] ? $_->[2] : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
349 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
350 }
351 $r .= '</ul>';
352 }
353
354 if ($child->has_child_nodes) {
355 $r .= '<ol class="children">';
356 unshift @node, @{$child->child_nodes}, '</ol></li>';
357 } else {
358 $r .= '</li>';
359 }
360 } elsif ($nt == $child->TEXT_NODE) {
361 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
362 } elsif ($nt == $child->CDATA_SECTION_NODE) {
363 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
364 } elsif ($nt == $child->COMMENT_NODE) {
365 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
366 } elsif ($nt == $child->DOCUMENT_NODE) {
367 $r .= qq'<li id="$node_id" class="tree-document">Document';
368 $r .= qq[<ul class="attributes">];
369 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
370 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
371 unless ($child->manakai_is_html) {
372 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
373 if (defined $child->xml_encoding) {
374 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
375 } else {
376 $r .= qq[<li>XML encoding = (null)</li>];
377 }
378 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
379 }
380 $r .= qq[</ul>];
381 if ($child->has_child_nodes) {
382 $r .= '<ol class="children">';
383 unshift @node, @{$child->child_nodes}, '</ol></li>';
384 }
385 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
386 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
387 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
388 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
389 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
390 $r .= '</ul></li>';
391 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
392 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
393 } else {
394 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
395 }
396 }
397
398 $r .= '</ol>';
399 print STDOUT $r;
400 } # print_document_tree
401
402 sub print_structure_dump_section ($$) {
403 my ($doc, $el) = @_;
404
405 print STDOUT qq[
406 <div id="document-tree" class="section">
407 <h2>Document Tree</h2>
408 ];
409 push @nav, ['#document-tree' => 'Tree'];
410
411 print_document_tree ($el || $doc);
412
413 print STDOUT qq[</div>];
414 } # print_structure_dump_section
415
416 sub print_structure_error_section ($$$) {
417 my ($doc, $el, $result) = @_;
418
419 print STDOUT qq[<div id="document-errors" class="section">
420 <h2>Document Errors</h2>
421
422 <dl>];
423 push @nav, ['#document-errors' => 'Document Error'];
424
425 require Whatpm::ContentChecker;
426 my $onerror = sub {
427 my %opt = @_;
428 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
429 $type =~ tr/ /-/;
430 $type =~ s/\|/%7C/g;
431 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
432 print STDOUT qq[<dt class="$cls">] . get_node_link ($opt{node}) .
433 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
434 add_error ('structure', \%opt => $result);
435 };
436
437 my $elements;
438 my $time1 = time;
439 if ($el) {
440 $elements = Whatpm::ContentChecker->check_element ($el, $onerror);
441 } else {
442 $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);
443 }
444 $time{check} = time - $time1;
445
446 print STDOUT qq[</dl></div>];
447
448 return $elements;
449 } # print_structure_error_section
450
451 sub print_table_section ($) {
452 my $tables = shift;
453
454 push @nav, ['#tables' => 'Tables'];
455 print STDOUT qq[
456 <div id="tables" class="section">
457 <h2>Tables</h2>
458
459 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
460 <script src="../table-script.js" type="text/javascript"></script>
461 <noscript>
462 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
463 </noscript>
464 ];
465
466 require JSON;
467
468 my $i = 0;
469 for my $table_el (@$tables) {
470 $i++;
471 print STDOUT qq[<div class="section" id="table-$i"><h3>] .
472 get_node_link ($table_el) . q[</h3>];
473
474 ## TODO: Make |ContentChecker| return |form_table| result
475 ## so that this script don't have to run the algorithm twice.
476 my $table = Whatpm::HTMLTable->form_table ($table_el);
477
478 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
479 next unless $_;
480 delete $_->{element};
481 }
482
483 for (@{$table->{row_group}}) {
484 next unless $_;
485 next unless $_->{element};
486 $_->{type} = $_->{element}->manakai_local_name;
487 delete $_->{element};
488 }
489
490 for (@{$table->{cell}}) {
491 next unless $_;
492 for (@{$_}) {
493 next unless $_;
494 for (@$_) {
495 $_->{id} = refaddr $_->{element} if defined $_->{element};
496 delete $_->{element};
497 $_->{is_header} = $_->{is_header} ? 1 : 0;
498 }
499 }
500 }
501
502 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
503 print STDOUT JSON::objToJson ($table);
504 print STDOUT qq[, document.getElementById ('table-$i'));</script>];
505 }
506
507 print STDOUT qq[</div>];
508 } # print_table_section
509
510 sub print_id_section ($) {
511 my $ids = shift;
512
513 push @nav, ['#identifiers' => 'IDs'];
514 print STDOUT qq[
515 <div id="identifiers" class="section">
516 <h2>Identifiers</h2>
517
518 <dl>
519 ];
520 for my $id (sort {$a cmp $b} keys %$ids) {
521 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
522 for (@{$ids->{$id}}) {
523 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
524 }
525 }
526 print STDOUT qq[</dl></div>];
527 } # print_id_section
528
529 sub print_term_section ($) {
530 my $terms = shift;
531
532 push @nav, ['#terms' => 'Terms'];
533 print STDOUT qq[
534 <div id="terms" class="section">
535 <h2>Terms</h2>
536
537 <dl>
538 ];
539 for my $term (sort {$a cmp $b} keys %$terms) {
540 print STDOUT qq[<dt>@{[htescape $term]}</dt>];
541 for (@{$terms->{$term}}) {
542 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
543 }
544 }
545 print STDOUT qq[</dl></div>];
546 } # print_term_section
547
548 sub print_class_section ($) {
549 my $classes = shift;
550
551 push @nav, ['#classes' => 'Classes'];
552 print STDOUT qq[
553 <div id="classes" class="section">
554 <h2>Classes</h2>
555
556 <dl>
557 ];
558 for my $class (sort {$a cmp $b} keys %$classes) {
559 print STDOUT qq[<dt><code>@{[htescape $class]}</code></dt>];
560 for (@{$classes->{$class}}) {
561 print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
562 }
563 }
564 print STDOUT qq[</dl></div>];
565 } # print_class_section
566
567 sub print_result_section ($) {
568 my $result = shift;
569
570 print STDOUT qq[
571 <div id="result-summary" class="section">
572 <h2>Result</h2>];
573
574 if ($result->{unsupported}) {
575 print STDOUT qq[<p class=uncertain id=result-para>The conformance
576 checker cannot decide whether the document is conforming or
577 not, since the document contains one or more unsupported
578 features.</p>];
579 } elsif ($result->{conforming_min}) {
580 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
581 found in this document.</p>];
582 } elsif ($result->{conforming_max}) {
583 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
584 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
585 it might be conforming.</p>];
586 } else {
587 print STDOUT qq[<p class=FAIL id=result-para>This document is
588 <strong><em>non</em>-conforming</strong>.</p>];
589 }
590
591 print STDOUT qq[<table>
592 <colgroup><col><colgroup><col><col><col><colgroup><col>
593 <thead>
594 <tr><th scope=col></th><th scope=col><em class=rfc2119>MUST</em>-level
595 Errors</th><th scope=col><em class=rfc2119>SHOULD</em>-level
596 Errors</th><th scope=col>Warnings</th><th scope=col>Score</th></tr>
597 </thead><tbody>];
598
599 my $must_error = 0;
600 my $should_error = 0;
601 my $warning = 0;
602 my $score_min = 0;
603 my $score_max = 0;
604 my $score_base = 20;
605 for (
606 [Transfer => 'transfer', ''],
607 [Character => 'char', ''],
608 [Syntax => 'syntax', '#parse-errors'],
609 [Structure => 'structure', '#document-errors'],
610 ) {
611 $must_error += ($result->{$_->[1]}->{must} += 0);
612 $should_error += ($result->{$_->[1]}->{should} += 0);
613 $warning += ($result->{$_->[1]}->{warning} += 0);
614 $score_min += ($result->{$_->[1]}->{score_min} += $score_base);
615 $score_max += ($result->{$_->[1]}->{score_max} += $score_base);
616
617 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
618 my $label = $_->[0];
619 if ($result->{$_->[1]}->{must} or
620 $result->{$_->[1]}->{should} or
621 $result->{$_->[1]}->{warning} or
622 $result->{$_->[1]}->{unsupported}) {
623 $label = qq[<a href="$_->[2]">$label</a>];
624 }
625
626 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
627 if ($uncertain) {
628 print qq[<td class="@{[$score_max < $score_base ? $score_min < $score_max ? 'FAIL' : 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
629 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
630 print qq[<td class="@{[$score_max < $score_base ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max} + $score_base</td></tr>];
631 } else {
632 print qq[<td class="@{[$score_max < $score_base ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
633 }
634 }
635
636 $score_max += $score_base;
637
638 print STDOUT qq[
639 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
640 </tbody>
641 <tfoot><tr class=uncertain><th scope=row>Total</th><td>$must_error?</td><td>$should_error?</td><td>$warning?</td><td><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
642 </table>
643
644 <p><strong>Important</strong>: This conformance checking service
645 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
646 </div>];
647 push @nav, ['#result-summary' => 'Result'];
648 } # print_result_section
649
650 sub print_result_unknown_type_section ($) {
651 my $input = shift;
652
653 print STDOUT qq[
654 <div id="result-summary" class="section">
655 <p><em>Media type <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code> is not supported!</em></p>
656 </div>
657 ];
658 push @nav, ['#result-summary' => 'Result'];
659 } # print_result_unknown_type_section
660
661 sub print_result_input_error_section ($) {
662 my $input = shift;
663 print STDOUT qq[<div class="section" id="result-summary">
664 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
665 </div>];
666 push @nav, ['#result-summary' => 'Result'];
667 } # print_Result_input_error_section
668
669 sub get_node_path ($) {
670 my $node = shift;
671 my @r;
672 while (defined $node) {
673 my $rs;
674 if ($node->node_type == 1) {
675 $rs = $node->manakai_local_name;
676 $node = $node->parent_node;
677 } elsif ($node->node_type == 2) {
678 $rs = '@' . $node->manakai_local_name;
679 $node = $node->owner_element;
680 } elsif ($node->node_type == 3) {
681 $rs = '"' . $node->data . '"';
682 $node = $node->parent_node;
683 } elsif ($node->node_type == 9) {
684 @r = ('') unless @r;
685 $rs = '';
686 $node = $node->parent_node;
687 } else {
688 $rs = '#' . $node->node_type;
689 $node = $node->parent_node;
690 }
691 unshift @r, $rs;
692 }
693 return join '/', @r;
694 } # get_node_path
695
696 sub get_node_link ($) {
697 return qq[<a href="#node-@{[refaddr $_[0]]}">] .
698 htescape (get_node_path ($_[0])) . qq[</a>];
699 } # get_node_link
700
701 {
702 my $Msg = {};
703
704 sub load_text_catalog ($) {
705 my $lang = shift; # MUST be a canonical lang name
706 open my $file, '<', "cc-msg.$lang.txt" or die "$0: cc-msg.$lang.txt: $!";
707 while (<$file>) {
708 if (s/^([^;]+);([^;]*);//) {
709 my ($type, $cls, $msg) = ($1, $2, $_);
710 $msg =~ tr/\x0D\x0A//d;
711 $Msg->{$type} = [$cls, $msg];
712 }
713 }
714 } # load_text_catalog
715
716 sub get_text ($) {
717 my ($type, $level, $node) = @_;
718 $type = $level . ':' . $type if defined $level;
719 my @arg;
720 {
721 if (defined $Msg->{$type}) {
722 my $msg = $Msg->{$type}->[1];
723 $msg =~ s{<var>\$([0-9]+)</var>}{
724 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
725 }ge;
726 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
727 UNIVERSAL::can ($node, 'get_attribute_ns')
728 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
729 }ge;
730 $msg =~ s{<var>{\@}</var>}{
731 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
732 }ge;
733 $msg =~ s{<var>{local-name}</var>}{
734 UNIVERSAL::can ($node, 'manakai_local_name')
735 ? htescape ($node->manakai_local_name) : ''
736 }ge;
737 $msg =~ s{<var>{element-local-name}</var>}{
738 (UNIVERSAL::can ($node, 'owner_element') and
739 $node->owner_element)
740 ? htescape ($node->owner_element->manakai_local_name)
741 : ''
742 }ge;
743 return ($type, $Msg->{$type}->[0], $msg);
744 } elsif ($type =~ s/:([^:]*)$//) {
745 unshift @arg, $1;
746 redo;
747 }
748 }
749 return ($type, '', htescape ($_[0]));
750 } # get_text
751
752 }
753
754 sub get_input_document ($$) {
755 my ($http, $dom) = @_;
756
757 my $request_uri = $http->get_parameter ('uri');
758 my $r = {};
759 if (defined $request_uri and length $request_uri) {
760 my $uri = $dom->create_uri_reference ($request_uri);
761 unless ({
762 http => 1,
763 }->{lc $uri->uri_scheme}) {
764 return {uri => $request_uri, request_uri => $request_uri,
765 error_status_text => 'URI scheme not allowed'};
766 }
767
768 require Message::Util::HostPermit;
769 my $host_permit = new Message::Util::HostPermit;
770 $host_permit->add_rule (<<EOH);
771 Allow host=suika port=80
772 Deny host=suika
773 Allow host=suika.fam.cx port=80
774 Deny host=suika.fam.cx
775 Deny host=localhost
776 Deny host=*.localdomain
777 Deny ipv4=0.0.0.0/8
778 Deny ipv4=10.0.0.0/8
779 Deny ipv4=127.0.0.0/8
780 Deny ipv4=169.254.0.0/16
781 Deny ipv4=172.0.0.0/11
782 Deny ipv4=192.0.2.0/24
783 Deny ipv4=192.88.99.0/24
784 Deny ipv4=192.168.0.0/16
785 Deny ipv4=198.18.0.0/15
786 Deny ipv4=224.0.0.0/4
787 Deny ipv4=255.255.255.255/32
788 Deny ipv6=0::0/0
789 Allow host=*
790 EOH
791 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
792 return {uri => $request_uri, request_uri => $request_uri,
793 error_status_text => 'Connection to the host is forbidden'};
794 }
795
796 require LWP::UserAgent;
797 my $ua = WDCC::LWPUA->new;
798 $ua->{wdcc_dom} = $dom;
799 $ua->{wdcc_host_permit} = $host_permit;
800 $ua->agent ('Mozilla'); ## TODO: for now.
801 $ua->parse_head (0);
802 $ua->protocols_allowed ([qw/http/]);
803 $ua->max_size (1000_000);
804 my $req = HTTP::Request->new (GET => $request_uri);
805 my $res = $ua->request ($req);
806 ## TODO: 401 sets |is_success| true.
807 if ($res->is_success or $http->get_parameter ('error-page')) {
808 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
809 $r->{uri} = $res->request->uri;
810 $r->{request_uri} = $request_uri;
811
812 ## TODO: More strict parsing...
813 my $ct = $res->header ('Content-Type');
814 if (defined $ct and $ct =~ m#^([0-9A-Za-z._+-]+/[0-9A-Za-z._+-]+)#) {
815 $r->{media_type} = lc $1;
816 }
817 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?(\S+)"?/i) {
818 $r->{charset} = lc $1;
819 $r->{charset} =~ tr/\\//d;
820 }
821
822 my $input_charset = $http->get_parameter ('charset');
823 if (defined $input_charset and length $input_charset) {
824 $r->{charset_overridden}
825 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
826 $r->{charset} = $input_charset;
827 }
828
829 $r->{s} = ''.$res->content;
830 } else {
831 $r->{uri} = $res->request->uri;
832 $r->{request_uri} = $request_uri;
833 $r->{error_status_text} = $res->status_line;
834 }
835
836 $r->{header_field} = [];
837 $res->scan (sub {
838 push @{$r->{header_field}}, [$_[0], $_[1]];
839 });
840 $r->{header_status_code} = $res->code;
841 $r->{header_status_text} = $res->message;
842 } else {
843 $r->{s} = ''.$http->get_parameter ('s');
844 $r->{uri} = q<thismessage:/>;
845 $r->{request_uri} = q<thismessage:/>;
846 $r->{base_uri} = q<thismessage:/>;
847 $r->{charset} = ''.$http->get_parameter ('_charset_');
848 $r->{charset} =~ s/\s+//g;
849 $r->{charset} = 'utf-8' if $r->{charset} eq '';
850 $r->{header_field} = [];
851 }
852
853 my $input_format = $http->get_parameter ('i');
854 if (defined $input_format and length $input_format) {
855 $r->{media_type_overridden}
856 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
857 $r->{media_type} = $input_format;
858 }
859 if (defined $r->{s} and not defined $r->{media_type}) {
860 $r->{media_type} = 'text/html';
861 $r->{media_type_overridden} = 1;
862 }
863
864 if ($r->{media_type} eq 'text/xml') {
865 unless (defined $r->{charset}) {
866 $r->{charset} = 'us-ascii';
867 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
868 $r->{charset_overridden} = 0;
869 }
870 }
871
872 if (length $r->{s} > 1000_000) {
873 $r->{error_status_text} = 'Entity-body too large';
874 delete $r->{s};
875 return $r;
876 }
877
878 return $r;
879 } # get_input_document
880
881 package WDCC::LWPUA;
882 BEGIN { push our @ISA, 'LWP::UserAgent'; }
883
884 sub redirect_ok {
885 my $ua = shift;
886 unless ($ua->SUPER::redirect_ok (@_)) {
887 return 0;
888 }
889
890 my $uris = $_[1]->header ('Location');
891 return 0 unless $uris;
892 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
893 unless ({
894 http => 1,
895 }->{lc $uri->uri_scheme}) {
896 return 0;
897 }
898 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
899 return 0;
900 }
901 return 1;
902 } # redirect_ok
903
904 =head1 AUTHOR
905
906 Wakaba <w@suika.fam.cx>.
907
908 =head1 LICENSE
909
910 Copyright 2007 Wakaba <w@suika.fam.cx>
911
912 This library is free software; you can redistribute it
913 and/or modify it under the same terms as Perl itself.
914
915 =cut
916
917 ## $Date: 2007/09/02 08:40:49 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24