/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.32 - (hide annotations) (download)
Sun Feb 10 02:30:14 2008 UTC (16 years, 3 months ago) by wakaba
Branch: MAIN
Changes since 1.31: +78 -71 lines
++ ChangeLog	10 Feb 2008 02:28:48 -0000
	* table-interface.en.html: Typo fixed.

	* cc.cgi: Use |$input->{id_prefix}| as the prefix for the
	identifiers in report sections.  Don't add headings
	if the |$input->{nested}| flag is set.

	* table-script.js (tableToCanvas): Now it aceepts third
	argument, |idPrefix|, for setting ID prefix.

	* table.cgi: Set the third argument to |tableToCanvas| as an
	empty string.

2008-02-10  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3 wakaba 1.23 use utf8;
4 wakaba 1.1
5     use lib qw[/home/httpd/html/www/markup/html/whatpm
6 wakaba 1.16 /home/wakaba/work/manakai2/lib];
7 wakaba 1.1 use CGI::Carp qw[fatalsToBrowser];
8 wakaba 1.2 use Scalar::Util qw[refaddr];
9 wakaba 1.16 use Time::HiRes qw/time/;
10 wakaba 1.1
11 wakaba 1.2 sub htescape ($) {
12     my $s = $_[0];
13     $s =~ s/&/&amp;/g;
14     $s =~ s/</&lt;/g;
15     $s =~ s/>/&gt;/g;
16     $s =~ s/"/&quot;/g;
17 wakaba 1.12 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18     sprintf '<var>U+%04X</var>', ord $1;
19     }ge;
20 wakaba 1.2 return $s;
21     } # htescape
22    
23 wakaba 1.16 use Message::CGI::HTTP;
24     my $http = Message::CGI::HTTP->new;
25 wakaba 1.1
26 wakaba 1.16 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
27 wakaba 1.8 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
28     exit;
29     }
30    
31 wakaba 1.12 binmode STDOUT, ':utf8';
32 wakaba 1.14 $| = 1;
33 wakaba 1.12
34 wakaba 1.9 require Message::DOM::DOMImplementation;
35     my $dom = Message::DOM::DOMImplementation->new;
36    
37 wakaba 1.7 load_text_catalog ('en'); ## TODO: conneg
38    
39 wakaba 1.3 my @nav;
40 wakaba 1.2 print STDOUT qq[Content-Type: text/html; charset=utf-8
41    
42     <!DOCTYPE html>
43     <html lang="en">
44     <head>
45     <title>Web Document Conformance Checker (BETA)</title>
46 wakaba 1.3 <link rel="stylesheet" href="../cc-style.css" type="text/css">
47 wakaba 1.2 </head>
48     <body>
49 wakaba 1.13 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
50     (<em>beta</em>)</h1>
51 wakaba 1.14 ];
52 wakaba 1.2
53 wakaba 1.14 $| = 0;
54     my $input = get_input_document ($http, $dom);
55 wakaba 1.16 my $char_length = 0;
56     my %time;
57 wakaba 1.14
58     print qq[
59 wakaba 1.4 <div id="document-info" class="section">
60 wakaba 1.2 <dl>
61 wakaba 1.9 <dt>Request URI</dt>
62     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 wakaba 1.2 <dt>Document URI</dt>
64 wakaba 1.25 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65     <script>
66     document.title = '<'
67     + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68     + document.title;
69     </script></dd>
70 wakaba 1.2 ]; # no </dl> yet
71 wakaba 1.3 push @nav, ['#document-info' => 'Information'];
72 wakaba 1.1
73 wakaba 1.9 if (defined $input->{s}) {
74 wakaba 1.16 $char_length = length $input->{s};
75 wakaba 1.9
76     print STDOUT qq[
77     <dt>Base URI</dt>
78     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79     <dt>Internet Media Type</dt>
80     <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 wakaba 1.25 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 wakaba 1.9 <dt>Character Encoding</dt>
83     <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84     @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 wakaba 1.16 <dt>Length</dt>
86     <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 wakaba 1.9 </dl>
88     </div>
89     ];
90    
91 wakaba 1.20 my $result = {conforming_min => 1, conforming_max => 1};
92 wakaba 1.31 check_and_print ($input => $result);
93 wakaba 1.19 print_result_section ($result);
94 wakaba 1.9 } else {
95 wakaba 1.18 print STDOUT qq[</dl></div>];
96     print_result_input_error_section ($input);
97 wakaba 1.9 }
98 wakaba 1.3
99 wakaba 1.2 print STDOUT qq[
100 wakaba 1.3 <ul class="navigation" id="nav-items">
101     ];
102     for (@nav) {
103     print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
104     }
105     print STDOUT qq[
106     </ul>
107 wakaba 1.2 </body>
108     </html>
109     ];
110 wakaba 1.1
111 wakaba 1.24 for (qw/decode parse parse_html parse_xml parse_manifest
112     check check_manifest/) {
113 wakaba 1.16 next unless defined $time{$_};
114     open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
115     print $file $char_length, "\t", $time{$_}, "\n";
116     }
117    
118 wakaba 1.1 exit;
119    
120 wakaba 1.19 sub add_error ($$$) {
121     my ($layer, $err, $result) = @_;
122     if (defined $err->{level}) {
123     if ($err->{level} eq 's') {
124     $result->{$layer}->{should}++;
125     $result->{$layer}->{score_min} -= 2;
126     $result->{conforming_min} = 0;
127     } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
128     $result->{$layer}->{warning}++;
129 wakaba 1.30 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
130 wakaba 1.19 $result->{$layer}->{unsupported}++;
131     $result->{unsupported} = 1;
132     } else {
133     $result->{$layer}->{must}++;
134     $result->{$layer}->{score_max} -= 2;
135     $result->{$layer}->{score_min} -= 2;
136     $result->{conforming_min} = 0;
137     $result->{conforming_max} = 0;
138     }
139     } else {
140     $result->{$layer}->{must}++;
141     $result->{$layer}->{score_max} -= 2;
142     $result->{$layer}->{score_min} -= 2;
143     $result->{conforming_min} = 0;
144     $result->{conforming_max} = 0;
145     }
146     } # add_error
147    
148 wakaba 1.31 sub check_and_print ($$) {
149     my ($input, $result) = @_;
150 wakaba 1.32 $input->{id_prefix} = '';
151     #$input->{nested} = 1/0;
152 wakaba 1.31
153     print_http_header_section ($input, $result);
154    
155     my $doc;
156     my $el;
157     my $manifest;
158    
159     if ($input->{media_type} eq 'text/html') {
160     ($doc, $el) = print_syntax_error_html_section ($input, $result);
161     print_source_string_section
162     (\($input->{s}), $input->{charset} || $doc->input_encoding);
163     } elsif ({
164     'text/xml' => 1,
165     'application/atom+xml' => 1,
166     'application/rss+xml' => 1,
167     'application/svg+xml' => 1,
168     'application/xhtml+xml' => 1,
169     'application/xml' => 1,
170     }->{$input->{media_type}}) {
171     ($doc, $el) = print_syntax_error_xml_section ($input, $result);
172     print_source_string_section (\($input->{s}), $doc->input_encoding);
173     } elsif ($input->{media_type} eq 'text/cache-manifest') {
174     ## TODO: MUST be text/cache-manifest
175     $manifest = print_syntax_error_manifest_section ($input, $result);
176     print_source_string_section (\($input->{s}), 'utf-8');
177     } else {
178     ## TODO: Change HTTP status code??
179     print_result_unknown_type_section ($input, $result);
180     }
181    
182     if (defined $doc or defined $el) {
183 wakaba 1.32 print_structure_dump_dom_section ($input, $doc, $el);
184     my $elements = print_structure_error_dom_section
185     ($input, $doc, $el, $result);
186     print_table_section ($input, $elements->{table}) if @{$elements->{table}};
187     print_id_section ($input, $elements->{id}) if keys %{$elements->{id}};
188     print_term_section ($input, $elements->{term}) if keys %{$elements->{term}};
189     print_class_section ($input, $elements->{class}) if keys %{$elements->{class}};
190 wakaba 1.31 } elsif (defined $manifest) {
191 wakaba 1.32 print_structure_dump_manifest_section ($input, $manifest);
192     print_structure_error_manifest_section ($input, $manifest, $result);
193 wakaba 1.31 }
194     } # check_and_print
195    
196 wakaba 1.19 sub print_http_header_section ($$) {
197     my ($input, $result) = @_;
198 wakaba 1.9 return unless defined $input->{header_status_code} or
199     defined $input->{header_status_text} or
200     @{$input->{header_field}};
201    
202 wakaba 1.32 push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
203     print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
204 wakaba 1.9 <h2>HTTP Header</h2>
205    
206     <p><strong>Note</strong>: Due to the limitation of the
207     network library in use, the content of this section might
208     not be the real header.</p>
209    
210     <table><tbody>
211     ];
212    
213     if (defined $input->{header_status_code}) {
214     print STDOUT qq[<tr><th scope="row">Status code</th>];
215     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
216     }
217     if (defined $input->{header_status_text}) {
218     print STDOUT qq[<tr><th scope="row">Status text</th>];
219     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
220     }
221    
222     for (@{$input->{header_field}}) {
223     print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
224     print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
225     }
226    
227     print STDOUT qq[</tbody></table></div>];
228     } # print_http_header_section
229    
230 wakaba 1.19 sub print_syntax_error_html_section ($$) {
231     my ($input, $result) = @_;
232 wakaba 1.18
233     require Encode;
234     require Whatpm::HTML;
235    
236     print STDOUT qq[
237 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
238 wakaba 1.18 <h2>Parse Errors</h2>
239    
240     <dl>];
241 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
242 wakaba 1.18
243     my $onerror = sub {
244     my (%opt) = @_;
245     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
246     if ($opt{column} > 0) {
247     print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
248     } else {
249     $opt{line} = $opt{line} - 1 || 1;
250     print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];
251     }
252     $type =~ tr/ /-/;
253     $type =~ s/\|/%7C/g;
254     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
255 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
256     print STDOUT qq[$msg</dd>\n];
257 wakaba 1.19
258     add_error ('syntax', \%opt => $result);
259 wakaba 1.18 };
260    
261     my $doc = $dom->create_document;
262     my $el;
263 wakaba 1.26 my $inner_html_element = $http->get_parameter ('e');
264 wakaba 1.18 if (defined $inner_html_element and length $inner_html_element) {
265 wakaba 1.26 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
266 wakaba 1.24 my $time1 = time;
267     my $t = Encode::decode ($input->{charset}, $input->{s});
268     $time{decode} = time - $time1;
269    
270 wakaba 1.18 $el = $doc->create_element_ns
271     ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
272 wakaba 1.24 $time1 = time;
273 wakaba 1.18 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
274 wakaba 1.24 $time{parse} = time - $time1;
275 wakaba 1.18 } else {
276 wakaba 1.24 my $time1 = time;
277     Whatpm::HTML->parse_byte_string
278     ($input->{charset}, $input->{s} => $doc, $onerror);
279     $time{parse_html} = time - $time1;
280 wakaba 1.18 }
281 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
282     if defined $input->{official_charset};
283 wakaba 1.24
284 wakaba 1.18 print STDOUT qq[</dl></div>];
285    
286     return ($doc, $el);
287     } # print_syntax_error_html_section
288    
289 wakaba 1.19 sub print_syntax_error_xml_section ($$) {
290     my ($input, $result) = @_;
291 wakaba 1.18
292     require Message::DOM::XMLParserTemp;
293    
294     print STDOUT qq[
295 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
296 wakaba 1.18 <h2>Parse Errors</h2>
297    
298     <dl>];
299 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
300 wakaba 1.18
301     my $onerror = sub {
302     my $err = shift;
303     my $line = $err->location->line_number;
304     print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];
305     print STDOUT $err->location->column_number, "</dt><dd>";
306     print STDOUT htescape $err->text, "</dd>\n";
307 wakaba 1.19
308     add_error ('syntax', {type => $err->text,
309     level => [
310     $err->SEVERITY_FATAL_ERROR => 'm',
311     $err->SEVERITY_ERROR => 'm',
312     $err->SEVERITY_WARNING => 's',
313     ]->[$err->severity]} => $result);
314    
315 wakaba 1.18 return 1;
316     };
317    
318     my $time1 = time;
319     open my $fh, '<', \($input->{s});
320     my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
321     ($fh => $dom, $onerror, charset => $input->{charset});
322     $time{parse_xml} = time - $time1;
323 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
324     if defined $input->{official_charset};
325 wakaba 1.18
326     print STDOUT qq[</dl></div>];
327    
328     return ($doc, undef);
329     } # print_syntax_error_xml_section
330    
331 wakaba 1.22 sub print_syntax_error_manifest_section ($$) {
332     my ($input, $result) = @_;
333    
334     require Whatpm::CacheManifest;
335    
336     print STDOUT qq[
337 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
338 wakaba 1.22 <h2>Parse Errors</h2>
339    
340     <dl>];
341 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
342 wakaba 1.22
343     my $onerror = sub {
344     my (%opt) = @_;
345     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
346 wakaba 1.32 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
347     qq[</dt>];
348 wakaba 1.22 $type =~ tr/ /-/;
349     $type =~ s/\|/%7C/g;
350     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
351 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
352     print STDOUT qq[$msg</dd>\n];
353 wakaba 1.22
354     add_error ('syntax', \%opt => $result);
355     };
356    
357     my $time1 = time;
358     my $manifest = Whatpm::CacheManifest->parse_byte_string
359     ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
360     $time{parse_manifest} = time - $time1;
361    
362     print STDOUT qq[</dl></div>];
363    
364     return $manifest;
365     } # print_syntax_error_manifest_section
366    
367 wakaba 1.9 sub print_source_string_section ($$) {
368     require Encode;
369     my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
370     return unless $enc;
371    
372     my $s = \($enc->decode (${$_[0]}));
373     my $i = 1;
374 wakaba 1.32 push @nav, ['#source-string' => 'Source'] unless $input->{nested};
375     print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
376 wakaba 1.9 <h2>Document Source</h2>
377     <ol lang="">\n];
378 wakaba 1.7 if (length $$s) {
379     while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
380 wakaba 1.32 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
381     "</li>\n";
382 wakaba 1.7 $i++;
383     }
384     if ($$s =~ /\G([^\x0A]+)/gc) {
385 wakaba 1.32 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
386     "</li>\n";
387 wakaba 1.7 }
388     } else {
389 wakaba 1.32 print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
390 wakaba 1.2 }
391 wakaba 1.9 print STDOUT "</ol></div>";
392     } # print_input_string_section
393 wakaba 1.2
394     sub print_document_tree ($) {
395 wakaba 1.1 my $node = shift;
396 wakaba 1.2 my $r = '<ol class="xoxo">';
397 wakaba 1.1
398 wakaba 1.2 my @node = ($node);
399 wakaba 1.1 while (@node) {
400     my $child = shift @node;
401 wakaba 1.2 unless (ref $child) {
402     $r .= $child;
403     next;
404     }
405    
406 wakaba 1.32 my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
407 wakaba 1.2 my $nt = $child->node_type;
408     if ($nt == $child->ELEMENT_NODE) {
409 wakaba 1.4 my $child_nsuri = $child->namespace_uri;
410     $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
411 wakaba 1.2 '</code>'; ## ISSUE: case
412    
413     if ($child->has_attributes) {
414     $r .= '<ul class="attributes">';
415 wakaba 1.4 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
416 wakaba 1.2 @{$child->attributes}) {
417 wakaba 1.32 $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
418 wakaba 1.2 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
419     }
420     $r .= '</ul>';
421     }
422    
423 wakaba 1.7 if ($child->has_child_nodes) {
424 wakaba 1.2 $r .= '<ol class="children">';
425 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
426     } else {
427     $r .= '</li>';
428 wakaba 1.2 }
429     } elsif ($nt == $child->TEXT_NODE) {
430 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
431 wakaba 1.2 } elsif ($nt == $child->CDATA_SECTION_NODE) {
432 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
433 wakaba 1.2 } elsif ($nt == $child->COMMENT_NODE) {
434 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
435 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_NODE) {
436 wakaba 1.6 $r .= qq'<li id="$node_id" class="tree-document">Document';
437 wakaba 1.7 $r .= qq[<ul class="attributes">];
438 wakaba 1.27 my $cp = $child->manakai_charset;
439     if (defined $cp) {
440     $r .= qq[<li><code>charset</code> parameter = <code>];
441     $r .= htescape ($cp) . qq[</code></li>];
442     }
443     $r .= qq[<li><code>inputEncoding</code> = ];
444     my $ie = $child->input_encoding;
445     if (defined $ie) {
446     $r .= qq[<code>@{[htescape ($ie)]}</code>];
447     if ($child->manakai_has_bom) {
448     $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
449     }
450     } else {
451     $r .= qq[(<code>null</code>)];
452     }
453 wakaba 1.7 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
454     $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
455 wakaba 1.9 unless ($child->manakai_is_html) {
456     $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
457     if (defined $child->xml_encoding) {
458     $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
459     } else {
460     $r .= qq[<li>XML encoding = (null)</li>];
461     }
462     $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
463     }
464 wakaba 1.7 $r .= qq[</ul>];
465 wakaba 1.2 if ($child->has_child_nodes) {
466 wakaba 1.7 $r .= '<ol class="children">';
467 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
468 wakaba 1.1 }
469 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
470 wakaba 1.5 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
471     $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
472     $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
473     $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
474 wakaba 1.2 $r .= '</ul></li>';
475     } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
476 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
477 wakaba 1.1 } else {
478 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
479 wakaba 1.1 }
480     }
481 wakaba 1.2
482     $r .= '</ol>';
483     print STDOUT $r;
484     } # print_document_tree
485 wakaba 1.1
486 wakaba 1.32 sub print_structure_dump_dom_section ($$$) {
487     my ($input, $doc, $el) = @_;
488 wakaba 1.18
489     print STDOUT qq[
490 wakaba 1.32 <div id="$input->{id_prefix}document-tree" class="section">
491 wakaba 1.18 <h2>Document Tree</h2>
492     ];
493 wakaba 1.32 push @nav, ['#document-tree' => 'Tree'] unless $input->{nested};
494 wakaba 1.18
495     print_document_tree ($el || $doc);
496    
497     print STDOUT qq[</div>];
498 wakaba 1.22 } # print_structure_dump_dom_section
499    
500 wakaba 1.32 sub print_structure_dump_manifest_section ($$) {
501     my ($input, $manifest) = @_;
502 wakaba 1.22
503     print STDOUT qq[
504 wakaba 1.32 <div id="$input->{id_prefix}dump-manifest" class="section">
505 wakaba 1.22 <h2>Cache Manifest</h2>
506     ];
507 wakaba 1.32 push @nav, ['#dump-manifest' => 'Caceh Manifest'] unless $input->{nested};
508 wakaba 1.22
509     print STDOUT qq[<dl><dt>Explicit entries</dt>];
510     for my $uri (@{$manifest->[0]}) {
511     my $euri = htescape ($uri);
512     print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
513     }
514    
515     print STDOUT qq[<dt>Fallback entries</dt><dd>
516     <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
517     <th scope=row>Fallback Entry</tr><tbody>];
518     for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
519     my $euri = htescape ($uri);
520     my $euri2 = htescape ($manifest->[1]->{$uri});
521     print STDOUT qq[<tr><td><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
522     <td><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
523     }
524    
525     print STDOUT qq[</table><dt>Online whitelist</dt>];
526     for my $uri (@{$manifest->[2]}) {
527     my $euri = htescape ($uri);
528     print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
529     }
530 wakaba 1.18
531 wakaba 1.22 print STDOUT qq[</dl></div>];
532     } # print_structure_dump_manifest_section
533    
534 wakaba 1.32 sub print_structure_error_dom_section ($$$$) {
535     my ($input, $doc, $el, $result) = @_;
536 wakaba 1.18
537 wakaba 1.32 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
538 wakaba 1.18 <h2>Document Errors</h2>
539    
540     <dl>];
541 wakaba 1.32 push @nav, ['#document-errors' => 'Document Error'] unless $input->{nested};
542 wakaba 1.18
543     require Whatpm::ContentChecker;
544     my $onerror = sub {
545     my %opt = @_;
546     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
547     $type =~ tr/ /-/;
548     $type =~ s/\|/%7C/g;
549     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
550 wakaba 1.32 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
551 wakaba 1.23 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
552     print STDOUT $msg, "</dd>\n";
553 wakaba 1.19 add_error ('structure', \%opt => $result);
554 wakaba 1.18 };
555    
556     my $elements;
557     my $time1 = time;
558     if ($el) {
559     $elements = Whatpm::ContentChecker->check_element ($el, $onerror);
560     } else {
561     $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);
562     }
563     $time{check} = time - $time1;
564    
565     print STDOUT qq[</dl></div>];
566    
567     return $elements;
568 wakaba 1.22 } # print_structure_error_dom_section
569    
570     sub print_structure_error_manifest_section ($$$) {
571 wakaba 1.32 my ($input, $manifest, $result) = @_;
572 wakaba 1.22
573 wakaba 1.32 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
574 wakaba 1.22 <h2>Document Errors</h2>
575    
576     <dl>];
577 wakaba 1.32 push @nav, ['#document-errors' => 'Document Error'] unless $input->{nested};
578 wakaba 1.22
579     require Whatpm::CacheManifest;
580     Whatpm::CacheManifest->check_manifest ($manifest, sub {
581     my %opt = @_;
582     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
583     $type =~ tr/ /-/;
584     $type =~ s/\|/%7C/g;
585     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
586 wakaba 1.32 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
587 wakaba 1.22 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
588     add_error ('structure', \%opt => $result);
589     });
590    
591     print STDOUT qq[</div>];
592     } # print_structure_error_manifest_section
593 wakaba 1.18
594 wakaba 1.32 sub print_table_section ($$) {
595     my ($input, $tables) = @_;
596 wakaba 1.18
597 wakaba 1.32 push @nav, ['#tables' => 'Tables'] unless $input->{nested};
598 wakaba 1.18 print STDOUT qq[
599 wakaba 1.32 <div id="$input->{id_prefix}tables" class="section">
600 wakaba 1.18 <h2>Tables</h2>
601    
602     <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
603     <script src="../table-script.js" type="text/javascript"></script>
604     <noscript>
605     <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
606     </noscript>
607     ];
608    
609     require JSON;
610    
611     my $i = 0;
612     for my $table_el (@$tables) {
613     $i++;
614 wakaba 1.32 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
615     get_node_link ($input, $table_el) . q[</h3>];
616 wakaba 1.18
617     ## TODO: Make |ContentChecker| return |form_table| result
618     ## so that this script don't have to run the algorithm twice.
619     my $table = Whatpm::HTMLTable->form_table ($table_el);
620    
621     for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
622     next unless $_;
623     delete $_->{element};
624     }
625    
626     for (@{$table->{row_group}}) {
627     next unless $_;
628     next unless $_->{element};
629     $_->{type} = $_->{element}->manakai_local_name;
630     delete $_->{element};
631     }
632    
633     for (@{$table->{cell}}) {
634     next unless $_;
635     for (@{$_}) {
636     next unless $_;
637     for (@$_) {
638     $_->{id} = refaddr $_->{element} if defined $_->{element};
639     delete $_->{element};
640     $_->{is_header} = $_->{is_header} ? 1 : 0;
641     }
642     }
643     }
644    
645     print STDOUT '</div><script type="text/javascript">tableToCanvas (';
646     print STDOUT JSON::objToJson ($table);
647 wakaba 1.32 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
648     print STDOUT qq[, '$input->{id_prefix}');</script>];
649 wakaba 1.18 }
650    
651     print STDOUT qq[</div>];
652     } # print_table_section
653    
654 wakaba 1.32 sub print_id_section ($$) {
655     my ($input, $ids) = @_;
656 wakaba 1.18
657 wakaba 1.32 push @nav, ['#identifiers' => 'IDs'] unless $input->{nested};
658 wakaba 1.18 print STDOUT qq[
659 wakaba 1.32 <div id="$input->{id_prefix}identifiers" class="section">
660 wakaba 1.18 <h2>Identifiers</h2>
661    
662     <dl>
663     ];
664     for my $id (sort {$a cmp $b} keys %$ids) {
665     print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
666     for (@{$ids->{$id}}) {
667 wakaba 1.32 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
668 wakaba 1.18 }
669     }
670     print STDOUT qq[</dl></div>];
671     } # print_id_section
672    
673 wakaba 1.32 sub print_term_section ($$) {
674     my ($input, $terms) = @_;
675 wakaba 1.18
676 wakaba 1.32 push @nav, ['#terms' => 'Terms'] unless $input->{nested};
677 wakaba 1.18 print STDOUT qq[
678 wakaba 1.32 <div id="$input->{id_prefix}terms" class="section">
679 wakaba 1.18 <h2>Terms</h2>
680    
681     <dl>
682     ];
683     for my $term (sort {$a cmp $b} keys %$terms) {
684     print STDOUT qq[<dt>@{[htescape $term]}</dt>];
685     for (@{$terms->{$term}}) {
686 wakaba 1.32 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
687 wakaba 1.18 }
688     }
689     print STDOUT qq[</dl></div>];
690     } # print_term_section
691    
692 wakaba 1.32 sub print_class_section ($$) {
693     my ($input, $classes) = @_;
694 wakaba 1.18
695 wakaba 1.32 push @nav, ['#classes' => 'Classes'] unless $input->{nested};
696 wakaba 1.18 print STDOUT qq[
697 wakaba 1.32 <div id="$input->{id_prefix}classes" class="section">
698 wakaba 1.18 <h2>Classes</h2>
699    
700     <dl>
701     ];
702     for my $class (sort {$a cmp $b} keys %$classes) {
703     print STDOUT qq[<dt><code>@{[htescape $class]}</code></dt>];
704     for (@{$classes->{$class}}) {
705 wakaba 1.32 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
706 wakaba 1.18 }
707     }
708     print STDOUT qq[</dl></div>];
709     } # print_class_section
710    
711 wakaba 1.19 sub print_result_section ($) {
712     my $result = shift;
713    
714     print STDOUT qq[
715     <div id="result-summary" class="section">
716     <h2>Result</h2>];
717    
718 wakaba 1.21 if ($result->{unsupported} and $result->{conforming_max}) {
719 wakaba 1.19 print STDOUT qq[<p class=uncertain id=result-para>The conformance
720     checker cannot decide whether the document is conforming or
721     not, since the document contains one or more unsupported
722 wakaba 1.21 features. The document might or might not be conforming.</p>];
723 wakaba 1.19 } elsif ($result->{conforming_min}) {
724     print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
725     found in this document.</p>];
726     } elsif ($result->{conforming_max}) {
727     print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
728     is <strong>likely <em>non</em>-conforming</strong>, but in rare case
729     it might be conforming.</p>];
730     } else {
731     print STDOUT qq[<p class=FAIL id=result-para>This document is
732     <strong><em>non</em>-conforming</strong>.</p>];
733     }
734    
735     print STDOUT qq[<table>
736     <colgroup><col><colgroup><col><col><col><colgroup><col>
737     <thead>
738 wakaba 1.23 <tr><th scope=col></th>
739     <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
740     Errors</a></th>
741     <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
742     Errors</a></th>
743     <th scope=col><a href="../error-description#level-w">Warnings</a></th>
744     <th scope=col>Score</th></tr></thead><tbody>];
745 wakaba 1.19
746     my $must_error = 0;
747     my $should_error = 0;
748     my $warning = 0;
749     my $score_min = 0;
750     my $score_max = 0;
751     my $score_base = 20;
752 wakaba 1.21 my $score_unit = $score_base / 100;
753 wakaba 1.19 for (
754     [Transfer => 'transfer', ''],
755     [Character => 'char', ''],
756     [Syntax => 'syntax', '#parse-errors'],
757     [Structure => 'structure', '#document-errors'],
758     ) {
759     $must_error += ($result->{$_->[1]}->{must} += 0);
760     $should_error += ($result->{$_->[1]}->{should} += 0);
761     $warning += ($result->{$_->[1]}->{warning} += 0);
762 wakaba 1.21 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
763     $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
764 wakaba 1.19
765     my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
766     my $label = $_->[0];
767     if ($result->{$_->[1]}->{must} or
768     $result->{$_->[1]}->{should} or
769     $result->{$_->[1]}->{warning} or
770     $result->{$_->[1]}->{unsupported}) {
771     $label = qq[<a href="$_->[2]">$label</a>];
772     }
773    
774     print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
775     if ($uncertain) {
776 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
777 wakaba 1.19 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
778 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
779 wakaba 1.19 } else {
780 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
781 wakaba 1.19 }
782     }
783    
784     $score_max += $score_base;
785    
786     print STDOUT qq[
787     <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
788     </tbody>
789 wakaba 1.21 <tfoot><tr class=uncertain><th scope=row>Total</th>
790     <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
791     <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
792     <td>$warning?</td>
793     <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
794 wakaba 1.19 </table>
795    
796     <p><strong>Important</strong>: This conformance checking service
797     is <em>under development</em>. The result above might be <em>wrong</em>.</p>
798     </div>];
799     push @nav, ['#result-summary' => 'Result'];
800     } # print_result_section
801    
802 wakaba 1.24 sub print_result_unknown_type_section ($$) {
803     my ($input, $result) = @_;
804 wakaba 1.18
805 wakaba 1.24 my $euri = htescape ($input->{uri});
806 wakaba 1.18 print STDOUT qq[
807 wakaba 1.24 <div id="parse-errors" class="section">
808     <h2>Errors</h2>
809    
810     <dl>
811     <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
812     <dd class=unsupported><strong><a href="../error-description#level-u">Not
813     supported</a></strong>:
814     Media type
815     <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
816     is not supported.</dd>
817     </dl>
818 wakaba 1.18 </div>
819     ];
820 wakaba 1.24 push @nav, ['#parse-errors' => 'Errors'];
821 wakaba 1.30 add_error (char => {level => 'u'} => $result);
822     add_error (syntax => {level => 'u'} => $result);
823     add_error (structure => {level => 'u'} => $result);
824 wakaba 1.18 } # print_result_unknown_type_section
825    
826     sub print_result_input_error_section ($) {
827     my $input = shift;
828     print STDOUT qq[<div class="section" id="result-summary">
829     <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
830     </div>];
831     push @nav, ['#result-summary' => 'Result'];
832 wakaba 1.32 } # print_result_input_error_section
833 wakaba 1.18
834 wakaba 1.32 sub get_error_label ($$) {
835     my ($input, $err) = @_;
836 wakaba 1.22
837     my $r = '';
838    
839     if (defined $err->{line}) {
840     if ($err->{column} > 0) {
841     $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a> column $err->{column}];
842     } else {
843     $err->{line} = $err->{line} - 1 || 1;
844     $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a>];
845     }
846     }
847    
848     if (defined $err->{node}) {
849     $r .= ' ' if length $r;
850 wakaba 1.32 $r = get_node_link ($input, $err->{node});
851 wakaba 1.22 }
852    
853     if (defined $err->{index}) {
854     $r .= ' ' if length $r;
855     $r .= 'Index ' . (0+$err->{index});
856     }
857    
858     if (defined $err->{value}) {
859     $r .= ' ' if length $r;
860     $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
861     }
862    
863     return $r;
864     } # get_error_label
865    
866 wakaba 1.23 sub get_error_level_label ($) {
867     my $err = shift;
868    
869     my $r = '';
870    
871     if (not defined $err->{level} or $err->{level} eq 'm') {
872     $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
873     error</a></strong>: ];
874     } elsif ($err->{level} eq 's') {
875     $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
876     error</a></strong>: ];
877     } elsif ($err->{level} eq 'w') {
878     $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
879     ];
880 wakaba 1.30 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
881 wakaba 1.23 $r = qq[<strong><a href="../error-description#level-u">Not
882     supported</a></strong>: ];
883     } else {
884     my $elevel = htescape ($err->{level});
885     $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
886     ];
887     }
888    
889     return $r;
890     } # get_error_level_label
891    
892 wakaba 1.1 sub get_node_path ($) {
893     my $node = shift;
894     my @r;
895     while (defined $node) {
896     my $rs;
897     if ($node->node_type == 1) {
898     $rs = $node->manakai_local_name;
899     $node = $node->parent_node;
900     } elsif ($node->node_type == 2) {
901     $rs = '@' . $node->manakai_local_name;
902     $node = $node->owner_element;
903     } elsif ($node->node_type == 3) {
904     $rs = '"' . $node->data . '"';
905     $node = $node->parent_node;
906     } elsif ($node->node_type == 9) {
907 wakaba 1.9 @r = ('') unless @r;
908 wakaba 1.1 $rs = '';
909     $node = $node->parent_node;
910     } else {
911     $rs = '#' . $node->node_type;
912     $node = $node->parent_node;
913     }
914     unshift @r, $rs;
915     }
916     return join '/', @r;
917     } # get_node_path
918    
919 wakaba 1.32 sub get_node_link ($$) {
920     return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
921     htescape (get_node_path ($_[1])) . qq[</a>];
922 wakaba 1.6 } # get_node_link
923    
924 wakaba 1.7 {
925     my $Msg = {};
926    
927     sub load_text_catalog ($) {
928     my $lang = shift; # MUST be a canonical lang name
929 wakaba 1.26 open my $file, '<:utf8', "cc-msg.$lang.txt"
930     or die "$0: cc-msg.$lang.txt: $!";
931 wakaba 1.7 while (<$file>) {
932     if (s/^([^;]+);([^;]*);//) {
933     my ($type, $cls, $msg) = ($1, $2, $_);
934     $msg =~ tr/\x0D\x0A//d;
935     $Msg->{$type} = [$cls, $msg];
936     }
937     }
938     } # load_text_catalog
939    
940     sub get_text ($) {
941 wakaba 1.15 my ($type, $level, $node) = @_;
942 wakaba 1.7 $type = $level . ':' . $type if defined $level;
943 wakaba 1.29 $level = 'm' unless defined $level;
944 wakaba 1.7 my @arg;
945     {
946     if (defined $Msg->{$type}) {
947     my $msg = $Msg->{$type}->[1];
948 wakaba 1.10 $msg =~ s{<var>\$([0-9]+)</var>}{
949     defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
950     }ge;
951 wakaba 1.15 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
952     UNIVERSAL::can ($node, 'get_attribute_ns')
953     ? htescape ($node->get_attribute_ns (undef, $1)) : ''
954     }ge;
955     $msg =~ s{<var>{\@}</var>}{
956     UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
957     }ge;
958 wakaba 1.17 $msg =~ s{<var>{local-name}</var>}{
959     UNIVERSAL::can ($node, 'manakai_local_name')
960     ? htescape ($node->manakai_local_name) : ''
961     }ge;
962     $msg =~ s{<var>{element-local-name}</var>}{
963     (UNIVERSAL::can ($node, 'owner_element') and
964     $node->owner_element)
965     ? htescape ($node->owner_element->manakai_local_name)
966     : ''
967     }ge;
968 wakaba 1.29 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
969 wakaba 1.7 } elsif ($type =~ s/:([^:]*)$//) {
970     unshift @arg, $1;
971     redo;
972     }
973     }
974 wakaba 1.29 return ($type, 'level-'.$level, htescape ($_[0]));
975 wakaba 1.7 } # get_text
976    
977     }
978    
979 wakaba 1.9 sub get_input_document ($$) {
980     my ($http, $dom) = @_;
981    
982 wakaba 1.16 my $request_uri = $http->get_parameter ('uri');
983 wakaba 1.9 my $r = {};
984     if (defined $request_uri and length $request_uri) {
985     my $uri = $dom->create_uri_reference ($request_uri);
986     unless ({
987     http => 1,
988     }->{lc $uri->uri_scheme}) {
989     return {uri => $request_uri, request_uri => $request_uri,
990     error_status_text => 'URI scheme not allowed'};
991     }
992    
993     require Message::Util::HostPermit;
994     my $host_permit = new Message::Util::HostPermit;
995     $host_permit->add_rule (<<EOH);
996     Allow host=suika port=80
997     Deny host=suika
998     Allow host=suika.fam.cx port=80
999     Deny host=suika.fam.cx
1000     Deny host=localhost
1001     Deny host=*.localdomain
1002     Deny ipv4=0.0.0.0/8
1003     Deny ipv4=10.0.0.0/8
1004     Deny ipv4=127.0.0.0/8
1005     Deny ipv4=169.254.0.0/16
1006     Deny ipv4=172.0.0.0/11
1007     Deny ipv4=192.0.2.0/24
1008     Deny ipv4=192.88.99.0/24
1009     Deny ipv4=192.168.0.0/16
1010     Deny ipv4=198.18.0.0/15
1011     Deny ipv4=224.0.0.0/4
1012     Deny ipv4=255.255.255.255/32
1013     Deny ipv6=0::0/0
1014     Allow host=*
1015     EOH
1016     unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1017     return {uri => $request_uri, request_uri => $request_uri,
1018     error_status_text => 'Connection to the host is forbidden'};
1019     }
1020    
1021     require LWP::UserAgent;
1022     my $ua = WDCC::LWPUA->new;
1023     $ua->{wdcc_dom} = $dom;
1024     $ua->{wdcc_host_permit} = $host_permit;
1025     $ua->agent ('Mozilla'); ## TODO: for now.
1026     $ua->parse_head (0);
1027     $ua->protocols_allowed ([qw/http/]);
1028     $ua->max_size (1000_000);
1029     my $req = HTTP::Request->new (GET => $request_uri);
1030 wakaba 1.28 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1031 wakaba 1.9 my $res = $ua->request ($req);
1032 wakaba 1.16 ## TODO: 401 sets |is_success| true.
1033     if ($res->is_success or $http->get_parameter ('error-page')) {
1034 wakaba 1.9 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1035     $r->{uri} = $res->request->uri;
1036     $r->{request_uri} = $request_uri;
1037    
1038     ## TODO: More strict parsing...
1039     my $ct = $res->header ('Content-Type');
1040 wakaba 1.22 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1041 wakaba 1.9 $r->{charset} = lc $1;
1042     $r->{charset} =~ tr/\\//d;
1043 wakaba 1.26 $r->{official_charset} = $r->{charset};
1044 wakaba 1.9 }
1045    
1046 wakaba 1.16 my $input_charset = $http->get_parameter ('charset');
1047 wakaba 1.9 if (defined $input_charset and length $input_charset) {
1048     $r->{charset_overridden}
1049     = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1050     $r->{charset} = $input_charset;
1051 wakaba 1.25 }
1052    
1053     ## TODO: Support for HTTP Content-Encoding
1054 wakaba 1.9
1055     $r->{s} = ''.$res->content;
1056 wakaba 1.25
1057     require Whatpm::ContentType;
1058     ($r->{official_type}, $r->{media_type})
1059     = Whatpm::ContentType->get_sniffed_type
1060     (get_file_head => sub {
1061     return substr $r->{s}, 0, shift;
1062     },
1063     http_content_type_byte => $ct,
1064     has_http_content_encoding =>
1065     defined $res->header ('Content-Encoding'),
1066     supported_image_types => {});
1067 wakaba 1.9 } else {
1068     $r->{uri} = $res->request->uri;
1069     $r->{request_uri} = $request_uri;
1070     $r->{error_status_text} = $res->status_line;
1071     }
1072    
1073     $r->{header_field} = [];
1074     $res->scan (sub {
1075     push @{$r->{header_field}}, [$_[0], $_[1]];
1076     });
1077     $r->{header_status_code} = $res->code;
1078     $r->{header_status_text} = $res->message;
1079     } else {
1080 wakaba 1.16 $r->{s} = ''.$http->get_parameter ('s');
1081 wakaba 1.9 $r->{uri} = q<thismessage:/>;
1082     $r->{request_uri} = q<thismessage:/>;
1083     $r->{base_uri} = q<thismessage:/>;
1084 wakaba 1.16 $r->{charset} = ''.$http->get_parameter ('_charset_');
1085 wakaba 1.9 $r->{charset} =~ s/\s+//g;
1086     $r->{charset} = 'utf-8' if $r->{charset} eq '';
1087 wakaba 1.26 $r->{official_charset} = $r->{charset};
1088 wakaba 1.9 $r->{header_field} = [];
1089 wakaba 1.25
1090     require Whatpm::ContentType;
1091     ($r->{official_type}, $r->{media_type})
1092     = Whatpm::ContentType->get_sniffed_type
1093     (get_file_head => sub {
1094     return substr $r->{s}, 0, shift;
1095     },
1096     http_content_type_byte => undef,
1097     has_http_content_encoding => 0,
1098     supported_image_types => {});
1099 wakaba 1.9 }
1100    
1101 wakaba 1.16 my $input_format = $http->get_parameter ('i');
1102 wakaba 1.9 if (defined $input_format and length $input_format) {
1103     $r->{media_type_overridden}
1104     = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1105     $r->{media_type} = $input_format;
1106     }
1107     if (defined $r->{s} and not defined $r->{media_type}) {
1108     $r->{media_type} = 'text/html';
1109     $r->{media_type_overridden} = 1;
1110     }
1111    
1112     if ($r->{media_type} eq 'text/xml') {
1113     unless (defined $r->{charset}) {
1114     $r->{charset} = 'us-ascii';
1115 wakaba 1.26 $r->{official_charset} = $r->{charset};
1116 wakaba 1.9 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1117     $r->{charset_overridden} = 0;
1118     }
1119     }
1120    
1121     if (length $r->{s} > 1000_000) {
1122     $r->{error_status_text} = 'Entity-body too large';
1123     delete $r->{s};
1124     return $r;
1125     }
1126    
1127     return $r;
1128     } # get_input_document
1129    
1130     package WDCC::LWPUA;
1131     BEGIN { push our @ISA, 'LWP::UserAgent'; }
1132    
1133     sub redirect_ok {
1134     my $ua = shift;
1135     unless ($ua->SUPER::redirect_ok (@_)) {
1136     return 0;
1137     }
1138    
1139     my $uris = $_[1]->header ('Location');
1140     return 0 unless $uris;
1141     my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1142     unless ({
1143     http => 1,
1144     }->{lc $uri->uri_scheme}) {
1145     return 0;
1146     }
1147     unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1148     return 0;
1149     }
1150     return 1;
1151     } # redirect_ok
1152    
1153 wakaba 1.1 =head1 AUTHOR
1154    
1155     Wakaba <w@suika.fam.cx>.
1156    
1157     =head1 LICENSE
1158    
1159     Copyright 2007 Wakaba <w@suika.fam.cx>
1160    
1161     This library is free software; you can redistribute it
1162     and/or modify it under the same terms as Perl itself.
1163    
1164     =cut
1165    
1166 wakaba 1.32 ## $Date: 2008/02/10 02:05:30 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24