/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.34 - (hide annotations) (download)
Sun Feb 10 03:11:06 2008 UTC (16 years, 9 months ago) by wakaba
Branch: MAIN
Changes since 1.33: +39 -7 lines
++ ChangeLog	10 Feb 2008 03:11:04 -0000
	* cc.cgi: Subdocument validation framework implemented.

2008-02-10  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3 wakaba 1.23 use utf8;
4 wakaba 1.1
5     use lib qw[/home/httpd/html/www/markup/html/whatpm
6 wakaba 1.16 /home/wakaba/work/manakai2/lib];
7 wakaba 1.1 use CGI::Carp qw[fatalsToBrowser];
8 wakaba 1.2 use Scalar::Util qw[refaddr];
9 wakaba 1.16 use Time::HiRes qw/time/;
10 wakaba 1.1
11 wakaba 1.2 sub htescape ($) {
12     my $s = $_[0];
13     $s =~ s/&/&amp;/g;
14     $s =~ s/</&lt;/g;
15     $s =~ s/>/&gt;/g;
16     $s =~ s/"/&quot;/g;
17 wakaba 1.12 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18     sprintf '<var>U+%04X</var>', ord $1;
19     }ge;
20 wakaba 1.2 return $s;
21     } # htescape
22    
23 wakaba 1.16 use Message::CGI::HTTP;
24     my $http = Message::CGI::HTTP->new;
25 wakaba 1.1
26 wakaba 1.16 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
27 wakaba 1.8 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
28     exit;
29     }
30    
31 wakaba 1.12 binmode STDOUT, ':utf8';
32 wakaba 1.14 $| = 1;
33 wakaba 1.12
34 wakaba 1.9 require Message::DOM::DOMImplementation;
35     my $dom = Message::DOM::DOMImplementation->new;
36    
37 wakaba 1.7 load_text_catalog ('en'); ## TODO: conneg
38    
39 wakaba 1.3 my @nav;
40 wakaba 1.2 print STDOUT qq[Content-Type: text/html; charset=utf-8
41    
42     <!DOCTYPE html>
43     <html lang="en">
44     <head>
45     <title>Web Document Conformance Checker (BETA)</title>
46 wakaba 1.3 <link rel="stylesheet" href="../cc-style.css" type="text/css">
47 wakaba 1.2 </head>
48     <body>
49 wakaba 1.13 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
50     (<em>beta</em>)</h1>
51 wakaba 1.14 ];
52 wakaba 1.2
53 wakaba 1.14 $| = 0;
54     my $input = get_input_document ($http, $dom);
55 wakaba 1.16 my $char_length = 0;
56     my %time;
57 wakaba 1.14
58     print qq[
59 wakaba 1.4 <div id="document-info" class="section">
60 wakaba 1.2 <dl>
61 wakaba 1.9 <dt>Request URI</dt>
62     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 wakaba 1.2 <dt>Document URI</dt>
64 wakaba 1.25 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65     <script>
66     document.title = '<'
67     + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68     + document.title;
69     </script></dd>
70 wakaba 1.2 ]; # no </dl> yet
71 wakaba 1.3 push @nav, ['#document-info' => 'Information'];
72 wakaba 1.1
73 wakaba 1.9 if (defined $input->{s}) {
74 wakaba 1.16 $char_length = length $input->{s};
75 wakaba 1.9
76     print STDOUT qq[
77     <dt>Base URI</dt>
78     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79     <dt>Internet Media Type</dt>
80     <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 wakaba 1.25 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 wakaba 1.9 <dt>Character Encoding</dt>
83     <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84     @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 wakaba 1.16 <dt>Length</dt>
86     <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 wakaba 1.9 </dl>
88     </div>
89     ];
90    
91 wakaba 1.20 my $result = {conforming_min => 1, conforming_max => 1};
92 wakaba 1.31 check_and_print ($input => $result);
93 wakaba 1.19 print_result_section ($result);
94 wakaba 1.9 } else {
95 wakaba 1.18 print STDOUT qq[</dl></div>];
96     print_result_input_error_section ($input);
97 wakaba 1.9 }
98 wakaba 1.3
99 wakaba 1.2 print STDOUT qq[
100 wakaba 1.3 <ul class="navigation" id="nav-items">
101     ];
102     for (@nav) {
103     print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
104     }
105     print STDOUT qq[
106     </ul>
107 wakaba 1.2 </body>
108     </html>
109     ];
110 wakaba 1.1
111 wakaba 1.24 for (qw/decode parse parse_html parse_xml parse_manifest
112     check check_manifest/) {
113 wakaba 1.16 next unless defined $time{$_};
114     open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
115     print $file $char_length, "\t", $time{$_}, "\n";
116     }
117    
118 wakaba 1.1 exit;
119    
120 wakaba 1.19 sub add_error ($$$) {
121     my ($layer, $err, $result) = @_;
122     if (defined $err->{level}) {
123     if ($err->{level} eq 's') {
124     $result->{$layer}->{should}++;
125     $result->{$layer}->{score_min} -= 2;
126     $result->{conforming_min} = 0;
127     } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
128     $result->{$layer}->{warning}++;
129 wakaba 1.30 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
130 wakaba 1.19 $result->{$layer}->{unsupported}++;
131     $result->{unsupported} = 1;
132     } else {
133     $result->{$layer}->{must}++;
134     $result->{$layer}->{score_max} -= 2;
135     $result->{$layer}->{score_min} -= 2;
136     $result->{conforming_min} = 0;
137     $result->{conforming_max} = 0;
138     }
139     } else {
140     $result->{$layer}->{must}++;
141     $result->{$layer}->{score_max} -= 2;
142     $result->{$layer}->{score_min} -= 2;
143     $result->{conforming_min} = 0;
144     $result->{conforming_max} = 0;
145     }
146     } # add_error
147    
148 wakaba 1.31 sub check_and_print ($$) {
149     my ($input, $result) = @_;
150 wakaba 1.32 $input->{id_prefix} = '';
151     #$input->{nested} = 1/0;
152 wakaba 1.31
153     print_http_header_section ($input, $result);
154    
155     my $doc;
156     my $el;
157     my $manifest;
158 wakaba 1.34 my @subdoc;
159 wakaba 1.31
160     if ($input->{media_type} eq 'text/html') {
161     ($doc, $el) = print_syntax_error_html_section ($input, $result);
162     print_source_string_section
163     (\($input->{s}), $input->{charset} || $doc->input_encoding);
164     } elsif ({
165     'text/xml' => 1,
166     'application/atom+xml' => 1,
167     'application/rss+xml' => 1,
168     'application/svg+xml' => 1,
169     'application/xhtml+xml' => 1,
170     'application/xml' => 1,
171     }->{$input->{media_type}}) {
172     ($doc, $el) = print_syntax_error_xml_section ($input, $result);
173     print_source_string_section (\($input->{s}), $doc->input_encoding);
174     } elsif ($input->{media_type} eq 'text/cache-manifest') {
175     ## TODO: MUST be text/cache-manifest
176     $manifest = print_syntax_error_manifest_section ($input, $result);
177     print_source_string_section (\($input->{s}), 'utf-8');
178     } else {
179     ## TODO: Change HTTP status code??
180     print_result_unknown_type_section ($input, $result);
181     }
182    
183     if (defined $doc or defined $el) {
184 wakaba 1.34 $doc->document_uri ($input->{uri});
185     $doc->manakai_entity_base_uri ($input->{base_uri});
186 wakaba 1.32 print_structure_dump_dom_section ($input, $doc, $el);
187     my $elements = print_structure_error_dom_section
188 wakaba 1.34 ($input, $doc, $el, $result, sub {
189     push @subdoc, shift;
190     });
191 wakaba 1.32 print_table_section ($input, $elements->{table}) if @{$elements->{table}};
192 wakaba 1.33 print_listing_section ({
193     id => 'identifiers', label => 'IDs', heading => 'Identifiers',
194     }, $input, $elements->{id}) if keys %{$elements->{id}};
195     print_listing_section ({
196     id => 'terms', label => 'Terms', heading => 'Terms',
197     }, $input, $elements->{term}) if keys %{$elements->{term}};
198     print_listing_section ({
199     id => 'classes', label => 'Classes', heading => 'Classes',
200     }, $input, $elements->{class}) if keys %{$elements->{class}};
201 wakaba 1.31 } elsif (defined $manifest) {
202 wakaba 1.32 print_structure_dump_manifest_section ($input, $manifest);
203     print_structure_error_manifest_section ($input, $manifest, $result);
204 wakaba 1.31 }
205 wakaba 1.34
206     my $id_prefix = 0;
207     for my $subinput (@subdoc) {
208     $subinput->{id_prefix} = 'subdoc-' . ++$id_prefix;
209     $subinput->{nested} = 1;
210     $subinput->{base_uri} = $subinput->{container_node}->base_uri
211     unless defined $subinput->{base_uri};
212     my $ebaseuri = htescape ($subinput->{base_uri});
213     push @nav, ['#' . $subinput->{id_prefix} => 'Sub #' . $id_prefix];
214     print STDOUT qq[<div id="$subinput->{id_prefix}" class=section>
215     <h2>Subdocument #$id_prefix</h2>
216    
217     <dl>
218     <dt>Internet Media Type</dt>
219     <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
220     <dt>Container Node</dt>
221     <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
222     <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
223     <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
224     </dl>];
225    
226     check_and_print ($subinput => $result);
227    
228     print STDOUT qq[</div>];
229     }
230 wakaba 1.31 } # check_and_print
231    
232 wakaba 1.19 sub print_http_header_section ($$) {
233     my ($input, $result) = @_;
234 wakaba 1.9 return unless defined $input->{header_status_code} or
235     defined $input->{header_status_text} or
236 wakaba 1.34 @{$input->{header_field} or []};
237 wakaba 1.9
238 wakaba 1.32 push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
239     print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
240 wakaba 1.9 <h2>HTTP Header</h2>
241    
242     <p><strong>Note</strong>: Due to the limitation of the
243     network library in use, the content of this section might
244     not be the real header.</p>
245    
246     <table><tbody>
247     ];
248    
249     if (defined $input->{header_status_code}) {
250     print STDOUT qq[<tr><th scope="row">Status code</th>];
251     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
252     }
253     if (defined $input->{header_status_text}) {
254     print STDOUT qq[<tr><th scope="row">Status text</th>];
255     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
256     }
257    
258     for (@{$input->{header_field}}) {
259     print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
260     print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
261     }
262    
263     print STDOUT qq[</tbody></table></div>];
264     } # print_http_header_section
265    
266 wakaba 1.19 sub print_syntax_error_html_section ($$) {
267     my ($input, $result) = @_;
268 wakaba 1.18
269     require Encode;
270     require Whatpm::HTML;
271    
272     print STDOUT qq[
273 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
274 wakaba 1.18 <h2>Parse Errors</h2>
275    
276     <dl>];
277 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
278 wakaba 1.18
279     my $onerror = sub {
280     my (%opt) = @_;
281     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
282     if ($opt{column} > 0) {
283     print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
284     } else {
285     $opt{line} = $opt{line} - 1 || 1;
286     print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];
287     }
288     $type =~ tr/ /-/;
289     $type =~ s/\|/%7C/g;
290     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
291 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
292     print STDOUT qq[$msg</dd>\n];
293 wakaba 1.19
294     add_error ('syntax', \%opt => $result);
295 wakaba 1.18 };
296    
297     my $doc = $dom->create_document;
298     my $el;
299 wakaba 1.26 my $inner_html_element = $http->get_parameter ('e');
300 wakaba 1.18 if (defined $inner_html_element and length $inner_html_element) {
301 wakaba 1.26 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
302 wakaba 1.24 my $time1 = time;
303     my $t = Encode::decode ($input->{charset}, $input->{s});
304     $time{decode} = time - $time1;
305    
306 wakaba 1.18 $el = $doc->create_element_ns
307     ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
308 wakaba 1.24 $time1 = time;
309 wakaba 1.18 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
310 wakaba 1.24 $time{parse} = time - $time1;
311 wakaba 1.18 } else {
312 wakaba 1.24 my $time1 = time;
313     Whatpm::HTML->parse_byte_string
314     ($input->{charset}, $input->{s} => $doc, $onerror);
315     $time{parse_html} = time - $time1;
316 wakaba 1.18 }
317 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
318     if defined $input->{official_charset};
319 wakaba 1.24
320 wakaba 1.18 print STDOUT qq[</dl></div>];
321    
322     return ($doc, $el);
323     } # print_syntax_error_html_section
324    
325 wakaba 1.19 sub print_syntax_error_xml_section ($$) {
326     my ($input, $result) = @_;
327 wakaba 1.18
328     require Message::DOM::XMLParserTemp;
329    
330     print STDOUT qq[
331 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
332 wakaba 1.18 <h2>Parse Errors</h2>
333    
334     <dl>];
335 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
336 wakaba 1.18
337     my $onerror = sub {
338     my $err = shift;
339     my $line = $err->location->line_number;
340     print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];
341     print STDOUT $err->location->column_number, "</dt><dd>";
342     print STDOUT htescape $err->text, "</dd>\n";
343 wakaba 1.19
344     add_error ('syntax', {type => $err->text,
345     level => [
346     $err->SEVERITY_FATAL_ERROR => 'm',
347     $err->SEVERITY_ERROR => 'm',
348     $err->SEVERITY_WARNING => 's',
349     ]->[$err->severity]} => $result);
350    
351 wakaba 1.18 return 1;
352     };
353    
354     my $time1 = time;
355     open my $fh, '<', \($input->{s});
356     my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
357     ($fh => $dom, $onerror, charset => $input->{charset});
358     $time{parse_xml} = time - $time1;
359 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
360     if defined $input->{official_charset};
361 wakaba 1.18
362     print STDOUT qq[</dl></div>];
363    
364     return ($doc, undef);
365     } # print_syntax_error_xml_section
366    
367 wakaba 1.22 sub print_syntax_error_manifest_section ($$) {
368     my ($input, $result) = @_;
369    
370     require Whatpm::CacheManifest;
371    
372     print STDOUT qq[
373 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
374 wakaba 1.22 <h2>Parse Errors</h2>
375    
376     <dl>];
377 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
378 wakaba 1.22
379     my $onerror = sub {
380     my (%opt) = @_;
381     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
382 wakaba 1.32 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
383     qq[</dt>];
384 wakaba 1.22 $type =~ tr/ /-/;
385     $type =~ s/\|/%7C/g;
386     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
387 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
388     print STDOUT qq[$msg</dd>\n];
389 wakaba 1.22
390     add_error ('syntax', \%opt => $result);
391     };
392    
393     my $time1 = time;
394     my $manifest = Whatpm::CacheManifest->parse_byte_string
395     ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
396     $time{parse_manifest} = time - $time1;
397    
398     print STDOUT qq[</dl></div>];
399    
400     return $manifest;
401     } # print_syntax_error_manifest_section
402    
403 wakaba 1.9 sub print_source_string_section ($$) {
404     require Encode;
405     my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
406     return unless $enc;
407    
408     my $s = \($enc->decode (${$_[0]}));
409     my $i = 1;
410 wakaba 1.32 push @nav, ['#source-string' => 'Source'] unless $input->{nested};
411     print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
412 wakaba 1.9 <h2>Document Source</h2>
413     <ol lang="">\n];
414 wakaba 1.7 if (length $$s) {
415     while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
416 wakaba 1.32 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
417     "</li>\n";
418 wakaba 1.7 $i++;
419     }
420     if ($$s =~ /\G([^\x0A]+)/gc) {
421 wakaba 1.32 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
422     "</li>\n";
423 wakaba 1.7 }
424     } else {
425 wakaba 1.32 print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
426 wakaba 1.2 }
427 wakaba 1.9 print STDOUT "</ol></div>";
428     } # print_input_string_section
429 wakaba 1.2
430     sub print_document_tree ($) {
431 wakaba 1.1 my $node = shift;
432 wakaba 1.2 my $r = '<ol class="xoxo">';
433 wakaba 1.1
434 wakaba 1.2 my @node = ($node);
435 wakaba 1.1 while (@node) {
436     my $child = shift @node;
437 wakaba 1.2 unless (ref $child) {
438     $r .= $child;
439     next;
440     }
441    
442 wakaba 1.32 my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
443 wakaba 1.2 my $nt = $child->node_type;
444     if ($nt == $child->ELEMENT_NODE) {
445 wakaba 1.4 my $child_nsuri = $child->namespace_uri;
446     $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
447 wakaba 1.2 '</code>'; ## ISSUE: case
448    
449     if ($child->has_attributes) {
450     $r .= '<ul class="attributes">';
451 wakaba 1.4 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
452 wakaba 1.2 @{$child->attributes}) {
453 wakaba 1.32 $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
454 wakaba 1.2 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
455     }
456     $r .= '</ul>';
457     }
458    
459 wakaba 1.7 if ($child->has_child_nodes) {
460 wakaba 1.2 $r .= '<ol class="children">';
461 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
462     } else {
463     $r .= '</li>';
464 wakaba 1.2 }
465     } elsif ($nt == $child->TEXT_NODE) {
466 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
467 wakaba 1.2 } elsif ($nt == $child->CDATA_SECTION_NODE) {
468 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
469 wakaba 1.2 } elsif ($nt == $child->COMMENT_NODE) {
470 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
471 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_NODE) {
472 wakaba 1.6 $r .= qq'<li id="$node_id" class="tree-document">Document';
473 wakaba 1.7 $r .= qq[<ul class="attributes">];
474 wakaba 1.27 my $cp = $child->manakai_charset;
475     if (defined $cp) {
476     $r .= qq[<li><code>charset</code> parameter = <code>];
477     $r .= htescape ($cp) . qq[</code></li>];
478     }
479     $r .= qq[<li><code>inputEncoding</code> = ];
480     my $ie = $child->input_encoding;
481     if (defined $ie) {
482     $r .= qq[<code>@{[htescape ($ie)]}</code>];
483     if ($child->manakai_has_bom) {
484     $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
485     }
486     } else {
487     $r .= qq[(<code>null</code>)];
488     }
489 wakaba 1.7 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
490     $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
491 wakaba 1.9 unless ($child->manakai_is_html) {
492     $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
493     if (defined $child->xml_encoding) {
494     $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
495     } else {
496     $r .= qq[<li>XML encoding = (null)</li>];
497     }
498     $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
499     }
500 wakaba 1.7 $r .= qq[</ul>];
501 wakaba 1.2 if ($child->has_child_nodes) {
502 wakaba 1.7 $r .= '<ol class="children">';
503 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
504 wakaba 1.1 }
505 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
506 wakaba 1.5 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
507     $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
508     $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
509     $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
510 wakaba 1.2 $r .= '</ul></li>';
511     } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
512 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
513 wakaba 1.1 } else {
514 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
515 wakaba 1.1 }
516     }
517 wakaba 1.2
518     $r .= '</ol>';
519     print STDOUT $r;
520     } # print_document_tree
521 wakaba 1.1
522 wakaba 1.32 sub print_structure_dump_dom_section ($$$) {
523     my ($input, $doc, $el) = @_;
524 wakaba 1.18
525     print STDOUT qq[
526 wakaba 1.32 <div id="$input->{id_prefix}document-tree" class="section">
527 wakaba 1.18 <h2>Document Tree</h2>
528     ];
529 wakaba 1.32 push @nav, ['#document-tree' => 'Tree'] unless $input->{nested};
530 wakaba 1.18
531     print_document_tree ($el || $doc);
532    
533     print STDOUT qq[</div>];
534 wakaba 1.22 } # print_structure_dump_dom_section
535    
536 wakaba 1.32 sub print_structure_dump_manifest_section ($$) {
537     my ($input, $manifest) = @_;
538 wakaba 1.22
539     print STDOUT qq[
540 wakaba 1.32 <div id="$input->{id_prefix}dump-manifest" class="section">
541 wakaba 1.22 <h2>Cache Manifest</h2>
542     ];
543 wakaba 1.32 push @nav, ['#dump-manifest' => 'Caceh Manifest'] unless $input->{nested};
544 wakaba 1.22
545     print STDOUT qq[<dl><dt>Explicit entries</dt>];
546     for my $uri (@{$manifest->[0]}) {
547     my $euri = htescape ($uri);
548     print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
549     }
550    
551     print STDOUT qq[<dt>Fallback entries</dt><dd>
552     <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
553     <th scope=row>Fallback Entry</tr><tbody>];
554     for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
555     my $euri = htescape ($uri);
556     my $euri2 = htescape ($manifest->[1]->{$uri});
557     print STDOUT qq[<tr><td><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
558     <td><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
559     }
560    
561     print STDOUT qq[</table><dt>Online whitelist</dt>];
562     for my $uri (@{$manifest->[2]}) {
563     my $euri = htescape ($uri);
564     print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
565     }
566 wakaba 1.18
567 wakaba 1.22 print STDOUT qq[</dl></div>];
568     } # print_structure_dump_manifest_section
569    
570 wakaba 1.34 sub print_structure_error_dom_section ($$$$$) {
571     my ($input, $doc, $el, $result, $onsubdoc) = @_;
572 wakaba 1.18
573 wakaba 1.32 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
574 wakaba 1.18 <h2>Document Errors</h2>
575    
576     <dl>];
577 wakaba 1.32 push @nav, ['#document-errors' => 'Document Error'] unless $input->{nested};
578 wakaba 1.18
579     require Whatpm::ContentChecker;
580     my $onerror = sub {
581     my %opt = @_;
582     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
583     $type =~ tr/ /-/;
584     $type =~ s/\|/%7C/g;
585     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
586 wakaba 1.32 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
587 wakaba 1.23 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
588     print STDOUT $msg, "</dd>\n";
589 wakaba 1.19 add_error ('structure', \%opt => $result);
590 wakaba 1.18 };
591    
592     my $elements;
593     my $time1 = time;
594     if ($el) {
595 wakaba 1.34 $elements = Whatpm::ContentChecker->check_element
596     ($el, $onerror, $onsubdoc);
597 wakaba 1.18 } else {
598 wakaba 1.34 $elements = Whatpm::ContentChecker->check_document
599     ($doc, $onerror, $onsubdoc);
600 wakaba 1.18 }
601     $time{check} = time - $time1;
602    
603     print STDOUT qq[</dl></div>];
604    
605     return $elements;
606 wakaba 1.22 } # print_structure_error_dom_section
607    
608     sub print_structure_error_manifest_section ($$$) {
609 wakaba 1.32 my ($input, $manifest, $result) = @_;
610 wakaba 1.22
611 wakaba 1.32 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
612 wakaba 1.22 <h2>Document Errors</h2>
613    
614     <dl>];
615 wakaba 1.32 push @nav, ['#document-errors' => 'Document Error'] unless $input->{nested};
616 wakaba 1.22
617     require Whatpm::CacheManifest;
618     Whatpm::CacheManifest->check_manifest ($manifest, sub {
619     my %opt = @_;
620     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
621     $type =~ tr/ /-/;
622     $type =~ s/\|/%7C/g;
623     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
624 wakaba 1.32 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
625 wakaba 1.22 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
626     add_error ('structure', \%opt => $result);
627     });
628    
629     print STDOUT qq[</div>];
630     } # print_structure_error_manifest_section
631 wakaba 1.18
632 wakaba 1.32 sub print_table_section ($$) {
633     my ($input, $tables) = @_;
634 wakaba 1.18
635 wakaba 1.32 push @nav, ['#tables' => 'Tables'] unless $input->{nested};
636 wakaba 1.18 print STDOUT qq[
637 wakaba 1.32 <div id="$input->{id_prefix}tables" class="section">
638 wakaba 1.18 <h2>Tables</h2>
639    
640     <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
641     <script src="../table-script.js" type="text/javascript"></script>
642     <noscript>
643     <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
644     </noscript>
645     ];
646    
647     require JSON;
648    
649     my $i = 0;
650     for my $table_el (@$tables) {
651     $i++;
652 wakaba 1.32 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
653     get_node_link ($input, $table_el) . q[</h3>];
654 wakaba 1.18
655     ## TODO: Make |ContentChecker| return |form_table| result
656     ## so that this script don't have to run the algorithm twice.
657     my $table = Whatpm::HTMLTable->form_table ($table_el);
658    
659     for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
660     next unless $_;
661     delete $_->{element};
662     }
663    
664     for (@{$table->{row_group}}) {
665     next unless $_;
666     next unless $_->{element};
667     $_->{type} = $_->{element}->manakai_local_name;
668     delete $_->{element};
669     }
670    
671     for (@{$table->{cell}}) {
672     next unless $_;
673     for (@{$_}) {
674     next unless $_;
675     for (@$_) {
676     $_->{id} = refaddr $_->{element} if defined $_->{element};
677     delete $_->{element};
678     $_->{is_header} = $_->{is_header} ? 1 : 0;
679     }
680     }
681     }
682    
683     print STDOUT '</div><script type="text/javascript">tableToCanvas (';
684     print STDOUT JSON::objToJson ($table);
685 wakaba 1.32 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
686     print STDOUT qq[, '$input->{id_prefix}');</script>];
687 wakaba 1.18 }
688    
689     print STDOUT qq[</div>];
690     } # print_table_section
691    
692 wakaba 1.33 sub print_listing_section ($$$) {
693     my ($opt, $input, $ids) = @_;
694 wakaba 1.18
695 wakaba 1.33 push @nav, ['#' . $opt->{id} => $opt->{label}] unless $input->{nested};
696 wakaba 1.18 print STDOUT qq[
697 wakaba 1.33 <div id="$input->{id_prefix}$opt->{id}" class="section">
698     <h2>$opt->{heading}</h2>
699 wakaba 1.18
700     <dl>
701     ];
702     for my $id (sort {$a cmp $b} keys %$ids) {
703     print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
704     for (@{$ids->{$id}}) {
705 wakaba 1.32 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
706 wakaba 1.18 }
707     }
708     print STDOUT qq[</dl></div>];
709 wakaba 1.33 } # print_listing_section
710 wakaba 1.18
711 wakaba 1.19 sub print_result_section ($) {
712     my $result = shift;
713    
714     print STDOUT qq[
715     <div id="result-summary" class="section">
716     <h2>Result</h2>];
717    
718 wakaba 1.21 if ($result->{unsupported} and $result->{conforming_max}) {
719 wakaba 1.19 print STDOUT qq[<p class=uncertain id=result-para>The conformance
720     checker cannot decide whether the document is conforming or
721     not, since the document contains one or more unsupported
722 wakaba 1.21 features. The document might or might not be conforming.</p>];
723 wakaba 1.19 } elsif ($result->{conforming_min}) {
724     print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
725     found in this document.</p>];
726     } elsif ($result->{conforming_max}) {
727     print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
728     is <strong>likely <em>non</em>-conforming</strong>, but in rare case
729     it might be conforming.</p>];
730     } else {
731     print STDOUT qq[<p class=FAIL id=result-para>This document is
732     <strong><em>non</em>-conforming</strong>.</p>];
733     }
734    
735     print STDOUT qq[<table>
736     <colgroup><col><colgroup><col><col><col><colgroup><col>
737     <thead>
738 wakaba 1.23 <tr><th scope=col></th>
739     <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
740     Errors</a></th>
741     <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
742     Errors</a></th>
743     <th scope=col><a href="../error-description#level-w">Warnings</a></th>
744     <th scope=col>Score</th></tr></thead><tbody>];
745 wakaba 1.19
746     my $must_error = 0;
747     my $should_error = 0;
748     my $warning = 0;
749     my $score_min = 0;
750     my $score_max = 0;
751     my $score_base = 20;
752 wakaba 1.21 my $score_unit = $score_base / 100;
753 wakaba 1.19 for (
754     [Transfer => 'transfer', ''],
755     [Character => 'char', ''],
756     [Syntax => 'syntax', '#parse-errors'],
757     [Structure => 'structure', '#document-errors'],
758     ) {
759     $must_error += ($result->{$_->[1]}->{must} += 0);
760     $should_error += ($result->{$_->[1]}->{should} += 0);
761     $warning += ($result->{$_->[1]}->{warning} += 0);
762 wakaba 1.21 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
763     $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
764 wakaba 1.19
765     my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
766     my $label = $_->[0];
767     if ($result->{$_->[1]}->{must} or
768     $result->{$_->[1]}->{should} or
769     $result->{$_->[1]}->{warning} or
770     $result->{$_->[1]}->{unsupported}) {
771     $label = qq[<a href="$_->[2]">$label</a>];
772     }
773    
774     print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
775     if ($uncertain) {
776 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
777 wakaba 1.19 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
778 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
779 wakaba 1.19 } else {
780 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
781 wakaba 1.19 }
782     }
783    
784     $score_max += $score_base;
785    
786     print STDOUT qq[
787     <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
788     </tbody>
789 wakaba 1.21 <tfoot><tr class=uncertain><th scope=row>Total</th>
790     <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
791     <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
792     <td>$warning?</td>
793     <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
794 wakaba 1.19 </table>
795    
796     <p><strong>Important</strong>: This conformance checking service
797     is <em>under development</em>. The result above might be <em>wrong</em>.</p>
798     </div>];
799     push @nav, ['#result-summary' => 'Result'];
800     } # print_result_section
801    
802 wakaba 1.24 sub print_result_unknown_type_section ($$) {
803     my ($input, $result) = @_;
804 wakaba 1.18
805 wakaba 1.24 my $euri = htescape ($input->{uri});
806 wakaba 1.18 print STDOUT qq[
807 wakaba 1.24 <div id="parse-errors" class="section">
808     <h2>Errors</h2>
809    
810     <dl>
811     <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
812     <dd class=unsupported><strong><a href="../error-description#level-u">Not
813     supported</a></strong>:
814     Media type
815     <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
816     is not supported.</dd>
817     </dl>
818 wakaba 1.18 </div>
819     ];
820 wakaba 1.24 push @nav, ['#parse-errors' => 'Errors'];
821 wakaba 1.30 add_error (char => {level => 'u'} => $result);
822     add_error (syntax => {level => 'u'} => $result);
823     add_error (structure => {level => 'u'} => $result);
824 wakaba 1.18 } # print_result_unknown_type_section
825    
826     sub print_result_input_error_section ($) {
827     my $input = shift;
828     print STDOUT qq[<div class="section" id="result-summary">
829     <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
830     </div>];
831     push @nav, ['#result-summary' => 'Result'];
832 wakaba 1.32 } # print_result_input_error_section
833 wakaba 1.18
834 wakaba 1.32 sub get_error_label ($$) {
835     my ($input, $err) = @_;
836 wakaba 1.22
837     my $r = '';
838    
839     if (defined $err->{line}) {
840     if ($err->{column} > 0) {
841     $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a> column $err->{column}];
842     } else {
843     $err->{line} = $err->{line} - 1 || 1;
844     $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a>];
845     }
846     }
847    
848     if (defined $err->{node}) {
849     $r .= ' ' if length $r;
850 wakaba 1.32 $r = get_node_link ($input, $err->{node});
851 wakaba 1.22 }
852    
853     if (defined $err->{index}) {
854     $r .= ' ' if length $r;
855     $r .= 'Index ' . (0+$err->{index});
856     }
857    
858     if (defined $err->{value}) {
859     $r .= ' ' if length $r;
860     $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
861     }
862    
863     return $r;
864     } # get_error_label
865    
866 wakaba 1.23 sub get_error_level_label ($) {
867     my $err = shift;
868    
869     my $r = '';
870    
871     if (not defined $err->{level} or $err->{level} eq 'm') {
872     $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
873     error</a></strong>: ];
874     } elsif ($err->{level} eq 's') {
875     $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
876     error</a></strong>: ];
877     } elsif ($err->{level} eq 'w') {
878     $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
879     ];
880 wakaba 1.30 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
881 wakaba 1.23 $r = qq[<strong><a href="../error-description#level-u">Not
882     supported</a></strong>: ];
883     } else {
884     my $elevel = htescape ($err->{level});
885     $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
886     ];
887     }
888    
889     return $r;
890     } # get_error_level_label
891    
892 wakaba 1.1 sub get_node_path ($) {
893     my $node = shift;
894     my @r;
895     while (defined $node) {
896     my $rs;
897     if ($node->node_type == 1) {
898     $rs = $node->manakai_local_name;
899     $node = $node->parent_node;
900     } elsif ($node->node_type == 2) {
901     $rs = '@' . $node->manakai_local_name;
902     $node = $node->owner_element;
903     } elsif ($node->node_type == 3) {
904     $rs = '"' . $node->data . '"';
905     $node = $node->parent_node;
906     } elsif ($node->node_type == 9) {
907 wakaba 1.9 @r = ('') unless @r;
908 wakaba 1.1 $rs = '';
909     $node = $node->parent_node;
910     } else {
911     $rs = '#' . $node->node_type;
912     $node = $node->parent_node;
913     }
914     unshift @r, $rs;
915     }
916     return join '/', @r;
917     } # get_node_path
918    
919 wakaba 1.32 sub get_node_link ($$) {
920     return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
921     htescape (get_node_path ($_[1])) . qq[</a>];
922 wakaba 1.6 } # get_node_link
923    
924 wakaba 1.7 {
925     my $Msg = {};
926    
927     sub load_text_catalog ($) {
928     my $lang = shift; # MUST be a canonical lang name
929 wakaba 1.26 open my $file, '<:utf8', "cc-msg.$lang.txt"
930     or die "$0: cc-msg.$lang.txt: $!";
931 wakaba 1.7 while (<$file>) {
932     if (s/^([^;]+);([^;]*);//) {
933     my ($type, $cls, $msg) = ($1, $2, $_);
934     $msg =~ tr/\x0D\x0A//d;
935     $Msg->{$type} = [$cls, $msg];
936     }
937     }
938     } # load_text_catalog
939    
940     sub get_text ($) {
941 wakaba 1.15 my ($type, $level, $node) = @_;
942 wakaba 1.7 $type = $level . ':' . $type if defined $level;
943 wakaba 1.29 $level = 'm' unless defined $level;
944 wakaba 1.7 my @arg;
945     {
946     if (defined $Msg->{$type}) {
947     my $msg = $Msg->{$type}->[1];
948 wakaba 1.10 $msg =~ s{<var>\$([0-9]+)</var>}{
949     defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
950     }ge;
951 wakaba 1.15 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
952     UNIVERSAL::can ($node, 'get_attribute_ns')
953     ? htescape ($node->get_attribute_ns (undef, $1)) : ''
954     }ge;
955     $msg =~ s{<var>{\@}</var>}{
956     UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
957     }ge;
958 wakaba 1.17 $msg =~ s{<var>{local-name}</var>}{
959     UNIVERSAL::can ($node, 'manakai_local_name')
960     ? htescape ($node->manakai_local_name) : ''
961     }ge;
962     $msg =~ s{<var>{element-local-name}</var>}{
963     (UNIVERSAL::can ($node, 'owner_element') and
964     $node->owner_element)
965     ? htescape ($node->owner_element->manakai_local_name)
966     : ''
967     }ge;
968 wakaba 1.29 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
969 wakaba 1.7 } elsif ($type =~ s/:([^:]*)$//) {
970     unshift @arg, $1;
971     redo;
972     }
973     }
974 wakaba 1.29 return ($type, 'level-'.$level, htescape ($_[0]));
975 wakaba 1.7 } # get_text
976    
977     }
978    
979 wakaba 1.9 sub get_input_document ($$) {
980     my ($http, $dom) = @_;
981    
982 wakaba 1.16 my $request_uri = $http->get_parameter ('uri');
983 wakaba 1.9 my $r = {};
984     if (defined $request_uri and length $request_uri) {
985     my $uri = $dom->create_uri_reference ($request_uri);
986     unless ({
987     http => 1,
988     }->{lc $uri->uri_scheme}) {
989     return {uri => $request_uri, request_uri => $request_uri,
990     error_status_text => 'URI scheme not allowed'};
991     }
992    
993     require Message::Util::HostPermit;
994     my $host_permit = new Message::Util::HostPermit;
995     $host_permit->add_rule (<<EOH);
996     Allow host=suika port=80
997     Deny host=suika
998     Allow host=suika.fam.cx port=80
999     Deny host=suika.fam.cx
1000     Deny host=localhost
1001     Deny host=*.localdomain
1002     Deny ipv4=0.0.0.0/8
1003     Deny ipv4=10.0.0.0/8
1004     Deny ipv4=127.0.0.0/8
1005     Deny ipv4=169.254.0.0/16
1006     Deny ipv4=172.0.0.0/11
1007     Deny ipv4=192.0.2.0/24
1008     Deny ipv4=192.88.99.0/24
1009     Deny ipv4=192.168.0.0/16
1010     Deny ipv4=198.18.0.0/15
1011     Deny ipv4=224.0.0.0/4
1012     Deny ipv4=255.255.255.255/32
1013     Deny ipv6=0::0/0
1014     Allow host=*
1015     EOH
1016     unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1017     return {uri => $request_uri, request_uri => $request_uri,
1018     error_status_text => 'Connection to the host is forbidden'};
1019     }
1020    
1021     require LWP::UserAgent;
1022     my $ua = WDCC::LWPUA->new;
1023     $ua->{wdcc_dom} = $dom;
1024     $ua->{wdcc_host_permit} = $host_permit;
1025     $ua->agent ('Mozilla'); ## TODO: for now.
1026     $ua->parse_head (0);
1027     $ua->protocols_allowed ([qw/http/]);
1028     $ua->max_size (1000_000);
1029     my $req = HTTP::Request->new (GET => $request_uri);
1030 wakaba 1.28 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1031 wakaba 1.9 my $res = $ua->request ($req);
1032 wakaba 1.16 ## TODO: 401 sets |is_success| true.
1033     if ($res->is_success or $http->get_parameter ('error-page')) {
1034 wakaba 1.9 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1035     $r->{uri} = $res->request->uri;
1036     $r->{request_uri} = $request_uri;
1037    
1038     ## TODO: More strict parsing...
1039     my $ct = $res->header ('Content-Type');
1040 wakaba 1.22 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1041 wakaba 1.9 $r->{charset} = lc $1;
1042     $r->{charset} =~ tr/\\//d;
1043 wakaba 1.26 $r->{official_charset} = $r->{charset};
1044 wakaba 1.9 }
1045    
1046 wakaba 1.16 my $input_charset = $http->get_parameter ('charset');
1047 wakaba 1.9 if (defined $input_charset and length $input_charset) {
1048     $r->{charset_overridden}
1049     = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1050     $r->{charset} = $input_charset;
1051 wakaba 1.25 }
1052    
1053     ## TODO: Support for HTTP Content-Encoding
1054 wakaba 1.9
1055     $r->{s} = ''.$res->content;
1056 wakaba 1.25
1057     require Whatpm::ContentType;
1058     ($r->{official_type}, $r->{media_type})
1059     = Whatpm::ContentType->get_sniffed_type
1060     (get_file_head => sub {
1061     return substr $r->{s}, 0, shift;
1062     },
1063     http_content_type_byte => $ct,
1064     has_http_content_encoding =>
1065     defined $res->header ('Content-Encoding'),
1066     supported_image_types => {});
1067 wakaba 1.9 } else {
1068     $r->{uri} = $res->request->uri;
1069     $r->{request_uri} = $request_uri;
1070     $r->{error_status_text} = $res->status_line;
1071     }
1072    
1073     $r->{header_field} = [];
1074     $res->scan (sub {
1075     push @{$r->{header_field}}, [$_[0], $_[1]];
1076     });
1077     $r->{header_status_code} = $res->code;
1078     $r->{header_status_text} = $res->message;
1079     } else {
1080 wakaba 1.16 $r->{s} = ''.$http->get_parameter ('s');
1081 wakaba 1.9 $r->{uri} = q<thismessage:/>;
1082     $r->{request_uri} = q<thismessage:/>;
1083     $r->{base_uri} = q<thismessage:/>;
1084 wakaba 1.16 $r->{charset} = ''.$http->get_parameter ('_charset_');
1085 wakaba 1.9 $r->{charset} =~ s/\s+//g;
1086     $r->{charset} = 'utf-8' if $r->{charset} eq '';
1087 wakaba 1.26 $r->{official_charset} = $r->{charset};
1088 wakaba 1.9 $r->{header_field} = [];
1089 wakaba 1.25
1090     require Whatpm::ContentType;
1091     ($r->{official_type}, $r->{media_type})
1092     = Whatpm::ContentType->get_sniffed_type
1093     (get_file_head => sub {
1094     return substr $r->{s}, 0, shift;
1095     },
1096     http_content_type_byte => undef,
1097     has_http_content_encoding => 0,
1098     supported_image_types => {});
1099 wakaba 1.9 }
1100    
1101 wakaba 1.16 my $input_format = $http->get_parameter ('i');
1102 wakaba 1.9 if (defined $input_format and length $input_format) {
1103     $r->{media_type_overridden}
1104     = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1105     $r->{media_type} = $input_format;
1106     }
1107     if (defined $r->{s} and not defined $r->{media_type}) {
1108     $r->{media_type} = 'text/html';
1109     $r->{media_type_overridden} = 1;
1110     }
1111    
1112     if ($r->{media_type} eq 'text/xml') {
1113     unless (defined $r->{charset}) {
1114     $r->{charset} = 'us-ascii';
1115 wakaba 1.26 $r->{official_charset} = $r->{charset};
1116 wakaba 1.9 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1117     $r->{charset_overridden} = 0;
1118     }
1119     }
1120    
1121     if (length $r->{s} > 1000_000) {
1122     $r->{error_status_text} = 'Entity-body too large';
1123     delete $r->{s};
1124     return $r;
1125     }
1126    
1127     return $r;
1128     } # get_input_document
1129    
1130     package WDCC::LWPUA;
1131     BEGIN { push our @ISA, 'LWP::UserAgent'; }
1132    
1133     sub redirect_ok {
1134     my $ua = shift;
1135     unless ($ua->SUPER::redirect_ok (@_)) {
1136     return 0;
1137     }
1138    
1139     my $uris = $_[1]->header ('Location');
1140     return 0 unless $uris;
1141     my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1142     unless ({
1143     http => 1,
1144     }->{lc $uri->uri_scheme}) {
1145     return 0;
1146     }
1147     unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1148     return 0;
1149     }
1150     return 1;
1151     } # redirect_ok
1152    
1153 wakaba 1.1 =head1 AUTHOR
1154    
1155     Wakaba <w@suika.fam.cx>.
1156    
1157     =head1 LICENSE
1158    
1159     Copyright 2007 Wakaba <w@suika.fam.cx>
1160    
1161     This library is free software; you can redistribute it
1162     and/or modify it under the same terms as Perl itself.
1163    
1164     =cut
1165    
1166 wakaba 1.34 ## $Date: 2008/02/10 02:42:01 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24