/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.33 - (hide annotations) (download)
Sun Feb 10 02:42:01 2008 UTC (16 years, 3 months ago) by wakaba
Branch: MAIN
Changes since 1.32: +16 -48 lines
++ ChangeLog	10 Feb 2008 02:41:59 -0000
	* cc.cgi (print_listing_section): ID, class, and term
	section functions are merged.

2008-02-10  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3 wakaba 1.23 use utf8;
4 wakaba 1.1
5     use lib qw[/home/httpd/html/www/markup/html/whatpm
6 wakaba 1.16 /home/wakaba/work/manakai2/lib];
7 wakaba 1.1 use CGI::Carp qw[fatalsToBrowser];
8 wakaba 1.2 use Scalar::Util qw[refaddr];
9 wakaba 1.16 use Time::HiRes qw/time/;
10 wakaba 1.1
11 wakaba 1.2 sub htescape ($) {
12     my $s = $_[0];
13     $s =~ s/&/&amp;/g;
14     $s =~ s/</&lt;/g;
15     $s =~ s/>/&gt;/g;
16     $s =~ s/"/&quot;/g;
17 wakaba 1.12 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18     sprintf '<var>U+%04X</var>', ord $1;
19     }ge;
20 wakaba 1.2 return $s;
21     } # htescape
22    
23 wakaba 1.16 use Message::CGI::HTTP;
24     my $http = Message::CGI::HTTP->new;
25 wakaba 1.1
26 wakaba 1.16 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
27 wakaba 1.8 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
28     exit;
29     }
30    
31 wakaba 1.12 binmode STDOUT, ':utf8';
32 wakaba 1.14 $| = 1;
33 wakaba 1.12
34 wakaba 1.9 require Message::DOM::DOMImplementation;
35     my $dom = Message::DOM::DOMImplementation->new;
36    
37 wakaba 1.7 load_text_catalog ('en'); ## TODO: conneg
38    
39 wakaba 1.3 my @nav;
40 wakaba 1.2 print STDOUT qq[Content-Type: text/html; charset=utf-8
41    
42     <!DOCTYPE html>
43     <html lang="en">
44     <head>
45     <title>Web Document Conformance Checker (BETA)</title>
46 wakaba 1.3 <link rel="stylesheet" href="../cc-style.css" type="text/css">
47 wakaba 1.2 </head>
48     <body>
49 wakaba 1.13 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
50     (<em>beta</em>)</h1>
51 wakaba 1.14 ];
52 wakaba 1.2
53 wakaba 1.14 $| = 0;
54     my $input = get_input_document ($http, $dom);
55 wakaba 1.16 my $char_length = 0;
56     my %time;
57 wakaba 1.14
58     print qq[
59 wakaba 1.4 <div id="document-info" class="section">
60 wakaba 1.2 <dl>
61 wakaba 1.9 <dt>Request URI</dt>
62     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 wakaba 1.2 <dt>Document URI</dt>
64 wakaba 1.25 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65     <script>
66     document.title = '<'
67     + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68     + document.title;
69     </script></dd>
70 wakaba 1.2 ]; # no </dl> yet
71 wakaba 1.3 push @nav, ['#document-info' => 'Information'];
72 wakaba 1.1
73 wakaba 1.9 if (defined $input->{s}) {
74 wakaba 1.16 $char_length = length $input->{s};
75 wakaba 1.9
76     print STDOUT qq[
77     <dt>Base URI</dt>
78     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79     <dt>Internet Media Type</dt>
80     <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 wakaba 1.25 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 wakaba 1.9 <dt>Character Encoding</dt>
83     <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84     @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 wakaba 1.16 <dt>Length</dt>
86     <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 wakaba 1.9 </dl>
88     </div>
89     ];
90    
91 wakaba 1.20 my $result = {conforming_min => 1, conforming_max => 1};
92 wakaba 1.31 check_and_print ($input => $result);
93 wakaba 1.19 print_result_section ($result);
94 wakaba 1.9 } else {
95 wakaba 1.18 print STDOUT qq[</dl></div>];
96     print_result_input_error_section ($input);
97 wakaba 1.9 }
98 wakaba 1.3
99 wakaba 1.2 print STDOUT qq[
100 wakaba 1.3 <ul class="navigation" id="nav-items">
101     ];
102     for (@nav) {
103     print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
104     }
105     print STDOUT qq[
106     </ul>
107 wakaba 1.2 </body>
108     </html>
109     ];
110 wakaba 1.1
111 wakaba 1.24 for (qw/decode parse parse_html parse_xml parse_manifest
112     check check_manifest/) {
113 wakaba 1.16 next unless defined $time{$_};
114     open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
115     print $file $char_length, "\t", $time{$_}, "\n";
116     }
117    
118 wakaba 1.1 exit;
119    
120 wakaba 1.19 sub add_error ($$$) {
121     my ($layer, $err, $result) = @_;
122     if (defined $err->{level}) {
123     if ($err->{level} eq 's') {
124     $result->{$layer}->{should}++;
125     $result->{$layer}->{score_min} -= 2;
126     $result->{conforming_min} = 0;
127     } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
128     $result->{$layer}->{warning}++;
129 wakaba 1.30 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
130 wakaba 1.19 $result->{$layer}->{unsupported}++;
131     $result->{unsupported} = 1;
132     } else {
133     $result->{$layer}->{must}++;
134     $result->{$layer}->{score_max} -= 2;
135     $result->{$layer}->{score_min} -= 2;
136     $result->{conforming_min} = 0;
137     $result->{conforming_max} = 0;
138     }
139     } else {
140     $result->{$layer}->{must}++;
141     $result->{$layer}->{score_max} -= 2;
142     $result->{$layer}->{score_min} -= 2;
143     $result->{conforming_min} = 0;
144     $result->{conforming_max} = 0;
145     }
146     } # add_error
147    
148 wakaba 1.31 sub check_and_print ($$) {
149     my ($input, $result) = @_;
150 wakaba 1.32 $input->{id_prefix} = '';
151     #$input->{nested} = 1/0;
152 wakaba 1.31
153     print_http_header_section ($input, $result);
154    
155     my $doc;
156     my $el;
157     my $manifest;
158    
159     if ($input->{media_type} eq 'text/html') {
160     ($doc, $el) = print_syntax_error_html_section ($input, $result);
161     print_source_string_section
162     (\($input->{s}), $input->{charset} || $doc->input_encoding);
163     } elsif ({
164     'text/xml' => 1,
165     'application/atom+xml' => 1,
166     'application/rss+xml' => 1,
167     'application/svg+xml' => 1,
168     'application/xhtml+xml' => 1,
169     'application/xml' => 1,
170     }->{$input->{media_type}}) {
171     ($doc, $el) = print_syntax_error_xml_section ($input, $result);
172     print_source_string_section (\($input->{s}), $doc->input_encoding);
173     } elsif ($input->{media_type} eq 'text/cache-manifest') {
174     ## TODO: MUST be text/cache-manifest
175     $manifest = print_syntax_error_manifest_section ($input, $result);
176     print_source_string_section (\($input->{s}), 'utf-8');
177     } else {
178     ## TODO: Change HTTP status code??
179     print_result_unknown_type_section ($input, $result);
180     }
181    
182     if (defined $doc or defined $el) {
183 wakaba 1.32 print_structure_dump_dom_section ($input, $doc, $el);
184     my $elements = print_structure_error_dom_section
185     ($input, $doc, $el, $result);
186     print_table_section ($input, $elements->{table}) if @{$elements->{table}};
187 wakaba 1.33 print_listing_section ({
188     id => 'identifiers', label => 'IDs', heading => 'Identifiers',
189     }, $input, $elements->{id}) if keys %{$elements->{id}};
190     print_listing_section ({
191     id => 'terms', label => 'Terms', heading => 'Terms',
192     }, $input, $elements->{term}) if keys %{$elements->{term}};
193     print_listing_section ({
194     id => 'classes', label => 'Classes', heading => 'Classes',
195     }, $input, $elements->{class}) if keys %{$elements->{class}};
196 wakaba 1.31 } elsif (defined $manifest) {
197 wakaba 1.32 print_structure_dump_manifest_section ($input, $manifest);
198     print_structure_error_manifest_section ($input, $manifest, $result);
199 wakaba 1.31 }
200     } # check_and_print
201    
202 wakaba 1.19 sub print_http_header_section ($$) {
203     my ($input, $result) = @_;
204 wakaba 1.9 return unless defined $input->{header_status_code} or
205     defined $input->{header_status_text} or
206     @{$input->{header_field}};
207    
208 wakaba 1.32 push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
209     print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
210 wakaba 1.9 <h2>HTTP Header</h2>
211    
212     <p><strong>Note</strong>: Due to the limitation of the
213     network library in use, the content of this section might
214     not be the real header.</p>
215    
216     <table><tbody>
217     ];
218    
219     if (defined $input->{header_status_code}) {
220     print STDOUT qq[<tr><th scope="row">Status code</th>];
221     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
222     }
223     if (defined $input->{header_status_text}) {
224     print STDOUT qq[<tr><th scope="row">Status text</th>];
225     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
226     }
227    
228     for (@{$input->{header_field}}) {
229     print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
230     print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
231     }
232    
233     print STDOUT qq[</tbody></table></div>];
234     } # print_http_header_section
235    
236 wakaba 1.19 sub print_syntax_error_html_section ($$) {
237     my ($input, $result) = @_;
238 wakaba 1.18
239     require Encode;
240     require Whatpm::HTML;
241    
242     print STDOUT qq[
243 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
244 wakaba 1.18 <h2>Parse Errors</h2>
245    
246     <dl>];
247 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
248 wakaba 1.18
249     my $onerror = sub {
250     my (%opt) = @_;
251     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
252     if ($opt{column} > 0) {
253     print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
254     } else {
255     $opt{line} = $opt{line} - 1 || 1;
256     print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];
257     }
258     $type =~ tr/ /-/;
259     $type =~ s/\|/%7C/g;
260     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
261 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
262     print STDOUT qq[$msg</dd>\n];
263 wakaba 1.19
264     add_error ('syntax', \%opt => $result);
265 wakaba 1.18 };
266    
267     my $doc = $dom->create_document;
268     my $el;
269 wakaba 1.26 my $inner_html_element = $http->get_parameter ('e');
270 wakaba 1.18 if (defined $inner_html_element and length $inner_html_element) {
271 wakaba 1.26 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
272 wakaba 1.24 my $time1 = time;
273     my $t = Encode::decode ($input->{charset}, $input->{s});
274     $time{decode} = time - $time1;
275    
276 wakaba 1.18 $el = $doc->create_element_ns
277     ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
278 wakaba 1.24 $time1 = time;
279 wakaba 1.18 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
280 wakaba 1.24 $time{parse} = time - $time1;
281 wakaba 1.18 } else {
282 wakaba 1.24 my $time1 = time;
283     Whatpm::HTML->parse_byte_string
284     ($input->{charset}, $input->{s} => $doc, $onerror);
285     $time{parse_html} = time - $time1;
286 wakaba 1.18 }
287 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
288     if defined $input->{official_charset};
289 wakaba 1.24
290 wakaba 1.18 print STDOUT qq[</dl></div>];
291    
292     return ($doc, $el);
293     } # print_syntax_error_html_section
294    
295 wakaba 1.19 sub print_syntax_error_xml_section ($$) {
296     my ($input, $result) = @_;
297 wakaba 1.18
298     require Message::DOM::XMLParserTemp;
299    
300     print STDOUT qq[
301 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
302 wakaba 1.18 <h2>Parse Errors</h2>
303    
304     <dl>];
305 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
306 wakaba 1.18
307     my $onerror = sub {
308     my $err = shift;
309     my $line = $err->location->line_number;
310     print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];
311     print STDOUT $err->location->column_number, "</dt><dd>";
312     print STDOUT htescape $err->text, "</dd>\n";
313 wakaba 1.19
314     add_error ('syntax', {type => $err->text,
315     level => [
316     $err->SEVERITY_FATAL_ERROR => 'm',
317     $err->SEVERITY_ERROR => 'm',
318     $err->SEVERITY_WARNING => 's',
319     ]->[$err->severity]} => $result);
320    
321 wakaba 1.18 return 1;
322     };
323    
324     my $time1 = time;
325     open my $fh, '<', \($input->{s});
326     my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
327     ($fh => $dom, $onerror, charset => $input->{charset});
328     $time{parse_xml} = time - $time1;
329 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
330     if defined $input->{official_charset};
331 wakaba 1.18
332     print STDOUT qq[</dl></div>];
333    
334     return ($doc, undef);
335     } # print_syntax_error_xml_section
336    
337 wakaba 1.22 sub print_syntax_error_manifest_section ($$) {
338     my ($input, $result) = @_;
339    
340     require Whatpm::CacheManifest;
341    
342     print STDOUT qq[
343 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
344 wakaba 1.22 <h2>Parse Errors</h2>
345    
346     <dl>];
347 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
348 wakaba 1.22
349     my $onerror = sub {
350     my (%opt) = @_;
351     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
352 wakaba 1.32 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
353     qq[</dt>];
354 wakaba 1.22 $type =~ tr/ /-/;
355     $type =~ s/\|/%7C/g;
356     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
357 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
358     print STDOUT qq[$msg</dd>\n];
359 wakaba 1.22
360     add_error ('syntax', \%opt => $result);
361     };
362    
363     my $time1 = time;
364     my $manifest = Whatpm::CacheManifest->parse_byte_string
365     ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
366     $time{parse_manifest} = time - $time1;
367    
368     print STDOUT qq[</dl></div>];
369    
370     return $manifest;
371     } # print_syntax_error_manifest_section
372    
373 wakaba 1.9 sub print_source_string_section ($$) {
374     require Encode;
375     my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
376     return unless $enc;
377    
378     my $s = \($enc->decode (${$_[0]}));
379     my $i = 1;
380 wakaba 1.32 push @nav, ['#source-string' => 'Source'] unless $input->{nested};
381     print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
382 wakaba 1.9 <h2>Document Source</h2>
383     <ol lang="">\n];
384 wakaba 1.7 if (length $$s) {
385     while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
386 wakaba 1.32 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
387     "</li>\n";
388 wakaba 1.7 $i++;
389     }
390     if ($$s =~ /\G([^\x0A]+)/gc) {
391 wakaba 1.32 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
392     "</li>\n";
393 wakaba 1.7 }
394     } else {
395 wakaba 1.32 print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
396 wakaba 1.2 }
397 wakaba 1.9 print STDOUT "</ol></div>";
398     } # print_input_string_section
399 wakaba 1.2
400     sub print_document_tree ($) {
401 wakaba 1.1 my $node = shift;
402 wakaba 1.2 my $r = '<ol class="xoxo">';
403 wakaba 1.1
404 wakaba 1.2 my @node = ($node);
405 wakaba 1.1 while (@node) {
406     my $child = shift @node;
407 wakaba 1.2 unless (ref $child) {
408     $r .= $child;
409     next;
410     }
411    
412 wakaba 1.32 my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
413 wakaba 1.2 my $nt = $child->node_type;
414     if ($nt == $child->ELEMENT_NODE) {
415 wakaba 1.4 my $child_nsuri = $child->namespace_uri;
416     $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
417 wakaba 1.2 '</code>'; ## ISSUE: case
418    
419     if ($child->has_attributes) {
420     $r .= '<ul class="attributes">';
421 wakaba 1.4 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
422 wakaba 1.2 @{$child->attributes}) {
423 wakaba 1.32 $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
424 wakaba 1.2 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
425     }
426     $r .= '</ul>';
427     }
428    
429 wakaba 1.7 if ($child->has_child_nodes) {
430 wakaba 1.2 $r .= '<ol class="children">';
431 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
432     } else {
433     $r .= '</li>';
434 wakaba 1.2 }
435     } elsif ($nt == $child->TEXT_NODE) {
436 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
437 wakaba 1.2 } elsif ($nt == $child->CDATA_SECTION_NODE) {
438 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
439 wakaba 1.2 } elsif ($nt == $child->COMMENT_NODE) {
440 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
441 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_NODE) {
442 wakaba 1.6 $r .= qq'<li id="$node_id" class="tree-document">Document';
443 wakaba 1.7 $r .= qq[<ul class="attributes">];
444 wakaba 1.27 my $cp = $child->manakai_charset;
445     if (defined $cp) {
446     $r .= qq[<li><code>charset</code> parameter = <code>];
447     $r .= htescape ($cp) . qq[</code></li>];
448     }
449     $r .= qq[<li><code>inputEncoding</code> = ];
450     my $ie = $child->input_encoding;
451     if (defined $ie) {
452     $r .= qq[<code>@{[htescape ($ie)]}</code>];
453     if ($child->manakai_has_bom) {
454     $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
455     }
456     } else {
457     $r .= qq[(<code>null</code>)];
458     }
459 wakaba 1.7 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
460     $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
461 wakaba 1.9 unless ($child->manakai_is_html) {
462     $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
463     if (defined $child->xml_encoding) {
464     $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
465     } else {
466     $r .= qq[<li>XML encoding = (null)</li>];
467     }
468     $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
469     }
470 wakaba 1.7 $r .= qq[</ul>];
471 wakaba 1.2 if ($child->has_child_nodes) {
472 wakaba 1.7 $r .= '<ol class="children">';
473 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
474 wakaba 1.1 }
475 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
476 wakaba 1.5 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
477     $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
478     $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
479     $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
480 wakaba 1.2 $r .= '</ul></li>';
481     } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
482 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
483 wakaba 1.1 } else {
484 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
485 wakaba 1.1 }
486     }
487 wakaba 1.2
488     $r .= '</ol>';
489     print STDOUT $r;
490     } # print_document_tree
491 wakaba 1.1
492 wakaba 1.32 sub print_structure_dump_dom_section ($$$) {
493     my ($input, $doc, $el) = @_;
494 wakaba 1.18
495     print STDOUT qq[
496 wakaba 1.32 <div id="$input->{id_prefix}document-tree" class="section">
497 wakaba 1.18 <h2>Document Tree</h2>
498     ];
499 wakaba 1.32 push @nav, ['#document-tree' => 'Tree'] unless $input->{nested};
500 wakaba 1.18
501     print_document_tree ($el || $doc);
502    
503     print STDOUT qq[</div>];
504 wakaba 1.22 } # print_structure_dump_dom_section
505    
506 wakaba 1.32 sub print_structure_dump_manifest_section ($$) {
507     my ($input, $manifest) = @_;
508 wakaba 1.22
509     print STDOUT qq[
510 wakaba 1.32 <div id="$input->{id_prefix}dump-manifest" class="section">
511 wakaba 1.22 <h2>Cache Manifest</h2>
512     ];
513 wakaba 1.32 push @nav, ['#dump-manifest' => 'Caceh Manifest'] unless $input->{nested};
514 wakaba 1.22
515     print STDOUT qq[<dl><dt>Explicit entries</dt>];
516     for my $uri (@{$manifest->[0]}) {
517     my $euri = htescape ($uri);
518     print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
519     }
520    
521     print STDOUT qq[<dt>Fallback entries</dt><dd>
522     <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
523     <th scope=row>Fallback Entry</tr><tbody>];
524     for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
525     my $euri = htescape ($uri);
526     my $euri2 = htescape ($manifest->[1]->{$uri});
527     print STDOUT qq[<tr><td><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
528     <td><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
529     }
530    
531     print STDOUT qq[</table><dt>Online whitelist</dt>];
532     for my $uri (@{$manifest->[2]}) {
533     my $euri = htescape ($uri);
534     print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
535     }
536 wakaba 1.18
537 wakaba 1.22 print STDOUT qq[</dl></div>];
538     } # print_structure_dump_manifest_section
539    
540 wakaba 1.32 sub print_structure_error_dom_section ($$$$) {
541     my ($input, $doc, $el, $result) = @_;
542 wakaba 1.18
543 wakaba 1.32 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
544 wakaba 1.18 <h2>Document Errors</h2>
545    
546     <dl>];
547 wakaba 1.32 push @nav, ['#document-errors' => 'Document Error'] unless $input->{nested};
548 wakaba 1.18
549     require Whatpm::ContentChecker;
550     my $onerror = sub {
551     my %opt = @_;
552     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
553     $type =~ tr/ /-/;
554     $type =~ s/\|/%7C/g;
555     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
556 wakaba 1.32 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
557 wakaba 1.23 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
558     print STDOUT $msg, "</dd>\n";
559 wakaba 1.19 add_error ('structure', \%opt => $result);
560 wakaba 1.18 };
561    
562     my $elements;
563     my $time1 = time;
564     if ($el) {
565     $elements = Whatpm::ContentChecker->check_element ($el, $onerror);
566     } else {
567     $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);
568     }
569     $time{check} = time - $time1;
570    
571     print STDOUT qq[</dl></div>];
572    
573     return $elements;
574 wakaba 1.22 } # print_structure_error_dom_section
575    
576     sub print_structure_error_manifest_section ($$$) {
577 wakaba 1.32 my ($input, $manifest, $result) = @_;
578 wakaba 1.22
579 wakaba 1.32 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
580 wakaba 1.22 <h2>Document Errors</h2>
581    
582     <dl>];
583 wakaba 1.32 push @nav, ['#document-errors' => 'Document Error'] unless $input->{nested};
584 wakaba 1.22
585     require Whatpm::CacheManifest;
586     Whatpm::CacheManifest->check_manifest ($manifest, sub {
587     my %opt = @_;
588     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
589     $type =~ tr/ /-/;
590     $type =~ s/\|/%7C/g;
591     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
592 wakaba 1.32 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
593 wakaba 1.22 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
594     add_error ('structure', \%opt => $result);
595     });
596    
597     print STDOUT qq[</div>];
598     } # print_structure_error_manifest_section
599 wakaba 1.18
600 wakaba 1.32 sub print_table_section ($$) {
601     my ($input, $tables) = @_;
602 wakaba 1.18
603 wakaba 1.32 push @nav, ['#tables' => 'Tables'] unless $input->{nested};
604 wakaba 1.18 print STDOUT qq[
605 wakaba 1.32 <div id="$input->{id_prefix}tables" class="section">
606 wakaba 1.18 <h2>Tables</h2>
607    
608     <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
609     <script src="../table-script.js" type="text/javascript"></script>
610     <noscript>
611     <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
612     </noscript>
613     ];
614    
615     require JSON;
616    
617     my $i = 0;
618     for my $table_el (@$tables) {
619     $i++;
620 wakaba 1.32 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
621     get_node_link ($input, $table_el) . q[</h3>];
622 wakaba 1.18
623     ## TODO: Make |ContentChecker| return |form_table| result
624     ## so that this script don't have to run the algorithm twice.
625     my $table = Whatpm::HTMLTable->form_table ($table_el);
626    
627     for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
628     next unless $_;
629     delete $_->{element};
630     }
631    
632     for (@{$table->{row_group}}) {
633     next unless $_;
634     next unless $_->{element};
635     $_->{type} = $_->{element}->manakai_local_name;
636     delete $_->{element};
637     }
638    
639     for (@{$table->{cell}}) {
640     next unless $_;
641     for (@{$_}) {
642     next unless $_;
643     for (@$_) {
644     $_->{id} = refaddr $_->{element} if defined $_->{element};
645     delete $_->{element};
646     $_->{is_header} = $_->{is_header} ? 1 : 0;
647     }
648     }
649     }
650    
651     print STDOUT '</div><script type="text/javascript">tableToCanvas (';
652     print STDOUT JSON::objToJson ($table);
653 wakaba 1.32 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
654     print STDOUT qq[, '$input->{id_prefix}');</script>];
655 wakaba 1.18 }
656    
657     print STDOUT qq[</div>];
658     } # print_table_section
659    
660 wakaba 1.33 sub print_listing_section ($$$) {
661     my ($opt, $input, $ids) = @_;
662 wakaba 1.18
663 wakaba 1.33 push @nav, ['#' . $opt->{id} => $opt->{label}] unless $input->{nested};
664 wakaba 1.18 print STDOUT qq[
665 wakaba 1.33 <div id="$input->{id_prefix}$opt->{id}" class="section">
666     <h2>$opt->{heading}</h2>
667 wakaba 1.18
668     <dl>
669     ];
670     for my $id (sort {$a cmp $b} keys %$ids) {
671     print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
672     for (@{$ids->{$id}}) {
673 wakaba 1.32 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
674 wakaba 1.18 }
675     }
676     print STDOUT qq[</dl></div>];
677 wakaba 1.33 } # print_listing_section
678 wakaba 1.18
679 wakaba 1.19 sub print_result_section ($) {
680     my $result = shift;
681    
682     print STDOUT qq[
683     <div id="result-summary" class="section">
684     <h2>Result</h2>];
685    
686 wakaba 1.21 if ($result->{unsupported} and $result->{conforming_max}) {
687 wakaba 1.19 print STDOUT qq[<p class=uncertain id=result-para>The conformance
688     checker cannot decide whether the document is conforming or
689     not, since the document contains one or more unsupported
690 wakaba 1.21 features. The document might or might not be conforming.</p>];
691 wakaba 1.19 } elsif ($result->{conforming_min}) {
692     print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
693     found in this document.</p>];
694     } elsif ($result->{conforming_max}) {
695     print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
696     is <strong>likely <em>non</em>-conforming</strong>, but in rare case
697     it might be conforming.</p>];
698     } else {
699     print STDOUT qq[<p class=FAIL id=result-para>This document is
700     <strong><em>non</em>-conforming</strong>.</p>];
701     }
702    
703     print STDOUT qq[<table>
704     <colgroup><col><colgroup><col><col><col><colgroup><col>
705     <thead>
706 wakaba 1.23 <tr><th scope=col></th>
707     <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
708     Errors</a></th>
709     <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
710     Errors</a></th>
711     <th scope=col><a href="../error-description#level-w">Warnings</a></th>
712     <th scope=col>Score</th></tr></thead><tbody>];
713 wakaba 1.19
714     my $must_error = 0;
715     my $should_error = 0;
716     my $warning = 0;
717     my $score_min = 0;
718     my $score_max = 0;
719     my $score_base = 20;
720 wakaba 1.21 my $score_unit = $score_base / 100;
721 wakaba 1.19 for (
722     [Transfer => 'transfer', ''],
723     [Character => 'char', ''],
724     [Syntax => 'syntax', '#parse-errors'],
725     [Structure => 'structure', '#document-errors'],
726     ) {
727     $must_error += ($result->{$_->[1]}->{must} += 0);
728     $should_error += ($result->{$_->[1]}->{should} += 0);
729     $warning += ($result->{$_->[1]}->{warning} += 0);
730 wakaba 1.21 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
731     $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
732 wakaba 1.19
733     my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
734     my $label = $_->[0];
735     if ($result->{$_->[1]}->{must} or
736     $result->{$_->[1]}->{should} or
737     $result->{$_->[1]}->{warning} or
738     $result->{$_->[1]}->{unsupported}) {
739     $label = qq[<a href="$_->[2]">$label</a>];
740     }
741    
742     print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
743     if ($uncertain) {
744 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
745 wakaba 1.19 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
746 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
747 wakaba 1.19 } else {
748 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
749 wakaba 1.19 }
750     }
751    
752     $score_max += $score_base;
753    
754     print STDOUT qq[
755     <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
756     </tbody>
757 wakaba 1.21 <tfoot><tr class=uncertain><th scope=row>Total</th>
758     <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
759     <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
760     <td>$warning?</td>
761     <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
762 wakaba 1.19 </table>
763    
764     <p><strong>Important</strong>: This conformance checking service
765     is <em>under development</em>. The result above might be <em>wrong</em>.</p>
766     </div>];
767     push @nav, ['#result-summary' => 'Result'];
768     } # print_result_section
769    
770 wakaba 1.24 sub print_result_unknown_type_section ($$) {
771     my ($input, $result) = @_;
772 wakaba 1.18
773 wakaba 1.24 my $euri = htescape ($input->{uri});
774 wakaba 1.18 print STDOUT qq[
775 wakaba 1.24 <div id="parse-errors" class="section">
776     <h2>Errors</h2>
777    
778     <dl>
779     <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
780     <dd class=unsupported><strong><a href="../error-description#level-u">Not
781     supported</a></strong>:
782     Media type
783     <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
784     is not supported.</dd>
785     </dl>
786 wakaba 1.18 </div>
787     ];
788 wakaba 1.24 push @nav, ['#parse-errors' => 'Errors'];
789 wakaba 1.30 add_error (char => {level => 'u'} => $result);
790     add_error (syntax => {level => 'u'} => $result);
791     add_error (structure => {level => 'u'} => $result);
792 wakaba 1.18 } # print_result_unknown_type_section
793    
794     sub print_result_input_error_section ($) {
795     my $input = shift;
796     print STDOUT qq[<div class="section" id="result-summary">
797     <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
798     </div>];
799     push @nav, ['#result-summary' => 'Result'];
800 wakaba 1.32 } # print_result_input_error_section
801 wakaba 1.18
802 wakaba 1.32 sub get_error_label ($$) {
803     my ($input, $err) = @_;
804 wakaba 1.22
805     my $r = '';
806    
807     if (defined $err->{line}) {
808     if ($err->{column} > 0) {
809     $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a> column $err->{column}];
810     } else {
811     $err->{line} = $err->{line} - 1 || 1;
812     $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a>];
813     }
814     }
815    
816     if (defined $err->{node}) {
817     $r .= ' ' if length $r;
818 wakaba 1.32 $r = get_node_link ($input, $err->{node});
819 wakaba 1.22 }
820    
821     if (defined $err->{index}) {
822     $r .= ' ' if length $r;
823     $r .= 'Index ' . (0+$err->{index});
824     }
825    
826     if (defined $err->{value}) {
827     $r .= ' ' if length $r;
828     $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
829     }
830    
831     return $r;
832     } # get_error_label
833    
834 wakaba 1.23 sub get_error_level_label ($) {
835     my $err = shift;
836    
837     my $r = '';
838    
839     if (not defined $err->{level} or $err->{level} eq 'm') {
840     $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
841     error</a></strong>: ];
842     } elsif ($err->{level} eq 's') {
843     $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
844     error</a></strong>: ];
845     } elsif ($err->{level} eq 'w') {
846     $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
847     ];
848 wakaba 1.30 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
849 wakaba 1.23 $r = qq[<strong><a href="../error-description#level-u">Not
850     supported</a></strong>: ];
851     } else {
852     my $elevel = htescape ($err->{level});
853     $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
854     ];
855     }
856    
857     return $r;
858     } # get_error_level_label
859    
860 wakaba 1.1 sub get_node_path ($) {
861     my $node = shift;
862     my @r;
863     while (defined $node) {
864     my $rs;
865     if ($node->node_type == 1) {
866     $rs = $node->manakai_local_name;
867     $node = $node->parent_node;
868     } elsif ($node->node_type == 2) {
869     $rs = '@' . $node->manakai_local_name;
870     $node = $node->owner_element;
871     } elsif ($node->node_type == 3) {
872     $rs = '"' . $node->data . '"';
873     $node = $node->parent_node;
874     } elsif ($node->node_type == 9) {
875 wakaba 1.9 @r = ('') unless @r;
876 wakaba 1.1 $rs = '';
877     $node = $node->parent_node;
878     } else {
879     $rs = '#' . $node->node_type;
880     $node = $node->parent_node;
881     }
882     unshift @r, $rs;
883     }
884     return join '/', @r;
885     } # get_node_path
886    
887 wakaba 1.32 sub get_node_link ($$) {
888     return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
889     htescape (get_node_path ($_[1])) . qq[</a>];
890 wakaba 1.6 } # get_node_link
891    
892 wakaba 1.7 {
893     my $Msg = {};
894    
895     sub load_text_catalog ($) {
896     my $lang = shift; # MUST be a canonical lang name
897 wakaba 1.26 open my $file, '<:utf8', "cc-msg.$lang.txt"
898     or die "$0: cc-msg.$lang.txt: $!";
899 wakaba 1.7 while (<$file>) {
900     if (s/^([^;]+);([^;]*);//) {
901     my ($type, $cls, $msg) = ($1, $2, $_);
902     $msg =~ tr/\x0D\x0A//d;
903     $Msg->{$type} = [$cls, $msg];
904     }
905     }
906     } # load_text_catalog
907    
908     sub get_text ($) {
909 wakaba 1.15 my ($type, $level, $node) = @_;
910 wakaba 1.7 $type = $level . ':' . $type if defined $level;
911 wakaba 1.29 $level = 'm' unless defined $level;
912 wakaba 1.7 my @arg;
913     {
914     if (defined $Msg->{$type}) {
915     my $msg = $Msg->{$type}->[1];
916 wakaba 1.10 $msg =~ s{<var>\$([0-9]+)</var>}{
917     defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
918     }ge;
919 wakaba 1.15 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
920     UNIVERSAL::can ($node, 'get_attribute_ns')
921     ? htescape ($node->get_attribute_ns (undef, $1)) : ''
922     }ge;
923     $msg =~ s{<var>{\@}</var>}{
924     UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
925     }ge;
926 wakaba 1.17 $msg =~ s{<var>{local-name}</var>}{
927     UNIVERSAL::can ($node, 'manakai_local_name')
928     ? htescape ($node->manakai_local_name) : ''
929     }ge;
930     $msg =~ s{<var>{element-local-name}</var>}{
931     (UNIVERSAL::can ($node, 'owner_element') and
932     $node->owner_element)
933     ? htescape ($node->owner_element->manakai_local_name)
934     : ''
935     }ge;
936 wakaba 1.29 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
937 wakaba 1.7 } elsif ($type =~ s/:([^:]*)$//) {
938     unshift @arg, $1;
939     redo;
940     }
941     }
942 wakaba 1.29 return ($type, 'level-'.$level, htescape ($_[0]));
943 wakaba 1.7 } # get_text
944    
945     }
946    
947 wakaba 1.9 sub get_input_document ($$) {
948     my ($http, $dom) = @_;
949    
950 wakaba 1.16 my $request_uri = $http->get_parameter ('uri');
951 wakaba 1.9 my $r = {};
952     if (defined $request_uri and length $request_uri) {
953     my $uri = $dom->create_uri_reference ($request_uri);
954     unless ({
955     http => 1,
956     }->{lc $uri->uri_scheme}) {
957     return {uri => $request_uri, request_uri => $request_uri,
958     error_status_text => 'URI scheme not allowed'};
959     }
960    
961     require Message::Util::HostPermit;
962     my $host_permit = new Message::Util::HostPermit;
963     $host_permit->add_rule (<<EOH);
964     Allow host=suika port=80
965     Deny host=suika
966     Allow host=suika.fam.cx port=80
967     Deny host=suika.fam.cx
968     Deny host=localhost
969     Deny host=*.localdomain
970     Deny ipv4=0.0.0.0/8
971     Deny ipv4=10.0.0.0/8
972     Deny ipv4=127.0.0.0/8
973     Deny ipv4=169.254.0.0/16
974     Deny ipv4=172.0.0.0/11
975     Deny ipv4=192.0.2.0/24
976     Deny ipv4=192.88.99.0/24
977     Deny ipv4=192.168.0.0/16
978     Deny ipv4=198.18.0.0/15
979     Deny ipv4=224.0.0.0/4
980     Deny ipv4=255.255.255.255/32
981     Deny ipv6=0::0/0
982     Allow host=*
983     EOH
984     unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
985     return {uri => $request_uri, request_uri => $request_uri,
986     error_status_text => 'Connection to the host is forbidden'};
987     }
988    
989     require LWP::UserAgent;
990     my $ua = WDCC::LWPUA->new;
991     $ua->{wdcc_dom} = $dom;
992     $ua->{wdcc_host_permit} = $host_permit;
993     $ua->agent ('Mozilla'); ## TODO: for now.
994     $ua->parse_head (0);
995     $ua->protocols_allowed ([qw/http/]);
996     $ua->max_size (1000_000);
997     my $req = HTTP::Request->new (GET => $request_uri);
998 wakaba 1.28 $req->header ('Accept-Encoding' => 'identity, *; q=0');
999 wakaba 1.9 my $res = $ua->request ($req);
1000 wakaba 1.16 ## TODO: 401 sets |is_success| true.
1001     if ($res->is_success or $http->get_parameter ('error-page')) {
1002 wakaba 1.9 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1003     $r->{uri} = $res->request->uri;
1004     $r->{request_uri} = $request_uri;
1005    
1006     ## TODO: More strict parsing...
1007     my $ct = $res->header ('Content-Type');
1008 wakaba 1.22 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1009 wakaba 1.9 $r->{charset} = lc $1;
1010     $r->{charset} =~ tr/\\//d;
1011 wakaba 1.26 $r->{official_charset} = $r->{charset};
1012 wakaba 1.9 }
1013    
1014 wakaba 1.16 my $input_charset = $http->get_parameter ('charset');
1015 wakaba 1.9 if (defined $input_charset and length $input_charset) {
1016     $r->{charset_overridden}
1017     = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1018     $r->{charset} = $input_charset;
1019 wakaba 1.25 }
1020    
1021     ## TODO: Support for HTTP Content-Encoding
1022 wakaba 1.9
1023     $r->{s} = ''.$res->content;
1024 wakaba 1.25
1025     require Whatpm::ContentType;
1026     ($r->{official_type}, $r->{media_type})
1027     = Whatpm::ContentType->get_sniffed_type
1028     (get_file_head => sub {
1029     return substr $r->{s}, 0, shift;
1030     },
1031     http_content_type_byte => $ct,
1032     has_http_content_encoding =>
1033     defined $res->header ('Content-Encoding'),
1034     supported_image_types => {});
1035 wakaba 1.9 } else {
1036     $r->{uri} = $res->request->uri;
1037     $r->{request_uri} = $request_uri;
1038     $r->{error_status_text} = $res->status_line;
1039     }
1040    
1041     $r->{header_field} = [];
1042     $res->scan (sub {
1043     push @{$r->{header_field}}, [$_[0], $_[1]];
1044     });
1045     $r->{header_status_code} = $res->code;
1046     $r->{header_status_text} = $res->message;
1047     } else {
1048 wakaba 1.16 $r->{s} = ''.$http->get_parameter ('s');
1049 wakaba 1.9 $r->{uri} = q<thismessage:/>;
1050     $r->{request_uri} = q<thismessage:/>;
1051     $r->{base_uri} = q<thismessage:/>;
1052 wakaba 1.16 $r->{charset} = ''.$http->get_parameter ('_charset_');
1053 wakaba 1.9 $r->{charset} =~ s/\s+//g;
1054     $r->{charset} = 'utf-8' if $r->{charset} eq '';
1055 wakaba 1.26 $r->{official_charset} = $r->{charset};
1056 wakaba 1.9 $r->{header_field} = [];
1057 wakaba 1.25
1058     require Whatpm::ContentType;
1059     ($r->{official_type}, $r->{media_type})
1060     = Whatpm::ContentType->get_sniffed_type
1061     (get_file_head => sub {
1062     return substr $r->{s}, 0, shift;
1063     },
1064     http_content_type_byte => undef,
1065     has_http_content_encoding => 0,
1066     supported_image_types => {});
1067 wakaba 1.9 }
1068    
1069 wakaba 1.16 my $input_format = $http->get_parameter ('i');
1070 wakaba 1.9 if (defined $input_format and length $input_format) {
1071     $r->{media_type_overridden}
1072     = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1073     $r->{media_type} = $input_format;
1074     }
1075     if (defined $r->{s} and not defined $r->{media_type}) {
1076     $r->{media_type} = 'text/html';
1077     $r->{media_type_overridden} = 1;
1078     }
1079    
1080     if ($r->{media_type} eq 'text/xml') {
1081     unless (defined $r->{charset}) {
1082     $r->{charset} = 'us-ascii';
1083 wakaba 1.26 $r->{official_charset} = $r->{charset};
1084 wakaba 1.9 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1085     $r->{charset_overridden} = 0;
1086     }
1087     }
1088    
1089     if (length $r->{s} > 1000_000) {
1090     $r->{error_status_text} = 'Entity-body too large';
1091     delete $r->{s};
1092     return $r;
1093     }
1094    
1095     return $r;
1096     } # get_input_document
1097    
1098     package WDCC::LWPUA;
1099     BEGIN { push our @ISA, 'LWP::UserAgent'; }
1100    
1101     sub redirect_ok {
1102     my $ua = shift;
1103     unless ($ua->SUPER::redirect_ok (@_)) {
1104     return 0;
1105     }
1106    
1107     my $uris = $_[1]->header ('Location');
1108     return 0 unless $uris;
1109     my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1110     unless ({
1111     http => 1,
1112     }->{lc $uri->uri_scheme}) {
1113     return 0;
1114     }
1115     unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1116     return 0;
1117     }
1118     return 1;
1119     } # redirect_ok
1120    
1121 wakaba 1.1 =head1 AUTHOR
1122    
1123     Wakaba <w@suika.fam.cx>.
1124    
1125     =head1 LICENSE
1126    
1127     Copyright 2007 Wakaba <w@suika.fam.cx>
1128    
1129     This library is free software; you can redistribute it
1130     and/or modify it under the same terms as Perl itself.
1131    
1132     =cut
1133    
1134 wakaba 1.33 ## $Date: 2008/02/10 02:30:14 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24