/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.31 - (hide annotations) (download)
Sun Feb 10 02:05:30 2008 UTC (16 years, 3 months ago) by wakaba
Branch: MAIN
Changes since 1.30: +47 -42 lines
++ ChangeLog	10 Feb 2008 02:05:20 -0000
2008-02-10  Wakaba  <wakaba@suika.fam.cx>

	* cc.cgi (check_and_print): Now this is a subroutine.

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3 wakaba 1.23 use utf8;
4 wakaba 1.1
5     use lib qw[/home/httpd/html/www/markup/html/whatpm
6 wakaba 1.16 /home/wakaba/work/manakai2/lib];
7 wakaba 1.1 use CGI::Carp qw[fatalsToBrowser];
8 wakaba 1.2 use Scalar::Util qw[refaddr];
9 wakaba 1.16 use Time::HiRes qw/time/;
10 wakaba 1.1
11 wakaba 1.2 sub htescape ($) {
12     my $s = $_[0];
13     $s =~ s/&/&amp;/g;
14     $s =~ s/</&lt;/g;
15     $s =~ s/>/&gt;/g;
16     $s =~ s/"/&quot;/g;
17 wakaba 1.12 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18     sprintf '<var>U+%04X</var>', ord $1;
19     }ge;
20 wakaba 1.2 return $s;
21     } # htescape
22    
23 wakaba 1.16 use Message::CGI::HTTP;
24     my $http = Message::CGI::HTTP->new;
25 wakaba 1.1
26 wakaba 1.16 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
27 wakaba 1.8 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
28     exit;
29     }
30    
31 wakaba 1.12 binmode STDOUT, ':utf8';
32 wakaba 1.14 $| = 1;
33 wakaba 1.12
34 wakaba 1.9 require Message::DOM::DOMImplementation;
35     my $dom = Message::DOM::DOMImplementation->new;
36    
37 wakaba 1.7 load_text_catalog ('en'); ## TODO: conneg
38    
39 wakaba 1.3 my @nav;
40 wakaba 1.2 print STDOUT qq[Content-Type: text/html; charset=utf-8
41    
42     <!DOCTYPE html>
43     <html lang="en">
44     <head>
45     <title>Web Document Conformance Checker (BETA)</title>
46 wakaba 1.3 <link rel="stylesheet" href="../cc-style.css" type="text/css">
47 wakaba 1.2 </head>
48     <body>
49 wakaba 1.13 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
50     (<em>beta</em>)</h1>
51 wakaba 1.14 ];
52 wakaba 1.2
53 wakaba 1.14 $| = 0;
54     my $input = get_input_document ($http, $dom);
55 wakaba 1.16 my $char_length = 0;
56     my %time;
57 wakaba 1.14
58     print qq[
59 wakaba 1.4 <div id="document-info" class="section">
60 wakaba 1.2 <dl>
61 wakaba 1.9 <dt>Request URI</dt>
62     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 wakaba 1.2 <dt>Document URI</dt>
64 wakaba 1.25 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65     <script>
66     document.title = '<'
67     + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68     + document.title;
69     </script></dd>
70 wakaba 1.2 ]; # no </dl> yet
71 wakaba 1.3 push @nav, ['#document-info' => 'Information'];
72 wakaba 1.1
73 wakaba 1.9 if (defined $input->{s}) {
74 wakaba 1.16 $char_length = length $input->{s};
75 wakaba 1.9
76     print STDOUT qq[
77     <dt>Base URI</dt>
78     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79     <dt>Internet Media Type</dt>
80     <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 wakaba 1.25 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 wakaba 1.9 <dt>Character Encoding</dt>
83     <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84     @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 wakaba 1.16 <dt>Length</dt>
86     <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 wakaba 1.9 </dl>
88     </div>
89     ];
90    
91 wakaba 1.20 my $result = {conforming_min => 1, conforming_max => 1};
92 wakaba 1.31 check_and_print ($input => $result);
93 wakaba 1.19 print_result_section ($result);
94 wakaba 1.9 } else {
95 wakaba 1.18 print STDOUT qq[</dl></div>];
96     print_result_input_error_section ($input);
97 wakaba 1.9 }
98 wakaba 1.3
99 wakaba 1.2 print STDOUT qq[
100 wakaba 1.3 <ul class="navigation" id="nav-items">
101     ];
102     for (@nav) {
103     print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
104     }
105     print STDOUT qq[
106     </ul>
107 wakaba 1.2 </body>
108     </html>
109     ];
110 wakaba 1.1
111 wakaba 1.24 for (qw/decode parse parse_html parse_xml parse_manifest
112     check check_manifest/) {
113 wakaba 1.16 next unless defined $time{$_};
114     open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
115     print $file $char_length, "\t", $time{$_}, "\n";
116     }
117    
118 wakaba 1.1 exit;
119    
120 wakaba 1.19 sub add_error ($$$) {
121     my ($layer, $err, $result) = @_;
122     if (defined $err->{level}) {
123     if ($err->{level} eq 's') {
124     $result->{$layer}->{should}++;
125     $result->{$layer}->{score_min} -= 2;
126     $result->{conforming_min} = 0;
127     } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
128     $result->{$layer}->{warning}++;
129 wakaba 1.30 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
130 wakaba 1.19 $result->{$layer}->{unsupported}++;
131     $result->{unsupported} = 1;
132     } else {
133     $result->{$layer}->{must}++;
134     $result->{$layer}->{score_max} -= 2;
135     $result->{$layer}->{score_min} -= 2;
136     $result->{conforming_min} = 0;
137     $result->{conforming_max} = 0;
138     }
139     } else {
140     $result->{$layer}->{must}++;
141     $result->{$layer}->{score_max} -= 2;
142     $result->{$layer}->{score_min} -= 2;
143     $result->{conforming_min} = 0;
144     $result->{conforming_max} = 0;
145     }
146     } # add_error
147    
148 wakaba 1.31 sub check_and_print ($$) {
149     my ($input, $result) = @_;
150    
151     print_http_header_section ($input, $result);
152    
153     my $doc;
154     my $el;
155     my $manifest;
156    
157     if ($input->{media_type} eq 'text/html') {
158     ($doc, $el) = print_syntax_error_html_section ($input, $result);
159     print_source_string_section
160     (\($input->{s}), $input->{charset} || $doc->input_encoding);
161     } elsif ({
162     'text/xml' => 1,
163     'application/atom+xml' => 1,
164     'application/rss+xml' => 1,
165     'application/svg+xml' => 1,
166     'application/xhtml+xml' => 1,
167     'application/xml' => 1,
168     }->{$input->{media_type}}) {
169     ($doc, $el) = print_syntax_error_xml_section ($input, $result);
170     print_source_string_section (\($input->{s}), $doc->input_encoding);
171     } elsif ($input->{media_type} eq 'text/cache-manifest') {
172     ## TODO: MUST be text/cache-manifest
173     $manifest = print_syntax_error_manifest_section ($input, $result);
174     print_source_string_section (\($input->{s}), 'utf-8');
175     } else {
176     ## TODO: Change HTTP status code??
177     print_result_unknown_type_section ($input, $result);
178     }
179    
180     if (defined $doc or defined $el) {
181     print_structure_dump_dom_section ($doc, $el);
182     my $elements = print_structure_error_dom_section ($doc, $el, $result);
183     print_table_section ($elements->{table}) if @{$elements->{table}};
184     print_id_section ($elements->{id}) if keys %{$elements->{id}};
185     print_term_section ($elements->{term}) if keys %{$elements->{term}};
186     print_class_section ($elements->{class}) if keys %{$elements->{class}};
187     } elsif (defined $manifest) {
188     print_structure_dump_manifest_section ($manifest);
189     print_structure_error_manifest_section ($manifest, $result);
190     }
191     } # check_and_print
192    
193 wakaba 1.19 sub print_http_header_section ($$) {
194     my ($input, $result) = @_;
195 wakaba 1.9 return unless defined $input->{header_status_code} or
196     defined $input->{header_status_text} or
197     @{$input->{header_field}};
198    
199     push @nav, ['#source-header' => 'HTTP Header'];
200     print STDOUT qq[<div id="source-header" class="section">
201     <h2>HTTP Header</h2>
202    
203     <p><strong>Note</strong>: Due to the limitation of the
204     network library in use, the content of this section might
205     not be the real header.</p>
206    
207     <table><tbody>
208     ];
209    
210     if (defined $input->{header_status_code}) {
211     print STDOUT qq[<tr><th scope="row">Status code</th>];
212     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
213     }
214     if (defined $input->{header_status_text}) {
215     print STDOUT qq[<tr><th scope="row">Status text</th>];
216     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
217     }
218    
219     for (@{$input->{header_field}}) {
220     print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
221     print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
222     }
223    
224     print STDOUT qq[</tbody></table></div>];
225     } # print_http_header_section
226    
227 wakaba 1.19 sub print_syntax_error_html_section ($$) {
228     my ($input, $result) = @_;
229 wakaba 1.18
230     require Encode;
231     require Whatpm::HTML;
232    
233     print STDOUT qq[
234     <div id="parse-errors" class="section">
235     <h2>Parse Errors</h2>
236    
237     <dl>];
238     push @nav, ['#parse-errors' => 'Parse Error'];
239    
240     my $onerror = sub {
241     my (%opt) = @_;
242     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
243     if ($opt{column} > 0) {
244     print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
245     } else {
246     $opt{line} = $opt{line} - 1 || 1;
247     print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];
248     }
249     $type =~ tr/ /-/;
250     $type =~ s/\|/%7C/g;
251     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
252 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
253     print STDOUT qq[$msg</dd>\n];
254 wakaba 1.19
255     add_error ('syntax', \%opt => $result);
256 wakaba 1.18 };
257    
258     my $doc = $dom->create_document;
259     my $el;
260 wakaba 1.26 my $inner_html_element = $http->get_parameter ('e');
261 wakaba 1.18 if (defined $inner_html_element and length $inner_html_element) {
262 wakaba 1.26 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
263 wakaba 1.24 my $time1 = time;
264     my $t = Encode::decode ($input->{charset}, $input->{s});
265     $time{decode} = time - $time1;
266    
267 wakaba 1.18 $el = $doc->create_element_ns
268     ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
269 wakaba 1.24 $time1 = time;
270 wakaba 1.18 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
271 wakaba 1.24 $time{parse} = time - $time1;
272 wakaba 1.18 } else {
273 wakaba 1.24 my $time1 = time;
274     Whatpm::HTML->parse_byte_string
275     ($input->{charset}, $input->{s} => $doc, $onerror);
276     $time{parse_html} = time - $time1;
277 wakaba 1.18 }
278 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
279     if defined $input->{official_charset};
280 wakaba 1.24
281 wakaba 1.18 print STDOUT qq[</dl></div>];
282    
283     return ($doc, $el);
284     } # print_syntax_error_html_section
285    
286 wakaba 1.19 sub print_syntax_error_xml_section ($$) {
287     my ($input, $result) = @_;
288 wakaba 1.18
289     require Message::DOM::XMLParserTemp;
290    
291     print STDOUT qq[
292     <div id="parse-errors" class="section">
293     <h2>Parse Errors</h2>
294    
295     <dl>];
296     push @nav, ['#parse-errors' => 'Parse Error'];
297    
298     my $onerror = sub {
299     my $err = shift;
300     my $line = $err->location->line_number;
301     print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];
302     print STDOUT $err->location->column_number, "</dt><dd>";
303     print STDOUT htescape $err->text, "</dd>\n";
304 wakaba 1.19
305     add_error ('syntax', {type => $err->text,
306     level => [
307     $err->SEVERITY_FATAL_ERROR => 'm',
308     $err->SEVERITY_ERROR => 'm',
309     $err->SEVERITY_WARNING => 's',
310     ]->[$err->severity]} => $result);
311    
312 wakaba 1.18 return 1;
313     };
314    
315     my $time1 = time;
316     open my $fh, '<', \($input->{s});
317     my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
318     ($fh => $dom, $onerror, charset => $input->{charset});
319     $time{parse_xml} = time - $time1;
320 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
321     if defined $input->{official_charset};
322 wakaba 1.18
323     print STDOUT qq[</dl></div>];
324    
325     return ($doc, undef);
326     } # print_syntax_error_xml_section
327    
328 wakaba 1.22 sub print_syntax_error_manifest_section ($$) {
329     my ($input, $result) = @_;
330    
331     require Whatpm::CacheManifest;
332    
333     print STDOUT qq[
334     <div id="parse-errors" class="section">
335     <h2>Parse Errors</h2>
336    
337     <dl>];
338     push @nav, ['#parse-errors' => 'Parse Error'];
339    
340     my $onerror = sub {
341     my (%opt) = @_;
342     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
343     print STDOUT qq[<dt class="$cls">], get_error_label (\%opt), qq[</dt>];
344     $type =~ tr/ /-/;
345     $type =~ s/\|/%7C/g;
346     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
347 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
348     print STDOUT qq[$msg</dd>\n];
349 wakaba 1.22
350     add_error ('syntax', \%opt => $result);
351     };
352    
353     my $time1 = time;
354     my $manifest = Whatpm::CacheManifest->parse_byte_string
355     ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
356     $time{parse_manifest} = time - $time1;
357    
358     print STDOUT qq[</dl></div>];
359    
360     return $manifest;
361     } # print_syntax_error_manifest_section
362    
363 wakaba 1.9 sub print_source_string_section ($$) {
364     require Encode;
365     my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
366     return unless $enc;
367    
368     my $s = \($enc->decode (${$_[0]}));
369     my $i = 1;
370     push @nav, ['#source-string' => 'Source'];
371     print STDOUT qq[<div id="source-string" class="section">
372     <h2>Document Source</h2>
373     <ol lang="">\n];
374 wakaba 1.7 if (length $$s) {
375     while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
376     print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
377     $i++;
378     }
379     if ($$s =~ /\G([^\x0A]+)/gc) {
380     print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
381     }
382     } else {
383     print STDOUT q[<li id="line-1"></li>];
384 wakaba 1.2 }
385 wakaba 1.9 print STDOUT "</ol></div>";
386     } # print_input_string_section
387 wakaba 1.2
388     sub print_document_tree ($) {
389 wakaba 1.1 my $node = shift;
390 wakaba 1.2 my $r = '<ol class="xoxo">';
391 wakaba 1.1
392 wakaba 1.2 my @node = ($node);
393 wakaba 1.1 while (@node) {
394     my $child = shift @node;
395 wakaba 1.2 unless (ref $child) {
396     $r .= $child;
397     next;
398     }
399    
400     my $node_id = 'node-'.refaddr $child;
401     my $nt = $child->node_type;
402     if ($nt == $child->ELEMENT_NODE) {
403 wakaba 1.4 my $child_nsuri = $child->namespace_uri;
404     $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
405 wakaba 1.2 '</code>'; ## ISSUE: case
406    
407     if ($child->has_attributes) {
408     $r .= '<ul class="attributes">';
409 wakaba 1.4 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
410 wakaba 1.2 @{$child->attributes}) {
411 wakaba 1.27 $r .= qq[<li id="$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
412 wakaba 1.2 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
413     }
414     $r .= '</ul>';
415     }
416    
417 wakaba 1.7 if ($child->has_child_nodes) {
418 wakaba 1.2 $r .= '<ol class="children">';
419 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
420     } else {
421     $r .= '</li>';
422 wakaba 1.2 }
423     } elsif ($nt == $child->TEXT_NODE) {
424 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
425 wakaba 1.2 } elsif ($nt == $child->CDATA_SECTION_NODE) {
426 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
427 wakaba 1.2 } elsif ($nt == $child->COMMENT_NODE) {
428 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
429 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_NODE) {
430 wakaba 1.6 $r .= qq'<li id="$node_id" class="tree-document">Document';
431 wakaba 1.7 $r .= qq[<ul class="attributes">];
432 wakaba 1.27 my $cp = $child->manakai_charset;
433     if (defined $cp) {
434     $r .= qq[<li><code>charset</code> parameter = <code>];
435     $r .= htescape ($cp) . qq[</code></li>];
436     }
437     $r .= qq[<li><code>inputEncoding</code> = ];
438     my $ie = $child->input_encoding;
439     if (defined $ie) {
440     $r .= qq[<code>@{[htescape ($ie)]}</code>];
441     if ($child->manakai_has_bom) {
442     $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
443     }
444     } else {
445     $r .= qq[(<code>null</code>)];
446     }
447 wakaba 1.7 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
448     $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
449 wakaba 1.9 unless ($child->manakai_is_html) {
450     $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
451     if (defined $child->xml_encoding) {
452     $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
453     } else {
454     $r .= qq[<li>XML encoding = (null)</li>];
455     }
456     $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
457     }
458 wakaba 1.7 $r .= qq[</ul>];
459 wakaba 1.2 if ($child->has_child_nodes) {
460 wakaba 1.7 $r .= '<ol class="children">';
461 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
462 wakaba 1.1 }
463 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
464 wakaba 1.5 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
465     $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
466     $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
467     $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
468 wakaba 1.2 $r .= '</ul></li>';
469     } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
470 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
471 wakaba 1.1 } else {
472 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
473 wakaba 1.1 }
474     }
475 wakaba 1.2
476     $r .= '</ol>';
477     print STDOUT $r;
478     } # print_document_tree
479 wakaba 1.1
480 wakaba 1.22 sub print_structure_dump_dom_section ($$) {
481 wakaba 1.18 my ($doc, $el) = @_;
482    
483     print STDOUT qq[
484     <div id="document-tree" class="section">
485     <h2>Document Tree</h2>
486     ];
487     push @nav, ['#document-tree' => 'Tree'];
488    
489     print_document_tree ($el || $doc);
490    
491     print STDOUT qq[</div>];
492 wakaba 1.22 } # print_structure_dump_dom_section
493    
494     sub print_structure_dump_manifest_section ($) {
495     my $manifest = shift;
496    
497     print STDOUT qq[
498     <div id="dump-manifest" class="section">
499     <h2>Cache Manifest</h2>
500     ];
501     push @nav, ['#dump-manifest' => 'Caceh Manifest'];
502    
503     print STDOUT qq[<dl><dt>Explicit entries</dt>];
504     for my $uri (@{$manifest->[0]}) {
505     my $euri = htescape ($uri);
506     print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
507     }
508    
509     print STDOUT qq[<dt>Fallback entries</dt><dd>
510     <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
511     <th scope=row>Fallback Entry</tr><tbody>];
512     for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
513     my $euri = htescape ($uri);
514     my $euri2 = htescape ($manifest->[1]->{$uri});
515     print STDOUT qq[<tr><td><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
516     <td><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
517     }
518    
519     print STDOUT qq[</table><dt>Online whitelist</dt>];
520     for my $uri (@{$manifest->[2]}) {
521     my $euri = htescape ($uri);
522     print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
523     }
524 wakaba 1.18
525 wakaba 1.22 print STDOUT qq[</dl></div>];
526     } # print_structure_dump_manifest_section
527    
528     sub print_structure_error_dom_section ($$$) {
529 wakaba 1.19 my ($doc, $el, $result) = @_;
530 wakaba 1.18
531     print STDOUT qq[<div id="document-errors" class="section">
532     <h2>Document Errors</h2>
533    
534     <dl>];
535     push @nav, ['#document-errors' => 'Document Error'];
536    
537     require Whatpm::ContentChecker;
538     my $onerror = sub {
539     my %opt = @_;
540     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
541     $type =~ tr/ /-/;
542     $type =~ s/\|/%7C/g;
543     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
544 wakaba 1.22 print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
545 wakaba 1.23 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
546     print STDOUT $msg, "</dd>\n";
547 wakaba 1.19 add_error ('structure', \%opt => $result);
548 wakaba 1.18 };
549    
550     my $elements;
551     my $time1 = time;
552     if ($el) {
553     $elements = Whatpm::ContentChecker->check_element ($el, $onerror);
554     } else {
555     $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);
556     }
557     $time{check} = time - $time1;
558    
559     print STDOUT qq[</dl></div>];
560    
561     return $elements;
562 wakaba 1.22 } # print_structure_error_dom_section
563    
564     sub print_structure_error_manifest_section ($$$) {
565     my ($manifest, $result) = @_;
566    
567     print STDOUT qq[<div id="document-errors" class="section">
568     <h2>Document Errors</h2>
569    
570     <dl>];
571     push @nav, ['#document-errors' => 'Document Error'];
572    
573     require Whatpm::CacheManifest;
574     Whatpm::CacheManifest->check_manifest ($manifest, sub {
575     my %opt = @_;
576     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
577     $type =~ tr/ /-/;
578     $type =~ s/\|/%7C/g;
579     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
580     print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
581     qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
582     add_error ('structure', \%opt => $result);
583     });
584    
585     print STDOUT qq[</div>];
586     } # print_structure_error_manifest_section
587 wakaba 1.18
588     sub print_table_section ($) {
589     my $tables = shift;
590    
591     push @nav, ['#tables' => 'Tables'];
592     print STDOUT qq[
593     <div id="tables" class="section">
594     <h2>Tables</h2>
595    
596     <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
597     <script src="../table-script.js" type="text/javascript"></script>
598     <noscript>
599     <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
600     </noscript>
601     ];
602    
603     require JSON;
604    
605     my $i = 0;
606     for my $table_el (@$tables) {
607     $i++;
608     print STDOUT qq[<div class="section" id="table-$i"><h3>] .
609     get_node_link ($table_el) . q[</h3>];
610    
611     ## TODO: Make |ContentChecker| return |form_table| result
612     ## so that this script don't have to run the algorithm twice.
613     my $table = Whatpm::HTMLTable->form_table ($table_el);
614    
615     for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
616     next unless $_;
617     delete $_->{element};
618     }
619    
620     for (@{$table->{row_group}}) {
621     next unless $_;
622     next unless $_->{element};
623     $_->{type} = $_->{element}->manakai_local_name;
624     delete $_->{element};
625     }
626    
627     for (@{$table->{cell}}) {
628     next unless $_;
629     for (@{$_}) {
630     next unless $_;
631     for (@$_) {
632     $_->{id} = refaddr $_->{element} if defined $_->{element};
633     delete $_->{element};
634     $_->{is_header} = $_->{is_header} ? 1 : 0;
635     }
636     }
637     }
638    
639     print STDOUT '</div><script type="text/javascript">tableToCanvas (';
640     print STDOUT JSON::objToJson ($table);
641     print STDOUT qq[, document.getElementById ('table-$i'));</script>];
642     }
643    
644     print STDOUT qq[</div>];
645     } # print_table_section
646    
647     sub print_id_section ($) {
648     my $ids = shift;
649    
650     push @nav, ['#identifiers' => 'IDs'];
651     print STDOUT qq[
652     <div id="identifiers" class="section">
653     <h2>Identifiers</h2>
654    
655     <dl>
656     ];
657     for my $id (sort {$a cmp $b} keys %$ids) {
658     print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
659     for (@{$ids->{$id}}) {
660     print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
661     }
662     }
663     print STDOUT qq[</dl></div>];
664     } # print_id_section
665    
666     sub print_term_section ($) {
667     my $terms = shift;
668    
669     push @nav, ['#terms' => 'Terms'];
670     print STDOUT qq[
671     <div id="terms" class="section">
672     <h2>Terms</h2>
673    
674     <dl>
675     ];
676     for my $term (sort {$a cmp $b} keys %$terms) {
677     print STDOUT qq[<dt>@{[htescape $term]}</dt>];
678     for (@{$terms->{$term}}) {
679     print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
680     }
681     }
682     print STDOUT qq[</dl></div>];
683     } # print_term_section
684    
685     sub print_class_section ($) {
686     my $classes = shift;
687    
688     push @nav, ['#classes' => 'Classes'];
689     print STDOUT qq[
690     <div id="classes" class="section">
691     <h2>Classes</h2>
692    
693     <dl>
694     ];
695     for my $class (sort {$a cmp $b} keys %$classes) {
696     print STDOUT qq[<dt><code>@{[htescape $class]}</code></dt>];
697     for (@{$classes->{$class}}) {
698     print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
699     }
700     }
701     print STDOUT qq[</dl></div>];
702     } # print_class_section
703    
704 wakaba 1.19 sub print_result_section ($) {
705     my $result = shift;
706    
707     print STDOUT qq[
708     <div id="result-summary" class="section">
709     <h2>Result</h2>];
710    
711 wakaba 1.21 if ($result->{unsupported} and $result->{conforming_max}) {
712 wakaba 1.19 print STDOUT qq[<p class=uncertain id=result-para>The conformance
713     checker cannot decide whether the document is conforming or
714     not, since the document contains one or more unsupported
715 wakaba 1.21 features. The document might or might not be conforming.</p>];
716 wakaba 1.19 } elsif ($result->{conforming_min}) {
717     print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
718     found in this document.</p>];
719     } elsif ($result->{conforming_max}) {
720     print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
721     is <strong>likely <em>non</em>-conforming</strong>, but in rare case
722     it might be conforming.</p>];
723     } else {
724     print STDOUT qq[<p class=FAIL id=result-para>This document is
725     <strong><em>non</em>-conforming</strong>.</p>];
726     }
727    
728     print STDOUT qq[<table>
729     <colgroup><col><colgroup><col><col><col><colgroup><col>
730     <thead>
731 wakaba 1.23 <tr><th scope=col></th>
732     <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
733     Errors</a></th>
734     <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
735     Errors</a></th>
736     <th scope=col><a href="../error-description#level-w">Warnings</a></th>
737     <th scope=col>Score</th></tr></thead><tbody>];
738 wakaba 1.19
739     my $must_error = 0;
740     my $should_error = 0;
741     my $warning = 0;
742     my $score_min = 0;
743     my $score_max = 0;
744     my $score_base = 20;
745 wakaba 1.21 my $score_unit = $score_base / 100;
746 wakaba 1.19 for (
747     [Transfer => 'transfer', ''],
748     [Character => 'char', ''],
749     [Syntax => 'syntax', '#parse-errors'],
750     [Structure => 'structure', '#document-errors'],
751     ) {
752     $must_error += ($result->{$_->[1]}->{must} += 0);
753     $should_error += ($result->{$_->[1]}->{should} += 0);
754     $warning += ($result->{$_->[1]}->{warning} += 0);
755 wakaba 1.21 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
756     $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
757 wakaba 1.19
758     my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
759     my $label = $_->[0];
760     if ($result->{$_->[1]}->{must} or
761     $result->{$_->[1]}->{should} or
762     $result->{$_->[1]}->{warning} or
763     $result->{$_->[1]}->{unsupported}) {
764     $label = qq[<a href="$_->[2]">$label</a>];
765     }
766    
767     print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
768     if ($uncertain) {
769 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
770 wakaba 1.19 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
771 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
772 wakaba 1.19 } else {
773 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
774 wakaba 1.19 }
775     }
776    
777     $score_max += $score_base;
778    
779     print STDOUT qq[
780     <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
781     </tbody>
782 wakaba 1.21 <tfoot><tr class=uncertain><th scope=row>Total</th>
783     <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
784     <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
785     <td>$warning?</td>
786     <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
787 wakaba 1.19 </table>
788    
789     <p><strong>Important</strong>: This conformance checking service
790     is <em>under development</em>. The result above might be <em>wrong</em>.</p>
791     </div>];
792     push @nav, ['#result-summary' => 'Result'];
793     } # print_result_section
794    
795 wakaba 1.24 sub print_result_unknown_type_section ($$) {
796     my ($input, $result) = @_;
797 wakaba 1.18
798 wakaba 1.24 my $euri = htescape ($input->{uri});
799 wakaba 1.18 print STDOUT qq[
800 wakaba 1.24 <div id="parse-errors" class="section">
801     <h2>Errors</h2>
802    
803     <dl>
804     <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
805     <dd class=unsupported><strong><a href="../error-description#level-u">Not
806     supported</a></strong>:
807     Media type
808     <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
809     is not supported.</dd>
810     </dl>
811 wakaba 1.18 </div>
812     ];
813 wakaba 1.24 push @nav, ['#parse-errors' => 'Errors'];
814 wakaba 1.30 add_error (char => {level => 'u'} => $result);
815     add_error (syntax => {level => 'u'} => $result);
816     add_error (structure => {level => 'u'} => $result);
817 wakaba 1.18 } # print_result_unknown_type_section
818    
819     sub print_result_input_error_section ($) {
820     my $input = shift;
821     print STDOUT qq[<div class="section" id="result-summary">
822     <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
823     </div>];
824     push @nav, ['#result-summary' => 'Result'];
825     } # print_Result_input_error_section
826    
827 wakaba 1.22 sub get_error_label ($) {
828     my $err = shift;
829    
830     my $r = '';
831    
832     if (defined $err->{line}) {
833     if ($err->{column} > 0) {
834     $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a> column $err->{column}];
835     } else {
836     $err->{line} = $err->{line} - 1 || 1;
837     $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a>];
838     }
839     }
840    
841     if (defined $err->{node}) {
842     $r .= ' ' if length $r;
843 wakaba 1.23 $r = get_node_link ($err->{node});
844 wakaba 1.22 }
845    
846     if (defined $err->{index}) {
847     $r .= ' ' if length $r;
848     $r .= 'Index ' . (0+$err->{index});
849     }
850    
851     if (defined $err->{value}) {
852     $r .= ' ' if length $r;
853     $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
854     }
855    
856     return $r;
857     } # get_error_label
858    
859 wakaba 1.23 sub get_error_level_label ($) {
860     my $err = shift;
861    
862     my $r = '';
863    
864     if (not defined $err->{level} or $err->{level} eq 'm') {
865     $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
866     error</a></strong>: ];
867     } elsif ($err->{level} eq 's') {
868     $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
869     error</a></strong>: ];
870     } elsif ($err->{level} eq 'w') {
871     $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
872     ];
873 wakaba 1.30 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
874 wakaba 1.23 $r = qq[<strong><a href="../error-description#level-u">Not
875     supported</a></strong>: ];
876     } else {
877     my $elevel = htescape ($err->{level});
878     $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
879     ];
880     }
881    
882     return $r;
883     } # get_error_level_label
884    
885 wakaba 1.1 sub get_node_path ($) {
886     my $node = shift;
887     my @r;
888     while (defined $node) {
889     my $rs;
890     if ($node->node_type == 1) {
891     $rs = $node->manakai_local_name;
892     $node = $node->parent_node;
893     } elsif ($node->node_type == 2) {
894     $rs = '@' . $node->manakai_local_name;
895     $node = $node->owner_element;
896     } elsif ($node->node_type == 3) {
897     $rs = '"' . $node->data . '"';
898     $node = $node->parent_node;
899     } elsif ($node->node_type == 9) {
900 wakaba 1.9 @r = ('') unless @r;
901 wakaba 1.1 $rs = '';
902     $node = $node->parent_node;
903     } else {
904     $rs = '#' . $node->node_type;
905     $node = $node->parent_node;
906     }
907     unshift @r, $rs;
908     }
909     return join '/', @r;
910     } # get_node_path
911    
912 wakaba 1.6 sub get_node_link ($) {
913     return qq[<a href="#node-@{[refaddr $_[0]]}">] .
914     htescape (get_node_path ($_[0])) . qq[</a>];
915     } # get_node_link
916    
917 wakaba 1.7 {
918     my $Msg = {};
919    
920     sub load_text_catalog ($) {
921     my $lang = shift; # MUST be a canonical lang name
922 wakaba 1.26 open my $file, '<:utf8', "cc-msg.$lang.txt"
923     or die "$0: cc-msg.$lang.txt: $!";
924 wakaba 1.7 while (<$file>) {
925     if (s/^([^;]+);([^;]*);//) {
926     my ($type, $cls, $msg) = ($1, $2, $_);
927     $msg =~ tr/\x0D\x0A//d;
928     $Msg->{$type} = [$cls, $msg];
929     }
930     }
931     } # load_text_catalog
932    
933     sub get_text ($) {
934 wakaba 1.15 my ($type, $level, $node) = @_;
935 wakaba 1.7 $type = $level . ':' . $type if defined $level;
936 wakaba 1.29 $level = 'm' unless defined $level;
937 wakaba 1.7 my @arg;
938     {
939     if (defined $Msg->{$type}) {
940     my $msg = $Msg->{$type}->[1];
941 wakaba 1.10 $msg =~ s{<var>\$([0-9]+)</var>}{
942     defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
943     }ge;
944 wakaba 1.15 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
945     UNIVERSAL::can ($node, 'get_attribute_ns')
946     ? htescape ($node->get_attribute_ns (undef, $1)) : ''
947     }ge;
948     $msg =~ s{<var>{\@}</var>}{
949     UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
950     }ge;
951 wakaba 1.17 $msg =~ s{<var>{local-name}</var>}{
952     UNIVERSAL::can ($node, 'manakai_local_name')
953     ? htescape ($node->manakai_local_name) : ''
954     }ge;
955     $msg =~ s{<var>{element-local-name}</var>}{
956     (UNIVERSAL::can ($node, 'owner_element') and
957     $node->owner_element)
958     ? htescape ($node->owner_element->manakai_local_name)
959     : ''
960     }ge;
961 wakaba 1.29 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
962 wakaba 1.7 } elsif ($type =~ s/:([^:]*)$//) {
963     unshift @arg, $1;
964     redo;
965     }
966     }
967 wakaba 1.29 return ($type, 'level-'.$level, htescape ($_[0]));
968 wakaba 1.7 } # get_text
969    
970     }
971    
972 wakaba 1.9 sub get_input_document ($$) {
973     my ($http, $dom) = @_;
974    
975 wakaba 1.16 my $request_uri = $http->get_parameter ('uri');
976 wakaba 1.9 my $r = {};
977     if (defined $request_uri and length $request_uri) {
978     my $uri = $dom->create_uri_reference ($request_uri);
979     unless ({
980     http => 1,
981     }->{lc $uri->uri_scheme}) {
982     return {uri => $request_uri, request_uri => $request_uri,
983     error_status_text => 'URI scheme not allowed'};
984     }
985    
986     require Message::Util::HostPermit;
987     my $host_permit = new Message::Util::HostPermit;
988     $host_permit->add_rule (<<EOH);
989     Allow host=suika port=80
990     Deny host=suika
991     Allow host=suika.fam.cx port=80
992     Deny host=suika.fam.cx
993     Deny host=localhost
994     Deny host=*.localdomain
995     Deny ipv4=0.0.0.0/8
996     Deny ipv4=10.0.0.0/8
997     Deny ipv4=127.0.0.0/8
998     Deny ipv4=169.254.0.0/16
999     Deny ipv4=172.0.0.0/11
1000     Deny ipv4=192.0.2.0/24
1001     Deny ipv4=192.88.99.0/24
1002     Deny ipv4=192.168.0.0/16
1003     Deny ipv4=198.18.0.0/15
1004     Deny ipv4=224.0.0.0/4
1005     Deny ipv4=255.255.255.255/32
1006     Deny ipv6=0::0/0
1007     Allow host=*
1008     EOH
1009     unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1010     return {uri => $request_uri, request_uri => $request_uri,
1011     error_status_text => 'Connection to the host is forbidden'};
1012     }
1013    
1014     require LWP::UserAgent;
1015     my $ua = WDCC::LWPUA->new;
1016     $ua->{wdcc_dom} = $dom;
1017     $ua->{wdcc_host_permit} = $host_permit;
1018     $ua->agent ('Mozilla'); ## TODO: for now.
1019     $ua->parse_head (0);
1020     $ua->protocols_allowed ([qw/http/]);
1021     $ua->max_size (1000_000);
1022     my $req = HTTP::Request->new (GET => $request_uri);
1023 wakaba 1.28 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1024 wakaba 1.9 my $res = $ua->request ($req);
1025 wakaba 1.16 ## TODO: 401 sets |is_success| true.
1026     if ($res->is_success or $http->get_parameter ('error-page')) {
1027 wakaba 1.9 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1028     $r->{uri} = $res->request->uri;
1029     $r->{request_uri} = $request_uri;
1030    
1031     ## TODO: More strict parsing...
1032     my $ct = $res->header ('Content-Type');
1033 wakaba 1.22 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1034 wakaba 1.9 $r->{charset} = lc $1;
1035     $r->{charset} =~ tr/\\//d;
1036 wakaba 1.26 $r->{official_charset} = $r->{charset};
1037 wakaba 1.9 }
1038    
1039 wakaba 1.16 my $input_charset = $http->get_parameter ('charset');
1040 wakaba 1.9 if (defined $input_charset and length $input_charset) {
1041     $r->{charset_overridden}
1042     = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1043     $r->{charset} = $input_charset;
1044 wakaba 1.25 }
1045    
1046     ## TODO: Support for HTTP Content-Encoding
1047 wakaba 1.9
1048     $r->{s} = ''.$res->content;
1049 wakaba 1.25
1050     require Whatpm::ContentType;
1051     ($r->{official_type}, $r->{media_type})
1052     = Whatpm::ContentType->get_sniffed_type
1053     (get_file_head => sub {
1054     return substr $r->{s}, 0, shift;
1055     },
1056     http_content_type_byte => $ct,
1057     has_http_content_encoding =>
1058     defined $res->header ('Content-Encoding'),
1059     supported_image_types => {});
1060 wakaba 1.9 } else {
1061     $r->{uri} = $res->request->uri;
1062     $r->{request_uri} = $request_uri;
1063     $r->{error_status_text} = $res->status_line;
1064     }
1065    
1066     $r->{header_field} = [];
1067     $res->scan (sub {
1068     push @{$r->{header_field}}, [$_[0], $_[1]];
1069     });
1070     $r->{header_status_code} = $res->code;
1071     $r->{header_status_text} = $res->message;
1072     } else {
1073 wakaba 1.16 $r->{s} = ''.$http->get_parameter ('s');
1074 wakaba 1.9 $r->{uri} = q<thismessage:/>;
1075     $r->{request_uri} = q<thismessage:/>;
1076     $r->{base_uri} = q<thismessage:/>;
1077 wakaba 1.16 $r->{charset} = ''.$http->get_parameter ('_charset_');
1078 wakaba 1.9 $r->{charset} =~ s/\s+//g;
1079     $r->{charset} = 'utf-8' if $r->{charset} eq '';
1080 wakaba 1.26 $r->{official_charset} = $r->{charset};
1081 wakaba 1.9 $r->{header_field} = [];
1082 wakaba 1.25
1083     require Whatpm::ContentType;
1084     ($r->{official_type}, $r->{media_type})
1085     = Whatpm::ContentType->get_sniffed_type
1086     (get_file_head => sub {
1087     return substr $r->{s}, 0, shift;
1088     },
1089     http_content_type_byte => undef,
1090     has_http_content_encoding => 0,
1091     supported_image_types => {});
1092 wakaba 1.9 }
1093    
1094 wakaba 1.16 my $input_format = $http->get_parameter ('i');
1095 wakaba 1.9 if (defined $input_format and length $input_format) {
1096     $r->{media_type_overridden}
1097     = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1098     $r->{media_type} = $input_format;
1099     }
1100     if (defined $r->{s} and not defined $r->{media_type}) {
1101     $r->{media_type} = 'text/html';
1102     $r->{media_type_overridden} = 1;
1103     }
1104    
1105     if ($r->{media_type} eq 'text/xml') {
1106     unless (defined $r->{charset}) {
1107     $r->{charset} = 'us-ascii';
1108 wakaba 1.26 $r->{official_charset} = $r->{charset};
1109 wakaba 1.9 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1110     $r->{charset_overridden} = 0;
1111     }
1112     }
1113    
1114     if (length $r->{s} > 1000_000) {
1115     $r->{error_status_text} = 'Entity-body too large';
1116     delete $r->{s};
1117     return $r;
1118     }
1119    
1120     return $r;
1121     } # get_input_document
1122    
1123     package WDCC::LWPUA;
1124     BEGIN { push our @ISA, 'LWP::UserAgent'; }
1125    
1126     sub redirect_ok {
1127     my $ua = shift;
1128     unless ($ua->SUPER::redirect_ok (@_)) {
1129     return 0;
1130     }
1131    
1132     my $uris = $_[1]->header ('Location');
1133     return 0 unless $uris;
1134     my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1135     unless ({
1136     http => 1,
1137     }->{lc $uri->uri_scheme}) {
1138     return 0;
1139     }
1140     unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1141     return 0;
1142     }
1143     return 1;
1144     } # redirect_ok
1145    
1146 wakaba 1.1 =head1 AUTHOR
1147    
1148     Wakaba <w@suika.fam.cx>.
1149    
1150     =head1 LICENSE
1151    
1152     Copyright 2007 Wakaba <w@suika.fam.cx>
1153    
1154     This library is free software; you can redistribute it
1155     and/or modify it under the same terms as Perl itself.
1156    
1157     =cut
1158    
1159 wakaba 1.31 ## $Date: 2008/02/09 12:22:19 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24