/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.25 - (hide annotations) (download)
Sun Nov 18 05:30:03 2007 UTC (16 years, 6 months ago) by wakaba
Branch: MAIN
Changes since 1.24: +34 -8 lines
++ ChangeLog	18 Nov 2007 05:29:39 -0000
2007-11-18  Wakaba  <wakaba@suika.fam.cx>

	* cc.cgi (get_input_document): Use sniffer to determine
	media type of the entity.

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3 wakaba 1.23 use utf8;
4 wakaba 1.1
5     use lib qw[/home/httpd/html/www/markup/html/whatpm
6 wakaba 1.16 /home/wakaba/work/manakai2/lib];
7 wakaba 1.1 use CGI::Carp qw[fatalsToBrowser];
8 wakaba 1.2 use Scalar::Util qw[refaddr];
9 wakaba 1.16 use Time::HiRes qw/time/;
10 wakaba 1.1
11 wakaba 1.2 sub htescape ($) {
12     my $s = $_[0];
13     $s =~ s/&/&amp;/g;
14     $s =~ s/</&lt;/g;
15     $s =~ s/>/&gt;/g;
16     $s =~ s/"/&quot;/g;
17 wakaba 1.12 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18     sprintf '<var>U+%04X</var>', ord $1;
19     }ge;
20 wakaba 1.2 return $s;
21     } # htescape
22    
23 wakaba 1.16 use Message::CGI::HTTP;
24     my $http = Message::CGI::HTTP->new;
25 wakaba 1.1
26 wakaba 1.16 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
27 wakaba 1.8 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
28     exit;
29     }
30    
31 wakaba 1.12 binmode STDOUT, ':utf8';
32 wakaba 1.14 $| = 1;
33 wakaba 1.12
34 wakaba 1.9 require Message::DOM::DOMImplementation;
35     my $dom = Message::DOM::DOMImplementation->new;
36    
37 wakaba 1.7 load_text_catalog ('en'); ## TODO: conneg
38    
39 wakaba 1.3 my @nav;
40 wakaba 1.2 print STDOUT qq[Content-Type: text/html; charset=utf-8
41    
42     <!DOCTYPE html>
43     <html lang="en">
44     <head>
45     <title>Web Document Conformance Checker (BETA)</title>
46 wakaba 1.3 <link rel="stylesheet" href="../cc-style.css" type="text/css">
47 wakaba 1.2 </head>
48     <body>
49 wakaba 1.13 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
50     (<em>beta</em>)</h1>
51 wakaba 1.14 ];
52 wakaba 1.2
53 wakaba 1.14 $| = 0;
54     my $input = get_input_document ($http, $dom);
55 wakaba 1.16 my $inner_html_element = $http->get_parameter ('e');
56     my $char_length = 0;
57     my %time;
58 wakaba 1.14
59     print qq[
60 wakaba 1.4 <div id="document-info" class="section">
61 wakaba 1.2 <dl>
62 wakaba 1.9 <dt>Request URI</dt>
63     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
64 wakaba 1.2 <dt>Document URI</dt>
65 wakaba 1.25 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
66     <script>
67     document.title = '<'
68     + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
69     + document.title;
70     </script></dd>
71 wakaba 1.2 ]; # no </dl> yet
72 wakaba 1.3 push @nav, ['#document-info' => 'Information'];
73 wakaba 1.1
74 wakaba 1.9 if (defined $input->{s}) {
75 wakaba 1.16 $char_length = length $input->{s};
76 wakaba 1.9
77     print STDOUT qq[
78     <dt>Base URI</dt>
79     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
80     <dt>Internet Media Type</dt>
81     <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
82 wakaba 1.25 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
83 wakaba 1.9 <dt>Character Encoding</dt>
84     <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
85     @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
86 wakaba 1.16 <dt>Length</dt>
87     <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
88 wakaba 1.9 </dl>
89     </div>
90     ];
91    
92 wakaba 1.20 my $result = {conforming_min => 1, conforming_max => 1};
93 wakaba 1.19 print_http_header_section ($input, $result);
94 wakaba 1.9
95 wakaba 1.1 my $doc;
96     my $el;
97 wakaba 1.22 my $manifest;
98 wakaba 1.1
99 wakaba 1.9 if ($input->{media_type} eq 'text/html') {
100 wakaba 1.19 ($doc, $el) = print_syntax_error_html_section ($input, $result);
101 wakaba 1.25 print_source_string_section
102     (\($input->{s}), $input->{charset} || $doc->input_encoding);
103 wakaba 1.9 } elsif ({
104     'text/xml' => 1,
105 wakaba 1.16 'application/atom+xml' => 1,
106     'application/rss+xml' => 1,
107     'application/svg+xml' => 1,
108 wakaba 1.9 'application/xhtml+xml' => 1,
109     'application/xml' => 1,
110     }->{$input->{media_type}}) {
111 wakaba 1.19 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
112 wakaba 1.9 print_source_string_section (\($input->{s}), $doc->input_encoding);
113 wakaba 1.22 } elsif ($input->{media_type} eq 'text/cache-manifest') {
114     ## TODO: MUST be text/cache-manifest
115     $manifest = print_syntax_error_manifest_section ($input, $result);
116     print_source_string_section (\($input->{s}), 'utf-8');
117 wakaba 1.2 } else {
118 wakaba 1.9 ## TODO: Change HTTP status code??
119 wakaba 1.24 print_result_unknown_type_section ($input, $result);
120 wakaba 1.2 }
121    
122     if (defined $doc or defined $el) {
123 wakaba 1.22 print_structure_dump_dom_section ($doc, $el);
124     my $elements = print_structure_error_dom_section ($doc, $el, $result);
125 wakaba 1.18 print_table_section ($elements->{table}) if @{$elements->{table}};
126     print_id_section ($elements->{id}) if keys %{$elements->{id}};
127     print_term_section ($elements->{term}) if keys %{$elements->{term}};
128     print_class_section ($elements->{class}) if keys %{$elements->{class}};
129 wakaba 1.22 } elsif (defined $manifest) {
130     print_structure_dump_manifest_section ($manifest);
131     print_structure_error_manifest_section ($manifest, $result);
132 wakaba 1.1 }
133    
134 wakaba 1.19 print_result_section ($result);
135 wakaba 1.9 } else {
136 wakaba 1.18 print STDOUT qq[</dl></div>];
137     print_result_input_error_section ($input);
138 wakaba 1.9 }
139 wakaba 1.3
140 wakaba 1.2 print STDOUT qq[
141 wakaba 1.3 <ul class="navigation" id="nav-items">
142     ];
143     for (@nav) {
144     print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
145     }
146     print STDOUT qq[
147     </ul>
148 wakaba 1.2 </body>
149     </html>
150     ];
151 wakaba 1.1
152 wakaba 1.24 for (qw/decode parse parse_html parse_xml parse_manifest
153     check check_manifest/) {
154 wakaba 1.16 next unless defined $time{$_};
155     open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
156     print $file $char_length, "\t", $time{$_}, "\n";
157     }
158    
159 wakaba 1.1 exit;
160    
161 wakaba 1.19 sub add_error ($$$) {
162     my ($layer, $err, $result) = @_;
163     if (defined $err->{level}) {
164     if ($err->{level} eq 's') {
165     $result->{$layer}->{should}++;
166     $result->{$layer}->{score_min} -= 2;
167     $result->{conforming_min} = 0;
168     } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
169     $result->{$layer}->{warning}++;
170     } elsif ($err->{level} eq 'unsupported') {
171     $result->{$layer}->{unsupported}++;
172     $result->{unsupported} = 1;
173     } else {
174     $result->{$layer}->{must}++;
175     $result->{$layer}->{score_max} -= 2;
176     $result->{$layer}->{score_min} -= 2;
177     $result->{conforming_min} = 0;
178     $result->{conforming_max} = 0;
179     }
180     } else {
181     $result->{$layer}->{must}++;
182     $result->{$layer}->{score_max} -= 2;
183     $result->{$layer}->{score_min} -= 2;
184     $result->{conforming_min} = 0;
185     $result->{conforming_max} = 0;
186     }
187     } # add_error
188    
189     sub print_http_header_section ($$) {
190     my ($input, $result) = @_;
191 wakaba 1.9 return unless defined $input->{header_status_code} or
192     defined $input->{header_status_text} or
193     @{$input->{header_field}};
194    
195     push @nav, ['#source-header' => 'HTTP Header'];
196     print STDOUT qq[<div id="source-header" class="section">
197     <h2>HTTP Header</h2>
198    
199     <p><strong>Note</strong>: Due to the limitation of the
200     network library in use, the content of this section might
201     not be the real header.</p>
202    
203     <table><tbody>
204     ];
205    
206     if (defined $input->{header_status_code}) {
207     print STDOUT qq[<tr><th scope="row">Status code</th>];
208     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
209     }
210     if (defined $input->{header_status_text}) {
211     print STDOUT qq[<tr><th scope="row">Status text</th>];
212     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
213     }
214    
215     for (@{$input->{header_field}}) {
216     print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
217     print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
218     }
219    
220     print STDOUT qq[</tbody></table></div>];
221     } # print_http_header_section
222    
223 wakaba 1.19 sub print_syntax_error_html_section ($$) {
224     my ($input, $result) = @_;
225 wakaba 1.18
226     require Encode;
227     require Whatpm::HTML;
228    
229     print STDOUT qq[
230     <div id="parse-errors" class="section">
231     <h2>Parse Errors</h2>
232    
233     <dl>];
234     push @nav, ['#parse-errors' => 'Parse Error'];
235    
236     my $onerror = sub {
237     my (%opt) = @_;
238     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
239     if ($opt{column} > 0) {
240     print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
241     } else {
242     $opt{line} = $opt{line} - 1 || 1;
243     print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];
244     }
245     $type =~ tr/ /-/;
246     $type =~ s/\|/%7C/g;
247     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
248 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
249     print STDOUT qq[$msg</dd>\n];
250 wakaba 1.19
251     add_error ('syntax', \%opt => $result);
252 wakaba 1.18 };
253    
254     my $doc = $dom->create_document;
255     my $el;
256     if (defined $inner_html_element and length $inner_html_element) {
257 wakaba 1.24 $input->{charset} ||= 'ISO-8859-1'; ## TODO: for now.
258     my $time1 = time;
259     my $t = Encode::decode ($input->{charset}, $input->{s});
260     $time{decode} = time - $time1;
261    
262 wakaba 1.18 $el = $doc->create_element_ns
263     ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
264 wakaba 1.24 $time1 = time;
265 wakaba 1.18 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
266 wakaba 1.24 $time{parse} = time - $time1;
267 wakaba 1.18 } else {
268 wakaba 1.24 my $time1 = time;
269     Whatpm::HTML->parse_byte_string
270     ($input->{charset}, $input->{s} => $doc, $onerror);
271     $time{parse_html} = time - $time1;
272 wakaba 1.18 }
273 wakaba 1.24
274 wakaba 1.18 print STDOUT qq[</dl></div>];
275    
276     return ($doc, $el);
277     } # print_syntax_error_html_section
278    
279 wakaba 1.19 sub print_syntax_error_xml_section ($$) {
280     my ($input, $result) = @_;
281 wakaba 1.18
282     require Message::DOM::XMLParserTemp;
283    
284     print STDOUT qq[
285     <div id="parse-errors" class="section">
286     <h2>Parse Errors</h2>
287    
288     <dl>];
289     push @nav, ['#parse-errors' => 'Parse Error'];
290    
291     my $onerror = sub {
292     my $err = shift;
293     my $line = $err->location->line_number;
294     print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];
295     print STDOUT $err->location->column_number, "</dt><dd>";
296     print STDOUT htescape $err->text, "</dd>\n";
297 wakaba 1.19
298     add_error ('syntax', {type => $err->text,
299     level => [
300     $err->SEVERITY_FATAL_ERROR => 'm',
301     $err->SEVERITY_ERROR => 'm',
302     $err->SEVERITY_WARNING => 's',
303     ]->[$err->severity]} => $result);
304    
305 wakaba 1.18 return 1;
306     };
307    
308     my $time1 = time;
309     open my $fh, '<', \($input->{s});
310     my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
311     ($fh => $dom, $onerror, charset => $input->{charset});
312     $time{parse_xml} = time - $time1;
313    
314     print STDOUT qq[</dl></div>];
315    
316     return ($doc, undef);
317     } # print_syntax_error_xml_section
318    
319 wakaba 1.22 sub print_syntax_error_manifest_section ($$) {
320     my ($input, $result) = @_;
321    
322     require Whatpm::CacheManifest;
323    
324     print STDOUT qq[
325     <div id="parse-errors" class="section">
326     <h2>Parse Errors</h2>
327    
328     <dl>];
329     push @nav, ['#parse-errors' => 'Parse Error'];
330    
331     my $onerror = sub {
332     my (%opt) = @_;
333     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
334     print STDOUT qq[<dt class="$cls">], get_error_label (\%opt), qq[</dt>];
335     $type =~ tr/ /-/;
336     $type =~ s/\|/%7C/g;
337     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
338 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
339     print STDOUT qq[$msg</dd>\n];
340 wakaba 1.22
341     add_error ('syntax', \%opt => $result);
342     };
343    
344     my $time1 = time;
345     my $manifest = Whatpm::CacheManifest->parse_byte_string
346     ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
347     $time{parse_manifest} = time - $time1;
348    
349     print STDOUT qq[</dl></div>];
350    
351     return $manifest;
352     } # print_syntax_error_manifest_section
353    
354 wakaba 1.9 sub print_source_string_section ($$) {
355     require Encode;
356     my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
357     return unless $enc;
358    
359     my $s = \($enc->decode (${$_[0]}));
360     my $i = 1;
361     push @nav, ['#source-string' => 'Source'];
362     print STDOUT qq[<div id="source-string" class="section">
363     <h2>Document Source</h2>
364     <ol lang="">\n];
365 wakaba 1.7 if (length $$s) {
366     while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
367     print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
368     $i++;
369     }
370     if ($$s =~ /\G([^\x0A]+)/gc) {
371     print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
372     }
373     } else {
374     print STDOUT q[<li id="line-1"></li>];
375 wakaba 1.2 }
376 wakaba 1.9 print STDOUT "</ol></div>";
377     } # print_input_string_section
378 wakaba 1.2
379     sub print_document_tree ($) {
380 wakaba 1.1 my $node = shift;
381 wakaba 1.2 my $r = '<ol class="xoxo">';
382 wakaba 1.1
383 wakaba 1.2 my @node = ($node);
384 wakaba 1.1 while (@node) {
385     my $child = shift @node;
386 wakaba 1.2 unless (ref $child) {
387     $r .= $child;
388     next;
389     }
390    
391     my $node_id = 'node-'.refaddr $child;
392     my $nt = $child->node_type;
393     if ($nt == $child->ELEMENT_NODE) {
394 wakaba 1.4 my $child_nsuri = $child->namespace_uri;
395     $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
396 wakaba 1.2 '</code>'; ## ISSUE: case
397    
398     if ($child->has_attributes) {
399     $r .= '<ul class="attributes">';
400 wakaba 1.4 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
401 wakaba 1.2 @{$child->attributes}) {
402 wakaba 1.4 $r .= qq[<li id="$attr->[3]" class="tree-attribute"><code title="@{[defined $_->[2] ? $_->[2] : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
403 wakaba 1.2 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
404     }
405     $r .= '</ul>';
406     }
407    
408 wakaba 1.7 if ($child->has_child_nodes) {
409 wakaba 1.2 $r .= '<ol class="children">';
410 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
411     } else {
412     $r .= '</li>';
413 wakaba 1.2 }
414     } elsif ($nt == $child->TEXT_NODE) {
415 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
416 wakaba 1.2 } elsif ($nt == $child->CDATA_SECTION_NODE) {
417 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
418 wakaba 1.2 } elsif ($nt == $child->COMMENT_NODE) {
419 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
420 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_NODE) {
421 wakaba 1.6 $r .= qq'<li id="$node_id" class="tree-document">Document';
422 wakaba 1.7 $r .= qq[<ul class="attributes">];
423     $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
424     $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
425 wakaba 1.9 unless ($child->manakai_is_html) {
426     $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
427     if (defined $child->xml_encoding) {
428     $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
429     } else {
430     $r .= qq[<li>XML encoding = (null)</li>];
431     }
432     $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
433     }
434 wakaba 1.7 $r .= qq[</ul>];
435 wakaba 1.2 if ($child->has_child_nodes) {
436 wakaba 1.7 $r .= '<ol class="children">';
437 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
438 wakaba 1.1 }
439 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
440 wakaba 1.5 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
441     $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
442     $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
443     $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
444 wakaba 1.2 $r .= '</ul></li>';
445     } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
446 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
447 wakaba 1.1 } else {
448 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
449 wakaba 1.1 }
450     }
451 wakaba 1.2
452     $r .= '</ol>';
453     print STDOUT $r;
454     } # print_document_tree
455 wakaba 1.1
456 wakaba 1.22 sub print_structure_dump_dom_section ($$) {
457 wakaba 1.18 my ($doc, $el) = @_;
458    
459     print STDOUT qq[
460     <div id="document-tree" class="section">
461     <h2>Document Tree</h2>
462     ];
463     push @nav, ['#document-tree' => 'Tree'];
464    
465     print_document_tree ($el || $doc);
466    
467     print STDOUT qq[</div>];
468 wakaba 1.22 } # print_structure_dump_dom_section
469    
470     sub print_structure_dump_manifest_section ($) {
471     my $manifest = shift;
472    
473     print STDOUT qq[
474     <div id="dump-manifest" class="section">
475     <h2>Cache Manifest</h2>
476     ];
477     push @nav, ['#dump-manifest' => 'Caceh Manifest'];
478    
479     print STDOUT qq[<dl><dt>Explicit entries</dt>];
480     for my $uri (@{$manifest->[0]}) {
481     my $euri = htescape ($uri);
482     print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
483     }
484    
485     print STDOUT qq[<dt>Fallback entries</dt><dd>
486     <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
487     <th scope=row>Fallback Entry</tr><tbody>];
488     for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
489     my $euri = htescape ($uri);
490     my $euri2 = htescape ($manifest->[1]->{$uri});
491     print STDOUT qq[<tr><td><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
492     <td><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
493     }
494    
495     print STDOUT qq[</table><dt>Online whitelist</dt>];
496     for my $uri (@{$manifest->[2]}) {
497     my $euri = htescape ($uri);
498     print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
499     }
500 wakaba 1.18
501 wakaba 1.22 print STDOUT qq[</dl></div>];
502     } # print_structure_dump_manifest_section
503    
504     sub print_structure_error_dom_section ($$$) {
505 wakaba 1.19 my ($doc, $el, $result) = @_;
506 wakaba 1.18
507     print STDOUT qq[<div id="document-errors" class="section">
508     <h2>Document Errors</h2>
509    
510     <dl>];
511     push @nav, ['#document-errors' => 'Document Error'];
512    
513     require Whatpm::ContentChecker;
514     my $onerror = sub {
515     my %opt = @_;
516     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
517     $type =~ tr/ /-/;
518     $type =~ s/\|/%7C/g;
519     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
520 wakaba 1.22 print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
521 wakaba 1.23 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
522     print STDOUT $msg, "</dd>\n";
523 wakaba 1.19 add_error ('structure', \%opt => $result);
524 wakaba 1.18 };
525    
526     my $elements;
527     my $time1 = time;
528     if ($el) {
529     $elements = Whatpm::ContentChecker->check_element ($el, $onerror);
530     } else {
531     $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);
532     }
533     $time{check} = time - $time1;
534    
535     print STDOUT qq[</dl></div>];
536    
537     return $elements;
538 wakaba 1.22 } # print_structure_error_dom_section
539    
540     sub print_structure_error_manifest_section ($$$) {
541     my ($manifest, $result) = @_;
542    
543     print STDOUT qq[<div id="document-errors" class="section">
544     <h2>Document Errors</h2>
545    
546     <dl>];
547     push @nav, ['#document-errors' => 'Document Error'];
548    
549     require Whatpm::CacheManifest;
550     Whatpm::CacheManifest->check_manifest ($manifest, sub {
551     my %opt = @_;
552     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
553     $type =~ tr/ /-/;
554     $type =~ s/\|/%7C/g;
555     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
556     print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
557     qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
558     add_error ('structure', \%opt => $result);
559     });
560    
561     print STDOUT qq[</div>];
562     } # print_structure_error_manifest_section
563 wakaba 1.18
564     sub print_table_section ($) {
565     my $tables = shift;
566    
567     push @nav, ['#tables' => 'Tables'];
568     print STDOUT qq[
569     <div id="tables" class="section">
570     <h2>Tables</h2>
571    
572     <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
573     <script src="../table-script.js" type="text/javascript"></script>
574     <noscript>
575     <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
576     </noscript>
577     ];
578    
579     require JSON;
580    
581     my $i = 0;
582     for my $table_el (@$tables) {
583     $i++;
584     print STDOUT qq[<div class="section" id="table-$i"><h3>] .
585     get_node_link ($table_el) . q[</h3>];
586    
587     ## TODO: Make |ContentChecker| return |form_table| result
588     ## so that this script don't have to run the algorithm twice.
589     my $table = Whatpm::HTMLTable->form_table ($table_el);
590    
591     for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
592     next unless $_;
593     delete $_->{element};
594     }
595    
596     for (@{$table->{row_group}}) {
597     next unless $_;
598     next unless $_->{element};
599     $_->{type} = $_->{element}->manakai_local_name;
600     delete $_->{element};
601     }
602    
603     for (@{$table->{cell}}) {
604     next unless $_;
605     for (@{$_}) {
606     next unless $_;
607     for (@$_) {
608     $_->{id} = refaddr $_->{element} if defined $_->{element};
609     delete $_->{element};
610     $_->{is_header} = $_->{is_header} ? 1 : 0;
611     }
612     }
613     }
614    
615     print STDOUT '</div><script type="text/javascript">tableToCanvas (';
616     print STDOUT JSON::objToJson ($table);
617     print STDOUT qq[, document.getElementById ('table-$i'));</script>];
618     }
619    
620     print STDOUT qq[</div>];
621     } # print_table_section
622    
623     sub print_id_section ($) {
624     my $ids = shift;
625    
626     push @nav, ['#identifiers' => 'IDs'];
627     print STDOUT qq[
628     <div id="identifiers" class="section">
629     <h2>Identifiers</h2>
630    
631     <dl>
632     ];
633     for my $id (sort {$a cmp $b} keys %$ids) {
634     print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
635     for (@{$ids->{$id}}) {
636     print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
637     }
638     }
639     print STDOUT qq[</dl></div>];
640     } # print_id_section
641    
642     sub print_term_section ($) {
643     my $terms = shift;
644    
645     push @nav, ['#terms' => 'Terms'];
646     print STDOUT qq[
647     <div id="terms" class="section">
648     <h2>Terms</h2>
649    
650     <dl>
651     ];
652     for my $term (sort {$a cmp $b} keys %$terms) {
653     print STDOUT qq[<dt>@{[htescape $term]}</dt>];
654     for (@{$terms->{$term}}) {
655     print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
656     }
657     }
658     print STDOUT qq[</dl></div>];
659     } # print_term_section
660    
661     sub print_class_section ($) {
662     my $classes = shift;
663    
664     push @nav, ['#classes' => 'Classes'];
665     print STDOUT qq[
666     <div id="classes" class="section">
667     <h2>Classes</h2>
668    
669     <dl>
670     ];
671     for my $class (sort {$a cmp $b} keys %$classes) {
672     print STDOUT qq[<dt><code>@{[htescape $class]}</code></dt>];
673     for (@{$classes->{$class}}) {
674     print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
675     }
676     }
677     print STDOUT qq[</dl></div>];
678     } # print_class_section
679    
680 wakaba 1.19 sub print_result_section ($) {
681     my $result = shift;
682    
683     print STDOUT qq[
684     <div id="result-summary" class="section">
685     <h2>Result</h2>];
686    
687 wakaba 1.21 if ($result->{unsupported} and $result->{conforming_max}) {
688 wakaba 1.19 print STDOUT qq[<p class=uncertain id=result-para>The conformance
689     checker cannot decide whether the document is conforming or
690     not, since the document contains one or more unsupported
691 wakaba 1.21 features. The document might or might not be conforming.</p>];
692 wakaba 1.19 } elsif ($result->{conforming_min}) {
693     print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
694     found in this document.</p>];
695     } elsif ($result->{conforming_max}) {
696     print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
697     is <strong>likely <em>non</em>-conforming</strong>, but in rare case
698     it might be conforming.</p>];
699     } else {
700     print STDOUT qq[<p class=FAIL id=result-para>This document is
701     <strong><em>non</em>-conforming</strong>.</p>];
702     }
703    
704     print STDOUT qq[<table>
705     <colgroup><col><colgroup><col><col><col><colgroup><col>
706     <thead>
707 wakaba 1.23 <tr><th scope=col></th>
708     <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
709     Errors</a></th>
710     <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
711     Errors</a></th>
712     <th scope=col><a href="../error-description#level-w">Warnings</a></th>
713     <th scope=col>Score</th></tr></thead><tbody>];
714 wakaba 1.19
715     my $must_error = 0;
716     my $should_error = 0;
717     my $warning = 0;
718     my $score_min = 0;
719     my $score_max = 0;
720     my $score_base = 20;
721 wakaba 1.21 my $score_unit = $score_base / 100;
722 wakaba 1.19 for (
723     [Transfer => 'transfer', ''],
724     [Character => 'char', ''],
725     [Syntax => 'syntax', '#parse-errors'],
726     [Structure => 'structure', '#document-errors'],
727     ) {
728     $must_error += ($result->{$_->[1]}->{must} += 0);
729     $should_error += ($result->{$_->[1]}->{should} += 0);
730     $warning += ($result->{$_->[1]}->{warning} += 0);
731 wakaba 1.21 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
732     $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
733 wakaba 1.19
734     my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
735     my $label = $_->[0];
736     if ($result->{$_->[1]}->{must} or
737     $result->{$_->[1]}->{should} or
738     $result->{$_->[1]}->{warning} or
739     $result->{$_->[1]}->{unsupported}) {
740     $label = qq[<a href="$_->[2]">$label</a>];
741     }
742    
743     print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
744     if ($uncertain) {
745 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
746 wakaba 1.19 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
747 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
748 wakaba 1.19 } else {
749 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
750 wakaba 1.19 }
751     }
752    
753     $score_max += $score_base;
754    
755     print STDOUT qq[
756     <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
757     </tbody>
758 wakaba 1.21 <tfoot><tr class=uncertain><th scope=row>Total</th>
759     <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
760     <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
761     <td>$warning?</td>
762     <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
763 wakaba 1.19 </table>
764    
765     <p><strong>Important</strong>: This conformance checking service
766     is <em>under development</em>. The result above might be <em>wrong</em>.</p>
767     </div>];
768     push @nav, ['#result-summary' => 'Result'];
769     } # print_result_section
770    
771 wakaba 1.24 sub print_result_unknown_type_section ($$) {
772     my ($input, $result) = @_;
773 wakaba 1.18
774 wakaba 1.24 my $euri = htescape ($input->{uri});
775 wakaba 1.18 print STDOUT qq[
776 wakaba 1.24 <div id="parse-errors" class="section">
777     <h2>Errors</h2>
778    
779     <dl>
780     <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
781     <dd class=unsupported><strong><a href="../error-description#level-u">Not
782     supported</a></strong>:
783     Media type
784     <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
785     is not supported.</dd>
786     </dl>
787 wakaba 1.18 </div>
788     ];
789 wakaba 1.24 push @nav, ['#parse-errors' => 'Errors'];
790     add_error (char => {level => 'unsupported'} => $result);
791     add_error (syntax => {level => 'unsupported'} => $result);
792     add_error (structure => {level => 'unsupported'} => $result);
793 wakaba 1.18 } # print_result_unknown_type_section
794    
795     sub print_result_input_error_section ($) {
796     my $input = shift;
797     print STDOUT qq[<div class="section" id="result-summary">
798     <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
799     </div>];
800     push @nav, ['#result-summary' => 'Result'];
801     } # print_Result_input_error_section
802    
803 wakaba 1.22 sub get_error_label ($) {
804     my $err = shift;
805    
806     my $r = '';
807    
808     if (defined $err->{line}) {
809     if ($err->{column} > 0) {
810     $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a> column $err->{column}];
811     } else {
812     $err->{line} = $err->{line} - 1 || 1;
813     $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a>];
814     }
815     }
816    
817     if (defined $err->{node}) {
818     $r .= ' ' if length $r;
819 wakaba 1.23 $r = get_node_link ($err->{node});
820 wakaba 1.22 }
821    
822     if (defined $err->{index}) {
823     $r .= ' ' if length $r;
824     $r .= 'Index ' . (0+$err->{index});
825     }
826    
827     if (defined $err->{value}) {
828     $r .= ' ' if length $r;
829     $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
830     }
831    
832     return $r;
833     } # get_error_label
834    
835 wakaba 1.23 sub get_error_level_label ($) {
836     my $err = shift;
837    
838     my $r = '';
839    
840     if (not defined $err->{level} or $err->{level} eq 'm') {
841     $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
842     error</a></strong>: ];
843     } elsif ($err->{level} eq 's') {
844     $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
845     error</a></strong>: ];
846     } elsif ($err->{level} eq 'w') {
847     $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
848     ];
849     } elsif ($err->{level} eq 'unsupported') {
850     $r = qq[<strong><a href="../error-description#level-u">Not
851     supported</a></strong>: ];
852     } else {
853     my $elevel = htescape ($err->{level});
854     $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
855     ];
856     }
857    
858     return $r;
859     } # get_error_level_label
860    
861 wakaba 1.1 sub get_node_path ($) {
862     my $node = shift;
863     my @r;
864     while (defined $node) {
865     my $rs;
866     if ($node->node_type == 1) {
867     $rs = $node->manakai_local_name;
868     $node = $node->parent_node;
869     } elsif ($node->node_type == 2) {
870     $rs = '@' . $node->manakai_local_name;
871     $node = $node->owner_element;
872     } elsif ($node->node_type == 3) {
873     $rs = '"' . $node->data . '"';
874     $node = $node->parent_node;
875     } elsif ($node->node_type == 9) {
876 wakaba 1.9 @r = ('') unless @r;
877 wakaba 1.1 $rs = '';
878     $node = $node->parent_node;
879     } else {
880     $rs = '#' . $node->node_type;
881     $node = $node->parent_node;
882     }
883     unshift @r, $rs;
884     }
885     return join '/', @r;
886     } # get_node_path
887    
888 wakaba 1.6 sub get_node_link ($) {
889     return qq[<a href="#node-@{[refaddr $_[0]]}">] .
890     htescape (get_node_path ($_[0])) . qq[</a>];
891     } # get_node_link
892    
893 wakaba 1.7 {
894     my $Msg = {};
895    
896     sub load_text_catalog ($) {
897     my $lang = shift; # MUST be a canonical lang name
898     open my $file, '<', "cc-msg.$lang.txt" or die "$0: cc-msg.$lang.txt: $!";
899     while (<$file>) {
900     if (s/^([^;]+);([^;]*);//) {
901     my ($type, $cls, $msg) = ($1, $2, $_);
902     $msg =~ tr/\x0D\x0A//d;
903     $Msg->{$type} = [$cls, $msg];
904     }
905     }
906     } # load_text_catalog
907    
908     sub get_text ($) {
909 wakaba 1.15 my ($type, $level, $node) = @_;
910 wakaba 1.7 $type = $level . ':' . $type if defined $level;
911     my @arg;
912     {
913     if (defined $Msg->{$type}) {
914     my $msg = $Msg->{$type}->[1];
915 wakaba 1.10 $msg =~ s{<var>\$([0-9]+)</var>}{
916     defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
917     }ge;
918 wakaba 1.15 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
919     UNIVERSAL::can ($node, 'get_attribute_ns')
920     ? htescape ($node->get_attribute_ns (undef, $1)) : ''
921     }ge;
922     $msg =~ s{<var>{\@}</var>}{
923     UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
924     }ge;
925 wakaba 1.17 $msg =~ s{<var>{local-name}</var>}{
926     UNIVERSAL::can ($node, 'manakai_local_name')
927     ? htescape ($node->manakai_local_name) : ''
928     }ge;
929     $msg =~ s{<var>{element-local-name}</var>}{
930     (UNIVERSAL::can ($node, 'owner_element') and
931     $node->owner_element)
932     ? htescape ($node->owner_element->manakai_local_name)
933     : ''
934     }ge;
935 wakaba 1.11 return ($type, $Msg->{$type}->[0], $msg);
936 wakaba 1.7 } elsif ($type =~ s/:([^:]*)$//) {
937     unshift @arg, $1;
938     redo;
939     }
940     }
941 wakaba 1.11 return ($type, '', htescape ($_[0]));
942 wakaba 1.7 } # get_text
943    
944     }
945    
946 wakaba 1.9 sub get_input_document ($$) {
947     my ($http, $dom) = @_;
948    
949 wakaba 1.16 my $request_uri = $http->get_parameter ('uri');
950 wakaba 1.9 my $r = {};
951     if (defined $request_uri and length $request_uri) {
952     my $uri = $dom->create_uri_reference ($request_uri);
953     unless ({
954     http => 1,
955     }->{lc $uri->uri_scheme}) {
956     return {uri => $request_uri, request_uri => $request_uri,
957     error_status_text => 'URI scheme not allowed'};
958     }
959    
960     require Message::Util::HostPermit;
961     my $host_permit = new Message::Util::HostPermit;
962     $host_permit->add_rule (<<EOH);
963     Allow host=suika port=80
964     Deny host=suika
965     Allow host=suika.fam.cx port=80
966     Deny host=suika.fam.cx
967     Deny host=localhost
968     Deny host=*.localdomain
969     Deny ipv4=0.0.0.0/8
970     Deny ipv4=10.0.0.0/8
971     Deny ipv4=127.0.0.0/8
972     Deny ipv4=169.254.0.0/16
973     Deny ipv4=172.0.0.0/11
974     Deny ipv4=192.0.2.0/24
975     Deny ipv4=192.88.99.0/24
976     Deny ipv4=192.168.0.0/16
977     Deny ipv4=198.18.0.0/15
978     Deny ipv4=224.0.0.0/4
979     Deny ipv4=255.255.255.255/32
980     Deny ipv6=0::0/0
981     Allow host=*
982     EOH
983     unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
984     return {uri => $request_uri, request_uri => $request_uri,
985     error_status_text => 'Connection to the host is forbidden'};
986     }
987    
988     require LWP::UserAgent;
989     my $ua = WDCC::LWPUA->new;
990     $ua->{wdcc_dom} = $dom;
991     $ua->{wdcc_host_permit} = $host_permit;
992     $ua->agent ('Mozilla'); ## TODO: for now.
993     $ua->parse_head (0);
994     $ua->protocols_allowed ([qw/http/]);
995     $ua->max_size (1000_000);
996     my $req = HTTP::Request->new (GET => $request_uri);
997     my $res = $ua->request ($req);
998 wakaba 1.16 ## TODO: 401 sets |is_success| true.
999     if ($res->is_success or $http->get_parameter ('error-page')) {
1000 wakaba 1.9 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1001     $r->{uri} = $res->request->uri;
1002     $r->{request_uri} = $request_uri;
1003    
1004     ## TODO: More strict parsing...
1005     my $ct = $res->header ('Content-Type');
1006 wakaba 1.22 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1007 wakaba 1.9 $r->{charset} = lc $1;
1008     $r->{charset} =~ tr/\\//d;
1009     }
1010    
1011 wakaba 1.16 my $input_charset = $http->get_parameter ('charset');
1012 wakaba 1.9 if (defined $input_charset and length $input_charset) {
1013     $r->{charset_overridden}
1014     = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1015     $r->{charset} = $input_charset;
1016 wakaba 1.25 }
1017    
1018     ## TODO: Support for HTTP Content-Encoding
1019 wakaba 1.9
1020     $r->{s} = ''.$res->content;
1021 wakaba 1.25
1022     require Whatpm::ContentType;
1023     ($r->{official_type}, $r->{media_type})
1024     = Whatpm::ContentType->get_sniffed_type
1025     (get_file_head => sub {
1026     return substr $r->{s}, 0, shift;
1027     },
1028     http_content_type_byte => $ct,
1029     has_http_content_encoding =>
1030     defined $res->header ('Content-Encoding'),
1031     supported_image_types => {});
1032 wakaba 1.9 } else {
1033     $r->{uri} = $res->request->uri;
1034     $r->{request_uri} = $request_uri;
1035     $r->{error_status_text} = $res->status_line;
1036     }
1037    
1038     $r->{header_field} = [];
1039     $res->scan (sub {
1040     push @{$r->{header_field}}, [$_[0], $_[1]];
1041     });
1042     $r->{header_status_code} = $res->code;
1043     $r->{header_status_text} = $res->message;
1044     } else {
1045 wakaba 1.16 $r->{s} = ''.$http->get_parameter ('s');
1046 wakaba 1.9 $r->{uri} = q<thismessage:/>;
1047     $r->{request_uri} = q<thismessage:/>;
1048     $r->{base_uri} = q<thismessage:/>;
1049 wakaba 1.16 $r->{charset} = ''.$http->get_parameter ('_charset_');
1050 wakaba 1.9 $r->{charset} =~ s/\s+//g;
1051     $r->{charset} = 'utf-8' if $r->{charset} eq '';
1052     $r->{header_field} = [];
1053 wakaba 1.25
1054     require Whatpm::ContentType;
1055     ($r->{official_type}, $r->{media_type})
1056     = Whatpm::ContentType->get_sniffed_type
1057     (get_file_head => sub {
1058     return substr $r->{s}, 0, shift;
1059     },
1060     http_content_type_byte => undef,
1061     has_http_content_encoding => 0,
1062     supported_image_types => {});
1063 wakaba 1.9 }
1064    
1065 wakaba 1.16 my $input_format = $http->get_parameter ('i');
1066 wakaba 1.9 if (defined $input_format and length $input_format) {
1067     $r->{media_type_overridden}
1068     = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1069     $r->{media_type} = $input_format;
1070     }
1071     if (defined $r->{s} and not defined $r->{media_type}) {
1072     $r->{media_type} = 'text/html';
1073     $r->{media_type_overridden} = 1;
1074     }
1075    
1076     if ($r->{media_type} eq 'text/xml') {
1077     unless (defined $r->{charset}) {
1078     $r->{charset} = 'us-ascii';
1079     } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1080     $r->{charset_overridden} = 0;
1081     }
1082     }
1083    
1084     if (length $r->{s} > 1000_000) {
1085     $r->{error_status_text} = 'Entity-body too large';
1086     delete $r->{s};
1087     return $r;
1088     }
1089    
1090     return $r;
1091     } # get_input_document
1092    
1093     package WDCC::LWPUA;
1094     BEGIN { push our @ISA, 'LWP::UserAgent'; }
1095    
1096     sub redirect_ok {
1097     my $ua = shift;
1098     unless ($ua->SUPER::redirect_ok (@_)) {
1099     return 0;
1100     }
1101    
1102     my $uris = $_[1]->header ('Location');
1103     return 0 unless $uris;
1104     my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1105     unless ({
1106     http => 1,
1107     }->{lc $uri->uri_scheme}) {
1108     return 0;
1109     }
1110     unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1111     return 0;
1112     }
1113     return 1;
1114     } # redirect_ok
1115    
1116 wakaba 1.1 =head1 AUTHOR
1117    
1118     Wakaba <w@suika.fam.cx>.
1119    
1120     =head1 LICENSE
1121    
1122     Copyright 2007 Wakaba <w@suika.fam.cx>
1123    
1124     This library is free software; you can redistribute it
1125     and/or modify it under the same terms as Perl itself.
1126    
1127     =cut
1128    
1129 wakaba 1.25 ## $Date: 2007/11/11 06:57:16 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24