/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.28 - (hide annotations) (download)
Fri Nov 23 06:36:19 2007 UTC (16 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.27: +2 -1 lines
++ ChangeLog	23 Nov 2007 06:36:14 -0000
2007-11-23  Wakaba  <wakaba@suika.fam.cx>

	* error-description-source.xml: New error descriptions.

	* cc.cgi: |Accept-Encoding: *; q=0| is a request for server to send
	a 406 page!  |identity| encoding is added.

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3 wakaba 1.23 use utf8;
4 wakaba 1.1
5     use lib qw[/home/httpd/html/www/markup/html/whatpm
6 wakaba 1.16 /home/wakaba/work/manakai2/lib];
7 wakaba 1.1 use CGI::Carp qw[fatalsToBrowser];
8 wakaba 1.2 use Scalar::Util qw[refaddr];
9 wakaba 1.16 use Time::HiRes qw/time/;
10 wakaba 1.1
11 wakaba 1.2 sub htescape ($) {
12     my $s = $_[0];
13     $s =~ s/&/&amp;/g;
14     $s =~ s/</&lt;/g;
15     $s =~ s/>/&gt;/g;
16     $s =~ s/"/&quot;/g;
17 wakaba 1.12 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18     sprintf '<var>U+%04X</var>', ord $1;
19     }ge;
20 wakaba 1.2 return $s;
21     } # htescape
22    
23 wakaba 1.16 use Message::CGI::HTTP;
24     my $http = Message::CGI::HTTP->new;
25 wakaba 1.1
26 wakaba 1.16 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
27 wakaba 1.8 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
28     exit;
29     }
30    
31 wakaba 1.12 binmode STDOUT, ':utf8';
32 wakaba 1.14 $| = 1;
33 wakaba 1.12
34 wakaba 1.9 require Message::DOM::DOMImplementation;
35     my $dom = Message::DOM::DOMImplementation->new;
36    
37 wakaba 1.7 load_text_catalog ('en'); ## TODO: conneg
38    
39 wakaba 1.3 my @nav;
40 wakaba 1.2 print STDOUT qq[Content-Type: text/html; charset=utf-8
41    
42     <!DOCTYPE html>
43     <html lang="en">
44     <head>
45     <title>Web Document Conformance Checker (BETA)</title>
46 wakaba 1.3 <link rel="stylesheet" href="../cc-style.css" type="text/css">
47 wakaba 1.2 </head>
48     <body>
49 wakaba 1.13 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
50     (<em>beta</em>)</h1>
51 wakaba 1.14 ];
52 wakaba 1.2
53 wakaba 1.14 $| = 0;
54     my $input = get_input_document ($http, $dom);
55 wakaba 1.16 my $char_length = 0;
56     my %time;
57 wakaba 1.14
58     print qq[
59 wakaba 1.4 <div id="document-info" class="section">
60 wakaba 1.2 <dl>
61 wakaba 1.9 <dt>Request URI</dt>
62     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 wakaba 1.2 <dt>Document URI</dt>
64 wakaba 1.25 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65     <script>
66     document.title = '<'
67     + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68     + document.title;
69     </script></dd>
70 wakaba 1.2 ]; # no </dl> yet
71 wakaba 1.3 push @nav, ['#document-info' => 'Information'];
72 wakaba 1.1
73 wakaba 1.9 if (defined $input->{s}) {
74 wakaba 1.16 $char_length = length $input->{s};
75 wakaba 1.9
76     print STDOUT qq[
77     <dt>Base URI</dt>
78     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79     <dt>Internet Media Type</dt>
80     <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 wakaba 1.25 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 wakaba 1.9 <dt>Character Encoding</dt>
83     <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84     @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 wakaba 1.16 <dt>Length</dt>
86     <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 wakaba 1.9 </dl>
88     </div>
89     ];
90    
91 wakaba 1.20 my $result = {conforming_min => 1, conforming_max => 1};
92 wakaba 1.19 print_http_header_section ($input, $result);
93 wakaba 1.9
94 wakaba 1.1 my $doc;
95     my $el;
96 wakaba 1.22 my $manifest;
97 wakaba 1.1
98 wakaba 1.9 if ($input->{media_type} eq 'text/html') {
99 wakaba 1.19 ($doc, $el) = print_syntax_error_html_section ($input, $result);
100 wakaba 1.25 print_source_string_section
101     (\($input->{s}), $input->{charset} || $doc->input_encoding);
102 wakaba 1.9 } elsif ({
103     'text/xml' => 1,
104 wakaba 1.16 'application/atom+xml' => 1,
105     'application/rss+xml' => 1,
106     'application/svg+xml' => 1,
107 wakaba 1.9 'application/xhtml+xml' => 1,
108     'application/xml' => 1,
109     }->{$input->{media_type}}) {
110 wakaba 1.19 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
111 wakaba 1.9 print_source_string_section (\($input->{s}), $doc->input_encoding);
112 wakaba 1.22 } elsif ($input->{media_type} eq 'text/cache-manifest') {
113     ## TODO: MUST be text/cache-manifest
114     $manifest = print_syntax_error_manifest_section ($input, $result);
115     print_source_string_section (\($input->{s}), 'utf-8');
116 wakaba 1.2 } else {
117 wakaba 1.9 ## TODO: Change HTTP status code??
118 wakaba 1.24 print_result_unknown_type_section ($input, $result);
119 wakaba 1.2 }
120    
121     if (defined $doc or defined $el) {
122 wakaba 1.22 print_structure_dump_dom_section ($doc, $el);
123     my $elements = print_structure_error_dom_section ($doc, $el, $result);
124 wakaba 1.18 print_table_section ($elements->{table}) if @{$elements->{table}};
125     print_id_section ($elements->{id}) if keys %{$elements->{id}};
126     print_term_section ($elements->{term}) if keys %{$elements->{term}};
127     print_class_section ($elements->{class}) if keys %{$elements->{class}};
128 wakaba 1.22 } elsif (defined $manifest) {
129     print_structure_dump_manifest_section ($manifest);
130     print_structure_error_manifest_section ($manifest, $result);
131 wakaba 1.1 }
132    
133 wakaba 1.19 print_result_section ($result);
134 wakaba 1.9 } else {
135 wakaba 1.18 print STDOUT qq[</dl></div>];
136     print_result_input_error_section ($input);
137 wakaba 1.9 }
138 wakaba 1.3
139 wakaba 1.2 print STDOUT qq[
140 wakaba 1.3 <ul class="navigation" id="nav-items">
141     ];
142     for (@nav) {
143     print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
144     }
145     print STDOUT qq[
146     </ul>
147 wakaba 1.2 </body>
148     </html>
149     ];
150 wakaba 1.1
151 wakaba 1.24 for (qw/decode parse parse_html parse_xml parse_manifest
152     check check_manifest/) {
153 wakaba 1.16 next unless defined $time{$_};
154     open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
155     print $file $char_length, "\t", $time{$_}, "\n";
156     }
157    
158 wakaba 1.1 exit;
159    
160 wakaba 1.19 sub add_error ($$$) {
161     my ($layer, $err, $result) = @_;
162     if (defined $err->{level}) {
163     if ($err->{level} eq 's') {
164     $result->{$layer}->{should}++;
165     $result->{$layer}->{score_min} -= 2;
166     $result->{conforming_min} = 0;
167     } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
168     $result->{$layer}->{warning}++;
169     } elsif ($err->{level} eq 'unsupported') {
170     $result->{$layer}->{unsupported}++;
171     $result->{unsupported} = 1;
172     } else {
173     $result->{$layer}->{must}++;
174     $result->{$layer}->{score_max} -= 2;
175     $result->{$layer}->{score_min} -= 2;
176     $result->{conforming_min} = 0;
177     $result->{conforming_max} = 0;
178     }
179     } else {
180     $result->{$layer}->{must}++;
181     $result->{$layer}->{score_max} -= 2;
182     $result->{$layer}->{score_min} -= 2;
183     $result->{conforming_min} = 0;
184     $result->{conforming_max} = 0;
185     }
186     } # add_error
187    
188     sub print_http_header_section ($$) {
189     my ($input, $result) = @_;
190 wakaba 1.9 return unless defined $input->{header_status_code} or
191     defined $input->{header_status_text} or
192     @{$input->{header_field}};
193    
194     push @nav, ['#source-header' => 'HTTP Header'];
195     print STDOUT qq[<div id="source-header" class="section">
196     <h2>HTTP Header</h2>
197    
198     <p><strong>Note</strong>: Due to the limitation of the
199     network library in use, the content of this section might
200     not be the real header.</p>
201    
202     <table><tbody>
203     ];
204    
205     if (defined $input->{header_status_code}) {
206     print STDOUT qq[<tr><th scope="row">Status code</th>];
207     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
208     }
209     if (defined $input->{header_status_text}) {
210     print STDOUT qq[<tr><th scope="row">Status text</th>];
211     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
212     }
213    
214     for (@{$input->{header_field}}) {
215     print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
216     print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
217     }
218    
219     print STDOUT qq[</tbody></table></div>];
220     } # print_http_header_section
221    
222 wakaba 1.19 sub print_syntax_error_html_section ($$) {
223     my ($input, $result) = @_;
224 wakaba 1.18
225     require Encode;
226     require Whatpm::HTML;
227    
228     print STDOUT qq[
229     <div id="parse-errors" class="section">
230     <h2>Parse Errors</h2>
231    
232     <dl>];
233     push @nav, ['#parse-errors' => 'Parse Error'];
234    
235     my $onerror = sub {
236     my (%opt) = @_;
237     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
238     if ($opt{column} > 0) {
239     print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
240     } else {
241     $opt{line} = $opt{line} - 1 || 1;
242     print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];
243     }
244     $type =~ tr/ /-/;
245     $type =~ s/\|/%7C/g;
246     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
247 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
248     print STDOUT qq[$msg</dd>\n];
249 wakaba 1.19
250     add_error ('syntax', \%opt => $result);
251 wakaba 1.18 };
252    
253     my $doc = $dom->create_document;
254     my $el;
255 wakaba 1.26 my $inner_html_element = $http->get_parameter ('e');
256 wakaba 1.18 if (defined $inner_html_element and length $inner_html_element) {
257 wakaba 1.26 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
258 wakaba 1.24 my $time1 = time;
259     my $t = Encode::decode ($input->{charset}, $input->{s});
260     $time{decode} = time - $time1;
261    
262 wakaba 1.18 $el = $doc->create_element_ns
263     ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
264 wakaba 1.24 $time1 = time;
265 wakaba 1.18 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
266 wakaba 1.24 $time{parse} = time - $time1;
267 wakaba 1.18 } else {
268 wakaba 1.24 my $time1 = time;
269     Whatpm::HTML->parse_byte_string
270     ($input->{charset}, $input->{s} => $doc, $onerror);
271     $time{parse_html} = time - $time1;
272 wakaba 1.18 }
273 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
274     if defined $input->{official_charset};
275 wakaba 1.24
276 wakaba 1.18 print STDOUT qq[</dl></div>];
277    
278     return ($doc, $el);
279     } # print_syntax_error_html_section
280    
281 wakaba 1.19 sub print_syntax_error_xml_section ($$) {
282     my ($input, $result) = @_;
283 wakaba 1.18
284     require Message::DOM::XMLParserTemp;
285    
286     print STDOUT qq[
287     <div id="parse-errors" class="section">
288     <h2>Parse Errors</h2>
289    
290     <dl>];
291     push @nav, ['#parse-errors' => 'Parse Error'];
292    
293     my $onerror = sub {
294     my $err = shift;
295     my $line = $err->location->line_number;
296     print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];
297     print STDOUT $err->location->column_number, "</dt><dd>";
298     print STDOUT htescape $err->text, "</dd>\n";
299 wakaba 1.19
300     add_error ('syntax', {type => $err->text,
301     level => [
302     $err->SEVERITY_FATAL_ERROR => 'm',
303     $err->SEVERITY_ERROR => 'm',
304     $err->SEVERITY_WARNING => 's',
305     ]->[$err->severity]} => $result);
306    
307 wakaba 1.18 return 1;
308     };
309    
310     my $time1 = time;
311     open my $fh, '<', \($input->{s});
312     my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
313     ($fh => $dom, $onerror, charset => $input->{charset});
314     $time{parse_xml} = time - $time1;
315 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
316     if defined $input->{official_charset};
317 wakaba 1.18
318     print STDOUT qq[</dl></div>];
319    
320     return ($doc, undef);
321     } # print_syntax_error_xml_section
322    
323 wakaba 1.22 sub print_syntax_error_manifest_section ($$) {
324     my ($input, $result) = @_;
325    
326     require Whatpm::CacheManifest;
327    
328     print STDOUT qq[
329     <div id="parse-errors" class="section">
330     <h2>Parse Errors</h2>
331    
332     <dl>];
333     push @nav, ['#parse-errors' => 'Parse Error'];
334    
335     my $onerror = sub {
336     my (%opt) = @_;
337     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
338     print STDOUT qq[<dt class="$cls">], get_error_label (\%opt), qq[</dt>];
339     $type =~ tr/ /-/;
340     $type =~ s/\|/%7C/g;
341     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
342 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
343     print STDOUT qq[$msg</dd>\n];
344 wakaba 1.22
345     add_error ('syntax', \%opt => $result);
346     };
347    
348     my $time1 = time;
349     my $manifest = Whatpm::CacheManifest->parse_byte_string
350     ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
351     $time{parse_manifest} = time - $time1;
352    
353     print STDOUT qq[</dl></div>];
354    
355     return $manifest;
356     } # print_syntax_error_manifest_section
357    
358 wakaba 1.9 sub print_source_string_section ($$) {
359     require Encode;
360     my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
361     return unless $enc;
362    
363     my $s = \($enc->decode (${$_[0]}));
364     my $i = 1;
365     push @nav, ['#source-string' => 'Source'];
366     print STDOUT qq[<div id="source-string" class="section">
367     <h2>Document Source</h2>
368     <ol lang="">\n];
369 wakaba 1.7 if (length $$s) {
370     while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
371     print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
372     $i++;
373     }
374     if ($$s =~ /\G([^\x0A]+)/gc) {
375     print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
376     }
377     } else {
378     print STDOUT q[<li id="line-1"></li>];
379 wakaba 1.2 }
380 wakaba 1.9 print STDOUT "</ol></div>";
381     } # print_input_string_section
382 wakaba 1.2
383     sub print_document_tree ($) {
384 wakaba 1.1 my $node = shift;
385 wakaba 1.2 my $r = '<ol class="xoxo">';
386 wakaba 1.1
387 wakaba 1.2 my @node = ($node);
388 wakaba 1.1 while (@node) {
389     my $child = shift @node;
390 wakaba 1.2 unless (ref $child) {
391     $r .= $child;
392     next;
393     }
394    
395     my $node_id = 'node-'.refaddr $child;
396     my $nt = $child->node_type;
397     if ($nt == $child->ELEMENT_NODE) {
398 wakaba 1.4 my $child_nsuri = $child->namespace_uri;
399     $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
400 wakaba 1.2 '</code>'; ## ISSUE: case
401    
402     if ($child->has_attributes) {
403     $r .= '<ul class="attributes">';
404 wakaba 1.4 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
405 wakaba 1.2 @{$child->attributes}) {
406 wakaba 1.27 $r .= qq[<li id="$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
407 wakaba 1.2 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
408     }
409     $r .= '</ul>';
410     }
411    
412 wakaba 1.7 if ($child->has_child_nodes) {
413 wakaba 1.2 $r .= '<ol class="children">';
414 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
415     } else {
416     $r .= '</li>';
417 wakaba 1.2 }
418     } elsif ($nt == $child->TEXT_NODE) {
419 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
420 wakaba 1.2 } elsif ($nt == $child->CDATA_SECTION_NODE) {
421 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
422 wakaba 1.2 } elsif ($nt == $child->COMMENT_NODE) {
423 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
424 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_NODE) {
425 wakaba 1.6 $r .= qq'<li id="$node_id" class="tree-document">Document';
426 wakaba 1.7 $r .= qq[<ul class="attributes">];
427 wakaba 1.27 my $cp = $child->manakai_charset;
428     if (defined $cp) {
429     $r .= qq[<li><code>charset</code> parameter = <code>];
430     $r .= htescape ($cp) . qq[</code></li>];
431     }
432     $r .= qq[<li><code>inputEncoding</code> = ];
433     my $ie = $child->input_encoding;
434     if (defined $ie) {
435     $r .= qq[<code>@{[htescape ($ie)]}</code>];
436     if ($child->manakai_has_bom) {
437     $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
438     }
439     } else {
440     $r .= qq[(<code>null</code>)];
441     }
442 wakaba 1.7 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
443     $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
444 wakaba 1.9 unless ($child->manakai_is_html) {
445     $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
446     if (defined $child->xml_encoding) {
447     $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
448     } else {
449     $r .= qq[<li>XML encoding = (null)</li>];
450     }
451     $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
452     }
453 wakaba 1.7 $r .= qq[</ul>];
454 wakaba 1.2 if ($child->has_child_nodes) {
455 wakaba 1.7 $r .= '<ol class="children">';
456 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
457 wakaba 1.1 }
458 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
459 wakaba 1.5 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
460     $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
461     $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
462     $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
463 wakaba 1.2 $r .= '</ul></li>';
464     } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
465 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
466 wakaba 1.1 } else {
467 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
468 wakaba 1.1 }
469     }
470 wakaba 1.2
471     $r .= '</ol>';
472     print STDOUT $r;
473     } # print_document_tree
474 wakaba 1.1
475 wakaba 1.22 sub print_structure_dump_dom_section ($$) {
476 wakaba 1.18 my ($doc, $el) = @_;
477    
478     print STDOUT qq[
479     <div id="document-tree" class="section">
480     <h2>Document Tree</h2>
481     ];
482     push @nav, ['#document-tree' => 'Tree'];
483    
484     print_document_tree ($el || $doc);
485    
486     print STDOUT qq[</div>];
487 wakaba 1.22 } # print_structure_dump_dom_section
488    
489     sub print_structure_dump_manifest_section ($) {
490     my $manifest = shift;
491    
492     print STDOUT qq[
493     <div id="dump-manifest" class="section">
494     <h2>Cache Manifest</h2>
495     ];
496     push @nav, ['#dump-manifest' => 'Caceh Manifest'];
497    
498     print STDOUT qq[<dl><dt>Explicit entries</dt>];
499     for my $uri (@{$manifest->[0]}) {
500     my $euri = htescape ($uri);
501     print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
502     }
503    
504     print STDOUT qq[<dt>Fallback entries</dt><dd>
505     <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
506     <th scope=row>Fallback Entry</tr><tbody>];
507     for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
508     my $euri = htescape ($uri);
509     my $euri2 = htescape ($manifest->[1]->{$uri});
510     print STDOUT qq[<tr><td><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
511     <td><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
512     }
513    
514     print STDOUT qq[</table><dt>Online whitelist</dt>];
515     for my $uri (@{$manifest->[2]}) {
516     my $euri = htescape ($uri);
517     print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
518     }
519 wakaba 1.18
520 wakaba 1.22 print STDOUT qq[</dl></div>];
521     } # print_structure_dump_manifest_section
522    
523     sub print_structure_error_dom_section ($$$) {
524 wakaba 1.19 my ($doc, $el, $result) = @_;
525 wakaba 1.18
526     print STDOUT qq[<div id="document-errors" class="section">
527     <h2>Document Errors</h2>
528    
529     <dl>];
530     push @nav, ['#document-errors' => 'Document Error'];
531    
532     require Whatpm::ContentChecker;
533     my $onerror = sub {
534     my %opt = @_;
535     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
536     $type =~ tr/ /-/;
537     $type =~ s/\|/%7C/g;
538     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
539 wakaba 1.22 print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
540 wakaba 1.23 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
541     print STDOUT $msg, "</dd>\n";
542 wakaba 1.19 add_error ('structure', \%opt => $result);
543 wakaba 1.18 };
544    
545     my $elements;
546     my $time1 = time;
547     if ($el) {
548     $elements = Whatpm::ContentChecker->check_element ($el, $onerror);
549     } else {
550     $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);
551     }
552     $time{check} = time - $time1;
553    
554     print STDOUT qq[</dl></div>];
555    
556     return $elements;
557 wakaba 1.22 } # print_structure_error_dom_section
558    
559     sub print_structure_error_manifest_section ($$$) {
560     my ($manifest, $result) = @_;
561    
562     print STDOUT qq[<div id="document-errors" class="section">
563     <h2>Document Errors</h2>
564    
565     <dl>];
566     push @nav, ['#document-errors' => 'Document Error'];
567    
568     require Whatpm::CacheManifest;
569     Whatpm::CacheManifest->check_manifest ($manifest, sub {
570     my %opt = @_;
571     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
572     $type =~ tr/ /-/;
573     $type =~ s/\|/%7C/g;
574     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
575     print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
576     qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
577     add_error ('structure', \%opt => $result);
578     });
579    
580     print STDOUT qq[</div>];
581     } # print_structure_error_manifest_section
582 wakaba 1.18
583     sub print_table_section ($) {
584     my $tables = shift;
585    
586     push @nav, ['#tables' => 'Tables'];
587     print STDOUT qq[
588     <div id="tables" class="section">
589     <h2>Tables</h2>
590    
591     <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
592     <script src="../table-script.js" type="text/javascript"></script>
593     <noscript>
594     <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
595     </noscript>
596     ];
597    
598     require JSON;
599    
600     my $i = 0;
601     for my $table_el (@$tables) {
602     $i++;
603     print STDOUT qq[<div class="section" id="table-$i"><h3>] .
604     get_node_link ($table_el) . q[</h3>];
605    
606     ## TODO: Make |ContentChecker| return |form_table| result
607     ## so that this script don't have to run the algorithm twice.
608     my $table = Whatpm::HTMLTable->form_table ($table_el);
609    
610     for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
611     next unless $_;
612     delete $_->{element};
613     }
614    
615     for (@{$table->{row_group}}) {
616     next unless $_;
617     next unless $_->{element};
618     $_->{type} = $_->{element}->manakai_local_name;
619     delete $_->{element};
620     }
621    
622     for (@{$table->{cell}}) {
623     next unless $_;
624     for (@{$_}) {
625     next unless $_;
626     for (@$_) {
627     $_->{id} = refaddr $_->{element} if defined $_->{element};
628     delete $_->{element};
629     $_->{is_header} = $_->{is_header} ? 1 : 0;
630     }
631     }
632     }
633    
634     print STDOUT '</div><script type="text/javascript">tableToCanvas (';
635     print STDOUT JSON::objToJson ($table);
636     print STDOUT qq[, document.getElementById ('table-$i'));</script>];
637     }
638    
639     print STDOUT qq[</div>];
640     } # print_table_section
641    
642     sub print_id_section ($) {
643     my $ids = shift;
644    
645     push @nav, ['#identifiers' => 'IDs'];
646     print STDOUT qq[
647     <div id="identifiers" class="section">
648     <h2>Identifiers</h2>
649    
650     <dl>
651     ];
652     for my $id (sort {$a cmp $b} keys %$ids) {
653     print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
654     for (@{$ids->{$id}}) {
655     print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
656     }
657     }
658     print STDOUT qq[</dl></div>];
659     } # print_id_section
660    
661     sub print_term_section ($) {
662     my $terms = shift;
663    
664     push @nav, ['#terms' => 'Terms'];
665     print STDOUT qq[
666     <div id="terms" class="section">
667     <h2>Terms</h2>
668    
669     <dl>
670     ];
671     for my $term (sort {$a cmp $b} keys %$terms) {
672     print STDOUT qq[<dt>@{[htescape $term]}</dt>];
673     for (@{$terms->{$term}}) {
674     print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
675     }
676     }
677     print STDOUT qq[</dl></div>];
678     } # print_term_section
679    
680     sub print_class_section ($) {
681     my $classes = shift;
682    
683     push @nav, ['#classes' => 'Classes'];
684     print STDOUT qq[
685     <div id="classes" class="section">
686     <h2>Classes</h2>
687    
688     <dl>
689     ];
690     for my $class (sort {$a cmp $b} keys %$classes) {
691     print STDOUT qq[<dt><code>@{[htescape $class]}</code></dt>];
692     for (@{$classes->{$class}}) {
693     print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
694     }
695     }
696     print STDOUT qq[</dl></div>];
697     } # print_class_section
698    
699 wakaba 1.19 sub print_result_section ($) {
700     my $result = shift;
701    
702     print STDOUT qq[
703     <div id="result-summary" class="section">
704     <h2>Result</h2>];
705    
706 wakaba 1.21 if ($result->{unsupported} and $result->{conforming_max}) {
707 wakaba 1.19 print STDOUT qq[<p class=uncertain id=result-para>The conformance
708     checker cannot decide whether the document is conforming or
709     not, since the document contains one or more unsupported
710 wakaba 1.21 features. The document might or might not be conforming.</p>];
711 wakaba 1.19 } elsif ($result->{conforming_min}) {
712     print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
713     found in this document.</p>];
714     } elsif ($result->{conforming_max}) {
715     print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
716     is <strong>likely <em>non</em>-conforming</strong>, but in rare case
717     it might be conforming.</p>];
718     } else {
719     print STDOUT qq[<p class=FAIL id=result-para>This document is
720     <strong><em>non</em>-conforming</strong>.</p>];
721     }
722    
723     print STDOUT qq[<table>
724     <colgroup><col><colgroup><col><col><col><colgroup><col>
725     <thead>
726 wakaba 1.23 <tr><th scope=col></th>
727     <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
728     Errors</a></th>
729     <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
730     Errors</a></th>
731     <th scope=col><a href="../error-description#level-w">Warnings</a></th>
732     <th scope=col>Score</th></tr></thead><tbody>];
733 wakaba 1.19
734     my $must_error = 0;
735     my $should_error = 0;
736     my $warning = 0;
737     my $score_min = 0;
738     my $score_max = 0;
739     my $score_base = 20;
740 wakaba 1.21 my $score_unit = $score_base / 100;
741 wakaba 1.19 for (
742     [Transfer => 'transfer', ''],
743     [Character => 'char', ''],
744     [Syntax => 'syntax', '#parse-errors'],
745     [Structure => 'structure', '#document-errors'],
746     ) {
747     $must_error += ($result->{$_->[1]}->{must} += 0);
748     $should_error += ($result->{$_->[1]}->{should} += 0);
749     $warning += ($result->{$_->[1]}->{warning} += 0);
750 wakaba 1.21 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
751     $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
752 wakaba 1.19
753     my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
754     my $label = $_->[0];
755     if ($result->{$_->[1]}->{must} or
756     $result->{$_->[1]}->{should} or
757     $result->{$_->[1]}->{warning} or
758     $result->{$_->[1]}->{unsupported}) {
759     $label = qq[<a href="$_->[2]">$label</a>];
760     }
761    
762     print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
763     if ($uncertain) {
764 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
765 wakaba 1.19 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
766 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
767 wakaba 1.19 } else {
768 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
769 wakaba 1.19 }
770     }
771    
772     $score_max += $score_base;
773    
774     print STDOUT qq[
775     <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
776     </tbody>
777 wakaba 1.21 <tfoot><tr class=uncertain><th scope=row>Total</th>
778     <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
779     <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
780     <td>$warning?</td>
781     <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
782 wakaba 1.19 </table>
783    
784     <p><strong>Important</strong>: This conformance checking service
785     is <em>under development</em>. The result above might be <em>wrong</em>.</p>
786     </div>];
787     push @nav, ['#result-summary' => 'Result'];
788     } # print_result_section
789    
790 wakaba 1.24 sub print_result_unknown_type_section ($$) {
791     my ($input, $result) = @_;
792 wakaba 1.18
793 wakaba 1.24 my $euri = htescape ($input->{uri});
794 wakaba 1.18 print STDOUT qq[
795 wakaba 1.24 <div id="parse-errors" class="section">
796     <h2>Errors</h2>
797    
798     <dl>
799     <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
800     <dd class=unsupported><strong><a href="../error-description#level-u">Not
801     supported</a></strong>:
802     Media type
803     <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
804     is not supported.</dd>
805     </dl>
806 wakaba 1.18 </div>
807     ];
808 wakaba 1.24 push @nav, ['#parse-errors' => 'Errors'];
809     add_error (char => {level => 'unsupported'} => $result);
810     add_error (syntax => {level => 'unsupported'} => $result);
811     add_error (structure => {level => 'unsupported'} => $result);
812 wakaba 1.18 } # print_result_unknown_type_section
813    
814     sub print_result_input_error_section ($) {
815     my $input = shift;
816     print STDOUT qq[<div class="section" id="result-summary">
817     <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
818     </div>];
819     push @nav, ['#result-summary' => 'Result'];
820     } # print_Result_input_error_section
821    
822 wakaba 1.22 sub get_error_label ($) {
823     my $err = shift;
824    
825     my $r = '';
826    
827     if (defined $err->{line}) {
828     if ($err->{column} > 0) {
829     $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a> column $err->{column}];
830     } else {
831     $err->{line} = $err->{line} - 1 || 1;
832     $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a>];
833     }
834     }
835    
836     if (defined $err->{node}) {
837     $r .= ' ' if length $r;
838 wakaba 1.23 $r = get_node_link ($err->{node});
839 wakaba 1.22 }
840    
841     if (defined $err->{index}) {
842     $r .= ' ' if length $r;
843     $r .= 'Index ' . (0+$err->{index});
844     }
845    
846     if (defined $err->{value}) {
847     $r .= ' ' if length $r;
848     $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
849     }
850    
851     return $r;
852     } # get_error_label
853    
854 wakaba 1.23 sub get_error_level_label ($) {
855     my $err = shift;
856    
857     my $r = '';
858    
859     if (not defined $err->{level} or $err->{level} eq 'm') {
860     $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
861     error</a></strong>: ];
862     } elsif ($err->{level} eq 's') {
863     $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
864     error</a></strong>: ];
865     } elsif ($err->{level} eq 'w') {
866     $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
867     ];
868     } elsif ($err->{level} eq 'unsupported') {
869     $r = qq[<strong><a href="../error-description#level-u">Not
870     supported</a></strong>: ];
871     } else {
872     my $elevel = htescape ($err->{level});
873     $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
874     ];
875     }
876    
877     return $r;
878     } # get_error_level_label
879    
880 wakaba 1.1 sub get_node_path ($) {
881     my $node = shift;
882     my @r;
883     while (defined $node) {
884     my $rs;
885     if ($node->node_type == 1) {
886     $rs = $node->manakai_local_name;
887     $node = $node->parent_node;
888     } elsif ($node->node_type == 2) {
889     $rs = '@' . $node->manakai_local_name;
890     $node = $node->owner_element;
891     } elsif ($node->node_type == 3) {
892     $rs = '"' . $node->data . '"';
893     $node = $node->parent_node;
894     } elsif ($node->node_type == 9) {
895 wakaba 1.9 @r = ('') unless @r;
896 wakaba 1.1 $rs = '';
897     $node = $node->parent_node;
898     } else {
899     $rs = '#' . $node->node_type;
900     $node = $node->parent_node;
901     }
902     unshift @r, $rs;
903     }
904     return join '/', @r;
905     } # get_node_path
906    
907 wakaba 1.6 sub get_node_link ($) {
908     return qq[<a href="#node-@{[refaddr $_[0]]}">] .
909     htescape (get_node_path ($_[0])) . qq[</a>];
910     } # get_node_link
911    
912 wakaba 1.7 {
913     my $Msg = {};
914    
915     sub load_text_catalog ($) {
916     my $lang = shift; # MUST be a canonical lang name
917 wakaba 1.26 open my $file, '<:utf8', "cc-msg.$lang.txt"
918     or die "$0: cc-msg.$lang.txt: $!";
919 wakaba 1.7 while (<$file>) {
920     if (s/^([^;]+);([^;]*);//) {
921     my ($type, $cls, $msg) = ($1, $2, $_);
922     $msg =~ tr/\x0D\x0A//d;
923     $Msg->{$type} = [$cls, $msg];
924     }
925     }
926     } # load_text_catalog
927    
928     sub get_text ($) {
929 wakaba 1.15 my ($type, $level, $node) = @_;
930 wakaba 1.7 $type = $level . ':' . $type if defined $level;
931     my @arg;
932     {
933     if (defined $Msg->{$type}) {
934     my $msg = $Msg->{$type}->[1];
935 wakaba 1.10 $msg =~ s{<var>\$([0-9]+)</var>}{
936     defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
937     }ge;
938 wakaba 1.15 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
939     UNIVERSAL::can ($node, 'get_attribute_ns')
940     ? htescape ($node->get_attribute_ns (undef, $1)) : ''
941     }ge;
942     $msg =~ s{<var>{\@}</var>}{
943     UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
944     }ge;
945 wakaba 1.17 $msg =~ s{<var>{local-name}</var>}{
946     UNIVERSAL::can ($node, 'manakai_local_name')
947     ? htescape ($node->manakai_local_name) : ''
948     }ge;
949     $msg =~ s{<var>{element-local-name}</var>}{
950     (UNIVERSAL::can ($node, 'owner_element') and
951     $node->owner_element)
952     ? htescape ($node->owner_element->manakai_local_name)
953     : ''
954     }ge;
955 wakaba 1.11 return ($type, $Msg->{$type}->[0], $msg);
956 wakaba 1.7 } elsif ($type =~ s/:([^:]*)$//) {
957     unshift @arg, $1;
958     redo;
959     }
960     }
961 wakaba 1.11 return ($type, '', htescape ($_[0]));
962 wakaba 1.7 } # get_text
963    
964     }
965    
966 wakaba 1.9 sub get_input_document ($$) {
967     my ($http, $dom) = @_;
968    
969 wakaba 1.16 my $request_uri = $http->get_parameter ('uri');
970 wakaba 1.9 my $r = {};
971     if (defined $request_uri and length $request_uri) {
972     my $uri = $dom->create_uri_reference ($request_uri);
973     unless ({
974     http => 1,
975     }->{lc $uri->uri_scheme}) {
976     return {uri => $request_uri, request_uri => $request_uri,
977     error_status_text => 'URI scheme not allowed'};
978     }
979    
980     require Message::Util::HostPermit;
981     my $host_permit = new Message::Util::HostPermit;
982     $host_permit->add_rule (<<EOH);
983     Allow host=suika port=80
984     Deny host=suika
985     Allow host=suika.fam.cx port=80
986     Deny host=suika.fam.cx
987     Deny host=localhost
988     Deny host=*.localdomain
989     Deny ipv4=0.0.0.0/8
990     Deny ipv4=10.0.0.0/8
991     Deny ipv4=127.0.0.0/8
992     Deny ipv4=169.254.0.0/16
993     Deny ipv4=172.0.0.0/11
994     Deny ipv4=192.0.2.0/24
995     Deny ipv4=192.88.99.0/24
996     Deny ipv4=192.168.0.0/16
997     Deny ipv4=198.18.0.0/15
998     Deny ipv4=224.0.0.0/4
999     Deny ipv4=255.255.255.255/32
1000     Deny ipv6=0::0/0
1001     Allow host=*
1002     EOH
1003     unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1004     return {uri => $request_uri, request_uri => $request_uri,
1005     error_status_text => 'Connection to the host is forbidden'};
1006     }
1007    
1008     require LWP::UserAgent;
1009     my $ua = WDCC::LWPUA->new;
1010     $ua->{wdcc_dom} = $dom;
1011     $ua->{wdcc_host_permit} = $host_permit;
1012     $ua->agent ('Mozilla'); ## TODO: for now.
1013     $ua->parse_head (0);
1014     $ua->protocols_allowed ([qw/http/]);
1015     $ua->max_size (1000_000);
1016     my $req = HTTP::Request->new (GET => $request_uri);
1017 wakaba 1.28 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1018 wakaba 1.9 my $res = $ua->request ($req);
1019 wakaba 1.16 ## TODO: 401 sets |is_success| true.
1020     if ($res->is_success or $http->get_parameter ('error-page')) {
1021 wakaba 1.9 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1022     $r->{uri} = $res->request->uri;
1023     $r->{request_uri} = $request_uri;
1024    
1025     ## TODO: More strict parsing...
1026     my $ct = $res->header ('Content-Type');
1027 wakaba 1.22 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1028 wakaba 1.9 $r->{charset} = lc $1;
1029     $r->{charset} =~ tr/\\//d;
1030 wakaba 1.26 $r->{official_charset} = $r->{charset};
1031 wakaba 1.9 }
1032    
1033 wakaba 1.16 my $input_charset = $http->get_parameter ('charset');
1034 wakaba 1.9 if (defined $input_charset and length $input_charset) {
1035     $r->{charset_overridden}
1036     = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1037     $r->{charset} = $input_charset;
1038 wakaba 1.25 }
1039    
1040     ## TODO: Support for HTTP Content-Encoding
1041 wakaba 1.9
1042     $r->{s} = ''.$res->content;
1043 wakaba 1.25
1044     require Whatpm::ContentType;
1045     ($r->{official_type}, $r->{media_type})
1046     = Whatpm::ContentType->get_sniffed_type
1047     (get_file_head => sub {
1048     return substr $r->{s}, 0, shift;
1049     },
1050     http_content_type_byte => $ct,
1051     has_http_content_encoding =>
1052     defined $res->header ('Content-Encoding'),
1053     supported_image_types => {});
1054 wakaba 1.9 } else {
1055     $r->{uri} = $res->request->uri;
1056     $r->{request_uri} = $request_uri;
1057     $r->{error_status_text} = $res->status_line;
1058     }
1059    
1060     $r->{header_field} = [];
1061     $res->scan (sub {
1062     push @{$r->{header_field}}, [$_[0], $_[1]];
1063     });
1064     $r->{header_status_code} = $res->code;
1065     $r->{header_status_text} = $res->message;
1066     } else {
1067 wakaba 1.16 $r->{s} = ''.$http->get_parameter ('s');
1068 wakaba 1.9 $r->{uri} = q<thismessage:/>;
1069     $r->{request_uri} = q<thismessage:/>;
1070     $r->{base_uri} = q<thismessage:/>;
1071 wakaba 1.16 $r->{charset} = ''.$http->get_parameter ('_charset_');
1072 wakaba 1.9 $r->{charset} =~ s/\s+//g;
1073     $r->{charset} = 'utf-8' if $r->{charset} eq '';
1074 wakaba 1.26 $r->{official_charset} = $r->{charset};
1075 wakaba 1.9 $r->{header_field} = [];
1076 wakaba 1.25
1077     require Whatpm::ContentType;
1078     ($r->{official_type}, $r->{media_type})
1079     = Whatpm::ContentType->get_sniffed_type
1080     (get_file_head => sub {
1081     return substr $r->{s}, 0, shift;
1082     },
1083     http_content_type_byte => undef,
1084     has_http_content_encoding => 0,
1085     supported_image_types => {});
1086 wakaba 1.9 }
1087    
1088 wakaba 1.16 my $input_format = $http->get_parameter ('i');
1089 wakaba 1.9 if (defined $input_format and length $input_format) {
1090     $r->{media_type_overridden}
1091     = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1092     $r->{media_type} = $input_format;
1093     }
1094     if (defined $r->{s} and not defined $r->{media_type}) {
1095     $r->{media_type} = 'text/html';
1096     $r->{media_type_overridden} = 1;
1097     }
1098    
1099     if ($r->{media_type} eq 'text/xml') {
1100     unless (defined $r->{charset}) {
1101     $r->{charset} = 'us-ascii';
1102 wakaba 1.26 $r->{official_charset} = $r->{charset};
1103 wakaba 1.9 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1104     $r->{charset_overridden} = 0;
1105     }
1106     }
1107    
1108     if (length $r->{s} > 1000_000) {
1109     $r->{error_status_text} = 'Entity-body too large';
1110     delete $r->{s};
1111     return $r;
1112     }
1113    
1114     return $r;
1115     } # get_input_document
1116    
1117     package WDCC::LWPUA;
1118     BEGIN { push our @ISA, 'LWP::UserAgent'; }
1119    
1120     sub redirect_ok {
1121     my $ua = shift;
1122     unless ($ua->SUPER::redirect_ok (@_)) {
1123     return 0;
1124     }
1125    
1126     my $uris = $_[1]->header ('Location');
1127     return 0 unless $uris;
1128     my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1129     unless ({
1130     http => 1,
1131     }->{lc $uri->uri_scheme}) {
1132     return 0;
1133     }
1134     unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1135     return 0;
1136     }
1137     return 1;
1138     } # redirect_ok
1139    
1140 wakaba 1.1 =head1 AUTHOR
1141    
1142     Wakaba <w@suika.fam.cx>.
1143    
1144     =head1 LICENSE
1145    
1146     Copyright 2007 Wakaba <w@suika.fam.cx>
1147    
1148     This library is free software; you can redistribute it
1149     and/or modify it under the same terms as Perl itself.
1150    
1151     =cut
1152    
1153 wakaba 1.28 ## $Date: 2007/11/19 12:20:14 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24