/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.30 - (hide annotations) (download)
Sat Feb 9 12:22:19 2008 UTC (16 years, 9 months ago) by wakaba
Branch: MAIN
Changes since 1.29: +6 -6 lines
++ ChangeLog	9 Feb 2008 12:22:16 -0000
	* cc-style.css: Rules for s/unsupported/u/'ed version added.

	* cc.cgi: Support for 'u' level.

	* error-description-source.en.xml: s/unsupported/u/;

	* standards.en.html: CSS added.

2008-02-09  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3 wakaba 1.23 use utf8;
4 wakaba 1.1
5     use lib qw[/home/httpd/html/www/markup/html/whatpm
6 wakaba 1.16 /home/wakaba/work/manakai2/lib];
7 wakaba 1.1 use CGI::Carp qw[fatalsToBrowser];
8 wakaba 1.2 use Scalar::Util qw[refaddr];
9 wakaba 1.16 use Time::HiRes qw/time/;
10 wakaba 1.1
11 wakaba 1.2 sub htescape ($) {
12     my $s = $_[0];
13     $s =~ s/&/&amp;/g;
14     $s =~ s/</&lt;/g;
15     $s =~ s/>/&gt;/g;
16     $s =~ s/"/&quot;/g;
17 wakaba 1.12 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18     sprintf '<var>U+%04X</var>', ord $1;
19     }ge;
20 wakaba 1.2 return $s;
21     } # htescape
22    
23 wakaba 1.16 use Message::CGI::HTTP;
24     my $http = Message::CGI::HTTP->new;
25 wakaba 1.1
26 wakaba 1.16 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
27 wakaba 1.8 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
28     exit;
29     }
30    
31 wakaba 1.12 binmode STDOUT, ':utf8';
32 wakaba 1.14 $| = 1;
33 wakaba 1.12
34 wakaba 1.9 require Message::DOM::DOMImplementation;
35     my $dom = Message::DOM::DOMImplementation->new;
36    
37 wakaba 1.7 load_text_catalog ('en'); ## TODO: conneg
38    
39 wakaba 1.3 my @nav;
40 wakaba 1.2 print STDOUT qq[Content-Type: text/html; charset=utf-8
41    
42     <!DOCTYPE html>
43     <html lang="en">
44     <head>
45     <title>Web Document Conformance Checker (BETA)</title>
46 wakaba 1.3 <link rel="stylesheet" href="../cc-style.css" type="text/css">
47 wakaba 1.2 </head>
48     <body>
49 wakaba 1.13 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
50     (<em>beta</em>)</h1>
51 wakaba 1.14 ];
52 wakaba 1.2
53 wakaba 1.14 $| = 0;
54     my $input = get_input_document ($http, $dom);
55 wakaba 1.16 my $char_length = 0;
56     my %time;
57 wakaba 1.14
58     print qq[
59 wakaba 1.4 <div id="document-info" class="section">
60 wakaba 1.2 <dl>
61 wakaba 1.9 <dt>Request URI</dt>
62     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 wakaba 1.2 <dt>Document URI</dt>
64 wakaba 1.25 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65     <script>
66     document.title = '<'
67     + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68     + document.title;
69     </script></dd>
70 wakaba 1.2 ]; # no </dl> yet
71 wakaba 1.3 push @nav, ['#document-info' => 'Information'];
72 wakaba 1.1
73 wakaba 1.9 if (defined $input->{s}) {
74 wakaba 1.16 $char_length = length $input->{s};
75 wakaba 1.9
76     print STDOUT qq[
77     <dt>Base URI</dt>
78     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79     <dt>Internet Media Type</dt>
80     <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 wakaba 1.25 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 wakaba 1.9 <dt>Character Encoding</dt>
83     <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84     @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 wakaba 1.16 <dt>Length</dt>
86     <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 wakaba 1.9 </dl>
88     </div>
89     ];
90    
91 wakaba 1.20 my $result = {conforming_min => 1, conforming_max => 1};
92 wakaba 1.19 print_http_header_section ($input, $result);
93 wakaba 1.9
94 wakaba 1.1 my $doc;
95     my $el;
96 wakaba 1.22 my $manifest;
97 wakaba 1.1
98 wakaba 1.9 if ($input->{media_type} eq 'text/html') {
99 wakaba 1.19 ($doc, $el) = print_syntax_error_html_section ($input, $result);
100 wakaba 1.25 print_source_string_section
101     (\($input->{s}), $input->{charset} || $doc->input_encoding);
102 wakaba 1.9 } elsif ({
103     'text/xml' => 1,
104 wakaba 1.16 'application/atom+xml' => 1,
105     'application/rss+xml' => 1,
106     'application/svg+xml' => 1,
107 wakaba 1.9 'application/xhtml+xml' => 1,
108     'application/xml' => 1,
109     }->{$input->{media_type}}) {
110 wakaba 1.19 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
111 wakaba 1.9 print_source_string_section (\($input->{s}), $doc->input_encoding);
112 wakaba 1.22 } elsif ($input->{media_type} eq 'text/cache-manifest') {
113     ## TODO: MUST be text/cache-manifest
114     $manifest = print_syntax_error_manifest_section ($input, $result);
115     print_source_string_section (\($input->{s}), 'utf-8');
116 wakaba 1.2 } else {
117 wakaba 1.9 ## TODO: Change HTTP status code??
118 wakaba 1.24 print_result_unknown_type_section ($input, $result);
119 wakaba 1.2 }
120    
121     if (defined $doc or defined $el) {
122 wakaba 1.22 print_structure_dump_dom_section ($doc, $el);
123     my $elements = print_structure_error_dom_section ($doc, $el, $result);
124 wakaba 1.18 print_table_section ($elements->{table}) if @{$elements->{table}};
125     print_id_section ($elements->{id}) if keys %{$elements->{id}};
126     print_term_section ($elements->{term}) if keys %{$elements->{term}};
127     print_class_section ($elements->{class}) if keys %{$elements->{class}};
128 wakaba 1.22 } elsif (defined $manifest) {
129     print_structure_dump_manifest_section ($manifest);
130     print_structure_error_manifest_section ($manifest, $result);
131 wakaba 1.1 }
132    
133 wakaba 1.19 print_result_section ($result);
134 wakaba 1.9 } else {
135 wakaba 1.18 print STDOUT qq[</dl></div>];
136     print_result_input_error_section ($input);
137 wakaba 1.9 }
138 wakaba 1.3
139 wakaba 1.2 print STDOUT qq[
140 wakaba 1.3 <ul class="navigation" id="nav-items">
141     ];
142     for (@nav) {
143     print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
144     }
145     print STDOUT qq[
146     </ul>
147 wakaba 1.2 </body>
148     </html>
149     ];
150 wakaba 1.1
151 wakaba 1.24 for (qw/decode parse parse_html parse_xml parse_manifest
152     check check_manifest/) {
153 wakaba 1.16 next unless defined $time{$_};
154     open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
155     print $file $char_length, "\t", $time{$_}, "\n";
156     }
157    
158 wakaba 1.1 exit;
159    
160 wakaba 1.19 sub add_error ($$$) {
161     my ($layer, $err, $result) = @_;
162     if (defined $err->{level}) {
163     if ($err->{level} eq 's') {
164     $result->{$layer}->{should}++;
165     $result->{$layer}->{score_min} -= 2;
166     $result->{conforming_min} = 0;
167     } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
168     $result->{$layer}->{warning}++;
169 wakaba 1.30 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
170 wakaba 1.19 $result->{$layer}->{unsupported}++;
171     $result->{unsupported} = 1;
172     } else {
173     $result->{$layer}->{must}++;
174     $result->{$layer}->{score_max} -= 2;
175     $result->{$layer}->{score_min} -= 2;
176     $result->{conforming_min} = 0;
177     $result->{conforming_max} = 0;
178     }
179     } else {
180     $result->{$layer}->{must}++;
181     $result->{$layer}->{score_max} -= 2;
182     $result->{$layer}->{score_min} -= 2;
183     $result->{conforming_min} = 0;
184     $result->{conforming_max} = 0;
185     }
186     } # add_error
187    
188     sub print_http_header_section ($$) {
189     my ($input, $result) = @_;
190 wakaba 1.9 return unless defined $input->{header_status_code} or
191     defined $input->{header_status_text} or
192     @{$input->{header_field}};
193    
194     push @nav, ['#source-header' => 'HTTP Header'];
195     print STDOUT qq[<div id="source-header" class="section">
196     <h2>HTTP Header</h2>
197    
198     <p><strong>Note</strong>: Due to the limitation of the
199     network library in use, the content of this section might
200     not be the real header.</p>
201    
202     <table><tbody>
203     ];
204    
205     if (defined $input->{header_status_code}) {
206     print STDOUT qq[<tr><th scope="row">Status code</th>];
207     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
208     }
209     if (defined $input->{header_status_text}) {
210     print STDOUT qq[<tr><th scope="row">Status text</th>];
211     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
212     }
213    
214     for (@{$input->{header_field}}) {
215     print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
216     print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
217     }
218    
219     print STDOUT qq[</tbody></table></div>];
220     } # print_http_header_section
221    
222 wakaba 1.19 sub print_syntax_error_html_section ($$) {
223     my ($input, $result) = @_;
224 wakaba 1.18
225     require Encode;
226     require Whatpm::HTML;
227    
228     print STDOUT qq[
229     <div id="parse-errors" class="section">
230     <h2>Parse Errors</h2>
231    
232     <dl>];
233     push @nav, ['#parse-errors' => 'Parse Error'];
234    
235     my $onerror = sub {
236     my (%opt) = @_;
237     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
238     if ($opt{column} > 0) {
239     print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a> column $opt{column}</dt>\n];
240     } else {
241     $opt{line} = $opt{line} - 1 || 1;
242     print STDOUT qq[<dt class="$cls"><a href="#line-$opt{line}">Line $opt{line}</a></dt>\n];
243     }
244     $type =~ tr/ /-/;
245     $type =~ s/\|/%7C/g;
246     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
247 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
248     print STDOUT qq[$msg</dd>\n];
249 wakaba 1.19
250     add_error ('syntax', \%opt => $result);
251 wakaba 1.18 };
252    
253     my $doc = $dom->create_document;
254     my $el;
255 wakaba 1.26 my $inner_html_element = $http->get_parameter ('e');
256 wakaba 1.18 if (defined $inner_html_element and length $inner_html_element) {
257 wakaba 1.26 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
258 wakaba 1.24 my $time1 = time;
259     my $t = Encode::decode ($input->{charset}, $input->{s});
260     $time{decode} = time - $time1;
261    
262 wakaba 1.18 $el = $doc->create_element_ns
263     ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
264 wakaba 1.24 $time1 = time;
265 wakaba 1.18 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
266 wakaba 1.24 $time{parse} = time - $time1;
267 wakaba 1.18 } else {
268 wakaba 1.24 my $time1 = time;
269     Whatpm::HTML->parse_byte_string
270     ($input->{charset}, $input->{s} => $doc, $onerror);
271     $time{parse_html} = time - $time1;
272 wakaba 1.18 }
273 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
274     if defined $input->{official_charset};
275 wakaba 1.24
276 wakaba 1.18 print STDOUT qq[</dl></div>];
277    
278     return ($doc, $el);
279     } # print_syntax_error_html_section
280    
281 wakaba 1.19 sub print_syntax_error_xml_section ($$) {
282     my ($input, $result) = @_;
283 wakaba 1.18
284     require Message::DOM::XMLParserTemp;
285    
286     print STDOUT qq[
287     <div id="parse-errors" class="section">
288     <h2>Parse Errors</h2>
289    
290     <dl>];
291     push @nav, ['#parse-errors' => 'Parse Error'];
292    
293     my $onerror = sub {
294     my $err = shift;
295     my $line = $err->location->line_number;
296     print STDOUT qq[<dt><a href="#line-$line">Line $line</a> column ];
297     print STDOUT $err->location->column_number, "</dt><dd>";
298     print STDOUT htescape $err->text, "</dd>\n";
299 wakaba 1.19
300     add_error ('syntax', {type => $err->text,
301     level => [
302     $err->SEVERITY_FATAL_ERROR => 'm',
303     $err->SEVERITY_ERROR => 'm',
304     $err->SEVERITY_WARNING => 's',
305     ]->[$err->severity]} => $result);
306    
307 wakaba 1.18 return 1;
308     };
309    
310     my $time1 = time;
311     open my $fh, '<', \($input->{s});
312     my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
313     ($fh => $dom, $onerror, charset => $input->{charset});
314     $time{parse_xml} = time - $time1;
315 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
316     if defined $input->{official_charset};
317 wakaba 1.18
318     print STDOUT qq[</dl></div>];
319    
320     return ($doc, undef);
321     } # print_syntax_error_xml_section
322    
323 wakaba 1.22 sub print_syntax_error_manifest_section ($$) {
324     my ($input, $result) = @_;
325    
326     require Whatpm::CacheManifest;
327    
328     print STDOUT qq[
329     <div id="parse-errors" class="section">
330     <h2>Parse Errors</h2>
331    
332     <dl>];
333     push @nav, ['#parse-errors' => 'Parse Error'];
334    
335     my $onerror = sub {
336     my (%opt) = @_;
337     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
338     print STDOUT qq[<dt class="$cls">], get_error_label (\%opt), qq[</dt>];
339     $type =~ tr/ /-/;
340     $type =~ s/\|/%7C/g;
341     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
342 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
343     print STDOUT qq[$msg</dd>\n];
344 wakaba 1.22
345     add_error ('syntax', \%opt => $result);
346     };
347    
348     my $time1 = time;
349     my $manifest = Whatpm::CacheManifest->parse_byte_string
350     ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
351     $time{parse_manifest} = time - $time1;
352    
353     print STDOUT qq[</dl></div>];
354    
355     return $manifest;
356     } # print_syntax_error_manifest_section
357    
358 wakaba 1.9 sub print_source_string_section ($$) {
359     require Encode;
360     my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
361     return unless $enc;
362    
363     my $s = \($enc->decode (${$_[0]}));
364     my $i = 1;
365     push @nav, ['#source-string' => 'Source'];
366     print STDOUT qq[<div id="source-string" class="section">
367     <h2>Document Source</h2>
368     <ol lang="">\n];
369 wakaba 1.7 if (length $$s) {
370     while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
371     print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
372     $i++;
373     }
374     if ($$s =~ /\G([^\x0A]+)/gc) {
375     print STDOUT qq[<li id="line-$i">], htescape $1, "</li>\n";
376     }
377     } else {
378     print STDOUT q[<li id="line-1"></li>];
379 wakaba 1.2 }
380 wakaba 1.9 print STDOUT "</ol></div>";
381     } # print_input_string_section
382 wakaba 1.2
383     sub print_document_tree ($) {
384 wakaba 1.1 my $node = shift;
385 wakaba 1.2 my $r = '<ol class="xoxo">';
386 wakaba 1.1
387 wakaba 1.2 my @node = ($node);
388 wakaba 1.1 while (@node) {
389     my $child = shift @node;
390 wakaba 1.2 unless (ref $child) {
391     $r .= $child;
392     next;
393     }
394    
395     my $node_id = 'node-'.refaddr $child;
396     my $nt = $child->node_type;
397     if ($nt == $child->ELEMENT_NODE) {
398 wakaba 1.4 my $child_nsuri = $child->namespace_uri;
399     $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
400 wakaba 1.2 '</code>'; ## ISSUE: case
401    
402     if ($child->has_attributes) {
403     $r .= '<ul class="attributes">';
404 wakaba 1.4 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
405 wakaba 1.2 @{$child->attributes}) {
406 wakaba 1.27 $r .= qq[<li id="$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
407 wakaba 1.2 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
408     }
409     $r .= '</ul>';
410     }
411    
412 wakaba 1.7 if ($child->has_child_nodes) {
413 wakaba 1.2 $r .= '<ol class="children">';
414 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
415     } else {
416     $r .= '</li>';
417 wakaba 1.2 }
418     } elsif ($nt == $child->TEXT_NODE) {
419 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
420 wakaba 1.2 } elsif ($nt == $child->CDATA_SECTION_NODE) {
421 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
422 wakaba 1.2 } elsif ($nt == $child->COMMENT_NODE) {
423 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
424 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_NODE) {
425 wakaba 1.6 $r .= qq'<li id="$node_id" class="tree-document">Document';
426 wakaba 1.7 $r .= qq[<ul class="attributes">];
427 wakaba 1.27 my $cp = $child->manakai_charset;
428     if (defined $cp) {
429     $r .= qq[<li><code>charset</code> parameter = <code>];
430     $r .= htescape ($cp) . qq[</code></li>];
431     }
432     $r .= qq[<li><code>inputEncoding</code> = ];
433     my $ie = $child->input_encoding;
434     if (defined $ie) {
435     $r .= qq[<code>@{[htescape ($ie)]}</code>];
436     if ($child->manakai_has_bom) {
437     $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
438     }
439     } else {
440     $r .= qq[(<code>null</code>)];
441     }
442 wakaba 1.7 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
443     $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
444 wakaba 1.9 unless ($child->manakai_is_html) {
445     $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
446     if (defined $child->xml_encoding) {
447     $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
448     } else {
449     $r .= qq[<li>XML encoding = (null)</li>];
450     }
451     $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
452     }
453 wakaba 1.7 $r .= qq[</ul>];
454 wakaba 1.2 if ($child->has_child_nodes) {
455 wakaba 1.7 $r .= '<ol class="children">';
456 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
457 wakaba 1.1 }
458 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
459 wakaba 1.5 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
460     $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
461     $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
462     $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
463 wakaba 1.2 $r .= '</ul></li>';
464     } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
465 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
466 wakaba 1.1 } else {
467 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
468 wakaba 1.1 }
469     }
470 wakaba 1.2
471     $r .= '</ol>';
472     print STDOUT $r;
473     } # print_document_tree
474 wakaba 1.1
475 wakaba 1.22 sub print_structure_dump_dom_section ($$) {
476 wakaba 1.18 my ($doc, $el) = @_;
477    
478     print STDOUT qq[
479     <div id="document-tree" class="section">
480     <h2>Document Tree</h2>
481     ];
482     push @nav, ['#document-tree' => 'Tree'];
483    
484     print_document_tree ($el || $doc);
485    
486     print STDOUT qq[</div>];
487 wakaba 1.22 } # print_structure_dump_dom_section
488    
489     sub print_structure_dump_manifest_section ($) {
490     my $manifest = shift;
491    
492     print STDOUT qq[
493     <div id="dump-manifest" class="section">
494     <h2>Cache Manifest</h2>
495     ];
496     push @nav, ['#dump-manifest' => 'Caceh Manifest'];
497    
498     print STDOUT qq[<dl><dt>Explicit entries</dt>];
499     for my $uri (@{$manifest->[0]}) {
500     my $euri = htescape ($uri);
501     print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
502     }
503    
504     print STDOUT qq[<dt>Fallback entries</dt><dd>
505     <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
506     <th scope=row>Fallback Entry</tr><tbody>];
507     for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
508     my $euri = htescape ($uri);
509     my $euri2 = htescape ($manifest->[1]->{$uri});
510     print STDOUT qq[<tr><td><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
511     <td><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
512     }
513    
514     print STDOUT qq[</table><dt>Online whitelist</dt>];
515     for my $uri (@{$manifest->[2]}) {
516     my $euri = htescape ($uri);
517     print STDOUT qq[<dd><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
518     }
519 wakaba 1.18
520 wakaba 1.22 print STDOUT qq[</dl></div>];
521     } # print_structure_dump_manifest_section
522    
523     sub print_structure_error_dom_section ($$$) {
524 wakaba 1.19 my ($doc, $el, $result) = @_;
525 wakaba 1.18
526     print STDOUT qq[<div id="document-errors" class="section">
527     <h2>Document Errors</h2>
528    
529     <dl>];
530     push @nav, ['#document-errors' => 'Document Error'];
531    
532     require Whatpm::ContentChecker;
533     my $onerror = sub {
534     my %opt = @_;
535     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
536     $type =~ tr/ /-/;
537     $type =~ s/\|/%7C/g;
538     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
539 wakaba 1.22 print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
540 wakaba 1.23 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
541     print STDOUT $msg, "</dd>\n";
542 wakaba 1.19 add_error ('structure', \%opt => $result);
543 wakaba 1.18 };
544    
545     my $elements;
546     my $time1 = time;
547     if ($el) {
548     $elements = Whatpm::ContentChecker->check_element ($el, $onerror);
549     } else {
550     $elements = Whatpm::ContentChecker->check_document ($doc, $onerror);
551     }
552     $time{check} = time - $time1;
553    
554     print STDOUT qq[</dl></div>];
555    
556     return $elements;
557 wakaba 1.22 } # print_structure_error_dom_section
558    
559     sub print_structure_error_manifest_section ($$$) {
560     my ($manifest, $result) = @_;
561    
562     print STDOUT qq[<div id="document-errors" class="section">
563     <h2>Document Errors</h2>
564    
565     <dl>];
566     push @nav, ['#document-errors' => 'Document Error'];
567    
568     require Whatpm::CacheManifest;
569     Whatpm::CacheManifest->check_manifest ($manifest, sub {
570     my %opt = @_;
571     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
572     $type =~ tr/ /-/;
573     $type =~ s/\|/%7C/g;
574     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
575     print STDOUT qq[<dt class="$cls">] . get_error_label (\%opt) .
576     qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
577     add_error ('structure', \%opt => $result);
578     });
579    
580     print STDOUT qq[</div>];
581     } # print_structure_error_manifest_section
582 wakaba 1.18
583     sub print_table_section ($) {
584     my $tables = shift;
585    
586     push @nav, ['#tables' => 'Tables'];
587     print STDOUT qq[
588     <div id="tables" class="section">
589     <h2>Tables</h2>
590    
591     <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
592     <script src="../table-script.js" type="text/javascript"></script>
593     <noscript>
594     <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
595     </noscript>
596     ];
597    
598     require JSON;
599    
600     my $i = 0;
601     for my $table_el (@$tables) {
602     $i++;
603     print STDOUT qq[<div class="section" id="table-$i"><h3>] .
604     get_node_link ($table_el) . q[</h3>];
605    
606     ## TODO: Make |ContentChecker| return |form_table| result
607     ## so that this script don't have to run the algorithm twice.
608     my $table = Whatpm::HTMLTable->form_table ($table_el);
609    
610     for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
611     next unless $_;
612     delete $_->{element};
613     }
614    
615     for (@{$table->{row_group}}) {
616     next unless $_;
617     next unless $_->{element};
618     $_->{type} = $_->{element}->manakai_local_name;
619     delete $_->{element};
620     }
621    
622     for (@{$table->{cell}}) {
623     next unless $_;
624     for (@{$_}) {
625     next unless $_;
626     for (@$_) {
627     $_->{id} = refaddr $_->{element} if defined $_->{element};
628     delete $_->{element};
629     $_->{is_header} = $_->{is_header} ? 1 : 0;
630     }
631     }
632     }
633    
634     print STDOUT '</div><script type="text/javascript">tableToCanvas (';
635     print STDOUT JSON::objToJson ($table);
636     print STDOUT qq[, document.getElementById ('table-$i'));</script>];
637     }
638    
639     print STDOUT qq[</div>];
640     } # print_table_section
641    
642     sub print_id_section ($) {
643     my $ids = shift;
644    
645     push @nav, ['#identifiers' => 'IDs'];
646     print STDOUT qq[
647     <div id="identifiers" class="section">
648     <h2>Identifiers</h2>
649    
650     <dl>
651     ];
652     for my $id (sort {$a cmp $b} keys %$ids) {
653     print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
654     for (@{$ids->{$id}}) {
655     print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
656     }
657     }
658     print STDOUT qq[</dl></div>];
659     } # print_id_section
660    
661     sub print_term_section ($) {
662     my $terms = shift;
663    
664     push @nav, ['#terms' => 'Terms'];
665     print STDOUT qq[
666     <div id="terms" class="section">
667     <h2>Terms</h2>
668    
669     <dl>
670     ];
671     for my $term (sort {$a cmp $b} keys %$terms) {
672     print STDOUT qq[<dt>@{[htescape $term]}</dt>];
673     for (@{$terms->{$term}}) {
674     print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
675     }
676     }
677     print STDOUT qq[</dl></div>];
678     } # print_term_section
679    
680     sub print_class_section ($) {
681     my $classes = shift;
682    
683     push @nav, ['#classes' => 'Classes'];
684     print STDOUT qq[
685     <div id="classes" class="section">
686     <h2>Classes</h2>
687    
688     <dl>
689     ];
690     for my $class (sort {$a cmp $b} keys %$classes) {
691     print STDOUT qq[<dt><code>@{[htescape $class]}</code></dt>];
692     for (@{$classes->{$class}}) {
693     print STDOUT qq[<dd>].get_node_link ($_).qq[</dd>];
694     }
695     }
696     print STDOUT qq[</dl></div>];
697     } # print_class_section
698    
699 wakaba 1.19 sub print_result_section ($) {
700     my $result = shift;
701    
702     print STDOUT qq[
703     <div id="result-summary" class="section">
704     <h2>Result</h2>];
705    
706 wakaba 1.21 if ($result->{unsupported} and $result->{conforming_max}) {
707 wakaba 1.19 print STDOUT qq[<p class=uncertain id=result-para>The conformance
708     checker cannot decide whether the document is conforming or
709     not, since the document contains one or more unsupported
710 wakaba 1.21 features. The document might or might not be conforming.</p>];
711 wakaba 1.19 } elsif ($result->{conforming_min}) {
712     print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
713     found in this document.</p>];
714     } elsif ($result->{conforming_max}) {
715     print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
716     is <strong>likely <em>non</em>-conforming</strong>, but in rare case
717     it might be conforming.</p>];
718     } else {
719     print STDOUT qq[<p class=FAIL id=result-para>This document is
720     <strong><em>non</em>-conforming</strong>.</p>];
721     }
722    
723     print STDOUT qq[<table>
724     <colgroup><col><colgroup><col><col><col><colgroup><col>
725     <thead>
726 wakaba 1.23 <tr><th scope=col></th>
727     <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
728     Errors</a></th>
729     <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
730     Errors</a></th>
731     <th scope=col><a href="../error-description#level-w">Warnings</a></th>
732     <th scope=col>Score</th></tr></thead><tbody>];
733 wakaba 1.19
734     my $must_error = 0;
735     my $should_error = 0;
736     my $warning = 0;
737     my $score_min = 0;
738     my $score_max = 0;
739     my $score_base = 20;
740 wakaba 1.21 my $score_unit = $score_base / 100;
741 wakaba 1.19 for (
742     [Transfer => 'transfer', ''],
743     [Character => 'char', ''],
744     [Syntax => 'syntax', '#parse-errors'],
745     [Structure => 'structure', '#document-errors'],
746     ) {
747     $must_error += ($result->{$_->[1]}->{must} += 0);
748     $should_error += ($result->{$_->[1]}->{should} += 0);
749     $warning += ($result->{$_->[1]}->{warning} += 0);
750 wakaba 1.21 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
751     $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
752 wakaba 1.19
753     my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
754     my $label = $_->[0];
755     if ($result->{$_->[1]}->{must} or
756     $result->{$_->[1]}->{should} or
757     $result->{$_->[1]}->{warning} or
758     $result->{$_->[1]}->{unsupported}) {
759     $label = qq[<a href="$_->[2]">$label</a>];
760     }
761    
762     print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
763     if ($uncertain) {
764 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
765 wakaba 1.19 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
766 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
767 wakaba 1.19 } else {
768 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
769 wakaba 1.19 }
770     }
771    
772     $score_max += $score_base;
773    
774     print STDOUT qq[
775     <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
776     </tbody>
777 wakaba 1.21 <tfoot><tr class=uncertain><th scope=row>Total</th>
778     <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
779     <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
780     <td>$warning?</td>
781     <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
782 wakaba 1.19 </table>
783    
784     <p><strong>Important</strong>: This conformance checking service
785     is <em>under development</em>. The result above might be <em>wrong</em>.</p>
786     </div>];
787     push @nav, ['#result-summary' => 'Result'];
788     } # print_result_section
789    
790 wakaba 1.24 sub print_result_unknown_type_section ($$) {
791     my ($input, $result) = @_;
792 wakaba 1.18
793 wakaba 1.24 my $euri = htescape ($input->{uri});
794 wakaba 1.18 print STDOUT qq[
795 wakaba 1.24 <div id="parse-errors" class="section">
796     <h2>Errors</h2>
797    
798     <dl>
799     <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
800     <dd class=unsupported><strong><a href="../error-description#level-u">Not
801     supported</a></strong>:
802     Media type
803     <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
804     is not supported.</dd>
805     </dl>
806 wakaba 1.18 </div>
807     ];
808 wakaba 1.24 push @nav, ['#parse-errors' => 'Errors'];
809 wakaba 1.30 add_error (char => {level => 'u'} => $result);
810     add_error (syntax => {level => 'u'} => $result);
811     add_error (structure => {level => 'u'} => $result);
812 wakaba 1.18 } # print_result_unknown_type_section
813    
814     sub print_result_input_error_section ($) {
815     my $input = shift;
816     print STDOUT qq[<div class="section" id="result-summary">
817     <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
818     </div>];
819     push @nav, ['#result-summary' => 'Result'];
820     } # print_Result_input_error_section
821    
822 wakaba 1.22 sub get_error_label ($) {
823     my $err = shift;
824    
825     my $r = '';
826    
827     if (defined $err->{line}) {
828     if ($err->{column} > 0) {
829     $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a> column $err->{column}];
830     } else {
831     $err->{line} = $err->{line} - 1 || 1;
832     $r = qq[<a href="#line-$err->{line}">Line $err->{line}</a>];
833     }
834     }
835    
836     if (defined $err->{node}) {
837     $r .= ' ' if length $r;
838 wakaba 1.23 $r = get_node_link ($err->{node});
839 wakaba 1.22 }
840    
841     if (defined $err->{index}) {
842     $r .= ' ' if length $r;
843     $r .= 'Index ' . (0+$err->{index});
844     }
845    
846     if (defined $err->{value}) {
847     $r .= ' ' if length $r;
848     $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
849     }
850    
851     return $r;
852     } # get_error_label
853    
854 wakaba 1.23 sub get_error_level_label ($) {
855     my $err = shift;
856    
857     my $r = '';
858    
859     if (not defined $err->{level} or $err->{level} eq 'm') {
860     $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
861     error</a></strong>: ];
862     } elsif ($err->{level} eq 's') {
863     $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
864     error</a></strong>: ];
865     } elsif ($err->{level} eq 'w') {
866     $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
867     ];
868 wakaba 1.30 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
869 wakaba 1.23 $r = qq[<strong><a href="../error-description#level-u">Not
870     supported</a></strong>: ];
871     } else {
872     my $elevel = htescape ($err->{level});
873     $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
874     ];
875     }
876    
877     return $r;
878     } # get_error_level_label
879    
880 wakaba 1.1 sub get_node_path ($) {
881     my $node = shift;
882     my @r;
883     while (defined $node) {
884     my $rs;
885     if ($node->node_type == 1) {
886     $rs = $node->manakai_local_name;
887     $node = $node->parent_node;
888     } elsif ($node->node_type == 2) {
889     $rs = '@' . $node->manakai_local_name;
890     $node = $node->owner_element;
891     } elsif ($node->node_type == 3) {
892     $rs = '"' . $node->data . '"';
893     $node = $node->parent_node;
894     } elsif ($node->node_type == 9) {
895 wakaba 1.9 @r = ('') unless @r;
896 wakaba 1.1 $rs = '';
897     $node = $node->parent_node;
898     } else {
899     $rs = '#' . $node->node_type;
900     $node = $node->parent_node;
901     }
902     unshift @r, $rs;
903     }
904     return join '/', @r;
905     } # get_node_path
906    
907 wakaba 1.6 sub get_node_link ($) {
908     return qq[<a href="#node-@{[refaddr $_[0]]}">] .
909     htescape (get_node_path ($_[0])) . qq[</a>];
910     } # get_node_link
911    
912 wakaba 1.7 {
913     my $Msg = {};
914    
915     sub load_text_catalog ($) {
916     my $lang = shift; # MUST be a canonical lang name
917 wakaba 1.26 open my $file, '<:utf8', "cc-msg.$lang.txt"
918     or die "$0: cc-msg.$lang.txt: $!";
919 wakaba 1.7 while (<$file>) {
920     if (s/^([^;]+);([^;]*);//) {
921     my ($type, $cls, $msg) = ($1, $2, $_);
922     $msg =~ tr/\x0D\x0A//d;
923     $Msg->{$type} = [$cls, $msg];
924     }
925     }
926     } # load_text_catalog
927    
928     sub get_text ($) {
929 wakaba 1.15 my ($type, $level, $node) = @_;
930 wakaba 1.7 $type = $level . ':' . $type if defined $level;
931 wakaba 1.29 $level = 'm' unless defined $level;
932 wakaba 1.7 my @arg;
933     {
934     if (defined $Msg->{$type}) {
935     my $msg = $Msg->{$type}->[1];
936 wakaba 1.10 $msg =~ s{<var>\$([0-9]+)</var>}{
937     defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
938     }ge;
939 wakaba 1.15 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
940     UNIVERSAL::can ($node, 'get_attribute_ns')
941     ? htescape ($node->get_attribute_ns (undef, $1)) : ''
942     }ge;
943     $msg =~ s{<var>{\@}</var>}{
944     UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
945     }ge;
946 wakaba 1.17 $msg =~ s{<var>{local-name}</var>}{
947     UNIVERSAL::can ($node, 'manakai_local_name')
948     ? htescape ($node->manakai_local_name) : ''
949     }ge;
950     $msg =~ s{<var>{element-local-name}</var>}{
951     (UNIVERSAL::can ($node, 'owner_element') and
952     $node->owner_element)
953     ? htescape ($node->owner_element->manakai_local_name)
954     : ''
955     }ge;
956 wakaba 1.29 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
957 wakaba 1.7 } elsif ($type =~ s/:([^:]*)$//) {
958     unshift @arg, $1;
959     redo;
960     }
961     }
962 wakaba 1.29 return ($type, 'level-'.$level, htescape ($_[0]));
963 wakaba 1.7 } # get_text
964    
965     }
966    
967 wakaba 1.9 sub get_input_document ($$) {
968     my ($http, $dom) = @_;
969    
970 wakaba 1.16 my $request_uri = $http->get_parameter ('uri');
971 wakaba 1.9 my $r = {};
972     if (defined $request_uri and length $request_uri) {
973     my $uri = $dom->create_uri_reference ($request_uri);
974     unless ({
975     http => 1,
976     }->{lc $uri->uri_scheme}) {
977     return {uri => $request_uri, request_uri => $request_uri,
978     error_status_text => 'URI scheme not allowed'};
979     }
980    
981     require Message::Util::HostPermit;
982     my $host_permit = new Message::Util::HostPermit;
983     $host_permit->add_rule (<<EOH);
984     Allow host=suika port=80
985     Deny host=suika
986     Allow host=suika.fam.cx port=80
987     Deny host=suika.fam.cx
988     Deny host=localhost
989     Deny host=*.localdomain
990     Deny ipv4=0.0.0.0/8
991     Deny ipv4=10.0.0.0/8
992     Deny ipv4=127.0.0.0/8
993     Deny ipv4=169.254.0.0/16
994     Deny ipv4=172.0.0.0/11
995     Deny ipv4=192.0.2.0/24
996     Deny ipv4=192.88.99.0/24
997     Deny ipv4=192.168.0.0/16
998     Deny ipv4=198.18.0.0/15
999     Deny ipv4=224.0.0.0/4
1000     Deny ipv4=255.255.255.255/32
1001     Deny ipv6=0::0/0
1002     Allow host=*
1003     EOH
1004     unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1005     return {uri => $request_uri, request_uri => $request_uri,
1006     error_status_text => 'Connection to the host is forbidden'};
1007     }
1008    
1009     require LWP::UserAgent;
1010     my $ua = WDCC::LWPUA->new;
1011     $ua->{wdcc_dom} = $dom;
1012     $ua->{wdcc_host_permit} = $host_permit;
1013     $ua->agent ('Mozilla'); ## TODO: for now.
1014     $ua->parse_head (0);
1015     $ua->protocols_allowed ([qw/http/]);
1016     $ua->max_size (1000_000);
1017     my $req = HTTP::Request->new (GET => $request_uri);
1018 wakaba 1.28 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1019 wakaba 1.9 my $res = $ua->request ($req);
1020 wakaba 1.16 ## TODO: 401 sets |is_success| true.
1021     if ($res->is_success or $http->get_parameter ('error-page')) {
1022 wakaba 1.9 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1023     $r->{uri} = $res->request->uri;
1024     $r->{request_uri} = $request_uri;
1025    
1026     ## TODO: More strict parsing...
1027     my $ct = $res->header ('Content-Type');
1028 wakaba 1.22 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1029 wakaba 1.9 $r->{charset} = lc $1;
1030     $r->{charset} =~ tr/\\//d;
1031 wakaba 1.26 $r->{official_charset} = $r->{charset};
1032 wakaba 1.9 }
1033    
1034 wakaba 1.16 my $input_charset = $http->get_parameter ('charset');
1035 wakaba 1.9 if (defined $input_charset and length $input_charset) {
1036     $r->{charset_overridden}
1037     = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1038     $r->{charset} = $input_charset;
1039 wakaba 1.25 }
1040    
1041     ## TODO: Support for HTTP Content-Encoding
1042 wakaba 1.9
1043     $r->{s} = ''.$res->content;
1044 wakaba 1.25
1045     require Whatpm::ContentType;
1046     ($r->{official_type}, $r->{media_type})
1047     = Whatpm::ContentType->get_sniffed_type
1048     (get_file_head => sub {
1049     return substr $r->{s}, 0, shift;
1050     },
1051     http_content_type_byte => $ct,
1052     has_http_content_encoding =>
1053     defined $res->header ('Content-Encoding'),
1054     supported_image_types => {});
1055 wakaba 1.9 } else {
1056     $r->{uri} = $res->request->uri;
1057     $r->{request_uri} = $request_uri;
1058     $r->{error_status_text} = $res->status_line;
1059     }
1060    
1061     $r->{header_field} = [];
1062     $res->scan (sub {
1063     push @{$r->{header_field}}, [$_[0], $_[1]];
1064     });
1065     $r->{header_status_code} = $res->code;
1066     $r->{header_status_text} = $res->message;
1067     } else {
1068 wakaba 1.16 $r->{s} = ''.$http->get_parameter ('s');
1069 wakaba 1.9 $r->{uri} = q<thismessage:/>;
1070     $r->{request_uri} = q<thismessage:/>;
1071     $r->{base_uri} = q<thismessage:/>;
1072 wakaba 1.16 $r->{charset} = ''.$http->get_parameter ('_charset_');
1073 wakaba 1.9 $r->{charset} =~ s/\s+//g;
1074     $r->{charset} = 'utf-8' if $r->{charset} eq '';
1075 wakaba 1.26 $r->{official_charset} = $r->{charset};
1076 wakaba 1.9 $r->{header_field} = [];
1077 wakaba 1.25
1078     require Whatpm::ContentType;
1079     ($r->{official_type}, $r->{media_type})
1080     = Whatpm::ContentType->get_sniffed_type
1081     (get_file_head => sub {
1082     return substr $r->{s}, 0, shift;
1083     },
1084     http_content_type_byte => undef,
1085     has_http_content_encoding => 0,
1086     supported_image_types => {});
1087 wakaba 1.9 }
1088    
1089 wakaba 1.16 my $input_format = $http->get_parameter ('i');
1090 wakaba 1.9 if (defined $input_format and length $input_format) {
1091     $r->{media_type_overridden}
1092     = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1093     $r->{media_type} = $input_format;
1094     }
1095     if (defined $r->{s} and not defined $r->{media_type}) {
1096     $r->{media_type} = 'text/html';
1097     $r->{media_type_overridden} = 1;
1098     }
1099    
1100     if ($r->{media_type} eq 'text/xml') {
1101     unless (defined $r->{charset}) {
1102     $r->{charset} = 'us-ascii';
1103 wakaba 1.26 $r->{official_charset} = $r->{charset};
1104 wakaba 1.9 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1105     $r->{charset_overridden} = 0;
1106     }
1107     }
1108    
1109     if (length $r->{s} > 1000_000) {
1110     $r->{error_status_text} = 'Entity-body too large';
1111     delete $r->{s};
1112     return $r;
1113     }
1114    
1115     return $r;
1116     } # get_input_document
1117    
1118     package WDCC::LWPUA;
1119     BEGIN { push our @ISA, 'LWP::UserAgent'; }
1120    
1121     sub redirect_ok {
1122     my $ua = shift;
1123     unless ($ua->SUPER::redirect_ok (@_)) {
1124     return 0;
1125     }
1126    
1127     my $uris = $_[1]->header ('Location');
1128     return 0 unless $uris;
1129     my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1130     unless ({
1131     http => 1,
1132     }->{lc $uri->uri_scheme}) {
1133     return 0;
1134     }
1135     unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1136     return 0;
1137     }
1138     return 1;
1139     } # redirect_ok
1140    
1141 wakaba 1.1 =head1 AUTHOR
1142    
1143     Wakaba <w@suika.fam.cx>.
1144    
1145     =head1 LICENSE
1146    
1147     Copyright 2007 Wakaba <w@suika.fam.cx>
1148    
1149     This library is free software; you can redistribute it
1150     and/or modify it under the same terms as Perl itself.
1151    
1152     =cut
1153    
1154 wakaba 1.30 ## $Date: 2007/11/23 12:08:32 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24