/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.48 - (hide annotations) (download)
Sat Apr 12 15:57:56 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.47: +79 -7 lines
++ ChangeLog	12 Apr 2008 15:57:44 -0000
2008-04-12  Wakaba  <wakaba@suika.fam.cx>

	* parser-manakai.cgi, parser-manakai-interface.en.html: The |innerHTML|
	output mode is split into "|innerHTML| (HTML)" and "|innerHTML| (XML)"
	output modes.

2008-03-29  Wakaba  <wakaba@suika.fam.cx>

	* cc.cgi: New "URI" section is implemented.

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3 wakaba 1.23 use utf8;
4 wakaba 1.1
5     use lib qw[/home/httpd/html/www/markup/html/whatpm
6 wakaba 1.16 /home/wakaba/work/manakai2/lib];
7 wakaba 1.1 use CGI::Carp qw[fatalsToBrowser];
8 wakaba 1.2 use Scalar::Util qw[refaddr];
9 wakaba 1.16 use Time::HiRes qw/time/;
10 wakaba 1.1
11 wakaba 1.2 sub htescape ($) {
12     my $s = $_[0];
13     $s =~ s/&/&amp;/g;
14     $s =~ s/</&lt;/g;
15     $s =~ s/>/&gt;/g;
16     $s =~ s/"/&quot;/g;
17 wakaba 1.12 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18     sprintf '<var>U+%04X</var>', ord $1;
19     }ge;
20 wakaba 1.2 return $s;
21     } # htescape
22    
23 wakaba 1.35 my @nav;
24     my %time;
25     require Message::DOM::DOMImplementation;
26     my $dom = Message::DOM::DOMImplementation->new;
27     {
28 wakaba 1.16 use Message::CGI::HTTP;
29     my $http = Message::CGI::HTTP->new;
30 wakaba 1.1
31 wakaba 1.16 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
32 wakaba 1.8 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
33     exit;
34     }
35    
36 wakaba 1.12 binmode STDOUT, ':utf8';
37 wakaba 1.14 $| = 1;
38 wakaba 1.12
39 wakaba 1.7 load_text_catalog ('en'); ## TODO: conneg
40    
41 wakaba 1.2 print STDOUT qq[Content-Type: text/html; charset=utf-8
42    
43     <!DOCTYPE html>
44     <html lang="en">
45     <head>
46     <title>Web Document Conformance Checker (BETA)</title>
47 wakaba 1.3 <link rel="stylesheet" href="../cc-style.css" type="text/css">
48 wakaba 1.2 </head>
49     <body>
50 wakaba 1.13 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
51     (<em>beta</em>)</h1>
52 wakaba 1.14 ];
53 wakaba 1.2
54 wakaba 1.14 $| = 0;
55     my $input = get_input_document ($http, $dom);
56 wakaba 1.16 my $char_length = 0;
57 wakaba 1.14
58     print qq[
59 wakaba 1.4 <div id="document-info" class="section">
60 wakaba 1.2 <dl>
61 wakaba 1.9 <dt>Request URI</dt>
62     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 wakaba 1.2 <dt>Document URI</dt>
64 wakaba 1.25 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65     <script>
66     document.title = '<'
67     + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68     + document.title;
69     </script></dd>
70 wakaba 1.2 ]; # no </dl> yet
71 wakaba 1.3 push @nav, ['#document-info' => 'Information'];
72 wakaba 1.1
73 wakaba 1.9 if (defined $input->{s}) {
74 wakaba 1.16 $char_length = length $input->{s};
75 wakaba 1.9
76     print STDOUT qq[
77     <dt>Base URI</dt>
78     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79     <dt>Internet Media Type</dt>
80     <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 wakaba 1.25 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 wakaba 1.9 <dt>Character Encoding</dt>
83     <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84     @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 wakaba 1.16 <dt>Length</dt>
86     <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 wakaba 1.9 </dl>
88     </div>
89 wakaba 1.39
90     <script src="../cc-script.js"></script>
91 wakaba 1.9 ];
92    
93 wakaba 1.35 $input->{id_prefix} = '';
94     #$input->{nested} = 0;
95 wakaba 1.20 my $result = {conforming_min => 1, conforming_max => 1};
96 wakaba 1.31 check_and_print ($input => $result);
97 wakaba 1.19 print_result_section ($result);
98 wakaba 1.9 } else {
99 wakaba 1.18 print STDOUT qq[</dl></div>];
100     print_result_input_error_section ($input);
101 wakaba 1.9 }
102 wakaba 1.3
103 wakaba 1.2 print STDOUT qq[
104 wakaba 1.3 <ul class="navigation" id="nav-items">
105     ];
106     for (@nav) {
107     print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
108     }
109     print STDOUT qq[
110     </ul>
111 wakaba 1.2 </body>
112     </html>
113     ];
114 wakaba 1.1
115 wakaba 1.24 for (qw/decode parse parse_html parse_xml parse_manifest
116     check check_manifest/) {
117 wakaba 1.16 next unless defined $time{$_};
118     open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
119     print $file $char_length, "\t", $time{$_}, "\n";
120     }
121    
122 wakaba 1.1 exit;
123 wakaba 1.35 }
124 wakaba 1.1
125 wakaba 1.19 sub add_error ($$$) {
126     my ($layer, $err, $result) = @_;
127     if (defined $err->{level}) {
128     if ($err->{level} eq 's') {
129     $result->{$layer}->{should}++;
130     $result->{$layer}->{score_min} -= 2;
131     $result->{conforming_min} = 0;
132     } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
133     $result->{$layer}->{warning}++;
134 wakaba 1.30 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
135 wakaba 1.19 $result->{$layer}->{unsupported}++;
136     $result->{unsupported} = 1;
137 wakaba 1.37 } elsif ($err->{level} eq 'i') {
138     #
139 wakaba 1.19 } else {
140     $result->{$layer}->{must}++;
141     $result->{$layer}->{score_max} -= 2;
142     $result->{$layer}->{score_min} -= 2;
143     $result->{conforming_min} = 0;
144     $result->{conforming_max} = 0;
145     }
146     } else {
147     $result->{$layer}->{must}++;
148     $result->{$layer}->{score_max} -= 2;
149     $result->{$layer}->{score_min} -= 2;
150     $result->{conforming_min} = 0;
151     $result->{conforming_max} = 0;
152     }
153     } # add_error
154    
155 wakaba 1.31 sub check_and_print ($$) {
156     my ($input, $result) = @_;
157    
158     print_http_header_section ($input, $result);
159    
160     my $doc;
161     my $el;
162 wakaba 1.35 my $cssom;
163 wakaba 1.31 my $manifest;
164 wakaba 1.34 my @subdoc;
165 wakaba 1.31
166     if ($input->{media_type} eq 'text/html') {
167     ($doc, $el) = print_syntax_error_html_section ($input, $result);
168     print_source_string_section
169 wakaba 1.35 ($input,
170     \($input->{s}),
171     $input->{charset} || $doc->input_encoding);
172 wakaba 1.31 } elsif ({
173     'text/xml' => 1,
174     'application/atom+xml' => 1,
175     'application/rss+xml' => 1,
176 wakaba 1.45 'image/svg+xml' => 1,
177 wakaba 1.31 'application/xhtml+xml' => 1,
178     'application/xml' => 1,
179 wakaba 1.45 ## TODO: Should we make all XML MIME Types fall
180     ## into this category?
181    
182     'application/rdf+xml' => 1, ## NOTE: This type has different model.
183 wakaba 1.31 }->{$input->{media_type}}) {
184     ($doc, $el) = print_syntax_error_xml_section ($input, $result);
185 wakaba 1.35 print_source_string_section ($input,
186     \($input->{s}),
187     $doc->input_encoding);
188     } elsif ($input->{media_type} eq 'text/css') {
189     $cssom = print_syntax_error_css_section ($input, $result);
190     print_source_string_section
191     ($input, \($input->{s}),
192     $cssom->manakai_input_encoding);
193 wakaba 1.31 } elsif ($input->{media_type} eq 'text/cache-manifest') {
194     ## TODO: MUST be text/cache-manifest
195     $manifest = print_syntax_error_manifest_section ($input, $result);
196 wakaba 1.35 print_source_string_section ($input, \($input->{s}),
197     'utf-8');
198 wakaba 1.31 } else {
199     ## TODO: Change HTTP status code??
200     print_result_unknown_type_section ($input, $result);
201     }
202    
203     if (defined $doc or defined $el) {
204 wakaba 1.34 $doc->document_uri ($input->{uri});
205     $doc->manakai_entity_base_uri ($input->{base_uri});
206 wakaba 1.32 print_structure_dump_dom_section ($input, $doc, $el);
207     my $elements = print_structure_error_dom_section
208 wakaba 1.34 ($input, $doc, $el, $result, sub {
209     push @subdoc, shift;
210     });
211 wakaba 1.32 print_table_section ($input, $elements->{table}) if @{$elements->{table}};
212 wakaba 1.33 print_listing_section ({
213     id => 'identifiers', label => 'IDs', heading => 'Identifiers',
214     }, $input, $elements->{id}) if keys %{$elements->{id}};
215     print_listing_section ({
216     id => 'terms', label => 'Terms', heading => 'Terms',
217     }, $input, $elements->{term}) if keys %{$elements->{term}};
218     print_listing_section ({
219     id => 'classes', label => 'Classes', heading => 'Classes',
220     }, $input, $elements->{class}) if keys %{$elements->{class}};
221 wakaba 1.48 print_uri_section ($input, $elements->{uri}) if keys %{$elements->{uri}};
222 wakaba 1.45 print_rdf_section ($input, $elements->{rdf}) if @{$elements->{rdf}};
223 wakaba 1.35 } elsif (defined $cssom) {
224     print_structure_dump_cssom_section ($input, $cssom);
225     ## TODO: CSSOM validation
226 wakaba 1.36 add_error ('structure', {level => 'u'} => $result);
227 wakaba 1.31 } elsif (defined $manifest) {
228 wakaba 1.32 print_structure_dump_manifest_section ($input, $manifest);
229     print_structure_error_manifest_section ($input, $manifest, $result);
230 wakaba 1.31 }
231 wakaba 1.34
232     my $id_prefix = 0;
233     for my $subinput (@subdoc) {
234     $subinput->{id_prefix} = 'subdoc-' . ++$id_prefix;
235     $subinput->{nested} = 1;
236     $subinput->{base_uri} = $subinput->{container_node}->base_uri
237     unless defined $subinput->{base_uri};
238     my $ebaseuri = htescape ($subinput->{base_uri});
239     push @nav, ['#' . $subinput->{id_prefix} => 'Sub #' . $id_prefix];
240     print STDOUT qq[<div id="$subinput->{id_prefix}" class=section>
241     <h2>Subdocument #$id_prefix</h2>
242    
243     <dl>
244     <dt>Internet Media Type</dt>
245     <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
246     <dt>Container Node</dt>
247     <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
248     <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
249     <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
250     </dl>];
251    
252 wakaba 1.35 $subinput->{id_prefix} .= '-';
253 wakaba 1.34 check_and_print ($subinput => $result);
254    
255     print STDOUT qq[</div>];
256     }
257 wakaba 1.31 } # check_and_print
258    
259 wakaba 1.19 sub print_http_header_section ($$) {
260     my ($input, $result) = @_;
261 wakaba 1.9 return unless defined $input->{header_status_code} or
262     defined $input->{header_status_text} or
263 wakaba 1.34 @{$input->{header_field} or []};
264 wakaba 1.9
265 wakaba 1.32 push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
266     print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
267 wakaba 1.9 <h2>HTTP Header</h2>
268    
269     <p><strong>Note</strong>: Due to the limitation of the
270     network library in use, the content of this section might
271     not be the real header.</p>
272    
273     <table><tbody>
274     ];
275    
276     if (defined $input->{header_status_code}) {
277     print STDOUT qq[<tr><th scope="row">Status code</th>];
278     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
279     }
280     if (defined $input->{header_status_text}) {
281     print STDOUT qq[<tr><th scope="row">Status text</th>];
282     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
283     }
284    
285     for (@{$input->{header_field}}) {
286     print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
287     print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
288     }
289    
290     print STDOUT qq[</tbody></table></div>];
291     } # print_http_header_section
292    
293 wakaba 1.19 sub print_syntax_error_html_section ($$) {
294     my ($input, $result) = @_;
295 wakaba 1.18
296     require Encode;
297     require Whatpm::HTML;
298    
299     print STDOUT qq[
300 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
301 wakaba 1.18 <h2>Parse Errors</h2>
302    
303 wakaba 1.39 <dl id="$input->{id_prefix}parse-errors-list">];
304 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
305 wakaba 1.18
306     my $onerror = sub {
307     my (%opt) = @_;
308     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
309 wakaba 1.38 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
310     qq[</dt>];
311 wakaba 1.18 $type =~ tr/ /-/;
312     $type =~ s/\|/%7C/g;
313     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
314 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
315     print STDOUT qq[$msg</dd>\n];
316 wakaba 1.19
317     add_error ('syntax', \%opt => $result);
318 wakaba 1.18 };
319    
320     my $doc = $dom->create_document;
321     my $el;
322 wakaba 1.35 my $inner_html_element = $input->{inner_html_element};
323 wakaba 1.18 if (defined $inner_html_element and length $inner_html_element) {
324 wakaba 1.26 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
325 wakaba 1.24 my $time1 = time;
326 wakaba 1.48 my $t = \($input->{s});
327     unless ($input->{is_char_string}) {
328     $t = \(Encode::decode ($input->{charset}, $$t));
329     }
330 wakaba 1.24 $time{decode} = time - $time1;
331    
332 wakaba 1.18 $el = $doc->create_element_ns
333     ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
334 wakaba 1.24 $time1 = time;
335 wakaba 1.48 Whatpm::HTML->set_inner_html ($el, $$t, $onerror);
336 wakaba 1.24 $time{parse} = time - $time1;
337 wakaba 1.18 } else {
338 wakaba 1.24 my $time1 = time;
339 wakaba 1.48 if ($input->{is_char_string}) {
340     Whatpm::HTML->parse_char_string ($input->{s} => $doc, $onerror);
341     } else {
342     Whatpm::HTML->parse_byte_string
343     ($input->{charset}, $input->{s} => $doc, $onerror);
344     }
345 wakaba 1.24 $time{parse_html} = time - $time1;
346 wakaba 1.18 }
347 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
348     if defined $input->{official_charset};
349 wakaba 1.24
350 wakaba 1.18 print STDOUT qq[</dl></div>];
351    
352     return ($doc, $el);
353     } # print_syntax_error_html_section
354    
355 wakaba 1.19 sub print_syntax_error_xml_section ($$) {
356     my ($input, $result) = @_;
357 wakaba 1.18
358     require Message::DOM::XMLParserTemp;
359    
360     print STDOUT qq[
361 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
362 wakaba 1.18 <h2>Parse Errors</h2>
363    
364 wakaba 1.39 <dl id="$input->{id_prefix}parse-errors-list">];
365 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
366 wakaba 1.18
367     my $onerror = sub {
368     my $err = shift;
369     my $line = $err->location->line_number;
370 wakaba 1.35 print STDOUT qq[<dt><a href="#$input->{id_prefix}line-$line">Line $line</a> column ];
371 wakaba 1.18 print STDOUT $err->location->column_number, "</dt><dd>";
372     print STDOUT htescape $err->text, "</dd>\n";
373 wakaba 1.19
374     add_error ('syntax', {type => $err->text,
375     level => [
376     $err->SEVERITY_FATAL_ERROR => 'm',
377     $err->SEVERITY_ERROR => 'm',
378     $err->SEVERITY_WARNING => 's',
379     ]->[$err->severity]} => $result);
380    
381 wakaba 1.18 return 1;
382     };
383    
384 wakaba 1.48 my $t = \($input->{s});
385     if ($input->{is_char_string}) {
386     require Encode;
387     $t = \(Encode::encode ('utf8', $$t));
388     $input->{charset} = 'utf-8';
389     }
390    
391 wakaba 1.18 my $time1 = time;
392 wakaba 1.48 open my $fh, '<', $t;
393 wakaba 1.18 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
394     ($fh => $dom, $onerror, charset => $input->{charset});
395     $time{parse_xml} = time - $time1;
396 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
397     if defined $input->{official_charset};
398 wakaba 1.18
399     print STDOUT qq[</dl></div>];
400    
401     return ($doc, undef);
402     } # print_syntax_error_xml_section
403    
404 wakaba 1.35 sub get_css_parser () {
405     our $CSSParser;
406     return $CSSParser if $CSSParser;
407    
408     require Whatpm::CSS::Parser;
409     my $p = Whatpm::CSS::Parser->new;
410    
411     $p->{prop}->{$_} = 1 for qw/
412 wakaba 1.37 alignment-baseline
413 wakaba 1.35 background background-attachment background-color background-image
414     background-position background-position-x background-position-y
415     background-repeat border border-bottom border-bottom-color
416     border-bottom-style border-bottom-width border-collapse border-color
417     border-left border-left-color
418     border-left-style border-left-width border-right border-right-color
419     border-right-style border-right-width
420     border-spacing -manakai-border-spacing-x -manakai-border-spacing-y
421     border-style border-top border-top-color border-top-style border-top-width
422     border-width bottom
423     caption-side clear clip color content counter-increment counter-reset
424 wakaba 1.37 cursor direction display dominant-baseline empty-cells float font
425 wakaba 1.35 font-family font-size font-size-adjust font-stretch
426     font-style font-variant font-weight height left
427     letter-spacing line-height
428     list-style list-style-image list-style-position list-style-type
429     margin margin-bottom margin-left margin-right margin-top marker-offset
430     marks max-height max-width min-height min-width opacity -moz-opacity
431     orphans outline outline-color outline-style outline-width overflow
432     overflow-x overflow-y
433     padding padding-bottom padding-left padding-right padding-top
434     page page-break-after page-break-before page-break-inside
435     position quotes right size table-layout
436 wakaba 1.37 text-align text-anchor text-decoration text-indent text-transform
437 wakaba 1.35 top unicode-bidi vertical-align visibility white-space width widows
438 wakaba 1.37 word-spacing writing-mode z-index
439 wakaba 1.35 /;
440     $p->{prop_value}->{display}->{$_} = 1 for qw/
441     block clip inline inline-block inline-table list-item none
442     table table-caption table-cell table-column table-column-group
443     table-header-group table-footer-group table-row table-row-group
444     compact marker
445     /;
446     $p->{prop_value}->{position}->{$_} = 1 for qw/
447     absolute fixed relative static
448     /;
449     $p->{prop_value}->{float}->{$_} = 1 for qw/
450     left right none
451     /;
452     $p->{prop_value}->{clear}->{$_} = 1 for qw/
453     left right none both
454     /;
455     $p->{prop_value}->{direction}->{ltr} = 1;
456     $p->{prop_value}->{direction}->{rtl} = 1;
457     $p->{prop_value}->{marks}->{crop} = 1;
458     $p->{prop_value}->{marks}->{cross} = 1;
459     $p->{prop_value}->{'unicode-bidi'}->{$_} = 1 for qw/
460     normal bidi-override embed
461     /;
462     for my $prop_name (qw/overflow overflow-x overflow-y/) {
463     $p->{prop_value}->{$prop_name}->{$_} = 1 for qw/
464     visible hidden scroll auto -webkit-marquee -moz-hidden-unscrollable
465     /;
466     }
467     $p->{prop_value}->{visibility}->{$_} = 1 for qw/
468     visible hidden collapse
469     /;
470     $p->{prop_value}->{'list-style-type'}->{$_} = 1 for qw/
471     disc circle square decimal decimal-leading-zero
472     lower-roman upper-roman lower-greek lower-latin
473     upper-latin armenian georgian lower-alpha upper-alpha none
474     hebrew cjk-ideographic hiragana katakana hiragana-iroha
475     katakana-iroha
476     /;
477     $p->{prop_value}->{'list-style-position'}->{outside} = 1;
478     $p->{prop_value}->{'list-style-position'}->{inside} = 1;
479     $p->{prop_value}->{'page-break-before'}->{$_} = 1 for qw/
480     auto always avoid left right
481     /;
482     $p->{prop_value}->{'page-break-after'}->{$_} = 1 for qw/
483     auto always avoid left right
484     /;
485     $p->{prop_value}->{'page-break-inside'}->{auto} = 1;
486     $p->{prop_value}->{'page-break-inside'}->{avoid} = 1;
487     $p->{prop_value}->{'background-repeat'}->{$_} = 1 for qw/
488     repeat repeat-x repeat-y no-repeat
489     /;
490     $p->{prop_value}->{'background-attachment'}->{scroll} = 1;
491     $p->{prop_value}->{'background-attachment'}->{fixed} = 1;
492     $p->{prop_value}->{'font-size'}->{$_} = 1 for qw/
493     xx-small x-small small medium large x-large xx-large
494     -manakai-xxx-large -webkit-xxx-large
495     larger smaller
496     /;
497     $p->{prop_value}->{'font-style'}->{normal} = 1;
498     $p->{prop_value}->{'font-style'}->{italic} = 1;
499     $p->{prop_value}->{'font-style'}->{oblique} = 1;
500     $p->{prop_value}->{'font-variant'}->{normal} = 1;
501     $p->{prop_value}->{'font-variant'}->{'small-caps'} = 1;
502     $p->{prop_value}->{'font-stretch'}->{$_} = 1 for
503     qw/normal wider narrower ultra-condensed extra-condensed
504     condensed semi-condensed semi-expanded expanded
505     extra-expanded ultra-expanded/;
506     $p->{prop_value}->{'text-align'}->{$_} = 1 for qw/
507     left right center justify begin end
508     /;
509     $p->{prop_value}->{'text-transform'}->{$_} = 1 for qw/
510     capitalize uppercase lowercase none
511     /;
512     $p->{prop_value}->{'white-space'}->{$_} = 1 for qw/
513 wakaba 1.36 normal pre nowrap pre-line pre-wrap -moz-pre-wrap
514 wakaba 1.35 /;
515 wakaba 1.37 $p->{prop_value}->{'writing-mode'}->{$_} = 1 for qw/
516     lr rl tb lr-tb rl-tb tb-rl
517     /;
518     $p->{prop_value}->{'text-anchor'}->{$_} = 1 for qw/
519     start middle end
520     /;
521     $p->{prop_value}->{'dominant-baseline'}->{$_} = 1 for qw/
522     auto use-script no-change reset-size ideographic alphabetic
523     hanging mathematical central middle text-after-edge text-before-edge
524     /;
525     $p->{prop_value}->{'alignment-baseline'}->{$_} = 1 for qw/
526     auto baseline before-edge text-before-edge middle central
527     after-edge text-after-edge ideographic alphabetic hanging
528     mathematical
529     /;
530 wakaba 1.35 $p->{prop_value}->{'text-decoration'}->{$_} = 1 for qw/
531     none blink underline overline line-through
532     /;
533     $p->{prop_value}->{'caption-side'}->{$_} = 1 for qw/
534     top bottom left right
535     /;
536     $p->{prop_value}->{'table-layout'}->{auto} = 1;
537     $p->{prop_value}->{'table-layout'}->{fixed} = 1;
538 wakaba 1.36 $p->{prop_value}->{'border-collapse'}->{collapse} = 1;
539 wakaba 1.35 $p->{prop_value}->{'border-collapse'}->{separate} = 1;
540     $p->{prop_value}->{'empty-cells'}->{show} = 1;
541     $p->{prop_value}->{'empty-cells'}->{hide} = 1;
542     $p->{prop_value}->{cursor}->{$_} = 1 for qw/
543     auto crosshair default pointer move e-resize ne-resize nw-resize n-resize
544     se-resize sw-resize s-resize w-resize text wait help progress
545     /;
546     for my $prop (qw/border-top-style border-left-style
547     border-bottom-style border-right-style outline-style/) {
548     $p->{prop_value}->{$prop}->{$_} = 1 for qw/
549     none hidden dotted dashed solid double groove ridge inset outset
550     /;
551     }
552     for my $prop (qw/color background-color
553     border-bottom-color border-left-color border-right-color
554     border-top-color border-color/) {
555     $p->{prop_value}->{$prop}->{transparent} = 1;
556     $p->{prop_value}->{$prop}->{flavor} = 1;
557     $p->{prop_value}->{$prop}->{'-manakai-default'} = 1;
558     }
559     $p->{prop_value}->{'outline-color'}->{invert} = 1;
560     $p->{prop_value}->{'outline-color'}->{'-manakai-invert-or-currentcolor'} = 1;
561     $p->{pseudo_class}->{$_} = 1 for qw/
562     active checked disabled empty enabled first-child first-of-type
563     focus hover indeterminate last-child last-of-type link only-child
564     only-of-type root target visited
565     lang nth-child nth-last-child nth-of-type nth-last-of-type not
566     -manakai-contains -manakai-current
567     /;
568     $p->{pseudo_element}->{$_} = 1 for qw/
569     after before first-letter first-line
570     /;
571    
572     return $CSSParser = $p;
573     } # get_css_parser
574    
575     sub print_syntax_error_css_section ($$) {
576     my ($input, $result) = @_;
577    
578     print STDOUT qq[
579     <div id="$input->{id_prefix}parse-errors" class="section">
580     <h2>Parse Errors</h2>
581    
582 wakaba 1.39 <dl id="$input->{id_prefix}parse-errors-list">];
583 wakaba 1.35 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
584    
585     my $p = get_css_parser ();
586 wakaba 1.37 $p->init;
587 wakaba 1.35 $p->{onerror} = sub {
588     my (%opt) = @_;
589     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
590     if ($opt{token}) {
591     print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{token}->{line}">Line $opt{token}->{line}</a> column $opt{token}->{column}];
592     } else {
593     print STDOUT qq[<dt class="$cls">Unknown location];
594     }
595     if (defined $opt{value}) {
596     print STDOUT qq[ (<code>@{[htescape ($opt{value})]}</code>)];
597     } elsif (defined $opt{token}) {
598     print STDOUT qq[ (<code>@{[htescape (Whatpm::CSS::Tokenizer->serialize_token ($opt{token}))]}</code>)];
599     }
600     $type =~ tr/ /-/;
601     $type =~ s/\|/%7C/g;
602     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
603     print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
604     print STDOUT qq[$msg</dd>\n];
605    
606     add_error ('syntax', \%opt => $result);
607     };
608     $p->{href} = $input->{uri};
609     $p->{base_uri} = $input->{base_uri};
610    
611 wakaba 1.37 # if ($parse_mode eq 'q') {
612     # $p->{unitless_px} = 1;
613     # $p->{hashless_color} = 1;
614     # }
615    
616     ## TODO: Make $input->{s} a ref.
617    
618 wakaba 1.35 my $s = \$input->{s};
619     my $charset;
620     unless ($input->{is_char_string}) {
621     require Encode;
622     if (defined $input->{charset}) {## TODO: IANA->Perl
623     $charset = $input->{charset};
624     $s = \(Encode::decode ($input->{charset}, $$s));
625     } else {
626     ## TODO: charset detection
627     $s = \(Encode::decode ($charset = 'utf-8', $$s));
628     }
629     }
630    
631     my $cssom = $p->parse_char_string ($$s);
632     $cssom->manakai_input_encoding ($charset) if defined $charset;
633    
634     print STDOUT qq[</dl></div>];
635    
636     return $cssom;
637     } # print_syntax_error_css_section
638    
639 wakaba 1.22 sub print_syntax_error_manifest_section ($$) {
640     my ($input, $result) = @_;
641    
642     require Whatpm::CacheManifest;
643    
644     print STDOUT qq[
645 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
646 wakaba 1.22 <h2>Parse Errors</h2>
647    
648 wakaba 1.39 <dl id="$input->{id_prefix}parse-errors-list">];
649 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
650 wakaba 1.22
651     my $onerror = sub {
652     my (%opt) = @_;
653     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
654 wakaba 1.32 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
655     qq[</dt>];
656 wakaba 1.22 $type =~ tr/ /-/;
657     $type =~ s/\|/%7C/g;
658     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
659 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
660     print STDOUT qq[$msg</dd>\n];
661 wakaba 1.22
662     add_error ('syntax', \%opt => $result);
663     };
664    
665 wakaba 1.48 my $m = $input->{is_char_string} ? 'parse_char_string' : 'parse_byte_string';
666 wakaba 1.22 my $time1 = time;
667 wakaba 1.48 my $manifest = Whatpm::CacheManifest->$m
668 wakaba 1.22 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
669     $time{parse_manifest} = time - $time1;
670    
671     print STDOUT qq[</dl></div>];
672    
673     return $manifest;
674     } # print_syntax_error_manifest_section
675    
676 wakaba 1.35 sub print_source_string_section ($$$) {
677     my $input = shift;
678     my $s;
679     unless ($input->{is_char_string}) {
680     require Encode;
681     my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
682     return unless $enc;
683    
684     $s = \($enc->decode (${$_[0]}));
685     } else {
686     $s = $_[0];
687     }
688 wakaba 1.9
689     my $i = 1;
690 wakaba 1.32 push @nav, ['#source-string' => 'Source'] unless $input->{nested};
691     print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
692 wakaba 1.9 <h2>Document Source</h2>
693     <ol lang="">\n];
694 wakaba 1.7 if (length $$s) {
695 wakaba 1.41 while ($$s =~ /\G([^\x0D\x0A]*?)(?>\x0D\x0A?|\x0A)/gc) {
696 wakaba 1.32 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
697     "</li>\n";
698 wakaba 1.7 $i++;
699     }
700 wakaba 1.41 if ($$s =~ /\G([^\x0D\x0A]+)/gc) {
701 wakaba 1.32 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
702     "</li>\n";
703 wakaba 1.7 }
704     } else {
705 wakaba 1.32 print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
706 wakaba 1.2 }
707 wakaba 1.39 print STDOUT "</ol></div>
708 wakaba 1.42 <script>
709     addSourceToParseErrorList ('$input->{id_prefix}', 'parse-errors-list');
710     </script>";
711 wakaba 1.9 } # print_input_string_section
712 wakaba 1.2
713 wakaba 1.35 sub print_document_tree ($$) {
714     my ($input, $node) = @_;
715    
716 wakaba 1.2 my $r = '<ol class="xoxo">';
717 wakaba 1.1
718 wakaba 1.2 my @node = ($node);
719 wakaba 1.1 while (@node) {
720     my $child = shift @node;
721 wakaba 1.2 unless (ref $child) {
722     $r .= $child;
723     next;
724     }
725    
726 wakaba 1.32 my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
727 wakaba 1.2 my $nt = $child->node_type;
728     if ($nt == $child->ELEMENT_NODE) {
729 wakaba 1.4 my $child_nsuri = $child->namespace_uri;
730     $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
731 wakaba 1.2 '</code>'; ## ISSUE: case
732    
733     if ($child->has_attributes) {
734     $r .= '<ul class="attributes">';
735 wakaba 1.4 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
736 wakaba 1.2 @{$child->attributes}) {
737 wakaba 1.32 $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
738 wakaba 1.2 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
739     }
740     $r .= '</ul>';
741     }
742    
743 wakaba 1.7 if ($child->has_child_nodes) {
744 wakaba 1.2 $r .= '<ol class="children">';
745 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
746     } else {
747     $r .= '</li>';
748 wakaba 1.2 }
749     } elsif ($nt == $child->TEXT_NODE) {
750 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
751 wakaba 1.2 } elsif ($nt == $child->CDATA_SECTION_NODE) {
752 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
753 wakaba 1.2 } elsif ($nt == $child->COMMENT_NODE) {
754 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
755 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_NODE) {
756 wakaba 1.6 $r .= qq'<li id="$node_id" class="tree-document">Document';
757 wakaba 1.7 $r .= qq[<ul class="attributes">];
758 wakaba 1.27 my $cp = $child->manakai_charset;
759     if (defined $cp) {
760     $r .= qq[<li><code>charset</code> parameter = <code>];
761     $r .= htescape ($cp) . qq[</code></li>];
762     }
763     $r .= qq[<li><code>inputEncoding</code> = ];
764     my $ie = $child->input_encoding;
765     if (defined $ie) {
766     $r .= qq[<code>@{[htescape ($ie)]}</code>];
767     if ($child->manakai_has_bom) {
768     $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
769     }
770     } else {
771     $r .= qq[(<code>null</code>)];
772     }
773 wakaba 1.7 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
774     $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
775 wakaba 1.9 unless ($child->manakai_is_html) {
776     $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
777     if (defined $child->xml_encoding) {
778     $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
779     } else {
780     $r .= qq[<li>XML encoding = (null)</li>];
781     }
782     $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
783     }
784 wakaba 1.7 $r .= qq[</ul>];
785 wakaba 1.2 if ($child->has_child_nodes) {
786 wakaba 1.7 $r .= '<ol class="children">';
787 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
788 wakaba 1.1 }
789 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
790 wakaba 1.5 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
791     $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
792     $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
793     $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
794 wakaba 1.2 $r .= '</ul></li>';
795     } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
796 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
797 wakaba 1.1 } else {
798 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
799 wakaba 1.1 }
800     }
801 wakaba 1.2
802     $r .= '</ol>';
803     print STDOUT $r;
804     } # print_document_tree
805 wakaba 1.1
806 wakaba 1.32 sub print_structure_dump_dom_section ($$$) {
807     my ($input, $doc, $el) = @_;
808 wakaba 1.18
809     print STDOUT qq[
810 wakaba 1.32 <div id="$input->{id_prefix}document-tree" class="section">
811 wakaba 1.18 <h2>Document Tree</h2>
812     ];
813 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
814     unless $input->{nested};
815 wakaba 1.18
816 wakaba 1.35 print_document_tree ($input, $el || $doc);
817 wakaba 1.18
818     print STDOUT qq[</div>];
819 wakaba 1.22 } # print_structure_dump_dom_section
820    
821 wakaba 1.35 sub print_structure_dump_cssom_section ($$) {
822     my ($input, $cssom) = @_;
823    
824     print STDOUT qq[
825     <div id="$input->{id_prefix}document-tree" class="section">
826     <h2>Document Tree</h2>
827     ];
828     push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
829     unless $input->{nested};
830    
831     ## TODO:
832     print STDOUT "<pre>".htescape ($cssom->css_text)."</pre>";
833    
834     print STDOUT qq[</div>];
835     } # print_structure_dump_cssom_section
836    
837 wakaba 1.32 sub print_structure_dump_manifest_section ($$) {
838     my ($input, $manifest) = @_;
839 wakaba 1.22
840     print STDOUT qq[
841 wakaba 1.32 <div id="$input->{id_prefix}dump-manifest" class="section">
842 wakaba 1.22 <h2>Cache Manifest</h2>
843     ];
844 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}dump-manifest] => 'Cache Manifest']
845     unless $input->{nested};
846 wakaba 1.22
847     print STDOUT qq[<dl><dt>Explicit entries</dt>];
848 wakaba 1.37 my $i = 0;
849 wakaba 1.22 for my $uri (@{$manifest->[0]}) {
850     my $euri = htescape ($uri);
851 wakaba 1.37 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
852 wakaba 1.22 }
853    
854     print STDOUT qq[<dt>Fallback entries</dt><dd>
855     <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
856     <th scope=row>Fallback Entry</tr><tbody>];
857     for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
858     my $euri = htescape ($uri);
859     my $euri2 = htescape ($manifest->[1]->{$uri});
860 wakaba 1.37 print STDOUT qq[<tr><td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
861     <td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
862 wakaba 1.22 }
863    
864     print STDOUT qq[</table><dt>Online whitelist</dt>];
865     for my $uri (@{$manifest->[2]}) {
866     my $euri = htescape ($uri);
867 wakaba 1.37 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
868 wakaba 1.22 }
869 wakaba 1.18
870 wakaba 1.22 print STDOUT qq[</dl></div>];
871     } # print_structure_dump_manifest_section
872    
873 wakaba 1.34 sub print_structure_error_dom_section ($$$$$) {
874     my ($input, $doc, $el, $result, $onsubdoc) = @_;
875 wakaba 1.18
876 wakaba 1.32 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
877 wakaba 1.18 <h2>Document Errors</h2>
878    
879 wakaba 1.42 <dl id=document-errors-list>];
880 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
881     unless $input->{nested};
882 wakaba 1.18
883     require Whatpm::ContentChecker;
884     my $onerror = sub {
885     my %opt = @_;
886     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
887     $type =~ tr/ /-/;
888     $type =~ s/\|/%7C/g;
889     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
890 wakaba 1.32 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
891 wakaba 1.23 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
892     print STDOUT $msg, "</dd>\n";
893 wakaba 1.19 add_error ('structure', \%opt => $result);
894 wakaba 1.18 };
895    
896     my $elements;
897     my $time1 = time;
898     if ($el) {
899 wakaba 1.34 $elements = Whatpm::ContentChecker->check_element
900     ($el, $onerror, $onsubdoc);
901 wakaba 1.18 } else {
902 wakaba 1.34 $elements = Whatpm::ContentChecker->check_document
903     ($doc, $onerror, $onsubdoc);
904 wakaba 1.18 }
905     $time{check} = time - $time1;
906    
907 wakaba 1.42 print STDOUT qq[</dl>
908     <script>
909     addSourceToParseErrorList ('$input->{id_prefix}', 'document-errors-list');
910     </script></div>];
911 wakaba 1.18
912     return $elements;
913 wakaba 1.22 } # print_structure_error_dom_section
914    
915     sub print_structure_error_manifest_section ($$$) {
916 wakaba 1.32 my ($input, $manifest, $result) = @_;
917 wakaba 1.22
918 wakaba 1.32 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
919 wakaba 1.22 <h2>Document Errors</h2>
920    
921     <dl>];
922 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
923     unless $input->{nested};
924 wakaba 1.22
925     require Whatpm::CacheManifest;
926     Whatpm::CacheManifest->check_manifest ($manifest, sub {
927     my %opt = @_;
928     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
929     $type =~ tr/ /-/;
930     $type =~ s/\|/%7C/g;
931     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
932 wakaba 1.32 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
933 wakaba 1.22 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
934     add_error ('structure', \%opt => $result);
935     });
936    
937     print STDOUT qq[</div>];
938     } # print_structure_error_manifest_section
939 wakaba 1.18
940 wakaba 1.32 sub print_table_section ($$) {
941     my ($input, $tables) = @_;
942 wakaba 1.18
943 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
944     unless $input->{nested};
945 wakaba 1.18 print STDOUT qq[
946 wakaba 1.32 <div id="$input->{id_prefix}tables" class="section">
947 wakaba 1.18 <h2>Tables</h2>
948    
949     <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
950     <script src="../table-script.js" type="text/javascript"></script>
951     <noscript>
952     <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
953     </noscript>
954     ];
955    
956     require JSON;
957    
958     my $i = 0;
959     for my $table_el (@$tables) {
960     $i++;
961 wakaba 1.32 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
962     get_node_link ($input, $table_el) . q[</h3>];
963 wakaba 1.18
964     ## TODO: Make |ContentChecker| return |form_table| result
965     ## so that this script don't have to run the algorithm twice.
966     my $table = Whatpm::HTMLTable->form_table ($table_el);
967    
968     for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
969     next unless $_;
970     delete $_->{element};
971     }
972    
973     for (@{$table->{row_group}}) {
974     next unless $_;
975     next unless $_->{element};
976     $_->{type} = $_->{element}->manakai_local_name;
977     delete $_->{element};
978     }
979    
980     for (@{$table->{cell}}) {
981     next unless $_;
982     for (@{$_}) {
983     next unless $_;
984     for (@$_) {
985     $_->{id} = refaddr $_->{element} if defined $_->{element};
986     delete $_->{element};
987     $_->{is_header} = $_->{is_header} ? 1 : 0;
988     }
989     }
990     }
991    
992     print STDOUT '</div><script type="text/javascript">tableToCanvas (';
993     print STDOUT JSON::objToJson ($table);
994 wakaba 1.32 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
995     print STDOUT qq[, '$input->{id_prefix}');</script>];
996 wakaba 1.18 }
997    
998     print STDOUT qq[</div>];
999     } # print_table_section
1000    
1001 wakaba 1.33 sub print_listing_section ($$$) {
1002     my ($opt, $input, $ids) = @_;
1003 wakaba 1.18
1004 wakaba 1.35 push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
1005     unless $input->{nested};
1006 wakaba 1.18 print STDOUT qq[
1007 wakaba 1.33 <div id="$input->{id_prefix}$opt->{id}" class="section">
1008     <h2>$opt->{heading}</h2>
1009 wakaba 1.18
1010     <dl>
1011     ];
1012     for my $id (sort {$a cmp $b} keys %$ids) {
1013     print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
1014     for (@{$ids->{$id}}) {
1015 wakaba 1.32 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
1016 wakaba 1.18 }
1017     }
1018     print STDOUT qq[</dl></div>];
1019 wakaba 1.33 } # print_listing_section
1020 wakaba 1.18
1021 wakaba 1.48 sub print_uri_section ($$$) {
1022     my ($input, $uris) = @_;
1023    
1024     ## NOTE: URIs contained in the DOM (i.e. in HTML or XML documents),
1025     ## except for those in RDF triples.
1026     ## TODO: URIs in CSS
1027    
1028     push @nav, ['#' . $input->{id_prefix} . 'uris' => 'URIs']
1029     unless $input->{nested};
1030     print STDOUT qq[
1031     <div id="$input->{id_prefix}uris" class="section">
1032     <h2>URIs</h2>
1033    
1034     <dl>];
1035     for my $uri (sort {$a cmp $b} keys %$uris) {
1036     my $euri = htescape ($uri);
1037     print STDOUT qq[<dt><code class=uri>&lt;<a href="$euri">$euri</a>></code>];
1038     my $eccuri = htescape (get_cc_uri ($uri));
1039     print STDOUT qq[<dd><a href="$eccuri">Check conformance of this document</a>];
1040     print STDOUT qq[<dd>Found at: <ul>];
1041     for my $entry (@{$uris->{$uri}}) {
1042     print STDOUT qq[<li>], get_node_link ($input, $entry->{node});
1043     if (keys %{$entry->{type} or {}}) {
1044     print STDOUT ' (';
1045     print STDOUT join ', ', map {
1046     {
1047     hyperlink => 'Hyperlink',
1048     resource => 'Link to an external resource',
1049     namespace => 'Namespace URI',
1050     cite => 'Citation or link to a long description',
1051     embedded => 'Link to an embedded content',
1052     base => 'Base URI',
1053     action => 'Submission URI',
1054     }->{$_}
1055     or
1056     htescape ($_)
1057     } keys %{$entry->{type}};
1058     print STDOUT ')';
1059     }
1060     }
1061     print STDOUT qq[</ul>];
1062     }
1063     print STDOUT qq[</dl></div>];
1064     } # print_uri_section
1065    
1066 wakaba 1.45 sub print_rdf_section ($$$) {
1067     my ($input, $rdfs) = @_;
1068    
1069     push @nav, ['#' . $input->{id_prefix} . 'rdf' => 'RDF']
1070     unless $input->{nested};
1071     print STDOUT qq[
1072     <div id="$input->{id_prefix}rdf" class="section">
1073     <h2>RDF Triples</h2>
1074    
1075     <dl>];
1076     my $i = 0;
1077     for my $rdf (@$rdfs) {
1078     print STDOUT qq[<dt id="$input->{id_prefix}rdf-@{[$i++]}">];
1079     print STDOUT get_node_link ($input, $rdf->[0]);
1080     print STDOUT qq[<dd><dl>];
1081     for my $triple (@{$rdf->[1]}) {
1082     print STDOUT '<dt>' . get_node_link ($input, $triple->[0]) . '<dd>';
1083     print STDOUT get_rdf_resource_html ($triple->[1]);
1084     print STDOUT ' ';
1085     print STDOUT get_rdf_resource_html ($triple->[2]);
1086     print STDOUT ' ';
1087     print STDOUT get_rdf_resource_html ($triple->[3]);
1088     }
1089     print STDOUT qq[</dl>];
1090     }
1091     print STDOUT qq[</dl></div>];
1092     } # print_rdf_section
1093    
1094     sub get_rdf_resource_html ($) {
1095     my $resource = shift;
1096 wakaba 1.46 if (defined $resource->{uri}) {
1097 wakaba 1.45 my $euri = htescape ($resource->{uri});
1098     return '<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
1099     '</a>></code>';
1100 wakaba 1.46 } elsif (defined $resource->{bnodeid}) {
1101 wakaba 1.45 return htescape ('_:' . $resource->{bnodeid});
1102     } elsif ($resource->{nodes}) {
1103     return '(rdf:XMLLiteral)';
1104     } elsif (defined $resource->{value}) {
1105     my $elang = htescape (defined $resource->{language}
1106     ? $resource->{language} : '');
1107     my $r = qq[<q lang="$elang">] . htescape ($resource->{value}) . '</q>';
1108     if (defined $resource->{datatype}) {
1109     my $euri = htescape ($resource->{datatype});
1110     $r .= '^^<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
1111     '</a>></code>';
1112     } elsif (length $resource->{language}) {
1113     $r .= '@' . htescape ($resource->{language});
1114     }
1115     return $r;
1116     } else {
1117     return '??';
1118     }
1119     } # get_rdf_resource_html
1120    
1121 wakaba 1.19 sub print_result_section ($) {
1122     my $result = shift;
1123    
1124     print STDOUT qq[
1125     <div id="result-summary" class="section">
1126     <h2>Result</h2>];
1127    
1128 wakaba 1.21 if ($result->{unsupported} and $result->{conforming_max}) {
1129 wakaba 1.19 print STDOUT qq[<p class=uncertain id=result-para>The conformance
1130     checker cannot decide whether the document is conforming or
1131     not, since the document contains one or more unsupported
1132 wakaba 1.21 features. The document might or might not be conforming.</p>];
1133 wakaba 1.19 } elsif ($result->{conforming_min}) {
1134     print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
1135     found in this document.</p>];
1136     } elsif ($result->{conforming_max}) {
1137     print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
1138     is <strong>likely <em>non</em>-conforming</strong>, but in rare case
1139     it might be conforming.</p>];
1140     } else {
1141     print STDOUT qq[<p class=FAIL id=result-para>This document is
1142     <strong><em>non</em>-conforming</strong>.</p>];
1143     }
1144    
1145     print STDOUT qq[<table>
1146     <colgroup><col><colgroup><col><col><col><colgroup><col>
1147     <thead>
1148 wakaba 1.23 <tr><th scope=col></th>
1149     <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1150     Errors</a></th>
1151     <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1152     Errors</a></th>
1153     <th scope=col><a href="../error-description#level-w">Warnings</a></th>
1154     <th scope=col>Score</th></tr></thead><tbody>];
1155 wakaba 1.19
1156     my $must_error = 0;
1157     my $should_error = 0;
1158     my $warning = 0;
1159     my $score_min = 0;
1160     my $score_max = 0;
1161     my $score_base = 20;
1162 wakaba 1.21 my $score_unit = $score_base / 100;
1163 wakaba 1.19 for (
1164     [Transfer => 'transfer', ''],
1165     [Character => 'char', ''],
1166     [Syntax => 'syntax', '#parse-errors'],
1167     [Structure => 'structure', '#document-errors'],
1168     ) {
1169     $must_error += ($result->{$_->[1]}->{must} += 0);
1170     $should_error += ($result->{$_->[1]}->{should} += 0);
1171     $warning += ($result->{$_->[1]}->{warning} += 0);
1172 wakaba 1.21 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
1173     $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
1174 wakaba 1.19
1175     my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
1176     my $label = $_->[0];
1177     if ($result->{$_->[1]}->{must} or
1178     $result->{$_->[1]}->{should} or
1179     $result->{$_->[1]}->{warning} or
1180     $result->{$_->[1]}->{unsupported}) {
1181     $label = qq[<a href="$_->[2]">$label</a>];
1182     }
1183    
1184     print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
1185     if ($uncertain) {
1186 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
1187 wakaba 1.19 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
1188 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
1189 wakaba 1.19 } else {
1190 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
1191 wakaba 1.19 }
1192     }
1193    
1194     $score_max += $score_base;
1195    
1196     print STDOUT qq[
1197     <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
1198     </tbody>
1199 wakaba 1.21 <tfoot><tr class=uncertain><th scope=row>Total</th>
1200     <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
1201     <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
1202     <td>$warning?</td>
1203     <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
1204 wakaba 1.19 </table>
1205    
1206     <p><strong>Important</strong>: This conformance checking service
1207     is <em>under development</em>. The result above might be <em>wrong</em>.</p>
1208     </div>];
1209     push @nav, ['#result-summary' => 'Result'];
1210     } # print_result_section
1211    
1212 wakaba 1.24 sub print_result_unknown_type_section ($$) {
1213     my ($input, $result) = @_;
1214 wakaba 1.18
1215 wakaba 1.24 my $euri = htescape ($input->{uri});
1216 wakaba 1.18 print STDOUT qq[
1217 wakaba 1.35 <div id="$input->{id_prefix}parse-errors" class="section">
1218 wakaba 1.24 <h2>Errors</h2>
1219    
1220     <dl>
1221     <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
1222     <dd class=unsupported><strong><a href="../error-description#level-u">Not
1223     supported</a></strong>:
1224     Media type
1225     <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
1226     is not supported.</dd>
1227     </dl>
1228 wakaba 1.18 </div>
1229     ];
1230 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}parse-errors] => 'Errors']
1231     unless $input->{nested};
1232 wakaba 1.30 add_error (char => {level => 'u'} => $result);
1233     add_error (syntax => {level => 'u'} => $result);
1234     add_error (structure => {level => 'u'} => $result);
1235 wakaba 1.18 } # print_result_unknown_type_section
1236    
1237     sub print_result_input_error_section ($) {
1238     my $input = shift;
1239     print STDOUT qq[<div class="section" id="result-summary">
1240     <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
1241     </div>];
1242     push @nav, ['#result-summary' => 'Result'];
1243 wakaba 1.32 } # print_result_input_error_section
1244 wakaba 1.18
1245 wakaba 1.32 sub get_error_label ($$) {
1246     my ($input, $err) = @_;
1247 wakaba 1.22
1248     my $r = '';
1249    
1250 wakaba 1.42 my $line;
1251     my $column;
1252    
1253     if (defined $err->{node}) {
1254     $line = $err->{node}->get_user_data ('manakai_source_line');
1255     if (defined $line) {
1256     $column = $err->{node}->get_user_data ('manakai_source_column');
1257 wakaba 1.40 } else {
1258 wakaba 1.42 if ($err->{node}->node_type == $err->{node}->ATTRIBUTE_NODE) {
1259     my $owner = $err->{node}->owner_element;
1260     $line = $owner->get_user_data ('manakai_source_line');
1261     $column = $owner->get_user_data ('manakai_source_column');
1262 wakaba 1.43 } else {
1263     my $parent = $err->{node}->parent_node;
1264 wakaba 1.44 if ($parent) {
1265     $line = $parent->get_user_data ('manakai_source_line');
1266     $column = $parent->get_user_data ('manakai_source_column');
1267     }
1268 wakaba 1.42 }
1269     }
1270     }
1271     unless (defined $line) {
1272     if (defined $err->{token} and defined $err->{token}->{line}) {
1273     $line = $err->{token}->{line};
1274     $column = $err->{token}->{column};
1275     } elsif (defined $err->{line}) {
1276     $line = $err->{line};
1277     $column = $err->{column};
1278 wakaba 1.40 }
1279 wakaba 1.42 }
1280    
1281     if (defined $line) {
1282     if (defined $column and $column > 0) {
1283     $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a> column $column];
1284 wakaba 1.22 } else {
1285 wakaba 1.42 $line = $line - 1 || 1;
1286     $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a>];
1287 wakaba 1.22 }
1288     }
1289    
1290     if (defined $err->{node}) {
1291     $r .= ' ' if length $r;
1292 wakaba 1.42 $r .= get_node_link ($input, $err->{node});
1293 wakaba 1.22 }
1294    
1295     if (defined $err->{index}) {
1296 wakaba 1.37 if (length $r) {
1297     $r .= ', Index ' . (0+$err->{index});
1298     } else {
1299     $r .= "<a href='#$input->{id_prefix}index-@{[0+$err->{index}]}'>Index "
1300     . (0+$err->{index}) . '</a>';
1301     }
1302 wakaba 1.22 }
1303    
1304     if (defined $err->{value}) {
1305     $r .= ' ' if length $r;
1306     $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
1307     }
1308    
1309     return $r;
1310     } # get_error_label
1311    
1312 wakaba 1.23 sub get_error_level_label ($) {
1313     my $err = shift;
1314    
1315     my $r = '';
1316    
1317     if (not defined $err->{level} or $err->{level} eq 'm') {
1318     $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1319     error</a></strong>: ];
1320     } elsif ($err->{level} eq 's') {
1321     $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1322     error</a></strong>: ];
1323     } elsif ($err->{level} eq 'w') {
1324     $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
1325     ];
1326 wakaba 1.30 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
1327 wakaba 1.23 $r = qq[<strong><a href="../error-description#level-u">Not
1328     supported</a></strong>: ];
1329 wakaba 1.37 } elsif ($err->{level} eq 'i') {
1330     $r = qq[<strong><a href="../error-description#level-i">Information</a></strong>: ];
1331 wakaba 1.23 } else {
1332     my $elevel = htescape ($err->{level});
1333     $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
1334     ];
1335     }
1336    
1337     return $r;
1338     } # get_error_level_label
1339    
1340 wakaba 1.1 sub get_node_path ($) {
1341     my $node = shift;
1342     my @r;
1343     while (defined $node) {
1344     my $rs;
1345     if ($node->node_type == 1) {
1346 wakaba 1.47 $rs = $node->node_name;
1347 wakaba 1.1 $node = $node->parent_node;
1348     } elsif ($node->node_type == 2) {
1349 wakaba 1.47 $rs = '@' . $node->node_name;
1350 wakaba 1.1 $node = $node->owner_element;
1351     } elsif ($node->node_type == 3) {
1352     $rs = '"' . $node->data . '"';
1353     $node = $node->parent_node;
1354     } elsif ($node->node_type == 9) {
1355 wakaba 1.9 @r = ('') unless @r;
1356 wakaba 1.1 $rs = '';
1357     $node = $node->parent_node;
1358     } else {
1359     $rs = '#' . $node->node_type;
1360     $node = $node->parent_node;
1361     }
1362     unshift @r, $rs;
1363     }
1364     return join '/', @r;
1365     } # get_node_path
1366    
1367 wakaba 1.32 sub get_node_link ($$) {
1368     return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
1369     htescape (get_node_path ($_[1])) . qq[</a>];
1370 wakaba 1.6 } # get_node_link
1371    
1372 wakaba 1.7 {
1373     my $Msg = {};
1374    
1375     sub load_text_catalog ($) {
1376     my $lang = shift; # MUST be a canonical lang name
1377 wakaba 1.26 open my $file, '<:utf8', "cc-msg.$lang.txt"
1378     or die "$0: cc-msg.$lang.txt: $!";
1379 wakaba 1.7 while (<$file>) {
1380     if (s/^([^;]+);([^;]*);//) {
1381     my ($type, $cls, $msg) = ($1, $2, $_);
1382     $msg =~ tr/\x0D\x0A//d;
1383     $Msg->{$type} = [$cls, $msg];
1384     }
1385     }
1386     } # load_text_catalog
1387    
1388     sub get_text ($) {
1389 wakaba 1.15 my ($type, $level, $node) = @_;
1390 wakaba 1.7 $type = $level . ':' . $type if defined $level;
1391 wakaba 1.29 $level = 'm' unless defined $level;
1392 wakaba 1.7 my @arg;
1393     {
1394     if (defined $Msg->{$type}) {
1395     my $msg = $Msg->{$type}->[1];
1396 wakaba 1.10 $msg =~ s{<var>\$([0-9]+)</var>}{
1397     defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
1398     }ge;
1399 wakaba 1.15 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
1400     UNIVERSAL::can ($node, 'get_attribute_ns')
1401     ? htescape ($node->get_attribute_ns (undef, $1)) : ''
1402     }ge;
1403     $msg =~ s{<var>{\@}</var>}{
1404     UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
1405     }ge;
1406 wakaba 1.17 $msg =~ s{<var>{local-name}</var>}{
1407     UNIVERSAL::can ($node, 'manakai_local_name')
1408     ? htescape ($node->manakai_local_name) : ''
1409     }ge;
1410     $msg =~ s{<var>{element-local-name}</var>}{
1411     (UNIVERSAL::can ($node, 'owner_element') and
1412     $node->owner_element)
1413     ? htescape ($node->owner_element->manakai_local_name)
1414     : ''
1415     }ge;
1416 wakaba 1.29 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
1417 wakaba 1.7 } elsif ($type =~ s/:([^:]*)$//) {
1418     unshift @arg, $1;
1419     redo;
1420     }
1421     }
1422 wakaba 1.29 return ($type, 'level-'.$level, htescape ($_[0]));
1423 wakaba 1.7 } # get_text
1424    
1425     }
1426    
1427 wakaba 1.48 sub encode_uri_component ($) {
1428     require Encode;
1429     my $s = Encode::encode ('utf8', shift);
1430     $s =~ s/([^0-9A-Za-z_.~-])/sprintf '%%%02X', ord $1/ge;
1431     return $s;
1432     } # encode_uri_component
1433    
1434     sub get_cc_uri ($) {
1435     return './?uri=' . encode_uri_component ($_[0]);
1436     } # get_cc_uri
1437    
1438 wakaba 1.9 sub get_input_document ($$) {
1439     my ($http, $dom) = @_;
1440    
1441 wakaba 1.16 my $request_uri = $http->get_parameter ('uri');
1442 wakaba 1.9 my $r = {};
1443     if (defined $request_uri and length $request_uri) {
1444     my $uri = $dom->create_uri_reference ($request_uri);
1445     unless ({
1446     http => 1,
1447     }->{lc $uri->uri_scheme}) {
1448     return {uri => $request_uri, request_uri => $request_uri,
1449     error_status_text => 'URI scheme not allowed'};
1450     }
1451    
1452     require Message::Util::HostPermit;
1453     my $host_permit = new Message::Util::HostPermit;
1454     $host_permit->add_rule (<<EOH);
1455     Allow host=suika port=80
1456     Deny host=suika
1457     Allow host=suika.fam.cx port=80
1458     Deny host=suika.fam.cx
1459     Deny host=localhost
1460     Deny host=*.localdomain
1461     Deny ipv4=0.0.0.0/8
1462     Deny ipv4=10.0.0.0/8
1463     Deny ipv4=127.0.0.0/8
1464     Deny ipv4=169.254.0.0/16
1465     Deny ipv4=172.0.0.0/11
1466     Deny ipv4=192.0.2.0/24
1467     Deny ipv4=192.88.99.0/24
1468     Deny ipv4=192.168.0.0/16
1469     Deny ipv4=198.18.0.0/15
1470     Deny ipv4=224.0.0.0/4
1471     Deny ipv4=255.255.255.255/32
1472     Deny ipv6=0::0/0
1473     Allow host=*
1474     EOH
1475     unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1476     return {uri => $request_uri, request_uri => $request_uri,
1477     error_status_text => 'Connection to the host is forbidden'};
1478     }
1479    
1480     require LWP::UserAgent;
1481     my $ua = WDCC::LWPUA->new;
1482     $ua->{wdcc_dom} = $dom;
1483     $ua->{wdcc_host_permit} = $host_permit;
1484     $ua->agent ('Mozilla'); ## TODO: for now.
1485     $ua->parse_head (0);
1486     $ua->protocols_allowed ([qw/http/]);
1487     $ua->max_size (1000_000);
1488     my $req = HTTP::Request->new (GET => $request_uri);
1489 wakaba 1.28 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1490 wakaba 1.9 my $res = $ua->request ($req);
1491 wakaba 1.16 ## TODO: 401 sets |is_success| true.
1492     if ($res->is_success or $http->get_parameter ('error-page')) {
1493 wakaba 1.9 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1494     $r->{uri} = $res->request->uri;
1495     $r->{request_uri} = $request_uri;
1496    
1497     ## TODO: More strict parsing...
1498     my $ct = $res->header ('Content-Type');
1499 wakaba 1.22 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1500 wakaba 1.9 $r->{charset} = lc $1;
1501     $r->{charset} =~ tr/\\//d;
1502 wakaba 1.26 $r->{official_charset} = $r->{charset};
1503 wakaba 1.9 }
1504    
1505 wakaba 1.16 my $input_charset = $http->get_parameter ('charset');
1506 wakaba 1.9 if (defined $input_charset and length $input_charset) {
1507     $r->{charset_overridden}
1508     = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1509     $r->{charset} = $input_charset;
1510 wakaba 1.25 }
1511    
1512     ## TODO: Support for HTTP Content-Encoding
1513 wakaba 1.9
1514     $r->{s} = ''.$res->content;
1515 wakaba 1.25
1516     require Whatpm::ContentType;
1517     ($r->{official_type}, $r->{media_type})
1518     = Whatpm::ContentType->get_sniffed_type
1519     (get_file_head => sub {
1520     return substr $r->{s}, 0, shift;
1521     },
1522     http_content_type_byte => $ct,
1523     has_http_content_encoding =>
1524     defined $res->header ('Content-Encoding'),
1525     supported_image_types => {});
1526 wakaba 1.9 } else {
1527     $r->{uri} = $res->request->uri;
1528     $r->{request_uri} = $request_uri;
1529     $r->{error_status_text} = $res->status_line;
1530     }
1531    
1532     $r->{header_field} = [];
1533     $res->scan (sub {
1534     push @{$r->{header_field}}, [$_[0], $_[1]];
1535     });
1536     $r->{header_status_code} = $res->code;
1537     $r->{header_status_text} = $res->message;
1538     } else {
1539 wakaba 1.16 $r->{s} = ''.$http->get_parameter ('s');
1540 wakaba 1.9 $r->{uri} = q<thismessage:/>;
1541     $r->{request_uri} = q<thismessage:/>;
1542     $r->{base_uri} = q<thismessage:/>;
1543 wakaba 1.16 $r->{charset} = ''.$http->get_parameter ('_charset_');
1544 wakaba 1.9 $r->{charset} =~ s/\s+//g;
1545     $r->{charset} = 'utf-8' if $r->{charset} eq '';
1546 wakaba 1.26 $r->{official_charset} = $r->{charset};
1547 wakaba 1.9 $r->{header_field} = [];
1548 wakaba 1.25
1549     require Whatpm::ContentType;
1550     ($r->{official_type}, $r->{media_type})
1551     = Whatpm::ContentType->get_sniffed_type
1552     (get_file_head => sub {
1553     return substr $r->{s}, 0, shift;
1554     },
1555     http_content_type_byte => undef,
1556     has_http_content_encoding => 0,
1557     supported_image_types => {});
1558 wakaba 1.9 }
1559    
1560 wakaba 1.16 my $input_format = $http->get_parameter ('i');
1561 wakaba 1.9 if (defined $input_format and length $input_format) {
1562     $r->{media_type_overridden}
1563     = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1564     $r->{media_type} = $input_format;
1565     }
1566     if (defined $r->{s} and not defined $r->{media_type}) {
1567     $r->{media_type} = 'text/html';
1568     $r->{media_type_overridden} = 1;
1569     }
1570    
1571     if ($r->{media_type} eq 'text/xml') {
1572     unless (defined $r->{charset}) {
1573     $r->{charset} = 'us-ascii';
1574 wakaba 1.26 $r->{official_charset} = $r->{charset};
1575 wakaba 1.9 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1576     $r->{charset_overridden} = 0;
1577     }
1578     }
1579    
1580     if (length $r->{s} > 1000_000) {
1581     $r->{error_status_text} = 'Entity-body too large';
1582     delete $r->{s};
1583     return $r;
1584     }
1585    
1586 wakaba 1.35 $r->{inner_html_element} = $http->get_parameter ('e');
1587    
1588 wakaba 1.9 return $r;
1589     } # get_input_document
1590    
1591     package WDCC::LWPUA;
1592     BEGIN { push our @ISA, 'LWP::UserAgent'; }
1593    
1594     sub redirect_ok {
1595     my $ua = shift;
1596     unless ($ua->SUPER::redirect_ok (@_)) {
1597     return 0;
1598     }
1599    
1600     my $uris = $_[1]->header ('Location');
1601     return 0 unless $uris;
1602     my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1603     unless ({
1604     http => 1,
1605     }->{lc $uri->uri_scheme}) {
1606     return 0;
1607     }
1608     unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1609     return 0;
1610     }
1611     return 1;
1612     } # redirect_ok
1613    
1614 wakaba 1.1 =head1 AUTHOR
1615    
1616     Wakaba <w@suika.fam.cx>.
1617    
1618     =head1 LICENSE
1619    
1620 wakaba 1.35 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1621 wakaba 1.1
1622     This library is free software; you can redistribute it
1623     and/or modify it under the same terms as Perl itself.
1624    
1625     =cut
1626    
1627 wakaba 1.48 ## $Date: 2008/03/21 11:17:00 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24