/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.51 - (hide annotations) (download)
Sun May 18 03:47:56 2008 UTC (16 years, 6 months ago) by wakaba
Branch: MAIN
Changes since 1.50: +28 -10 lines
++ ChangeLog	18 May 2008 03:47:40 -0000
2008-05-18  Wakaba  <wakaba@suika.fam.cx>

	* cc.cgi (print_source_string_section): Use new Message::Charset::Info
	interface to decode source code, otherwise the Perl native Encode
	module might decode the source code into different character
	string with the Info's.

2008-05-10  Wakaba  <wakaba@suika.fam.cx>

	* standards.en.html (requirements): Remove a requirement
	for an HTML element's allowed context (it is covered by HTML5
	spec since r1583).

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3 wakaba 1.23 use utf8;
4 wakaba 1.1
5     use lib qw[/home/httpd/html/www/markup/html/whatpm
6 wakaba 1.16 /home/wakaba/work/manakai2/lib];
7 wakaba 1.1 use CGI::Carp qw[fatalsToBrowser];
8 wakaba 1.2 use Scalar::Util qw[refaddr];
9 wakaba 1.16 use Time::HiRes qw/time/;
10 wakaba 1.1
11 wakaba 1.2 sub htescape ($) {
12     my $s = $_[0];
13     $s =~ s/&/&amp;/g;
14     $s =~ s/</&lt;/g;
15     $s =~ s/>/&gt;/g;
16     $s =~ s/"/&quot;/g;
17 wakaba 1.12 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18     sprintf '<var>U+%04X</var>', ord $1;
19     }ge;
20 wakaba 1.2 return $s;
21     } # htescape
22    
23 wakaba 1.35 my @nav;
24     my %time;
25     require Message::DOM::DOMImplementation;
26     my $dom = Message::DOM::DOMImplementation->new;
27     {
28 wakaba 1.16 use Message::CGI::HTTP;
29     my $http = Message::CGI::HTTP->new;
30 wakaba 1.1
31 wakaba 1.16 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
32 wakaba 1.8 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
33     exit;
34     }
35    
36 wakaba 1.12 binmode STDOUT, ':utf8';
37 wakaba 1.14 $| = 1;
38 wakaba 1.12
39 wakaba 1.7 load_text_catalog ('en'); ## TODO: conneg
40    
41 wakaba 1.2 print STDOUT qq[Content-Type: text/html; charset=utf-8
42    
43     <!DOCTYPE html>
44     <html lang="en">
45     <head>
46     <title>Web Document Conformance Checker (BETA)</title>
47 wakaba 1.3 <link rel="stylesheet" href="../cc-style.css" type="text/css">
48 wakaba 1.2 </head>
49     <body>
50 wakaba 1.13 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
51     (<em>beta</em>)</h1>
52 wakaba 1.14 ];
53 wakaba 1.2
54 wakaba 1.14 $| = 0;
55     my $input = get_input_document ($http, $dom);
56 wakaba 1.16 my $char_length = 0;
57 wakaba 1.14
58     print qq[
59 wakaba 1.4 <div id="document-info" class="section">
60 wakaba 1.2 <dl>
61 wakaba 1.9 <dt>Request URI</dt>
62     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 wakaba 1.2 <dt>Document URI</dt>
64 wakaba 1.25 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65     <script>
66     document.title = '<'
67     + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68     + document.title;
69     </script></dd>
70 wakaba 1.2 ]; # no </dl> yet
71 wakaba 1.3 push @nav, ['#document-info' => 'Information'];
72 wakaba 1.1
73 wakaba 1.9 if (defined $input->{s}) {
74 wakaba 1.16 $char_length = length $input->{s};
75 wakaba 1.9
76     print STDOUT qq[
77     <dt>Base URI</dt>
78     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79     <dt>Internet Media Type</dt>
80     <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 wakaba 1.25 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 wakaba 1.9 <dt>Character Encoding</dt>
83     <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84     @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 wakaba 1.16 <dt>Length</dt>
86     <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 wakaba 1.9 </dl>
88     </div>
89 wakaba 1.39
90     <script src="../cc-script.js"></script>
91 wakaba 1.9 ];
92    
93 wakaba 1.35 $input->{id_prefix} = '';
94     #$input->{nested} = 0;
95 wakaba 1.20 my $result = {conforming_min => 1, conforming_max => 1};
96 wakaba 1.31 check_and_print ($input => $result);
97 wakaba 1.19 print_result_section ($result);
98 wakaba 1.9 } else {
99 wakaba 1.18 print STDOUT qq[</dl></div>];
100     print_result_input_error_section ($input);
101 wakaba 1.9 }
102 wakaba 1.3
103 wakaba 1.2 print STDOUT qq[
104 wakaba 1.3 <ul class="navigation" id="nav-items">
105     ];
106     for (@nav) {
107     print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
108     }
109     print STDOUT qq[
110     </ul>
111 wakaba 1.2 </body>
112     </html>
113     ];
114 wakaba 1.1
115 wakaba 1.24 for (qw/decode parse parse_html parse_xml parse_manifest
116     check check_manifest/) {
117 wakaba 1.16 next unless defined $time{$_};
118     open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
119     print $file $char_length, "\t", $time{$_}, "\n";
120     }
121    
122 wakaba 1.1 exit;
123 wakaba 1.35 }
124 wakaba 1.1
125 wakaba 1.19 sub add_error ($$$) {
126     my ($layer, $err, $result) = @_;
127     if (defined $err->{level}) {
128     if ($err->{level} eq 's') {
129     $result->{$layer}->{should}++;
130     $result->{$layer}->{score_min} -= 2;
131     $result->{conforming_min} = 0;
132     } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
133     $result->{$layer}->{warning}++;
134 wakaba 1.30 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
135 wakaba 1.19 $result->{$layer}->{unsupported}++;
136     $result->{unsupported} = 1;
137 wakaba 1.37 } elsif ($err->{level} eq 'i') {
138     #
139 wakaba 1.19 } else {
140     $result->{$layer}->{must}++;
141     $result->{$layer}->{score_max} -= 2;
142     $result->{$layer}->{score_min} -= 2;
143     $result->{conforming_min} = 0;
144     $result->{conforming_max} = 0;
145     }
146     } else {
147     $result->{$layer}->{must}++;
148     $result->{$layer}->{score_max} -= 2;
149     $result->{$layer}->{score_min} -= 2;
150     $result->{conforming_min} = 0;
151     $result->{conforming_max} = 0;
152     }
153     } # add_error
154    
155 wakaba 1.31 sub check_and_print ($$) {
156     my ($input, $result) = @_;
157    
158     print_http_header_section ($input, $result);
159    
160     my $doc;
161     my $el;
162 wakaba 1.35 my $cssom;
163 wakaba 1.31 my $manifest;
164 wakaba 1.34 my @subdoc;
165 wakaba 1.31
166     if ($input->{media_type} eq 'text/html') {
167     ($doc, $el) = print_syntax_error_html_section ($input, $result);
168     print_source_string_section
169 wakaba 1.35 ($input,
170     \($input->{s}),
171     $input->{charset} || $doc->input_encoding);
172 wakaba 1.31 } elsif ({
173     'text/xml' => 1,
174     'application/atom+xml' => 1,
175     'application/rss+xml' => 1,
176 wakaba 1.45 'image/svg+xml' => 1,
177 wakaba 1.31 'application/xhtml+xml' => 1,
178     'application/xml' => 1,
179 wakaba 1.45 ## TODO: Should we make all XML MIME Types fall
180     ## into this category?
181    
182     'application/rdf+xml' => 1, ## NOTE: This type has different model.
183 wakaba 1.31 }->{$input->{media_type}}) {
184     ($doc, $el) = print_syntax_error_xml_section ($input, $result);
185 wakaba 1.35 print_source_string_section ($input,
186     \($input->{s}),
187     $doc->input_encoding);
188     } elsif ($input->{media_type} eq 'text/css') {
189     $cssom = print_syntax_error_css_section ($input, $result);
190     print_source_string_section
191     ($input, \($input->{s}),
192     $cssom->manakai_input_encoding);
193 wakaba 1.31 } elsif ($input->{media_type} eq 'text/cache-manifest') {
194     ## TODO: MUST be text/cache-manifest
195     $manifest = print_syntax_error_manifest_section ($input, $result);
196 wakaba 1.35 print_source_string_section ($input, \($input->{s}),
197     'utf-8');
198 wakaba 1.31 } else {
199     ## TODO: Change HTTP status code??
200     print_result_unknown_type_section ($input, $result);
201     }
202    
203     if (defined $doc or defined $el) {
204 wakaba 1.34 $doc->document_uri ($input->{uri});
205     $doc->manakai_entity_base_uri ($input->{base_uri});
206 wakaba 1.32 print_structure_dump_dom_section ($input, $doc, $el);
207     my $elements = print_structure_error_dom_section
208 wakaba 1.34 ($input, $doc, $el, $result, sub {
209     push @subdoc, shift;
210     });
211 wakaba 1.32 print_table_section ($input, $elements->{table}) if @{$elements->{table}};
212 wakaba 1.33 print_listing_section ({
213     id => 'identifiers', label => 'IDs', heading => 'Identifiers',
214     }, $input, $elements->{id}) if keys %{$elements->{id}};
215     print_listing_section ({
216     id => 'terms', label => 'Terms', heading => 'Terms',
217     }, $input, $elements->{term}) if keys %{$elements->{term}};
218     print_listing_section ({
219     id => 'classes', label => 'Classes', heading => 'Classes',
220     }, $input, $elements->{class}) if keys %{$elements->{class}};
221 wakaba 1.48 print_uri_section ($input, $elements->{uri}) if keys %{$elements->{uri}};
222 wakaba 1.45 print_rdf_section ($input, $elements->{rdf}) if @{$elements->{rdf}};
223 wakaba 1.35 } elsif (defined $cssom) {
224     print_structure_dump_cssom_section ($input, $cssom);
225     ## TODO: CSSOM validation
226 wakaba 1.36 add_error ('structure', {level => 'u'} => $result);
227 wakaba 1.31 } elsif (defined $manifest) {
228 wakaba 1.32 print_structure_dump_manifest_section ($input, $manifest);
229     print_structure_error_manifest_section ($input, $manifest, $result);
230 wakaba 1.31 }
231 wakaba 1.34
232     my $id_prefix = 0;
233     for my $subinput (@subdoc) {
234     $subinput->{id_prefix} = 'subdoc-' . ++$id_prefix;
235     $subinput->{nested} = 1;
236     $subinput->{base_uri} = $subinput->{container_node}->base_uri
237     unless defined $subinput->{base_uri};
238     my $ebaseuri = htescape ($subinput->{base_uri});
239     push @nav, ['#' . $subinput->{id_prefix} => 'Sub #' . $id_prefix];
240     print STDOUT qq[<div id="$subinput->{id_prefix}" class=section>
241     <h2>Subdocument #$id_prefix</h2>
242    
243     <dl>
244     <dt>Internet Media Type</dt>
245     <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
246     <dt>Container Node</dt>
247     <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
248     <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
249     <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
250     </dl>];
251    
252 wakaba 1.35 $subinput->{id_prefix} .= '-';
253 wakaba 1.34 check_and_print ($subinput => $result);
254    
255     print STDOUT qq[</div>];
256     }
257 wakaba 1.31 } # check_and_print
258    
259 wakaba 1.19 sub print_http_header_section ($$) {
260     my ($input, $result) = @_;
261 wakaba 1.9 return unless defined $input->{header_status_code} or
262     defined $input->{header_status_text} or
263 wakaba 1.34 @{$input->{header_field} or []};
264 wakaba 1.9
265 wakaba 1.32 push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
266     print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
267 wakaba 1.9 <h2>HTTP Header</h2>
268    
269     <p><strong>Note</strong>: Due to the limitation of the
270     network library in use, the content of this section might
271     not be the real header.</p>
272    
273     <table><tbody>
274     ];
275    
276     if (defined $input->{header_status_code}) {
277     print STDOUT qq[<tr><th scope="row">Status code</th>];
278     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
279     }
280     if (defined $input->{header_status_text}) {
281     print STDOUT qq[<tr><th scope="row">Status text</th>];
282     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
283     }
284    
285     for (@{$input->{header_field}}) {
286     print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
287     print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
288     }
289    
290     print STDOUT qq[</tbody></table></div>];
291     } # print_http_header_section
292    
293 wakaba 1.19 sub print_syntax_error_html_section ($$) {
294     my ($input, $result) = @_;
295 wakaba 1.18
296     require Encode;
297     require Whatpm::HTML;
298    
299     print STDOUT qq[
300 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
301 wakaba 1.18 <h2>Parse Errors</h2>
302    
303 wakaba 1.39 <dl id="$input->{id_prefix}parse-errors-list">];
304 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
305 wakaba 1.18
306     my $onerror = sub {
307     my (%opt) = @_;
308     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
309 wakaba 1.38 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
310     qq[</dt>];
311 wakaba 1.18 $type =~ tr/ /-/;
312     $type =~ s/\|/%7C/g;
313     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
314 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
315     print STDOUT qq[$msg</dd>\n];
316 wakaba 1.19
317     add_error ('syntax', \%opt => $result);
318 wakaba 1.18 };
319    
320     my $doc = $dom->create_document;
321     my $el;
322 wakaba 1.35 my $inner_html_element = $input->{inner_html_element};
323 wakaba 1.18 if (defined $inner_html_element and length $inner_html_element) {
324 wakaba 1.26 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
325 wakaba 1.24 my $time1 = time;
326 wakaba 1.48 my $t = \($input->{s});
327     unless ($input->{is_char_string}) {
328     $t = \(Encode::decode ($input->{charset}, $$t));
329     }
330 wakaba 1.24 $time{decode} = time - $time1;
331    
332 wakaba 1.18 $el = $doc->create_element_ns
333     ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
334 wakaba 1.24 $time1 = time;
335 wakaba 1.48 Whatpm::HTML->set_inner_html ($el, $$t, $onerror);
336 wakaba 1.24 $time{parse} = time - $time1;
337 wakaba 1.18 } else {
338 wakaba 1.24 my $time1 = time;
339 wakaba 1.48 if ($input->{is_char_string}) {
340     Whatpm::HTML->parse_char_string ($input->{s} => $doc, $onerror);
341     } else {
342     Whatpm::HTML->parse_byte_string
343     ($input->{charset}, $input->{s} => $doc, $onerror);
344     }
345 wakaba 1.24 $time{parse_html} = time - $time1;
346 wakaba 1.18 }
347 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
348     if defined $input->{official_charset};
349 wakaba 1.24
350 wakaba 1.18 print STDOUT qq[</dl></div>];
351    
352     return ($doc, $el);
353     } # print_syntax_error_html_section
354    
355 wakaba 1.19 sub print_syntax_error_xml_section ($$) {
356     my ($input, $result) = @_;
357 wakaba 1.18
358     require Message::DOM::XMLParserTemp;
359    
360     print STDOUT qq[
361 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
362 wakaba 1.18 <h2>Parse Errors</h2>
363    
364 wakaba 1.39 <dl id="$input->{id_prefix}parse-errors-list">];
365 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
366 wakaba 1.18
367     my $onerror = sub {
368     my $err = shift;
369     my $line = $err->location->line_number;
370 wakaba 1.35 print STDOUT qq[<dt><a href="#$input->{id_prefix}line-$line">Line $line</a> column ];
371 wakaba 1.18 print STDOUT $err->location->column_number, "</dt><dd>";
372     print STDOUT htescape $err->text, "</dd>\n";
373 wakaba 1.19
374     add_error ('syntax', {type => $err->text,
375     level => [
376     $err->SEVERITY_FATAL_ERROR => 'm',
377     $err->SEVERITY_ERROR => 'm',
378     $err->SEVERITY_WARNING => 's',
379     ]->[$err->severity]} => $result);
380    
381 wakaba 1.18 return 1;
382     };
383    
384 wakaba 1.48 my $t = \($input->{s});
385     if ($input->{is_char_string}) {
386     require Encode;
387     $t = \(Encode::encode ('utf8', $$t));
388     $input->{charset} = 'utf-8';
389     }
390    
391 wakaba 1.18 my $time1 = time;
392 wakaba 1.48 open my $fh, '<', $t;
393 wakaba 1.18 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
394     ($fh => $dom, $onerror, charset => $input->{charset});
395     $time{parse_xml} = time - $time1;
396 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
397     if defined $input->{official_charset};
398 wakaba 1.18
399     print STDOUT qq[</dl></div>];
400    
401     return ($doc, undef);
402     } # print_syntax_error_xml_section
403    
404 wakaba 1.35 sub get_css_parser () {
405     our $CSSParser;
406     return $CSSParser if $CSSParser;
407    
408     require Whatpm::CSS::Parser;
409     my $p = Whatpm::CSS::Parser->new;
410    
411     $p->{prop}->{$_} = 1 for qw/
412 wakaba 1.37 alignment-baseline
413 wakaba 1.35 background background-attachment background-color background-image
414     background-position background-position-x background-position-y
415     background-repeat border border-bottom border-bottom-color
416     border-bottom-style border-bottom-width border-collapse border-color
417     border-left border-left-color
418     border-left-style border-left-width border-right border-right-color
419     border-right-style border-right-width
420     border-spacing -manakai-border-spacing-x -manakai-border-spacing-y
421     border-style border-top border-top-color border-top-style border-top-width
422     border-width bottom
423     caption-side clear clip color content counter-increment counter-reset
424 wakaba 1.37 cursor direction display dominant-baseline empty-cells float font
425 wakaba 1.35 font-family font-size font-size-adjust font-stretch
426     font-style font-variant font-weight height left
427     letter-spacing line-height
428     list-style list-style-image list-style-position list-style-type
429     margin margin-bottom margin-left margin-right margin-top marker-offset
430     marks max-height max-width min-height min-width opacity -moz-opacity
431     orphans outline outline-color outline-style outline-width overflow
432     overflow-x overflow-y
433     padding padding-bottom padding-left padding-right padding-top
434     page page-break-after page-break-before page-break-inside
435     position quotes right size table-layout
436 wakaba 1.37 text-align text-anchor text-decoration text-indent text-transform
437 wakaba 1.35 top unicode-bidi vertical-align visibility white-space width widows
438 wakaba 1.37 word-spacing writing-mode z-index
439 wakaba 1.35 /;
440     $p->{prop_value}->{display}->{$_} = 1 for qw/
441     block clip inline inline-block inline-table list-item none
442     table table-caption table-cell table-column table-column-group
443     table-header-group table-footer-group table-row table-row-group
444     compact marker
445     /;
446     $p->{prop_value}->{position}->{$_} = 1 for qw/
447     absolute fixed relative static
448     /;
449     $p->{prop_value}->{float}->{$_} = 1 for qw/
450     left right none
451     /;
452     $p->{prop_value}->{clear}->{$_} = 1 for qw/
453     left right none both
454     /;
455     $p->{prop_value}->{direction}->{ltr} = 1;
456     $p->{prop_value}->{direction}->{rtl} = 1;
457     $p->{prop_value}->{marks}->{crop} = 1;
458     $p->{prop_value}->{marks}->{cross} = 1;
459     $p->{prop_value}->{'unicode-bidi'}->{$_} = 1 for qw/
460     normal bidi-override embed
461     /;
462     for my $prop_name (qw/overflow overflow-x overflow-y/) {
463     $p->{prop_value}->{$prop_name}->{$_} = 1 for qw/
464     visible hidden scroll auto -webkit-marquee -moz-hidden-unscrollable
465     /;
466     }
467     $p->{prop_value}->{visibility}->{$_} = 1 for qw/
468     visible hidden collapse
469     /;
470     $p->{prop_value}->{'list-style-type'}->{$_} = 1 for qw/
471     disc circle square decimal decimal-leading-zero
472     lower-roman upper-roman lower-greek lower-latin
473     upper-latin armenian georgian lower-alpha upper-alpha none
474     hebrew cjk-ideographic hiragana katakana hiragana-iroha
475     katakana-iroha
476     /;
477     $p->{prop_value}->{'list-style-position'}->{outside} = 1;
478     $p->{prop_value}->{'list-style-position'}->{inside} = 1;
479     $p->{prop_value}->{'page-break-before'}->{$_} = 1 for qw/
480     auto always avoid left right
481     /;
482     $p->{prop_value}->{'page-break-after'}->{$_} = 1 for qw/
483     auto always avoid left right
484     /;
485     $p->{prop_value}->{'page-break-inside'}->{auto} = 1;
486     $p->{prop_value}->{'page-break-inside'}->{avoid} = 1;
487     $p->{prop_value}->{'background-repeat'}->{$_} = 1 for qw/
488     repeat repeat-x repeat-y no-repeat
489     /;
490     $p->{prop_value}->{'background-attachment'}->{scroll} = 1;
491     $p->{prop_value}->{'background-attachment'}->{fixed} = 1;
492     $p->{prop_value}->{'font-size'}->{$_} = 1 for qw/
493     xx-small x-small small medium large x-large xx-large
494     -manakai-xxx-large -webkit-xxx-large
495     larger smaller
496     /;
497     $p->{prop_value}->{'font-style'}->{normal} = 1;
498     $p->{prop_value}->{'font-style'}->{italic} = 1;
499     $p->{prop_value}->{'font-style'}->{oblique} = 1;
500     $p->{prop_value}->{'font-variant'}->{normal} = 1;
501     $p->{prop_value}->{'font-variant'}->{'small-caps'} = 1;
502     $p->{prop_value}->{'font-stretch'}->{$_} = 1 for
503     qw/normal wider narrower ultra-condensed extra-condensed
504     condensed semi-condensed semi-expanded expanded
505     extra-expanded ultra-expanded/;
506     $p->{prop_value}->{'text-align'}->{$_} = 1 for qw/
507     left right center justify begin end
508     /;
509     $p->{prop_value}->{'text-transform'}->{$_} = 1 for qw/
510     capitalize uppercase lowercase none
511     /;
512     $p->{prop_value}->{'white-space'}->{$_} = 1 for qw/
513 wakaba 1.36 normal pre nowrap pre-line pre-wrap -moz-pre-wrap
514 wakaba 1.35 /;
515 wakaba 1.37 $p->{prop_value}->{'writing-mode'}->{$_} = 1 for qw/
516     lr rl tb lr-tb rl-tb tb-rl
517     /;
518     $p->{prop_value}->{'text-anchor'}->{$_} = 1 for qw/
519     start middle end
520     /;
521     $p->{prop_value}->{'dominant-baseline'}->{$_} = 1 for qw/
522     auto use-script no-change reset-size ideographic alphabetic
523     hanging mathematical central middle text-after-edge text-before-edge
524     /;
525     $p->{prop_value}->{'alignment-baseline'}->{$_} = 1 for qw/
526     auto baseline before-edge text-before-edge middle central
527     after-edge text-after-edge ideographic alphabetic hanging
528     mathematical
529     /;
530 wakaba 1.35 $p->{prop_value}->{'text-decoration'}->{$_} = 1 for qw/
531     none blink underline overline line-through
532     /;
533     $p->{prop_value}->{'caption-side'}->{$_} = 1 for qw/
534     top bottom left right
535     /;
536     $p->{prop_value}->{'table-layout'}->{auto} = 1;
537     $p->{prop_value}->{'table-layout'}->{fixed} = 1;
538 wakaba 1.36 $p->{prop_value}->{'border-collapse'}->{collapse} = 1;
539 wakaba 1.35 $p->{prop_value}->{'border-collapse'}->{separate} = 1;
540     $p->{prop_value}->{'empty-cells'}->{show} = 1;
541     $p->{prop_value}->{'empty-cells'}->{hide} = 1;
542     $p->{prop_value}->{cursor}->{$_} = 1 for qw/
543     auto crosshair default pointer move e-resize ne-resize nw-resize n-resize
544     se-resize sw-resize s-resize w-resize text wait help progress
545     /;
546     for my $prop (qw/border-top-style border-left-style
547     border-bottom-style border-right-style outline-style/) {
548     $p->{prop_value}->{$prop}->{$_} = 1 for qw/
549     none hidden dotted dashed solid double groove ridge inset outset
550     /;
551     }
552     for my $prop (qw/color background-color
553     border-bottom-color border-left-color border-right-color
554     border-top-color border-color/) {
555     $p->{prop_value}->{$prop}->{transparent} = 1;
556     $p->{prop_value}->{$prop}->{flavor} = 1;
557     $p->{prop_value}->{$prop}->{'-manakai-default'} = 1;
558     }
559     $p->{prop_value}->{'outline-color'}->{invert} = 1;
560     $p->{prop_value}->{'outline-color'}->{'-manakai-invert-or-currentcolor'} = 1;
561     $p->{pseudo_class}->{$_} = 1 for qw/
562     active checked disabled empty enabled first-child first-of-type
563     focus hover indeterminate last-child last-of-type link only-child
564     only-of-type root target visited
565     lang nth-child nth-last-child nth-of-type nth-last-of-type not
566     -manakai-contains -manakai-current
567     /;
568     $p->{pseudo_element}->{$_} = 1 for qw/
569     after before first-letter first-line
570     /;
571    
572     return $CSSParser = $p;
573     } # get_css_parser
574    
575     sub print_syntax_error_css_section ($$) {
576     my ($input, $result) = @_;
577    
578     print STDOUT qq[
579     <div id="$input->{id_prefix}parse-errors" class="section">
580     <h2>Parse Errors</h2>
581    
582 wakaba 1.39 <dl id="$input->{id_prefix}parse-errors-list">];
583 wakaba 1.35 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
584    
585     my $p = get_css_parser ();
586 wakaba 1.37 $p->init;
587 wakaba 1.35 $p->{onerror} = sub {
588     my (%opt) = @_;
589     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
590     if ($opt{token}) {
591     print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{token}->{line}">Line $opt{token}->{line}</a> column $opt{token}->{column}];
592     } else {
593     print STDOUT qq[<dt class="$cls">Unknown location];
594     }
595     if (defined $opt{value}) {
596     print STDOUT qq[ (<code>@{[htescape ($opt{value})]}</code>)];
597     } elsif (defined $opt{token}) {
598     print STDOUT qq[ (<code>@{[htescape (Whatpm::CSS::Tokenizer->serialize_token ($opt{token}))]}</code>)];
599     }
600     $type =~ tr/ /-/;
601     $type =~ s/\|/%7C/g;
602     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
603     print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
604     print STDOUT qq[$msg</dd>\n];
605    
606     add_error ('syntax', \%opt => $result);
607     };
608     $p->{href} = $input->{uri};
609     $p->{base_uri} = $input->{base_uri};
610    
611 wakaba 1.37 # if ($parse_mode eq 'q') {
612     # $p->{unitless_px} = 1;
613     # $p->{hashless_color} = 1;
614     # }
615    
616     ## TODO: Make $input->{s} a ref.
617    
618 wakaba 1.35 my $s = \$input->{s};
619     my $charset;
620     unless ($input->{is_char_string}) {
621     require Encode;
622     if (defined $input->{charset}) {## TODO: IANA->Perl
623     $charset = $input->{charset};
624     $s = \(Encode::decode ($input->{charset}, $$s));
625     } else {
626     ## TODO: charset detection
627     $s = \(Encode::decode ($charset = 'utf-8', $$s));
628     }
629     }
630    
631     my $cssom = $p->parse_char_string ($$s);
632     $cssom->manakai_input_encoding ($charset) if defined $charset;
633    
634     print STDOUT qq[</dl></div>];
635    
636     return $cssom;
637     } # print_syntax_error_css_section
638    
639 wakaba 1.22 sub print_syntax_error_manifest_section ($$) {
640     my ($input, $result) = @_;
641    
642     require Whatpm::CacheManifest;
643    
644     print STDOUT qq[
645 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
646 wakaba 1.22 <h2>Parse Errors</h2>
647    
648 wakaba 1.39 <dl id="$input->{id_prefix}parse-errors-list">];
649 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
650 wakaba 1.22
651     my $onerror = sub {
652     my (%opt) = @_;
653     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
654 wakaba 1.32 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
655     qq[</dt>];
656 wakaba 1.22 $type =~ tr/ /-/;
657     $type =~ s/\|/%7C/g;
658     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
659 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
660     print STDOUT qq[$msg</dd>\n];
661 wakaba 1.22
662     add_error ('syntax', \%opt => $result);
663     };
664    
665 wakaba 1.48 my $m = $input->{is_char_string} ? 'parse_char_string' : 'parse_byte_string';
666 wakaba 1.22 my $time1 = time;
667 wakaba 1.48 my $manifest = Whatpm::CacheManifest->$m
668 wakaba 1.22 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
669     $time{parse_manifest} = time - $time1;
670    
671     print STDOUT qq[</dl></div>];
672    
673     return $manifest;
674     } # print_syntax_error_manifest_section
675    
676 wakaba 1.35 sub print_source_string_section ($$$) {
677     my $input = shift;
678     my $s;
679     unless ($input->{is_char_string}) {
680 wakaba 1.51 open my $byte_stream, '<', $_[0];
681     require Message::Charset::Info;
682     my $charset = Message::Charset::Info->get_by_iana_name ($_[1]);
683     my ($char_stream, $e_status) = $charset->get_decode_handle
684     ($byte_stream, allow_error_reporting => 1, allow_fallback => 1);
685     return unless $char_stream;
686    
687     $char_stream->onerror (sub {
688     my (undef, $type, %opt) = @_;
689     if ($opt{octets}) {
690     ${$opt{octets}} = "\x{FFFD}";
691     }
692     });
693 wakaba 1.35
694 wakaba 1.51 my $t = '';
695     while (1) {
696     my $c = $char_stream->getc;
697     last unless defined $c;
698     $t .= $c;
699     }
700     $s = \$t;
701     ## TODO: Output for each line, don't concat all of lines.
702 wakaba 1.35 } else {
703     $s = $_[0];
704     }
705 wakaba 1.9
706     my $i = 1;
707 wakaba 1.32 push @nav, ['#source-string' => 'Source'] unless $input->{nested};
708     print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
709 wakaba 1.9 <h2>Document Source</h2>
710     <ol lang="">\n];
711 wakaba 1.7 if (length $$s) {
712 wakaba 1.41 while ($$s =~ /\G([^\x0D\x0A]*?)(?>\x0D\x0A?|\x0A)/gc) {
713 wakaba 1.32 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
714     "</li>\n";
715 wakaba 1.7 $i++;
716     }
717 wakaba 1.41 if ($$s =~ /\G([^\x0D\x0A]+)/gc) {
718 wakaba 1.32 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
719     "</li>\n";
720 wakaba 1.7 }
721     } else {
722 wakaba 1.32 print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
723 wakaba 1.2 }
724 wakaba 1.39 print STDOUT "</ol></div>
725 wakaba 1.42 <script>
726     addSourceToParseErrorList ('$input->{id_prefix}', 'parse-errors-list');
727     </script>";
728 wakaba 1.9 } # print_input_string_section
729 wakaba 1.2
730 wakaba 1.35 sub print_document_tree ($$) {
731     my ($input, $node) = @_;
732    
733 wakaba 1.2 my $r = '<ol class="xoxo">';
734 wakaba 1.1
735 wakaba 1.2 my @node = ($node);
736 wakaba 1.1 while (@node) {
737     my $child = shift @node;
738 wakaba 1.2 unless (ref $child) {
739     $r .= $child;
740     next;
741     }
742    
743 wakaba 1.32 my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
744 wakaba 1.2 my $nt = $child->node_type;
745     if ($nt == $child->ELEMENT_NODE) {
746 wakaba 1.4 my $child_nsuri = $child->namespace_uri;
747     $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
748 wakaba 1.2 '</code>'; ## ISSUE: case
749    
750     if ($child->has_attributes) {
751     $r .= '<ul class="attributes">';
752 wakaba 1.4 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
753 wakaba 1.2 @{$child->attributes}) {
754 wakaba 1.32 $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
755 wakaba 1.2 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
756     }
757     $r .= '</ul>';
758     }
759    
760 wakaba 1.7 if ($child->has_child_nodes) {
761 wakaba 1.2 $r .= '<ol class="children">';
762 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
763     } else {
764     $r .= '</li>';
765 wakaba 1.2 }
766     } elsif ($nt == $child->TEXT_NODE) {
767 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
768 wakaba 1.2 } elsif ($nt == $child->CDATA_SECTION_NODE) {
769 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
770 wakaba 1.2 } elsif ($nt == $child->COMMENT_NODE) {
771 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
772 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_NODE) {
773 wakaba 1.6 $r .= qq'<li id="$node_id" class="tree-document">Document';
774 wakaba 1.7 $r .= qq[<ul class="attributes">];
775 wakaba 1.27 my $cp = $child->manakai_charset;
776     if (defined $cp) {
777     $r .= qq[<li><code>charset</code> parameter = <code>];
778     $r .= htescape ($cp) . qq[</code></li>];
779     }
780     $r .= qq[<li><code>inputEncoding</code> = ];
781     my $ie = $child->input_encoding;
782     if (defined $ie) {
783     $r .= qq[<code>@{[htescape ($ie)]}</code>];
784     if ($child->manakai_has_bom) {
785     $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
786     }
787     } else {
788     $r .= qq[(<code>null</code>)];
789     }
790 wakaba 1.7 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
791     $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
792 wakaba 1.9 unless ($child->manakai_is_html) {
793     $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
794     if (defined $child->xml_encoding) {
795     $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
796     } else {
797     $r .= qq[<li>XML encoding = (null)</li>];
798     }
799     $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
800     }
801 wakaba 1.7 $r .= qq[</ul>];
802 wakaba 1.2 if ($child->has_child_nodes) {
803 wakaba 1.7 $r .= '<ol class="children">';
804 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
805 wakaba 1.1 }
806 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
807 wakaba 1.5 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
808     $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
809     $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
810     $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
811 wakaba 1.2 $r .= '</ul></li>';
812     } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
813 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
814 wakaba 1.1 } else {
815 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
816 wakaba 1.1 }
817     }
818 wakaba 1.2
819     $r .= '</ol>';
820     print STDOUT $r;
821     } # print_document_tree
822 wakaba 1.1
823 wakaba 1.32 sub print_structure_dump_dom_section ($$$) {
824     my ($input, $doc, $el) = @_;
825 wakaba 1.18
826     print STDOUT qq[
827 wakaba 1.32 <div id="$input->{id_prefix}document-tree" class="section">
828 wakaba 1.18 <h2>Document Tree</h2>
829     ];
830 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
831     unless $input->{nested};
832 wakaba 1.18
833 wakaba 1.35 print_document_tree ($input, $el || $doc);
834 wakaba 1.18
835     print STDOUT qq[</div>];
836 wakaba 1.22 } # print_structure_dump_dom_section
837    
838 wakaba 1.35 sub print_structure_dump_cssom_section ($$) {
839     my ($input, $cssom) = @_;
840    
841     print STDOUT qq[
842     <div id="$input->{id_prefix}document-tree" class="section">
843     <h2>Document Tree</h2>
844     ];
845     push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
846     unless $input->{nested};
847    
848     ## TODO:
849     print STDOUT "<pre>".htescape ($cssom->css_text)."</pre>";
850    
851     print STDOUT qq[</div>];
852     } # print_structure_dump_cssom_section
853    
854 wakaba 1.32 sub print_structure_dump_manifest_section ($$) {
855     my ($input, $manifest) = @_;
856 wakaba 1.22
857     print STDOUT qq[
858 wakaba 1.32 <div id="$input->{id_prefix}dump-manifest" class="section">
859 wakaba 1.22 <h2>Cache Manifest</h2>
860     ];
861 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}dump-manifest] => 'Cache Manifest']
862     unless $input->{nested};
863 wakaba 1.22
864     print STDOUT qq[<dl><dt>Explicit entries</dt>];
865 wakaba 1.37 my $i = 0;
866 wakaba 1.22 for my $uri (@{$manifest->[0]}) {
867     my $euri = htescape ($uri);
868 wakaba 1.37 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
869 wakaba 1.22 }
870    
871     print STDOUT qq[<dt>Fallback entries</dt><dd>
872     <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
873     <th scope=row>Fallback Entry</tr><tbody>];
874     for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
875     my $euri = htescape ($uri);
876     my $euri2 = htescape ($manifest->[1]->{$uri});
877 wakaba 1.37 print STDOUT qq[<tr><td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
878     <td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
879 wakaba 1.22 }
880    
881     print STDOUT qq[</table><dt>Online whitelist</dt>];
882     for my $uri (@{$manifest->[2]}) {
883     my $euri = htescape ($uri);
884 wakaba 1.37 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
885 wakaba 1.22 }
886 wakaba 1.18
887 wakaba 1.22 print STDOUT qq[</dl></div>];
888     } # print_structure_dump_manifest_section
889    
890 wakaba 1.34 sub print_structure_error_dom_section ($$$$$) {
891     my ($input, $doc, $el, $result, $onsubdoc) = @_;
892 wakaba 1.18
893 wakaba 1.32 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
894 wakaba 1.18 <h2>Document Errors</h2>
895    
896 wakaba 1.42 <dl id=document-errors-list>];
897 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
898     unless $input->{nested};
899 wakaba 1.18
900     require Whatpm::ContentChecker;
901     my $onerror = sub {
902     my %opt = @_;
903     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
904     $type =~ tr/ /-/;
905     $type =~ s/\|/%7C/g;
906     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
907 wakaba 1.32 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
908 wakaba 1.23 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
909     print STDOUT $msg, "</dd>\n";
910 wakaba 1.19 add_error ('structure', \%opt => $result);
911 wakaba 1.18 };
912    
913     my $elements;
914     my $time1 = time;
915     if ($el) {
916 wakaba 1.34 $elements = Whatpm::ContentChecker->check_element
917     ($el, $onerror, $onsubdoc);
918 wakaba 1.18 } else {
919 wakaba 1.34 $elements = Whatpm::ContentChecker->check_document
920     ($doc, $onerror, $onsubdoc);
921 wakaba 1.18 }
922     $time{check} = time - $time1;
923    
924 wakaba 1.42 print STDOUT qq[</dl>
925     <script>
926     addSourceToParseErrorList ('$input->{id_prefix}', 'document-errors-list');
927     </script></div>];
928 wakaba 1.18
929     return $elements;
930 wakaba 1.22 } # print_structure_error_dom_section
931    
932     sub print_structure_error_manifest_section ($$$) {
933 wakaba 1.32 my ($input, $manifest, $result) = @_;
934 wakaba 1.22
935 wakaba 1.32 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
936 wakaba 1.22 <h2>Document Errors</h2>
937    
938     <dl>];
939 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
940     unless $input->{nested};
941 wakaba 1.22
942     require Whatpm::CacheManifest;
943     Whatpm::CacheManifest->check_manifest ($manifest, sub {
944     my %opt = @_;
945     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
946     $type =~ tr/ /-/;
947     $type =~ s/\|/%7C/g;
948     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
949 wakaba 1.32 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
950 wakaba 1.22 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
951     add_error ('structure', \%opt => $result);
952     });
953    
954     print STDOUT qq[</div>];
955     } # print_structure_error_manifest_section
956 wakaba 1.18
957 wakaba 1.32 sub print_table_section ($$) {
958     my ($input, $tables) = @_;
959 wakaba 1.18
960 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
961     unless $input->{nested};
962 wakaba 1.18 print STDOUT qq[
963 wakaba 1.32 <div id="$input->{id_prefix}tables" class="section">
964 wakaba 1.18 <h2>Tables</h2>
965    
966     <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
967     <script src="../table-script.js" type="text/javascript"></script>
968     <noscript>
969     <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
970     </noscript>
971     ];
972    
973     require JSON;
974    
975     my $i = 0;
976 wakaba 1.50 for my $table (@$tables) {
977 wakaba 1.18 $i++;
978 wakaba 1.32 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
979 wakaba 1.50 get_node_link ($input, $table->{element}) . q[</h3>];
980    
981     delete $table->{element};
982 wakaba 1.18
983 wakaba 1.49 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption},
984     @{$table->{row}}) {
985 wakaba 1.18 next unless $_;
986     delete $_->{element};
987     }
988    
989     for (@{$table->{row_group}}) {
990     next unless $_;
991     next unless $_->{element};
992     $_->{type} = $_->{element}->manakai_local_name;
993     delete $_->{element};
994     }
995    
996     for (@{$table->{cell}}) {
997     next unless $_;
998     for (@{$_}) {
999     next unless $_;
1000     for (@$_) {
1001     $_->{id} = refaddr $_->{element} if defined $_->{element};
1002     delete $_->{element};
1003     $_->{is_header} = $_->{is_header} ? 1 : 0;
1004     }
1005     }
1006     }
1007    
1008     print STDOUT '</div><script type="text/javascript">tableToCanvas (';
1009     print STDOUT JSON::objToJson ($table);
1010 wakaba 1.32 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
1011     print STDOUT qq[, '$input->{id_prefix}');</script>];
1012 wakaba 1.18 }
1013    
1014     print STDOUT qq[</div>];
1015     } # print_table_section
1016    
1017 wakaba 1.33 sub print_listing_section ($$$) {
1018     my ($opt, $input, $ids) = @_;
1019 wakaba 1.18
1020 wakaba 1.35 push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
1021     unless $input->{nested};
1022 wakaba 1.18 print STDOUT qq[
1023 wakaba 1.33 <div id="$input->{id_prefix}$opt->{id}" class="section">
1024     <h2>$opt->{heading}</h2>
1025 wakaba 1.18
1026     <dl>
1027     ];
1028     for my $id (sort {$a cmp $b} keys %$ids) {
1029     print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
1030     for (@{$ids->{$id}}) {
1031 wakaba 1.32 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
1032 wakaba 1.18 }
1033     }
1034     print STDOUT qq[</dl></div>];
1035 wakaba 1.33 } # print_listing_section
1036 wakaba 1.18
1037 wakaba 1.48 sub print_uri_section ($$$) {
1038     my ($input, $uris) = @_;
1039    
1040     ## NOTE: URIs contained in the DOM (i.e. in HTML or XML documents),
1041     ## except for those in RDF triples.
1042     ## TODO: URIs in CSS
1043    
1044     push @nav, ['#' . $input->{id_prefix} . 'uris' => 'URIs']
1045     unless $input->{nested};
1046     print STDOUT qq[
1047     <div id="$input->{id_prefix}uris" class="section">
1048     <h2>URIs</h2>
1049    
1050     <dl>];
1051     for my $uri (sort {$a cmp $b} keys %$uris) {
1052     my $euri = htescape ($uri);
1053     print STDOUT qq[<dt><code class=uri>&lt;<a href="$euri">$euri</a>></code>];
1054     my $eccuri = htescape (get_cc_uri ($uri));
1055     print STDOUT qq[<dd><a href="$eccuri">Check conformance of this document</a>];
1056     print STDOUT qq[<dd>Found at: <ul>];
1057     for my $entry (@{$uris->{$uri}}) {
1058     print STDOUT qq[<li>], get_node_link ($input, $entry->{node});
1059     if (keys %{$entry->{type} or {}}) {
1060     print STDOUT ' (';
1061     print STDOUT join ', ', map {
1062     {
1063     hyperlink => 'Hyperlink',
1064     resource => 'Link to an external resource',
1065     namespace => 'Namespace URI',
1066     cite => 'Citation or link to a long description',
1067     embedded => 'Link to an embedded content',
1068     base => 'Base URI',
1069     action => 'Submission URI',
1070     }->{$_}
1071     or
1072     htescape ($_)
1073     } keys %{$entry->{type}};
1074     print STDOUT ')';
1075     }
1076     }
1077     print STDOUT qq[</ul>];
1078     }
1079     print STDOUT qq[</dl></div>];
1080     } # print_uri_section
1081    
1082 wakaba 1.45 sub print_rdf_section ($$$) {
1083     my ($input, $rdfs) = @_;
1084    
1085     push @nav, ['#' . $input->{id_prefix} . 'rdf' => 'RDF']
1086     unless $input->{nested};
1087     print STDOUT qq[
1088     <div id="$input->{id_prefix}rdf" class="section">
1089     <h2>RDF Triples</h2>
1090    
1091     <dl>];
1092     my $i = 0;
1093     for my $rdf (@$rdfs) {
1094     print STDOUT qq[<dt id="$input->{id_prefix}rdf-@{[$i++]}">];
1095     print STDOUT get_node_link ($input, $rdf->[0]);
1096     print STDOUT qq[<dd><dl>];
1097     for my $triple (@{$rdf->[1]}) {
1098     print STDOUT '<dt>' . get_node_link ($input, $triple->[0]) . '<dd>';
1099     print STDOUT get_rdf_resource_html ($triple->[1]);
1100     print STDOUT ' ';
1101     print STDOUT get_rdf_resource_html ($triple->[2]);
1102     print STDOUT ' ';
1103     print STDOUT get_rdf_resource_html ($triple->[3]);
1104     }
1105     print STDOUT qq[</dl>];
1106     }
1107     print STDOUT qq[</dl></div>];
1108     } # print_rdf_section
1109    
1110     sub get_rdf_resource_html ($) {
1111     my $resource = shift;
1112 wakaba 1.46 if (defined $resource->{uri}) {
1113 wakaba 1.45 my $euri = htescape ($resource->{uri});
1114     return '<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
1115     '</a>></code>';
1116 wakaba 1.46 } elsif (defined $resource->{bnodeid}) {
1117 wakaba 1.45 return htescape ('_:' . $resource->{bnodeid});
1118     } elsif ($resource->{nodes}) {
1119     return '(rdf:XMLLiteral)';
1120     } elsif (defined $resource->{value}) {
1121     my $elang = htescape (defined $resource->{language}
1122     ? $resource->{language} : '');
1123     my $r = qq[<q lang="$elang">] . htescape ($resource->{value}) . '</q>';
1124     if (defined $resource->{datatype}) {
1125     my $euri = htescape ($resource->{datatype});
1126     $r .= '^^<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
1127     '</a>></code>';
1128     } elsif (length $resource->{language}) {
1129     $r .= '@' . htescape ($resource->{language});
1130     }
1131     return $r;
1132     } else {
1133     return '??';
1134     }
1135     } # get_rdf_resource_html
1136    
1137 wakaba 1.19 sub print_result_section ($) {
1138     my $result = shift;
1139    
1140     print STDOUT qq[
1141     <div id="result-summary" class="section">
1142     <h2>Result</h2>];
1143    
1144 wakaba 1.21 if ($result->{unsupported} and $result->{conforming_max}) {
1145 wakaba 1.19 print STDOUT qq[<p class=uncertain id=result-para>The conformance
1146     checker cannot decide whether the document is conforming or
1147     not, since the document contains one or more unsupported
1148 wakaba 1.21 features. The document might or might not be conforming.</p>];
1149 wakaba 1.19 } elsif ($result->{conforming_min}) {
1150     print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
1151     found in this document.</p>];
1152     } elsif ($result->{conforming_max}) {
1153     print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
1154     is <strong>likely <em>non</em>-conforming</strong>, but in rare case
1155     it might be conforming.</p>];
1156     } else {
1157     print STDOUT qq[<p class=FAIL id=result-para>This document is
1158     <strong><em>non</em>-conforming</strong>.</p>];
1159     }
1160    
1161     print STDOUT qq[<table>
1162     <colgroup><col><colgroup><col><col><col><colgroup><col>
1163     <thead>
1164 wakaba 1.23 <tr><th scope=col></th>
1165     <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1166     Errors</a></th>
1167     <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1168     Errors</a></th>
1169     <th scope=col><a href="../error-description#level-w">Warnings</a></th>
1170     <th scope=col>Score</th></tr></thead><tbody>];
1171 wakaba 1.19
1172     my $must_error = 0;
1173     my $should_error = 0;
1174     my $warning = 0;
1175     my $score_min = 0;
1176     my $score_max = 0;
1177     my $score_base = 20;
1178 wakaba 1.21 my $score_unit = $score_base / 100;
1179 wakaba 1.19 for (
1180     [Transfer => 'transfer', ''],
1181     [Character => 'char', ''],
1182     [Syntax => 'syntax', '#parse-errors'],
1183     [Structure => 'structure', '#document-errors'],
1184     ) {
1185     $must_error += ($result->{$_->[1]}->{must} += 0);
1186     $should_error += ($result->{$_->[1]}->{should} += 0);
1187     $warning += ($result->{$_->[1]}->{warning} += 0);
1188 wakaba 1.21 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
1189     $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
1190 wakaba 1.19
1191     my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
1192     my $label = $_->[0];
1193     if ($result->{$_->[1]}->{must} or
1194     $result->{$_->[1]}->{should} or
1195     $result->{$_->[1]}->{warning} or
1196     $result->{$_->[1]}->{unsupported}) {
1197     $label = qq[<a href="$_->[2]">$label</a>];
1198     }
1199    
1200     print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
1201     if ($uncertain) {
1202 wakaba 1.51 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}];
1203 wakaba 1.19 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
1204 wakaba 1.51 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}];
1205 wakaba 1.19 } else {
1206 wakaba 1.51 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}];
1207 wakaba 1.19 }
1208 wakaba 1.51 print qq[ / 20];
1209 wakaba 1.19 }
1210    
1211     $score_max += $score_base;
1212    
1213     print STDOUT qq[
1214 wakaba 1.51 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base / 20
1215 wakaba 1.19 </tbody>
1216 wakaba 1.21 <tfoot><tr class=uncertain><th scope=row>Total</th>
1217     <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
1218     <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
1219     <td>$warning?</td>
1220 wakaba 1.51 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong> / 100
1221 wakaba 1.19 </table>
1222    
1223     <p><strong>Important</strong>: This conformance checking service
1224     is <em>under development</em>. The result above might be <em>wrong</em>.</p>
1225     </div>];
1226     push @nav, ['#result-summary' => 'Result'];
1227     } # print_result_section
1228    
1229 wakaba 1.24 sub print_result_unknown_type_section ($$) {
1230     my ($input, $result) = @_;
1231 wakaba 1.18
1232 wakaba 1.24 my $euri = htescape ($input->{uri});
1233 wakaba 1.18 print STDOUT qq[
1234 wakaba 1.35 <div id="$input->{id_prefix}parse-errors" class="section">
1235 wakaba 1.24 <h2>Errors</h2>
1236    
1237     <dl>
1238     <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
1239     <dd class=unsupported><strong><a href="../error-description#level-u">Not
1240     supported</a></strong>:
1241     Media type
1242     <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
1243     is not supported.</dd>
1244     </dl>
1245 wakaba 1.18 </div>
1246     ];
1247 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}parse-errors] => 'Errors']
1248     unless $input->{nested};
1249 wakaba 1.30 add_error (char => {level => 'u'} => $result);
1250     add_error (syntax => {level => 'u'} => $result);
1251     add_error (structure => {level => 'u'} => $result);
1252 wakaba 1.18 } # print_result_unknown_type_section
1253    
1254     sub print_result_input_error_section ($) {
1255     my $input = shift;
1256     print STDOUT qq[<div class="section" id="result-summary">
1257     <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
1258     </div>];
1259     push @nav, ['#result-summary' => 'Result'];
1260 wakaba 1.32 } # print_result_input_error_section
1261 wakaba 1.18
1262 wakaba 1.32 sub get_error_label ($$) {
1263     my ($input, $err) = @_;
1264 wakaba 1.22
1265     my $r = '';
1266    
1267 wakaba 1.42 my $line;
1268     my $column;
1269    
1270     if (defined $err->{node}) {
1271     $line = $err->{node}->get_user_data ('manakai_source_line');
1272     if (defined $line) {
1273     $column = $err->{node}->get_user_data ('manakai_source_column');
1274 wakaba 1.40 } else {
1275 wakaba 1.42 if ($err->{node}->node_type == $err->{node}->ATTRIBUTE_NODE) {
1276     my $owner = $err->{node}->owner_element;
1277     $line = $owner->get_user_data ('manakai_source_line');
1278     $column = $owner->get_user_data ('manakai_source_column');
1279 wakaba 1.43 } else {
1280     my $parent = $err->{node}->parent_node;
1281 wakaba 1.44 if ($parent) {
1282     $line = $parent->get_user_data ('manakai_source_line');
1283     $column = $parent->get_user_data ('manakai_source_column');
1284     }
1285 wakaba 1.42 }
1286     }
1287     }
1288     unless (defined $line) {
1289     if (defined $err->{token} and defined $err->{token}->{line}) {
1290     $line = $err->{token}->{line};
1291     $column = $err->{token}->{column};
1292     } elsif (defined $err->{line}) {
1293     $line = $err->{line};
1294     $column = $err->{column};
1295 wakaba 1.40 }
1296 wakaba 1.42 }
1297    
1298     if (defined $line) {
1299     if (defined $column and $column > 0) {
1300     $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a> column $column];
1301 wakaba 1.22 } else {
1302 wakaba 1.42 $line = $line - 1 || 1;
1303     $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a>];
1304 wakaba 1.22 }
1305     }
1306    
1307     if (defined $err->{node}) {
1308     $r .= ' ' if length $r;
1309 wakaba 1.42 $r .= get_node_link ($input, $err->{node});
1310 wakaba 1.22 }
1311    
1312     if (defined $err->{index}) {
1313 wakaba 1.37 if (length $r) {
1314     $r .= ', Index ' . (0+$err->{index});
1315     } else {
1316     $r .= "<a href='#$input->{id_prefix}index-@{[0+$err->{index}]}'>Index "
1317     . (0+$err->{index}) . '</a>';
1318     }
1319 wakaba 1.22 }
1320    
1321     if (defined $err->{value}) {
1322     $r .= ' ' if length $r;
1323     $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
1324     }
1325    
1326     return $r;
1327     } # get_error_label
1328    
1329 wakaba 1.23 sub get_error_level_label ($) {
1330     my $err = shift;
1331    
1332     my $r = '';
1333    
1334     if (not defined $err->{level} or $err->{level} eq 'm') {
1335     $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1336     error</a></strong>: ];
1337     } elsif ($err->{level} eq 's') {
1338     $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1339     error</a></strong>: ];
1340     } elsif ($err->{level} eq 'w') {
1341     $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
1342     ];
1343 wakaba 1.30 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
1344 wakaba 1.23 $r = qq[<strong><a href="../error-description#level-u">Not
1345     supported</a></strong>: ];
1346 wakaba 1.37 } elsif ($err->{level} eq 'i') {
1347     $r = qq[<strong><a href="../error-description#level-i">Information</a></strong>: ];
1348 wakaba 1.23 } else {
1349     my $elevel = htescape ($err->{level});
1350     $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
1351     ];
1352     }
1353    
1354     return $r;
1355     } # get_error_level_label
1356    
1357 wakaba 1.1 sub get_node_path ($) {
1358     my $node = shift;
1359     my @r;
1360     while (defined $node) {
1361     my $rs;
1362     if ($node->node_type == 1) {
1363 wakaba 1.47 $rs = $node->node_name;
1364 wakaba 1.1 $node = $node->parent_node;
1365     } elsif ($node->node_type == 2) {
1366 wakaba 1.47 $rs = '@' . $node->node_name;
1367 wakaba 1.1 $node = $node->owner_element;
1368     } elsif ($node->node_type == 3) {
1369     $rs = '"' . $node->data . '"';
1370     $node = $node->parent_node;
1371     } elsif ($node->node_type == 9) {
1372 wakaba 1.9 @r = ('') unless @r;
1373 wakaba 1.1 $rs = '';
1374     $node = $node->parent_node;
1375     } else {
1376     $rs = '#' . $node->node_type;
1377     $node = $node->parent_node;
1378     }
1379     unshift @r, $rs;
1380     }
1381     return join '/', @r;
1382     } # get_node_path
1383    
1384 wakaba 1.32 sub get_node_link ($$) {
1385     return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
1386     htescape (get_node_path ($_[1])) . qq[</a>];
1387 wakaba 1.6 } # get_node_link
1388    
1389 wakaba 1.7 {
1390     my $Msg = {};
1391    
1392     sub load_text_catalog ($) {
1393     my $lang = shift; # MUST be a canonical lang name
1394 wakaba 1.26 open my $file, '<:utf8', "cc-msg.$lang.txt"
1395     or die "$0: cc-msg.$lang.txt: $!";
1396 wakaba 1.7 while (<$file>) {
1397     if (s/^([^;]+);([^;]*);//) {
1398     my ($type, $cls, $msg) = ($1, $2, $_);
1399     $msg =~ tr/\x0D\x0A//d;
1400     $Msg->{$type} = [$cls, $msg];
1401     }
1402     }
1403     } # load_text_catalog
1404    
1405     sub get_text ($) {
1406 wakaba 1.15 my ($type, $level, $node) = @_;
1407 wakaba 1.7 $type = $level . ':' . $type if defined $level;
1408 wakaba 1.29 $level = 'm' unless defined $level;
1409 wakaba 1.7 my @arg;
1410     {
1411     if (defined $Msg->{$type}) {
1412     my $msg = $Msg->{$type}->[1];
1413 wakaba 1.10 $msg =~ s{<var>\$([0-9]+)</var>}{
1414     defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
1415     }ge;
1416 wakaba 1.15 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
1417     UNIVERSAL::can ($node, 'get_attribute_ns')
1418     ? htescape ($node->get_attribute_ns (undef, $1)) : ''
1419     }ge;
1420     $msg =~ s{<var>{\@}</var>}{
1421     UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
1422     }ge;
1423 wakaba 1.17 $msg =~ s{<var>{local-name}</var>}{
1424     UNIVERSAL::can ($node, 'manakai_local_name')
1425     ? htescape ($node->manakai_local_name) : ''
1426     }ge;
1427     $msg =~ s{<var>{element-local-name}</var>}{
1428     (UNIVERSAL::can ($node, 'owner_element') and
1429     $node->owner_element)
1430     ? htescape ($node->owner_element->manakai_local_name)
1431     : ''
1432     }ge;
1433 wakaba 1.29 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
1434 wakaba 1.7 } elsif ($type =~ s/:([^:]*)$//) {
1435     unshift @arg, $1;
1436     redo;
1437     }
1438     }
1439 wakaba 1.29 return ($type, 'level-'.$level, htescape ($_[0]));
1440 wakaba 1.7 } # get_text
1441    
1442     }
1443    
1444 wakaba 1.48 sub encode_uri_component ($) {
1445     require Encode;
1446     my $s = Encode::encode ('utf8', shift);
1447     $s =~ s/([^0-9A-Za-z_.~-])/sprintf '%%%02X', ord $1/ge;
1448     return $s;
1449     } # encode_uri_component
1450    
1451     sub get_cc_uri ($) {
1452     return './?uri=' . encode_uri_component ($_[0]);
1453     } # get_cc_uri
1454    
1455 wakaba 1.9 sub get_input_document ($$) {
1456     my ($http, $dom) = @_;
1457    
1458 wakaba 1.16 my $request_uri = $http->get_parameter ('uri');
1459 wakaba 1.9 my $r = {};
1460     if (defined $request_uri and length $request_uri) {
1461     my $uri = $dom->create_uri_reference ($request_uri);
1462     unless ({
1463     http => 1,
1464     }->{lc $uri->uri_scheme}) {
1465     return {uri => $request_uri, request_uri => $request_uri,
1466     error_status_text => 'URI scheme not allowed'};
1467     }
1468    
1469     require Message::Util::HostPermit;
1470     my $host_permit = new Message::Util::HostPermit;
1471     $host_permit->add_rule (<<EOH);
1472     Allow host=suika port=80
1473     Deny host=suika
1474     Allow host=suika.fam.cx port=80
1475     Deny host=suika.fam.cx
1476     Deny host=localhost
1477     Deny host=*.localdomain
1478     Deny ipv4=0.0.0.0/8
1479     Deny ipv4=10.0.0.0/8
1480     Deny ipv4=127.0.0.0/8
1481     Deny ipv4=169.254.0.0/16
1482     Deny ipv4=172.0.0.0/11
1483     Deny ipv4=192.0.2.0/24
1484     Deny ipv4=192.88.99.0/24
1485     Deny ipv4=192.168.0.0/16
1486     Deny ipv4=198.18.0.0/15
1487     Deny ipv4=224.0.0.0/4
1488     Deny ipv4=255.255.255.255/32
1489     Deny ipv6=0::0/0
1490     Allow host=*
1491     EOH
1492     unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1493     return {uri => $request_uri, request_uri => $request_uri,
1494     error_status_text => 'Connection to the host is forbidden'};
1495     }
1496    
1497     require LWP::UserAgent;
1498     my $ua = WDCC::LWPUA->new;
1499     $ua->{wdcc_dom} = $dom;
1500     $ua->{wdcc_host_permit} = $host_permit;
1501     $ua->agent ('Mozilla'); ## TODO: for now.
1502     $ua->parse_head (0);
1503     $ua->protocols_allowed ([qw/http/]);
1504     $ua->max_size (1000_000);
1505     my $req = HTTP::Request->new (GET => $request_uri);
1506 wakaba 1.28 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1507 wakaba 1.9 my $res = $ua->request ($req);
1508 wakaba 1.16 ## TODO: 401 sets |is_success| true.
1509     if ($res->is_success or $http->get_parameter ('error-page')) {
1510 wakaba 1.9 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1511     $r->{uri} = $res->request->uri;
1512     $r->{request_uri} = $request_uri;
1513    
1514     ## TODO: More strict parsing...
1515     my $ct = $res->header ('Content-Type');
1516 wakaba 1.22 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1517 wakaba 1.9 $r->{charset} = lc $1;
1518     $r->{charset} =~ tr/\\//d;
1519 wakaba 1.26 $r->{official_charset} = $r->{charset};
1520 wakaba 1.9 }
1521    
1522 wakaba 1.16 my $input_charset = $http->get_parameter ('charset');
1523 wakaba 1.9 if (defined $input_charset and length $input_charset) {
1524     $r->{charset_overridden}
1525     = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1526     $r->{charset} = $input_charset;
1527 wakaba 1.25 }
1528    
1529     ## TODO: Support for HTTP Content-Encoding
1530 wakaba 1.9
1531     $r->{s} = ''.$res->content;
1532 wakaba 1.25
1533     require Whatpm::ContentType;
1534     ($r->{official_type}, $r->{media_type})
1535     = Whatpm::ContentType->get_sniffed_type
1536     (get_file_head => sub {
1537     return substr $r->{s}, 0, shift;
1538     },
1539     http_content_type_byte => $ct,
1540     has_http_content_encoding =>
1541     defined $res->header ('Content-Encoding'),
1542     supported_image_types => {});
1543 wakaba 1.9 } else {
1544     $r->{uri} = $res->request->uri;
1545     $r->{request_uri} = $request_uri;
1546     $r->{error_status_text} = $res->status_line;
1547     }
1548    
1549     $r->{header_field} = [];
1550     $res->scan (sub {
1551     push @{$r->{header_field}}, [$_[0], $_[1]];
1552     });
1553     $r->{header_status_code} = $res->code;
1554     $r->{header_status_text} = $res->message;
1555     } else {
1556 wakaba 1.16 $r->{s} = ''.$http->get_parameter ('s');
1557 wakaba 1.9 $r->{uri} = q<thismessage:/>;
1558     $r->{request_uri} = q<thismessage:/>;
1559     $r->{base_uri} = q<thismessage:/>;
1560 wakaba 1.16 $r->{charset} = ''.$http->get_parameter ('_charset_');
1561 wakaba 1.9 $r->{charset} =~ s/\s+//g;
1562     $r->{charset} = 'utf-8' if $r->{charset} eq '';
1563 wakaba 1.26 $r->{official_charset} = $r->{charset};
1564 wakaba 1.9 $r->{header_field} = [];
1565 wakaba 1.25
1566     require Whatpm::ContentType;
1567     ($r->{official_type}, $r->{media_type})
1568     = Whatpm::ContentType->get_sniffed_type
1569     (get_file_head => sub {
1570     return substr $r->{s}, 0, shift;
1571     },
1572     http_content_type_byte => undef,
1573     has_http_content_encoding => 0,
1574     supported_image_types => {});
1575 wakaba 1.9 }
1576    
1577 wakaba 1.16 my $input_format = $http->get_parameter ('i');
1578 wakaba 1.9 if (defined $input_format and length $input_format) {
1579     $r->{media_type_overridden}
1580     = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1581     $r->{media_type} = $input_format;
1582     }
1583     if (defined $r->{s} and not defined $r->{media_type}) {
1584     $r->{media_type} = 'text/html';
1585     $r->{media_type_overridden} = 1;
1586     }
1587    
1588     if ($r->{media_type} eq 'text/xml') {
1589     unless (defined $r->{charset}) {
1590     $r->{charset} = 'us-ascii';
1591 wakaba 1.26 $r->{official_charset} = $r->{charset};
1592 wakaba 1.9 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1593     $r->{charset_overridden} = 0;
1594     }
1595     }
1596    
1597     if (length $r->{s} > 1000_000) {
1598     $r->{error_status_text} = 'Entity-body too large';
1599     delete $r->{s};
1600     return $r;
1601     }
1602    
1603 wakaba 1.35 $r->{inner_html_element} = $http->get_parameter ('e');
1604    
1605 wakaba 1.9 return $r;
1606     } # get_input_document
1607    
1608     package WDCC::LWPUA;
1609     BEGIN { push our @ISA, 'LWP::UserAgent'; }
1610    
1611     sub redirect_ok {
1612     my $ua = shift;
1613     unless ($ua->SUPER::redirect_ok (@_)) {
1614     return 0;
1615     }
1616    
1617     my $uris = $_[1]->header ('Location');
1618     return 0 unless $uris;
1619     my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1620     unless ({
1621     http => 1,
1622     }->{lc $uri->uri_scheme}) {
1623     return 0;
1624     }
1625     unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1626     return 0;
1627     }
1628     return 1;
1629     } # redirect_ok
1630    
1631 wakaba 1.1 =head1 AUTHOR
1632    
1633     Wakaba <w@suika.fam.cx>.
1634    
1635     =head1 LICENSE
1636    
1637 wakaba 1.35 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1638 wakaba 1.1
1639     This library is free software; you can redistribute it
1640     and/or modify it under the same terms as Perl itself.
1641    
1642     =cut
1643    
1644 wakaba 1.51 ## $Date: 2008/05/06 08:47:09 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24