/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.50 - (hide annotations) (download)
Tue May 6 08:47:09 2008 UTC (16 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.49: +5 -7 lines
++ ChangeLog	6 May 2008 08:47:05 -0000
	* cc.cgi: Use table object returned by the checker; don't
	form a table by itself.

	* table-script.js: Use different coloring for empty data cells.

	* cc.cgi, table.cgi: Remove table reference for JSON convertion.

2008-05-06  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3 wakaba 1.23 use utf8;
4 wakaba 1.1
5     use lib qw[/home/httpd/html/www/markup/html/whatpm
6 wakaba 1.16 /home/wakaba/work/manakai2/lib];
7 wakaba 1.1 use CGI::Carp qw[fatalsToBrowser];
8 wakaba 1.2 use Scalar::Util qw[refaddr];
9 wakaba 1.16 use Time::HiRes qw/time/;
10 wakaba 1.1
11 wakaba 1.2 sub htescape ($) {
12     my $s = $_[0];
13     $s =~ s/&/&amp;/g;
14     $s =~ s/</&lt;/g;
15     $s =~ s/>/&gt;/g;
16     $s =~ s/"/&quot;/g;
17 wakaba 1.12 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18     sprintf '<var>U+%04X</var>', ord $1;
19     }ge;
20 wakaba 1.2 return $s;
21     } # htescape
22    
23 wakaba 1.35 my @nav;
24     my %time;
25     require Message::DOM::DOMImplementation;
26     my $dom = Message::DOM::DOMImplementation->new;
27     {
28 wakaba 1.16 use Message::CGI::HTTP;
29     my $http = Message::CGI::HTTP->new;
30 wakaba 1.1
31 wakaba 1.16 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
32 wakaba 1.8 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
33     exit;
34     }
35    
36 wakaba 1.12 binmode STDOUT, ':utf8';
37 wakaba 1.14 $| = 1;
38 wakaba 1.12
39 wakaba 1.7 load_text_catalog ('en'); ## TODO: conneg
40    
41 wakaba 1.2 print STDOUT qq[Content-Type: text/html; charset=utf-8
42    
43     <!DOCTYPE html>
44     <html lang="en">
45     <head>
46     <title>Web Document Conformance Checker (BETA)</title>
47 wakaba 1.3 <link rel="stylesheet" href="../cc-style.css" type="text/css">
48 wakaba 1.2 </head>
49     <body>
50 wakaba 1.13 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
51     (<em>beta</em>)</h1>
52 wakaba 1.14 ];
53 wakaba 1.2
54 wakaba 1.14 $| = 0;
55     my $input = get_input_document ($http, $dom);
56 wakaba 1.16 my $char_length = 0;
57 wakaba 1.14
58     print qq[
59 wakaba 1.4 <div id="document-info" class="section">
60 wakaba 1.2 <dl>
61 wakaba 1.9 <dt>Request URI</dt>
62     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 wakaba 1.2 <dt>Document URI</dt>
64 wakaba 1.25 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65     <script>
66     document.title = '<'
67     + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68     + document.title;
69     </script></dd>
70 wakaba 1.2 ]; # no </dl> yet
71 wakaba 1.3 push @nav, ['#document-info' => 'Information'];
72 wakaba 1.1
73 wakaba 1.9 if (defined $input->{s}) {
74 wakaba 1.16 $char_length = length $input->{s};
75 wakaba 1.9
76     print STDOUT qq[
77     <dt>Base URI</dt>
78     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79     <dt>Internet Media Type</dt>
80     <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 wakaba 1.25 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 wakaba 1.9 <dt>Character Encoding</dt>
83     <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84     @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 wakaba 1.16 <dt>Length</dt>
86     <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 wakaba 1.9 </dl>
88     </div>
89 wakaba 1.39
90     <script src="../cc-script.js"></script>
91 wakaba 1.9 ];
92    
93 wakaba 1.35 $input->{id_prefix} = '';
94     #$input->{nested} = 0;
95 wakaba 1.20 my $result = {conforming_min => 1, conforming_max => 1};
96 wakaba 1.31 check_and_print ($input => $result);
97 wakaba 1.19 print_result_section ($result);
98 wakaba 1.9 } else {
99 wakaba 1.18 print STDOUT qq[</dl></div>];
100     print_result_input_error_section ($input);
101 wakaba 1.9 }
102 wakaba 1.3
103 wakaba 1.2 print STDOUT qq[
104 wakaba 1.3 <ul class="navigation" id="nav-items">
105     ];
106     for (@nav) {
107     print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
108     }
109     print STDOUT qq[
110     </ul>
111 wakaba 1.2 </body>
112     </html>
113     ];
114 wakaba 1.1
115 wakaba 1.24 for (qw/decode parse parse_html parse_xml parse_manifest
116     check check_manifest/) {
117 wakaba 1.16 next unless defined $time{$_};
118     open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
119     print $file $char_length, "\t", $time{$_}, "\n";
120     }
121    
122 wakaba 1.1 exit;
123 wakaba 1.35 }
124 wakaba 1.1
125 wakaba 1.19 sub add_error ($$$) {
126     my ($layer, $err, $result) = @_;
127     if (defined $err->{level}) {
128     if ($err->{level} eq 's') {
129     $result->{$layer}->{should}++;
130     $result->{$layer}->{score_min} -= 2;
131     $result->{conforming_min} = 0;
132     } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
133     $result->{$layer}->{warning}++;
134 wakaba 1.30 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
135 wakaba 1.19 $result->{$layer}->{unsupported}++;
136     $result->{unsupported} = 1;
137 wakaba 1.37 } elsif ($err->{level} eq 'i') {
138     #
139 wakaba 1.19 } else {
140     $result->{$layer}->{must}++;
141     $result->{$layer}->{score_max} -= 2;
142     $result->{$layer}->{score_min} -= 2;
143     $result->{conforming_min} = 0;
144     $result->{conforming_max} = 0;
145     }
146     } else {
147     $result->{$layer}->{must}++;
148     $result->{$layer}->{score_max} -= 2;
149     $result->{$layer}->{score_min} -= 2;
150     $result->{conforming_min} = 0;
151     $result->{conforming_max} = 0;
152     }
153     } # add_error
154    
155 wakaba 1.31 sub check_and_print ($$) {
156     my ($input, $result) = @_;
157    
158     print_http_header_section ($input, $result);
159    
160     my $doc;
161     my $el;
162 wakaba 1.35 my $cssom;
163 wakaba 1.31 my $manifest;
164 wakaba 1.34 my @subdoc;
165 wakaba 1.31
166     if ($input->{media_type} eq 'text/html') {
167     ($doc, $el) = print_syntax_error_html_section ($input, $result);
168     print_source_string_section
169 wakaba 1.35 ($input,
170     \($input->{s}),
171     $input->{charset} || $doc->input_encoding);
172 wakaba 1.31 } elsif ({
173     'text/xml' => 1,
174     'application/atom+xml' => 1,
175     'application/rss+xml' => 1,
176 wakaba 1.45 'image/svg+xml' => 1,
177 wakaba 1.31 'application/xhtml+xml' => 1,
178     'application/xml' => 1,
179 wakaba 1.45 ## TODO: Should we make all XML MIME Types fall
180     ## into this category?
181    
182     'application/rdf+xml' => 1, ## NOTE: This type has different model.
183 wakaba 1.31 }->{$input->{media_type}}) {
184     ($doc, $el) = print_syntax_error_xml_section ($input, $result);
185 wakaba 1.35 print_source_string_section ($input,
186     \($input->{s}),
187     $doc->input_encoding);
188     } elsif ($input->{media_type} eq 'text/css') {
189     $cssom = print_syntax_error_css_section ($input, $result);
190     print_source_string_section
191     ($input, \($input->{s}),
192     $cssom->manakai_input_encoding);
193 wakaba 1.31 } elsif ($input->{media_type} eq 'text/cache-manifest') {
194     ## TODO: MUST be text/cache-manifest
195     $manifest = print_syntax_error_manifest_section ($input, $result);
196 wakaba 1.35 print_source_string_section ($input, \($input->{s}),
197     'utf-8');
198 wakaba 1.31 } else {
199     ## TODO: Change HTTP status code??
200     print_result_unknown_type_section ($input, $result);
201     }
202    
203     if (defined $doc or defined $el) {
204 wakaba 1.34 $doc->document_uri ($input->{uri});
205     $doc->manakai_entity_base_uri ($input->{base_uri});
206 wakaba 1.32 print_structure_dump_dom_section ($input, $doc, $el);
207     my $elements = print_structure_error_dom_section
208 wakaba 1.34 ($input, $doc, $el, $result, sub {
209     push @subdoc, shift;
210     });
211 wakaba 1.32 print_table_section ($input, $elements->{table}) if @{$elements->{table}};
212 wakaba 1.33 print_listing_section ({
213     id => 'identifiers', label => 'IDs', heading => 'Identifiers',
214     }, $input, $elements->{id}) if keys %{$elements->{id}};
215     print_listing_section ({
216     id => 'terms', label => 'Terms', heading => 'Terms',
217     }, $input, $elements->{term}) if keys %{$elements->{term}};
218     print_listing_section ({
219     id => 'classes', label => 'Classes', heading => 'Classes',
220     }, $input, $elements->{class}) if keys %{$elements->{class}};
221 wakaba 1.48 print_uri_section ($input, $elements->{uri}) if keys %{$elements->{uri}};
222 wakaba 1.45 print_rdf_section ($input, $elements->{rdf}) if @{$elements->{rdf}};
223 wakaba 1.35 } elsif (defined $cssom) {
224     print_structure_dump_cssom_section ($input, $cssom);
225     ## TODO: CSSOM validation
226 wakaba 1.36 add_error ('structure', {level => 'u'} => $result);
227 wakaba 1.31 } elsif (defined $manifest) {
228 wakaba 1.32 print_structure_dump_manifest_section ($input, $manifest);
229     print_structure_error_manifest_section ($input, $manifest, $result);
230 wakaba 1.31 }
231 wakaba 1.34
232     my $id_prefix = 0;
233     for my $subinput (@subdoc) {
234     $subinput->{id_prefix} = 'subdoc-' . ++$id_prefix;
235     $subinput->{nested} = 1;
236     $subinput->{base_uri} = $subinput->{container_node}->base_uri
237     unless defined $subinput->{base_uri};
238     my $ebaseuri = htescape ($subinput->{base_uri});
239     push @nav, ['#' . $subinput->{id_prefix} => 'Sub #' . $id_prefix];
240     print STDOUT qq[<div id="$subinput->{id_prefix}" class=section>
241     <h2>Subdocument #$id_prefix</h2>
242    
243     <dl>
244     <dt>Internet Media Type</dt>
245     <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
246     <dt>Container Node</dt>
247     <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
248     <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
249     <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
250     </dl>];
251    
252 wakaba 1.35 $subinput->{id_prefix} .= '-';
253 wakaba 1.34 check_and_print ($subinput => $result);
254    
255     print STDOUT qq[</div>];
256     }
257 wakaba 1.31 } # check_and_print
258    
259 wakaba 1.19 sub print_http_header_section ($$) {
260     my ($input, $result) = @_;
261 wakaba 1.9 return unless defined $input->{header_status_code} or
262     defined $input->{header_status_text} or
263 wakaba 1.34 @{$input->{header_field} or []};
264 wakaba 1.9
265 wakaba 1.32 push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
266     print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
267 wakaba 1.9 <h2>HTTP Header</h2>
268    
269     <p><strong>Note</strong>: Due to the limitation of the
270     network library in use, the content of this section might
271     not be the real header.</p>
272    
273     <table><tbody>
274     ];
275    
276     if (defined $input->{header_status_code}) {
277     print STDOUT qq[<tr><th scope="row">Status code</th>];
278     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
279     }
280     if (defined $input->{header_status_text}) {
281     print STDOUT qq[<tr><th scope="row">Status text</th>];
282     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
283     }
284    
285     for (@{$input->{header_field}}) {
286     print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
287     print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
288     }
289    
290     print STDOUT qq[</tbody></table></div>];
291     } # print_http_header_section
292    
293 wakaba 1.19 sub print_syntax_error_html_section ($$) {
294     my ($input, $result) = @_;
295 wakaba 1.18
296     require Encode;
297     require Whatpm::HTML;
298    
299     print STDOUT qq[
300 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
301 wakaba 1.18 <h2>Parse Errors</h2>
302    
303 wakaba 1.39 <dl id="$input->{id_prefix}parse-errors-list">];
304 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
305 wakaba 1.18
306     my $onerror = sub {
307     my (%opt) = @_;
308     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
309 wakaba 1.38 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
310     qq[</dt>];
311 wakaba 1.18 $type =~ tr/ /-/;
312     $type =~ s/\|/%7C/g;
313     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
314 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
315     print STDOUT qq[$msg</dd>\n];
316 wakaba 1.19
317     add_error ('syntax', \%opt => $result);
318 wakaba 1.18 };
319    
320     my $doc = $dom->create_document;
321     my $el;
322 wakaba 1.35 my $inner_html_element = $input->{inner_html_element};
323 wakaba 1.18 if (defined $inner_html_element and length $inner_html_element) {
324 wakaba 1.26 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
325 wakaba 1.24 my $time1 = time;
326 wakaba 1.48 my $t = \($input->{s});
327     unless ($input->{is_char_string}) {
328     $t = \(Encode::decode ($input->{charset}, $$t));
329     }
330 wakaba 1.24 $time{decode} = time - $time1;
331    
332 wakaba 1.18 $el = $doc->create_element_ns
333     ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
334 wakaba 1.24 $time1 = time;
335 wakaba 1.48 Whatpm::HTML->set_inner_html ($el, $$t, $onerror);
336 wakaba 1.24 $time{parse} = time - $time1;
337 wakaba 1.18 } else {
338 wakaba 1.24 my $time1 = time;
339 wakaba 1.48 if ($input->{is_char_string}) {
340     Whatpm::HTML->parse_char_string ($input->{s} => $doc, $onerror);
341     } else {
342     Whatpm::HTML->parse_byte_string
343     ($input->{charset}, $input->{s} => $doc, $onerror);
344     }
345 wakaba 1.24 $time{parse_html} = time - $time1;
346 wakaba 1.18 }
347 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
348     if defined $input->{official_charset};
349 wakaba 1.24
350 wakaba 1.18 print STDOUT qq[</dl></div>];
351    
352     return ($doc, $el);
353     } # print_syntax_error_html_section
354    
355 wakaba 1.19 sub print_syntax_error_xml_section ($$) {
356     my ($input, $result) = @_;
357 wakaba 1.18
358     require Message::DOM::XMLParserTemp;
359    
360     print STDOUT qq[
361 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
362 wakaba 1.18 <h2>Parse Errors</h2>
363    
364 wakaba 1.39 <dl id="$input->{id_prefix}parse-errors-list">];
365 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
366 wakaba 1.18
367     my $onerror = sub {
368     my $err = shift;
369     my $line = $err->location->line_number;
370 wakaba 1.35 print STDOUT qq[<dt><a href="#$input->{id_prefix}line-$line">Line $line</a> column ];
371 wakaba 1.18 print STDOUT $err->location->column_number, "</dt><dd>";
372     print STDOUT htescape $err->text, "</dd>\n";
373 wakaba 1.19
374     add_error ('syntax', {type => $err->text,
375     level => [
376     $err->SEVERITY_FATAL_ERROR => 'm',
377     $err->SEVERITY_ERROR => 'm',
378     $err->SEVERITY_WARNING => 's',
379     ]->[$err->severity]} => $result);
380    
381 wakaba 1.18 return 1;
382     };
383    
384 wakaba 1.48 my $t = \($input->{s});
385     if ($input->{is_char_string}) {
386     require Encode;
387     $t = \(Encode::encode ('utf8', $$t));
388     $input->{charset} = 'utf-8';
389     }
390    
391 wakaba 1.18 my $time1 = time;
392 wakaba 1.48 open my $fh, '<', $t;
393 wakaba 1.18 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
394     ($fh => $dom, $onerror, charset => $input->{charset});
395     $time{parse_xml} = time - $time1;
396 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
397     if defined $input->{official_charset};
398 wakaba 1.18
399     print STDOUT qq[</dl></div>];
400    
401     return ($doc, undef);
402     } # print_syntax_error_xml_section
403    
404 wakaba 1.35 sub get_css_parser () {
405     our $CSSParser;
406     return $CSSParser if $CSSParser;
407    
408     require Whatpm::CSS::Parser;
409     my $p = Whatpm::CSS::Parser->new;
410    
411     $p->{prop}->{$_} = 1 for qw/
412 wakaba 1.37 alignment-baseline
413 wakaba 1.35 background background-attachment background-color background-image
414     background-position background-position-x background-position-y
415     background-repeat border border-bottom border-bottom-color
416     border-bottom-style border-bottom-width border-collapse border-color
417     border-left border-left-color
418     border-left-style border-left-width border-right border-right-color
419     border-right-style border-right-width
420     border-spacing -manakai-border-spacing-x -manakai-border-spacing-y
421     border-style border-top border-top-color border-top-style border-top-width
422     border-width bottom
423     caption-side clear clip color content counter-increment counter-reset
424 wakaba 1.37 cursor direction display dominant-baseline empty-cells float font
425 wakaba 1.35 font-family font-size font-size-adjust font-stretch
426     font-style font-variant font-weight height left
427     letter-spacing line-height
428     list-style list-style-image list-style-position list-style-type
429     margin margin-bottom margin-left margin-right margin-top marker-offset
430     marks max-height max-width min-height min-width opacity -moz-opacity
431     orphans outline outline-color outline-style outline-width overflow
432     overflow-x overflow-y
433     padding padding-bottom padding-left padding-right padding-top
434     page page-break-after page-break-before page-break-inside
435     position quotes right size table-layout
436 wakaba 1.37 text-align text-anchor text-decoration text-indent text-transform
437 wakaba 1.35 top unicode-bidi vertical-align visibility white-space width widows
438 wakaba 1.37 word-spacing writing-mode z-index
439 wakaba 1.35 /;
440     $p->{prop_value}->{display}->{$_} = 1 for qw/
441     block clip inline inline-block inline-table list-item none
442     table table-caption table-cell table-column table-column-group
443     table-header-group table-footer-group table-row table-row-group
444     compact marker
445     /;
446     $p->{prop_value}->{position}->{$_} = 1 for qw/
447     absolute fixed relative static
448     /;
449     $p->{prop_value}->{float}->{$_} = 1 for qw/
450     left right none
451     /;
452     $p->{prop_value}->{clear}->{$_} = 1 for qw/
453     left right none both
454     /;
455     $p->{prop_value}->{direction}->{ltr} = 1;
456     $p->{prop_value}->{direction}->{rtl} = 1;
457     $p->{prop_value}->{marks}->{crop} = 1;
458     $p->{prop_value}->{marks}->{cross} = 1;
459     $p->{prop_value}->{'unicode-bidi'}->{$_} = 1 for qw/
460     normal bidi-override embed
461     /;
462     for my $prop_name (qw/overflow overflow-x overflow-y/) {
463     $p->{prop_value}->{$prop_name}->{$_} = 1 for qw/
464     visible hidden scroll auto -webkit-marquee -moz-hidden-unscrollable
465     /;
466     }
467     $p->{prop_value}->{visibility}->{$_} = 1 for qw/
468     visible hidden collapse
469     /;
470     $p->{prop_value}->{'list-style-type'}->{$_} = 1 for qw/
471     disc circle square decimal decimal-leading-zero
472     lower-roman upper-roman lower-greek lower-latin
473     upper-latin armenian georgian lower-alpha upper-alpha none
474     hebrew cjk-ideographic hiragana katakana hiragana-iroha
475     katakana-iroha
476     /;
477     $p->{prop_value}->{'list-style-position'}->{outside} = 1;
478     $p->{prop_value}->{'list-style-position'}->{inside} = 1;
479     $p->{prop_value}->{'page-break-before'}->{$_} = 1 for qw/
480     auto always avoid left right
481     /;
482     $p->{prop_value}->{'page-break-after'}->{$_} = 1 for qw/
483     auto always avoid left right
484     /;
485     $p->{prop_value}->{'page-break-inside'}->{auto} = 1;
486     $p->{prop_value}->{'page-break-inside'}->{avoid} = 1;
487     $p->{prop_value}->{'background-repeat'}->{$_} = 1 for qw/
488     repeat repeat-x repeat-y no-repeat
489     /;
490     $p->{prop_value}->{'background-attachment'}->{scroll} = 1;
491     $p->{prop_value}->{'background-attachment'}->{fixed} = 1;
492     $p->{prop_value}->{'font-size'}->{$_} = 1 for qw/
493     xx-small x-small small medium large x-large xx-large
494     -manakai-xxx-large -webkit-xxx-large
495     larger smaller
496     /;
497     $p->{prop_value}->{'font-style'}->{normal} = 1;
498     $p->{prop_value}->{'font-style'}->{italic} = 1;
499     $p->{prop_value}->{'font-style'}->{oblique} = 1;
500     $p->{prop_value}->{'font-variant'}->{normal} = 1;
501     $p->{prop_value}->{'font-variant'}->{'small-caps'} = 1;
502     $p->{prop_value}->{'font-stretch'}->{$_} = 1 for
503     qw/normal wider narrower ultra-condensed extra-condensed
504     condensed semi-condensed semi-expanded expanded
505     extra-expanded ultra-expanded/;
506     $p->{prop_value}->{'text-align'}->{$_} = 1 for qw/
507     left right center justify begin end
508     /;
509     $p->{prop_value}->{'text-transform'}->{$_} = 1 for qw/
510     capitalize uppercase lowercase none
511     /;
512     $p->{prop_value}->{'white-space'}->{$_} = 1 for qw/
513 wakaba 1.36 normal pre nowrap pre-line pre-wrap -moz-pre-wrap
514 wakaba 1.35 /;
515 wakaba 1.37 $p->{prop_value}->{'writing-mode'}->{$_} = 1 for qw/
516     lr rl tb lr-tb rl-tb tb-rl
517     /;
518     $p->{prop_value}->{'text-anchor'}->{$_} = 1 for qw/
519     start middle end
520     /;
521     $p->{prop_value}->{'dominant-baseline'}->{$_} = 1 for qw/
522     auto use-script no-change reset-size ideographic alphabetic
523     hanging mathematical central middle text-after-edge text-before-edge
524     /;
525     $p->{prop_value}->{'alignment-baseline'}->{$_} = 1 for qw/
526     auto baseline before-edge text-before-edge middle central
527     after-edge text-after-edge ideographic alphabetic hanging
528     mathematical
529     /;
530 wakaba 1.35 $p->{prop_value}->{'text-decoration'}->{$_} = 1 for qw/
531     none blink underline overline line-through
532     /;
533     $p->{prop_value}->{'caption-side'}->{$_} = 1 for qw/
534     top bottom left right
535     /;
536     $p->{prop_value}->{'table-layout'}->{auto} = 1;
537     $p->{prop_value}->{'table-layout'}->{fixed} = 1;
538 wakaba 1.36 $p->{prop_value}->{'border-collapse'}->{collapse} = 1;
539 wakaba 1.35 $p->{prop_value}->{'border-collapse'}->{separate} = 1;
540     $p->{prop_value}->{'empty-cells'}->{show} = 1;
541     $p->{prop_value}->{'empty-cells'}->{hide} = 1;
542     $p->{prop_value}->{cursor}->{$_} = 1 for qw/
543     auto crosshair default pointer move e-resize ne-resize nw-resize n-resize
544     se-resize sw-resize s-resize w-resize text wait help progress
545     /;
546     for my $prop (qw/border-top-style border-left-style
547     border-bottom-style border-right-style outline-style/) {
548     $p->{prop_value}->{$prop}->{$_} = 1 for qw/
549     none hidden dotted dashed solid double groove ridge inset outset
550     /;
551     }
552     for my $prop (qw/color background-color
553     border-bottom-color border-left-color border-right-color
554     border-top-color border-color/) {
555     $p->{prop_value}->{$prop}->{transparent} = 1;
556     $p->{prop_value}->{$prop}->{flavor} = 1;
557     $p->{prop_value}->{$prop}->{'-manakai-default'} = 1;
558     }
559     $p->{prop_value}->{'outline-color'}->{invert} = 1;
560     $p->{prop_value}->{'outline-color'}->{'-manakai-invert-or-currentcolor'} = 1;
561     $p->{pseudo_class}->{$_} = 1 for qw/
562     active checked disabled empty enabled first-child first-of-type
563     focus hover indeterminate last-child last-of-type link only-child
564     only-of-type root target visited
565     lang nth-child nth-last-child nth-of-type nth-last-of-type not
566     -manakai-contains -manakai-current
567     /;
568     $p->{pseudo_element}->{$_} = 1 for qw/
569     after before first-letter first-line
570     /;
571    
572     return $CSSParser = $p;
573     } # get_css_parser
574    
575     sub print_syntax_error_css_section ($$) {
576     my ($input, $result) = @_;
577    
578     print STDOUT qq[
579     <div id="$input->{id_prefix}parse-errors" class="section">
580     <h2>Parse Errors</h2>
581    
582 wakaba 1.39 <dl id="$input->{id_prefix}parse-errors-list">];
583 wakaba 1.35 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
584    
585     my $p = get_css_parser ();
586 wakaba 1.37 $p->init;
587 wakaba 1.35 $p->{onerror} = sub {
588     my (%opt) = @_;
589     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
590     if ($opt{token}) {
591     print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{token}->{line}">Line $opt{token}->{line}</a> column $opt{token}->{column}];
592     } else {
593     print STDOUT qq[<dt class="$cls">Unknown location];
594     }
595     if (defined $opt{value}) {
596     print STDOUT qq[ (<code>@{[htescape ($opt{value})]}</code>)];
597     } elsif (defined $opt{token}) {
598     print STDOUT qq[ (<code>@{[htescape (Whatpm::CSS::Tokenizer->serialize_token ($opt{token}))]}</code>)];
599     }
600     $type =~ tr/ /-/;
601     $type =~ s/\|/%7C/g;
602     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
603     print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
604     print STDOUT qq[$msg</dd>\n];
605    
606     add_error ('syntax', \%opt => $result);
607     };
608     $p->{href} = $input->{uri};
609     $p->{base_uri} = $input->{base_uri};
610    
611 wakaba 1.37 # if ($parse_mode eq 'q') {
612     # $p->{unitless_px} = 1;
613     # $p->{hashless_color} = 1;
614     # }
615    
616     ## TODO: Make $input->{s} a ref.
617    
618 wakaba 1.35 my $s = \$input->{s};
619     my $charset;
620     unless ($input->{is_char_string}) {
621     require Encode;
622     if (defined $input->{charset}) {## TODO: IANA->Perl
623     $charset = $input->{charset};
624     $s = \(Encode::decode ($input->{charset}, $$s));
625     } else {
626     ## TODO: charset detection
627     $s = \(Encode::decode ($charset = 'utf-8', $$s));
628     }
629     }
630    
631     my $cssom = $p->parse_char_string ($$s);
632     $cssom->manakai_input_encoding ($charset) if defined $charset;
633    
634     print STDOUT qq[</dl></div>];
635    
636     return $cssom;
637     } # print_syntax_error_css_section
638    
639 wakaba 1.22 sub print_syntax_error_manifest_section ($$) {
640     my ($input, $result) = @_;
641    
642     require Whatpm::CacheManifest;
643    
644     print STDOUT qq[
645 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
646 wakaba 1.22 <h2>Parse Errors</h2>
647    
648 wakaba 1.39 <dl id="$input->{id_prefix}parse-errors-list">];
649 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
650 wakaba 1.22
651     my $onerror = sub {
652     my (%opt) = @_;
653     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
654 wakaba 1.32 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
655     qq[</dt>];
656 wakaba 1.22 $type =~ tr/ /-/;
657     $type =~ s/\|/%7C/g;
658     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
659 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
660     print STDOUT qq[$msg</dd>\n];
661 wakaba 1.22
662     add_error ('syntax', \%opt => $result);
663     };
664    
665 wakaba 1.48 my $m = $input->{is_char_string} ? 'parse_char_string' : 'parse_byte_string';
666 wakaba 1.22 my $time1 = time;
667 wakaba 1.48 my $manifest = Whatpm::CacheManifest->$m
668 wakaba 1.22 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
669     $time{parse_manifest} = time - $time1;
670    
671     print STDOUT qq[</dl></div>];
672    
673     return $manifest;
674     } # print_syntax_error_manifest_section
675    
676 wakaba 1.35 sub print_source_string_section ($$$) {
677     my $input = shift;
678     my $s;
679     unless ($input->{is_char_string}) {
680     require Encode;
681     my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
682     return unless $enc;
683    
684     $s = \($enc->decode (${$_[0]}));
685     } else {
686     $s = $_[0];
687     }
688 wakaba 1.9
689     my $i = 1;
690 wakaba 1.32 push @nav, ['#source-string' => 'Source'] unless $input->{nested};
691     print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
692 wakaba 1.9 <h2>Document Source</h2>
693     <ol lang="">\n];
694 wakaba 1.7 if (length $$s) {
695 wakaba 1.41 while ($$s =~ /\G([^\x0D\x0A]*?)(?>\x0D\x0A?|\x0A)/gc) {
696 wakaba 1.32 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
697     "</li>\n";
698 wakaba 1.7 $i++;
699     }
700 wakaba 1.41 if ($$s =~ /\G([^\x0D\x0A]+)/gc) {
701 wakaba 1.32 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
702     "</li>\n";
703 wakaba 1.7 }
704     } else {
705 wakaba 1.32 print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
706 wakaba 1.2 }
707 wakaba 1.39 print STDOUT "</ol></div>
708 wakaba 1.42 <script>
709     addSourceToParseErrorList ('$input->{id_prefix}', 'parse-errors-list');
710     </script>";
711 wakaba 1.9 } # print_input_string_section
712 wakaba 1.2
713 wakaba 1.35 sub print_document_tree ($$) {
714     my ($input, $node) = @_;
715    
716 wakaba 1.2 my $r = '<ol class="xoxo">';
717 wakaba 1.1
718 wakaba 1.2 my @node = ($node);
719 wakaba 1.1 while (@node) {
720     my $child = shift @node;
721 wakaba 1.2 unless (ref $child) {
722     $r .= $child;
723     next;
724     }
725    
726 wakaba 1.32 my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
727 wakaba 1.2 my $nt = $child->node_type;
728     if ($nt == $child->ELEMENT_NODE) {
729 wakaba 1.4 my $child_nsuri = $child->namespace_uri;
730     $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
731 wakaba 1.2 '</code>'; ## ISSUE: case
732    
733     if ($child->has_attributes) {
734     $r .= '<ul class="attributes">';
735 wakaba 1.4 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
736 wakaba 1.2 @{$child->attributes}) {
737 wakaba 1.32 $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
738 wakaba 1.2 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
739     }
740     $r .= '</ul>';
741     }
742    
743 wakaba 1.7 if ($child->has_child_nodes) {
744 wakaba 1.2 $r .= '<ol class="children">';
745 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
746     } else {
747     $r .= '</li>';
748 wakaba 1.2 }
749     } elsif ($nt == $child->TEXT_NODE) {
750 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
751 wakaba 1.2 } elsif ($nt == $child->CDATA_SECTION_NODE) {
752 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
753 wakaba 1.2 } elsif ($nt == $child->COMMENT_NODE) {
754 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
755 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_NODE) {
756 wakaba 1.6 $r .= qq'<li id="$node_id" class="tree-document">Document';
757 wakaba 1.7 $r .= qq[<ul class="attributes">];
758 wakaba 1.27 my $cp = $child->manakai_charset;
759     if (defined $cp) {
760     $r .= qq[<li><code>charset</code> parameter = <code>];
761     $r .= htescape ($cp) . qq[</code></li>];
762     }
763     $r .= qq[<li><code>inputEncoding</code> = ];
764     my $ie = $child->input_encoding;
765     if (defined $ie) {
766     $r .= qq[<code>@{[htescape ($ie)]}</code>];
767     if ($child->manakai_has_bom) {
768     $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
769     }
770     } else {
771     $r .= qq[(<code>null</code>)];
772     }
773 wakaba 1.7 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
774     $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
775 wakaba 1.9 unless ($child->manakai_is_html) {
776     $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
777     if (defined $child->xml_encoding) {
778     $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
779     } else {
780     $r .= qq[<li>XML encoding = (null)</li>];
781     }
782     $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
783     }
784 wakaba 1.7 $r .= qq[</ul>];
785 wakaba 1.2 if ($child->has_child_nodes) {
786 wakaba 1.7 $r .= '<ol class="children">';
787 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
788 wakaba 1.1 }
789 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
790 wakaba 1.5 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
791     $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
792     $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
793     $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
794 wakaba 1.2 $r .= '</ul></li>';
795     } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
796 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
797 wakaba 1.1 } else {
798 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
799 wakaba 1.1 }
800     }
801 wakaba 1.2
802     $r .= '</ol>';
803     print STDOUT $r;
804     } # print_document_tree
805 wakaba 1.1
806 wakaba 1.32 sub print_structure_dump_dom_section ($$$) {
807     my ($input, $doc, $el) = @_;
808 wakaba 1.18
809     print STDOUT qq[
810 wakaba 1.32 <div id="$input->{id_prefix}document-tree" class="section">
811 wakaba 1.18 <h2>Document Tree</h2>
812     ];
813 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
814     unless $input->{nested};
815 wakaba 1.18
816 wakaba 1.35 print_document_tree ($input, $el || $doc);
817 wakaba 1.18
818     print STDOUT qq[</div>];
819 wakaba 1.22 } # print_structure_dump_dom_section
820    
821 wakaba 1.35 sub print_structure_dump_cssom_section ($$) {
822     my ($input, $cssom) = @_;
823    
824     print STDOUT qq[
825     <div id="$input->{id_prefix}document-tree" class="section">
826     <h2>Document Tree</h2>
827     ];
828     push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
829     unless $input->{nested};
830    
831     ## TODO:
832     print STDOUT "<pre>".htescape ($cssom->css_text)."</pre>";
833    
834     print STDOUT qq[</div>];
835     } # print_structure_dump_cssom_section
836    
837 wakaba 1.32 sub print_structure_dump_manifest_section ($$) {
838     my ($input, $manifest) = @_;
839 wakaba 1.22
840     print STDOUT qq[
841 wakaba 1.32 <div id="$input->{id_prefix}dump-manifest" class="section">
842 wakaba 1.22 <h2>Cache Manifest</h2>
843     ];
844 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}dump-manifest] => 'Cache Manifest']
845     unless $input->{nested};
846 wakaba 1.22
847     print STDOUT qq[<dl><dt>Explicit entries</dt>];
848 wakaba 1.37 my $i = 0;
849 wakaba 1.22 for my $uri (@{$manifest->[0]}) {
850     my $euri = htescape ($uri);
851 wakaba 1.37 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
852 wakaba 1.22 }
853    
854     print STDOUT qq[<dt>Fallback entries</dt><dd>
855     <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
856     <th scope=row>Fallback Entry</tr><tbody>];
857     for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
858     my $euri = htescape ($uri);
859     my $euri2 = htescape ($manifest->[1]->{$uri});
860 wakaba 1.37 print STDOUT qq[<tr><td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
861     <td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
862 wakaba 1.22 }
863    
864     print STDOUT qq[</table><dt>Online whitelist</dt>];
865     for my $uri (@{$manifest->[2]}) {
866     my $euri = htescape ($uri);
867 wakaba 1.37 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
868 wakaba 1.22 }
869 wakaba 1.18
870 wakaba 1.22 print STDOUT qq[</dl></div>];
871     } # print_structure_dump_manifest_section
872    
873 wakaba 1.34 sub print_structure_error_dom_section ($$$$$) {
874     my ($input, $doc, $el, $result, $onsubdoc) = @_;
875 wakaba 1.18
876 wakaba 1.32 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
877 wakaba 1.18 <h2>Document Errors</h2>
878    
879 wakaba 1.42 <dl id=document-errors-list>];
880 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
881     unless $input->{nested};
882 wakaba 1.18
883     require Whatpm::ContentChecker;
884     my $onerror = sub {
885     my %opt = @_;
886     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
887     $type =~ tr/ /-/;
888     $type =~ s/\|/%7C/g;
889     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
890 wakaba 1.32 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
891 wakaba 1.23 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
892     print STDOUT $msg, "</dd>\n";
893 wakaba 1.19 add_error ('structure', \%opt => $result);
894 wakaba 1.18 };
895    
896     my $elements;
897     my $time1 = time;
898     if ($el) {
899 wakaba 1.34 $elements = Whatpm::ContentChecker->check_element
900     ($el, $onerror, $onsubdoc);
901 wakaba 1.18 } else {
902 wakaba 1.34 $elements = Whatpm::ContentChecker->check_document
903     ($doc, $onerror, $onsubdoc);
904 wakaba 1.18 }
905     $time{check} = time - $time1;
906    
907 wakaba 1.42 print STDOUT qq[</dl>
908     <script>
909     addSourceToParseErrorList ('$input->{id_prefix}', 'document-errors-list');
910     </script></div>];
911 wakaba 1.18
912     return $elements;
913 wakaba 1.22 } # print_structure_error_dom_section
914    
915     sub print_structure_error_manifest_section ($$$) {
916 wakaba 1.32 my ($input, $manifest, $result) = @_;
917 wakaba 1.22
918 wakaba 1.32 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
919 wakaba 1.22 <h2>Document Errors</h2>
920    
921     <dl>];
922 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
923     unless $input->{nested};
924 wakaba 1.22
925     require Whatpm::CacheManifest;
926     Whatpm::CacheManifest->check_manifest ($manifest, sub {
927     my %opt = @_;
928     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
929     $type =~ tr/ /-/;
930     $type =~ s/\|/%7C/g;
931     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
932 wakaba 1.32 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
933 wakaba 1.22 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
934     add_error ('structure', \%opt => $result);
935     });
936    
937     print STDOUT qq[</div>];
938     } # print_structure_error_manifest_section
939 wakaba 1.18
940 wakaba 1.32 sub print_table_section ($$) {
941     my ($input, $tables) = @_;
942 wakaba 1.18
943 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
944     unless $input->{nested};
945 wakaba 1.18 print STDOUT qq[
946 wakaba 1.32 <div id="$input->{id_prefix}tables" class="section">
947 wakaba 1.18 <h2>Tables</h2>
948    
949     <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
950     <script src="../table-script.js" type="text/javascript"></script>
951     <noscript>
952     <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
953     </noscript>
954     ];
955    
956     require JSON;
957    
958     my $i = 0;
959 wakaba 1.50 for my $table (@$tables) {
960 wakaba 1.18 $i++;
961 wakaba 1.32 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
962 wakaba 1.50 get_node_link ($input, $table->{element}) . q[</h3>];
963    
964     delete $table->{element};
965 wakaba 1.18
966 wakaba 1.49 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption},
967     @{$table->{row}}) {
968 wakaba 1.18 next unless $_;
969     delete $_->{element};
970     }
971    
972     for (@{$table->{row_group}}) {
973     next unless $_;
974     next unless $_->{element};
975     $_->{type} = $_->{element}->manakai_local_name;
976     delete $_->{element};
977     }
978    
979     for (@{$table->{cell}}) {
980     next unless $_;
981     for (@{$_}) {
982     next unless $_;
983     for (@$_) {
984     $_->{id} = refaddr $_->{element} if defined $_->{element};
985     delete $_->{element};
986     $_->{is_header} = $_->{is_header} ? 1 : 0;
987     }
988     }
989     }
990    
991     print STDOUT '</div><script type="text/javascript">tableToCanvas (';
992     print STDOUT JSON::objToJson ($table);
993 wakaba 1.32 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
994     print STDOUT qq[, '$input->{id_prefix}');</script>];
995 wakaba 1.18 }
996    
997     print STDOUT qq[</div>];
998     } # print_table_section
999    
1000 wakaba 1.33 sub print_listing_section ($$$) {
1001     my ($opt, $input, $ids) = @_;
1002 wakaba 1.18
1003 wakaba 1.35 push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
1004     unless $input->{nested};
1005 wakaba 1.18 print STDOUT qq[
1006 wakaba 1.33 <div id="$input->{id_prefix}$opt->{id}" class="section">
1007     <h2>$opt->{heading}</h2>
1008 wakaba 1.18
1009     <dl>
1010     ];
1011     for my $id (sort {$a cmp $b} keys %$ids) {
1012     print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
1013     for (@{$ids->{$id}}) {
1014 wakaba 1.32 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
1015 wakaba 1.18 }
1016     }
1017     print STDOUT qq[</dl></div>];
1018 wakaba 1.33 } # print_listing_section
1019 wakaba 1.18
1020 wakaba 1.48 sub print_uri_section ($$$) {
1021     my ($input, $uris) = @_;
1022    
1023     ## NOTE: URIs contained in the DOM (i.e. in HTML or XML documents),
1024     ## except for those in RDF triples.
1025     ## TODO: URIs in CSS
1026    
1027     push @nav, ['#' . $input->{id_prefix} . 'uris' => 'URIs']
1028     unless $input->{nested};
1029     print STDOUT qq[
1030     <div id="$input->{id_prefix}uris" class="section">
1031     <h2>URIs</h2>
1032    
1033     <dl>];
1034     for my $uri (sort {$a cmp $b} keys %$uris) {
1035     my $euri = htescape ($uri);
1036     print STDOUT qq[<dt><code class=uri>&lt;<a href="$euri">$euri</a>></code>];
1037     my $eccuri = htescape (get_cc_uri ($uri));
1038     print STDOUT qq[<dd><a href="$eccuri">Check conformance of this document</a>];
1039     print STDOUT qq[<dd>Found at: <ul>];
1040     for my $entry (@{$uris->{$uri}}) {
1041     print STDOUT qq[<li>], get_node_link ($input, $entry->{node});
1042     if (keys %{$entry->{type} or {}}) {
1043     print STDOUT ' (';
1044     print STDOUT join ', ', map {
1045     {
1046     hyperlink => 'Hyperlink',
1047     resource => 'Link to an external resource',
1048     namespace => 'Namespace URI',
1049     cite => 'Citation or link to a long description',
1050     embedded => 'Link to an embedded content',
1051     base => 'Base URI',
1052     action => 'Submission URI',
1053     }->{$_}
1054     or
1055     htescape ($_)
1056     } keys %{$entry->{type}};
1057     print STDOUT ')';
1058     }
1059     }
1060     print STDOUT qq[</ul>];
1061     }
1062     print STDOUT qq[</dl></div>];
1063     } # print_uri_section
1064    
1065 wakaba 1.45 sub print_rdf_section ($$$) {
1066     my ($input, $rdfs) = @_;
1067    
1068     push @nav, ['#' . $input->{id_prefix} . 'rdf' => 'RDF']
1069     unless $input->{nested};
1070     print STDOUT qq[
1071     <div id="$input->{id_prefix}rdf" class="section">
1072     <h2>RDF Triples</h2>
1073    
1074     <dl>];
1075     my $i = 0;
1076     for my $rdf (@$rdfs) {
1077     print STDOUT qq[<dt id="$input->{id_prefix}rdf-@{[$i++]}">];
1078     print STDOUT get_node_link ($input, $rdf->[0]);
1079     print STDOUT qq[<dd><dl>];
1080     for my $triple (@{$rdf->[1]}) {
1081     print STDOUT '<dt>' . get_node_link ($input, $triple->[0]) . '<dd>';
1082     print STDOUT get_rdf_resource_html ($triple->[1]);
1083     print STDOUT ' ';
1084     print STDOUT get_rdf_resource_html ($triple->[2]);
1085     print STDOUT ' ';
1086     print STDOUT get_rdf_resource_html ($triple->[3]);
1087     }
1088     print STDOUT qq[</dl>];
1089     }
1090     print STDOUT qq[</dl></div>];
1091     } # print_rdf_section
1092    
1093     sub get_rdf_resource_html ($) {
1094     my $resource = shift;
1095 wakaba 1.46 if (defined $resource->{uri}) {
1096 wakaba 1.45 my $euri = htescape ($resource->{uri});
1097     return '<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
1098     '</a>></code>';
1099 wakaba 1.46 } elsif (defined $resource->{bnodeid}) {
1100 wakaba 1.45 return htescape ('_:' . $resource->{bnodeid});
1101     } elsif ($resource->{nodes}) {
1102     return '(rdf:XMLLiteral)';
1103     } elsif (defined $resource->{value}) {
1104     my $elang = htescape (defined $resource->{language}
1105     ? $resource->{language} : '');
1106     my $r = qq[<q lang="$elang">] . htescape ($resource->{value}) . '</q>';
1107     if (defined $resource->{datatype}) {
1108     my $euri = htescape ($resource->{datatype});
1109     $r .= '^^<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
1110     '</a>></code>';
1111     } elsif (length $resource->{language}) {
1112     $r .= '@' . htescape ($resource->{language});
1113     }
1114     return $r;
1115     } else {
1116     return '??';
1117     }
1118     } # get_rdf_resource_html
1119    
1120 wakaba 1.19 sub print_result_section ($) {
1121     my $result = shift;
1122    
1123     print STDOUT qq[
1124     <div id="result-summary" class="section">
1125     <h2>Result</h2>];
1126    
1127 wakaba 1.21 if ($result->{unsupported} and $result->{conforming_max}) {
1128 wakaba 1.19 print STDOUT qq[<p class=uncertain id=result-para>The conformance
1129     checker cannot decide whether the document is conforming or
1130     not, since the document contains one or more unsupported
1131 wakaba 1.21 features. The document might or might not be conforming.</p>];
1132 wakaba 1.19 } elsif ($result->{conforming_min}) {
1133     print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
1134     found in this document.</p>];
1135     } elsif ($result->{conforming_max}) {
1136     print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
1137     is <strong>likely <em>non</em>-conforming</strong>, but in rare case
1138     it might be conforming.</p>];
1139     } else {
1140     print STDOUT qq[<p class=FAIL id=result-para>This document is
1141     <strong><em>non</em>-conforming</strong>.</p>];
1142     }
1143    
1144     print STDOUT qq[<table>
1145     <colgroup><col><colgroup><col><col><col><colgroup><col>
1146     <thead>
1147 wakaba 1.23 <tr><th scope=col></th>
1148     <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1149     Errors</a></th>
1150     <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1151     Errors</a></th>
1152     <th scope=col><a href="../error-description#level-w">Warnings</a></th>
1153     <th scope=col>Score</th></tr></thead><tbody>];
1154 wakaba 1.19
1155     my $must_error = 0;
1156     my $should_error = 0;
1157     my $warning = 0;
1158     my $score_min = 0;
1159     my $score_max = 0;
1160     my $score_base = 20;
1161 wakaba 1.21 my $score_unit = $score_base / 100;
1162 wakaba 1.19 for (
1163     [Transfer => 'transfer', ''],
1164     [Character => 'char', ''],
1165     [Syntax => 'syntax', '#parse-errors'],
1166     [Structure => 'structure', '#document-errors'],
1167     ) {
1168     $must_error += ($result->{$_->[1]}->{must} += 0);
1169     $should_error += ($result->{$_->[1]}->{should} += 0);
1170     $warning += ($result->{$_->[1]}->{warning} += 0);
1171 wakaba 1.21 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
1172     $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
1173 wakaba 1.19
1174     my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
1175     my $label = $_->[0];
1176     if ($result->{$_->[1]}->{must} or
1177     $result->{$_->[1]}->{should} or
1178     $result->{$_->[1]}->{warning} or
1179     $result->{$_->[1]}->{unsupported}) {
1180     $label = qq[<a href="$_->[2]">$label</a>];
1181     }
1182    
1183     print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
1184     if ($uncertain) {
1185 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
1186 wakaba 1.19 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
1187 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
1188 wakaba 1.19 } else {
1189 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
1190 wakaba 1.19 }
1191     }
1192    
1193     $score_max += $score_base;
1194    
1195     print STDOUT qq[
1196     <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
1197     </tbody>
1198 wakaba 1.21 <tfoot><tr class=uncertain><th scope=row>Total</th>
1199     <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
1200     <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
1201     <td>$warning?</td>
1202     <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
1203 wakaba 1.19 </table>
1204    
1205     <p><strong>Important</strong>: This conformance checking service
1206     is <em>under development</em>. The result above might be <em>wrong</em>.</p>
1207     </div>];
1208     push @nav, ['#result-summary' => 'Result'];
1209     } # print_result_section
1210    
1211 wakaba 1.24 sub print_result_unknown_type_section ($$) {
1212     my ($input, $result) = @_;
1213 wakaba 1.18
1214 wakaba 1.24 my $euri = htescape ($input->{uri});
1215 wakaba 1.18 print STDOUT qq[
1216 wakaba 1.35 <div id="$input->{id_prefix}parse-errors" class="section">
1217 wakaba 1.24 <h2>Errors</h2>
1218    
1219     <dl>
1220     <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
1221     <dd class=unsupported><strong><a href="../error-description#level-u">Not
1222     supported</a></strong>:
1223     Media type
1224     <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
1225     is not supported.</dd>
1226     </dl>
1227 wakaba 1.18 </div>
1228     ];
1229 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}parse-errors] => 'Errors']
1230     unless $input->{nested};
1231 wakaba 1.30 add_error (char => {level => 'u'} => $result);
1232     add_error (syntax => {level => 'u'} => $result);
1233     add_error (structure => {level => 'u'} => $result);
1234 wakaba 1.18 } # print_result_unknown_type_section
1235    
1236     sub print_result_input_error_section ($) {
1237     my $input = shift;
1238     print STDOUT qq[<div class="section" id="result-summary">
1239     <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
1240     </div>];
1241     push @nav, ['#result-summary' => 'Result'];
1242 wakaba 1.32 } # print_result_input_error_section
1243 wakaba 1.18
1244 wakaba 1.32 sub get_error_label ($$) {
1245     my ($input, $err) = @_;
1246 wakaba 1.22
1247     my $r = '';
1248    
1249 wakaba 1.42 my $line;
1250     my $column;
1251    
1252     if (defined $err->{node}) {
1253     $line = $err->{node}->get_user_data ('manakai_source_line');
1254     if (defined $line) {
1255     $column = $err->{node}->get_user_data ('manakai_source_column');
1256 wakaba 1.40 } else {
1257 wakaba 1.42 if ($err->{node}->node_type == $err->{node}->ATTRIBUTE_NODE) {
1258     my $owner = $err->{node}->owner_element;
1259     $line = $owner->get_user_data ('manakai_source_line');
1260     $column = $owner->get_user_data ('manakai_source_column');
1261 wakaba 1.43 } else {
1262     my $parent = $err->{node}->parent_node;
1263 wakaba 1.44 if ($parent) {
1264     $line = $parent->get_user_data ('manakai_source_line');
1265     $column = $parent->get_user_data ('manakai_source_column');
1266     }
1267 wakaba 1.42 }
1268     }
1269     }
1270     unless (defined $line) {
1271     if (defined $err->{token} and defined $err->{token}->{line}) {
1272     $line = $err->{token}->{line};
1273     $column = $err->{token}->{column};
1274     } elsif (defined $err->{line}) {
1275     $line = $err->{line};
1276     $column = $err->{column};
1277 wakaba 1.40 }
1278 wakaba 1.42 }
1279    
1280     if (defined $line) {
1281     if (defined $column and $column > 0) {
1282     $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a> column $column];
1283 wakaba 1.22 } else {
1284 wakaba 1.42 $line = $line - 1 || 1;
1285     $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a>];
1286 wakaba 1.22 }
1287     }
1288    
1289     if (defined $err->{node}) {
1290     $r .= ' ' if length $r;
1291 wakaba 1.42 $r .= get_node_link ($input, $err->{node});
1292 wakaba 1.22 }
1293    
1294     if (defined $err->{index}) {
1295 wakaba 1.37 if (length $r) {
1296     $r .= ', Index ' . (0+$err->{index});
1297     } else {
1298     $r .= "<a href='#$input->{id_prefix}index-@{[0+$err->{index}]}'>Index "
1299     . (0+$err->{index}) . '</a>';
1300     }
1301 wakaba 1.22 }
1302    
1303     if (defined $err->{value}) {
1304     $r .= ' ' if length $r;
1305     $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
1306     }
1307    
1308     return $r;
1309     } # get_error_label
1310    
1311 wakaba 1.23 sub get_error_level_label ($) {
1312     my $err = shift;
1313    
1314     my $r = '';
1315    
1316     if (not defined $err->{level} or $err->{level} eq 'm') {
1317     $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1318     error</a></strong>: ];
1319     } elsif ($err->{level} eq 's') {
1320     $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1321     error</a></strong>: ];
1322     } elsif ($err->{level} eq 'w') {
1323     $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
1324     ];
1325 wakaba 1.30 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
1326 wakaba 1.23 $r = qq[<strong><a href="../error-description#level-u">Not
1327     supported</a></strong>: ];
1328 wakaba 1.37 } elsif ($err->{level} eq 'i') {
1329     $r = qq[<strong><a href="../error-description#level-i">Information</a></strong>: ];
1330 wakaba 1.23 } else {
1331     my $elevel = htescape ($err->{level});
1332     $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
1333     ];
1334     }
1335    
1336     return $r;
1337     } # get_error_level_label
1338    
1339 wakaba 1.1 sub get_node_path ($) {
1340     my $node = shift;
1341     my @r;
1342     while (defined $node) {
1343     my $rs;
1344     if ($node->node_type == 1) {
1345 wakaba 1.47 $rs = $node->node_name;
1346 wakaba 1.1 $node = $node->parent_node;
1347     } elsif ($node->node_type == 2) {
1348 wakaba 1.47 $rs = '@' . $node->node_name;
1349 wakaba 1.1 $node = $node->owner_element;
1350     } elsif ($node->node_type == 3) {
1351     $rs = '"' . $node->data . '"';
1352     $node = $node->parent_node;
1353     } elsif ($node->node_type == 9) {
1354 wakaba 1.9 @r = ('') unless @r;
1355 wakaba 1.1 $rs = '';
1356     $node = $node->parent_node;
1357     } else {
1358     $rs = '#' . $node->node_type;
1359     $node = $node->parent_node;
1360     }
1361     unshift @r, $rs;
1362     }
1363     return join '/', @r;
1364     } # get_node_path
1365    
1366 wakaba 1.32 sub get_node_link ($$) {
1367     return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
1368     htescape (get_node_path ($_[1])) . qq[</a>];
1369 wakaba 1.6 } # get_node_link
1370    
1371 wakaba 1.7 {
1372     my $Msg = {};
1373    
1374     sub load_text_catalog ($) {
1375     my $lang = shift; # MUST be a canonical lang name
1376 wakaba 1.26 open my $file, '<:utf8', "cc-msg.$lang.txt"
1377     or die "$0: cc-msg.$lang.txt: $!";
1378 wakaba 1.7 while (<$file>) {
1379     if (s/^([^;]+);([^;]*);//) {
1380     my ($type, $cls, $msg) = ($1, $2, $_);
1381     $msg =~ tr/\x0D\x0A//d;
1382     $Msg->{$type} = [$cls, $msg];
1383     }
1384     }
1385     } # load_text_catalog
1386    
1387     sub get_text ($) {
1388 wakaba 1.15 my ($type, $level, $node) = @_;
1389 wakaba 1.7 $type = $level . ':' . $type if defined $level;
1390 wakaba 1.29 $level = 'm' unless defined $level;
1391 wakaba 1.7 my @arg;
1392     {
1393     if (defined $Msg->{$type}) {
1394     my $msg = $Msg->{$type}->[1];
1395 wakaba 1.10 $msg =~ s{<var>\$([0-9]+)</var>}{
1396     defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
1397     }ge;
1398 wakaba 1.15 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
1399     UNIVERSAL::can ($node, 'get_attribute_ns')
1400     ? htescape ($node->get_attribute_ns (undef, $1)) : ''
1401     }ge;
1402     $msg =~ s{<var>{\@}</var>}{
1403     UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
1404     }ge;
1405 wakaba 1.17 $msg =~ s{<var>{local-name}</var>}{
1406     UNIVERSAL::can ($node, 'manakai_local_name')
1407     ? htescape ($node->manakai_local_name) : ''
1408     }ge;
1409     $msg =~ s{<var>{element-local-name}</var>}{
1410     (UNIVERSAL::can ($node, 'owner_element') and
1411     $node->owner_element)
1412     ? htescape ($node->owner_element->manakai_local_name)
1413     : ''
1414     }ge;
1415 wakaba 1.29 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
1416 wakaba 1.7 } elsif ($type =~ s/:([^:]*)$//) {
1417     unshift @arg, $1;
1418     redo;
1419     }
1420     }
1421 wakaba 1.29 return ($type, 'level-'.$level, htescape ($_[0]));
1422 wakaba 1.7 } # get_text
1423    
1424     }
1425    
1426 wakaba 1.48 sub encode_uri_component ($) {
1427     require Encode;
1428     my $s = Encode::encode ('utf8', shift);
1429     $s =~ s/([^0-9A-Za-z_.~-])/sprintf '%%%02X', ord $1/ge;
1430     return $s;
1431     } # encode_uri_component
1432    
1433     sub get_cc_uri ($) {
1434     return './?uri=' . encode_uri_component ($_[0]);
1435     } # get_cc_uri
1436    
1437 wakaba 1.9 sub get_input_document ($$) {
1438     my ($http, $dom) = @_;
1439    
1440 wakaba 1.16 my $request_uri = $http->get_parameter ('uri');
1441 wakaba 1.9 my $r = {};
1442     if (defined $request_uri and length $request_uri) {
1443     my $uri = $dom->create_uri_reference ($request_uri);
1444     unless ({
1445     http => 1,
1446     }->{lc $uri->uri_scheme}) {
1447     return {uri => $request_uri, request_uri => $request_uri,
1448     error_status_text => 'URI scheme not allowed'};
1449     }
1450    
1451     require Message::Util::HostPermit;
1452     my $host_permit = new Message::Util::HostPermit;
1453     $host_permit->add_rule (<<EOH);
1454     Allow host=suika port=80
1455     Deny host=suika
1456     Allow host=suika.fam.cx port=80
1457     Deny host=suika.fam.cx
1458     Deny host=localhost
1459     Deny host=*.localdomain
1460     Deny ipv4=0.0.0.0/8
1461     Deny ipv4=10.0.0.0/8
1462     Deny ipv4=127.0.0.0/8
1463     Deny ipv4=169.254.0.0/16
1464     Deny ipv4=172.0.0.0/11
1465     Deny ipv4=192.0.2.0/24
1466     Deny ipv4=192.88.99.0/24
1467     Deny ipv4=192.168.0.0/16
1468     Deny ipv4=198.18.0.0/15
1469     Deny ipv4=224.0.0.0/4
1470     Deny ipv4=255.255.255.255/32
1471     Deny ipv6=0::0/0
1472     Allow host=*
1473     EOH
1474     unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1475     return {uri => $request_uri, request_uri => $request_uri,
1476     error_status_text => 'Connection to the host is forbidden'};
1477     }
1478    
1479     require LWP::UserAgent;
1480     my $ua = WDCC::LWPUA->new;
1481     $ua->{wdcc_dom} = $dom;
1482     $ua->{wdcc_host_permit} = $host_permit;
1483     $ua->agent ('Mozilla'); ## TODO: for now.
1484     $ua->parse_head (0);
1485     $ua->protocols_allowed ([qw/http/]);
1486     $ua->max_size (1000_000);
1487     my $req = HTTP::Request->new (GET => $request_uri);
1488 wakaba 1.28 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1489 wakaba 1.9 my $res = $ua->request ($req);
1490 wakaba 1.16 ## TODO: 401 sets |is_success| true.
1491     if ($res->is_success or $http->get_parameter ('error-page')) {
1492 wakaba 1.9 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1493     $r->{uri} = $res->request->uri;
1494     $r->{request_uri} = $request_uri;
1495    
1496     ## TODO: More strict parsing...
1497     my $ct = $res->header ('Content-Type');
1498 wakaba 1.22 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1499 wakaba 1.9 $r->{charset} = lc $1;
1500     $r->{charset} =~ tr/\\//d;
1501 wakaba 1.26 $r->{official_charset} = $r->{charset};
1502 wakaba 1.9 }
1503    
1504 wakaba 1.16 my $input_charset = $http->get_parameter ('charset');
1505 wakaba 1.9 if (defined $input_charset and length $input_charset) {
1506     $r->{charset_overridden}
1507     = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1508     $r->{charset} = $input_charset;
1509 wakaba 1.25 }
1510    
1511     ## TODO: Support for HTTP Content-Encoding
1512 wakaba 1.9
1513     $r->{s} = ''.$res->content;
1514 wakaba 1.25
1515     require Whatpm::ContentType;
1516     ($r->{official_type}, $r->{media_type})
1517     = Whatpm::ContentType->get_sniffed_type
1518     (get_file_head => sub {
1519     return substr $r->{s}, 0, shift;
1520     },
1521     http_content_type_byte => $ct,
1522     has_http_content_encoding =>
1523     defined $res->header ('Content-Encoding'),
1524     supported_image_types => {});
1525 wakaba 1.9 } else {
1526     $r->{uri} = $res->request->uri;
1527     $r->{request_uri} = $request_uri;
1528     $r->{error_status_text} = $res->status_line;
1529     }
1530    
1531     $r->{header_field} = [];
1532     $res->scan (sub {
1533     push @{$r->{header_field}}, [$_[0], $_[1]];
1534     });
1535     $r->{header_status_code} = $res->code;
1536     $r->{header_status_text} = $res->message;
1537     } else {
1538 wakaba 1.16 $r->{s} = ''.$http->get_parameter ('s');
1539 wakaba 1.9 $r->{uri} = q<thismessage:/>;
1540     $r->{request_uri} = q<thismessage:/>;
1541     $r->{base_uri} = q<thismessage:/>;
1542 wakaba 1.16 $r->{charset} = ''.$http->get_parameter ('_charset_');
1543 wakaba 1.9 $r->{charset} =~ s/\s+//g;
1544     $r->{charset} = 'utf-8' if $r->{charset} eq '';
1545 wakaba 1.26 $r->{official_charset} = $r->{charset};
1546 wakaba 1.9 $r->{header_field} = [];
1547 wakaba 1.25
1548     require Whatpm::ContentType;
1549     ($r->{official_type}, $r->{media_type})
1550     = Whatpm::ContentType->get_sniffed_type
1551     (get_file_head => sub {
1552     return substr $r->{s}, 0, shift;
1553     },
1554     http_content_type_byte => undef,
1555     has_http_content_encoding => 0,
1556     supported_image_types => {});
1557 wakaba 1.9 }
1558    
1559 wakaba 1.16 my $input_format = $http->get_parameter ('i');
1560 wakaba 1.9 if (defined $input_format and length $input_format) {
1561     $r->{media_type_overridden}
1562     = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1563     $r->{media_type} = $input_format;
1564     }
1565     if (defined $r->{s} and not defined $r->{media_type}) {
1566     $r->{media_type} = 'text/html';
1567     $r->{media_type_overridden} = 1;
1568     }
1569    
1570     if ($r->{media_type} eq 'text/xml') {
1571     unless (defined $r->{charset}) {
1572     $r->{charset} = 'us-ascii';
1573 wakaba 1.26 $r->{official_charset} = $r->{charset};
1574 wakaba 1.9 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1575     $r->{charset_overridden} = 0;
1576     }
1577     }
1578    
1579     if (length $r->{s} > 1000_000) {
1580     $r->{error_status_text} = 'Entity-body too large';
1581     delete $r->{s};
1582     return $r;
1583     }
1584    
1585 wakaba 1.35 $r->{inner_html_element} = $http->get_parameter ('e');
1586    
1587 wakaba 1.9 return $r;
1588     } # get_input_document
1589    
1590     package WDCC::LWPUA;
1591     BEGIN { push our @ISA, 'LWP::UserAgent'; }
1592    
1593     sub redirect_ok {
1594     my $ua = shift;
1595     unless ($ua->SUPER::redirect_ok (@_)) {
1596     return 0;
1597     }
1598    
1599     my $uris = $_[1]->header ('Location');
1600     return 0 unless $uris;
1601     my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1602     unless ({
1603     http => 1,
1604     }->{lc $uri->uri_scheme}) {
1605     return 0;
1606     }
1607     unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1608     return 0;
1609     }
1610     return 1;
1611     } # redirect_ok
1612    
1613 wakaba 1.1 =head1 AUTHOR
1614    
1615     Wakaba <w@suika.fam.cx>.
1616    
1617     =head1 LICENSE
1618    
1619 wakaba 1.35 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1620 wakaba 1.1
1621     This library is free software; you can redistribute it
1622     and/or modify it under the same terms as Perl itself.
1623    
1624     =cut
1625    
1626 wakaba 1.50 ## $Date: 2008/05/06 07:50:28 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24