/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.49 - (hide annotations) (download)
Tue May 6 07:50:28 2008 UTC (16 years ago) by wakaba
Branch: MAIN
Changes since 1.48: +3 -2 lines
++ ChangeLog	6 May 2008 07:50:23 -0000
2008-05-06  Wakaba  <wakaba@suika.fam.cx>

	* table-script.js: Support for header cell highlighting.

	* table.cgi: Set |id| to cells; it enables the cell highlighting
	feature.

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3 wakaba 1.23 use utf8;
4 wakaba 1.1
5     use lib qw[/home/httpd/html/www/markup/html/whatpm
6 wakaba 1.16 /home/wakaba/work/manakai2/lib];
7 wakaba 1.1 use CGI::Carp qw[fatalsToBrowser];
8 wakaba 1.2 use Scalar::Util qw[refaddr];
9 wakaba 1.16 use Time::HiRes qw/time/;
10 wakaba 1.1
11 wakaba 1.2 sub htescape ($) {
12     my $s = $_[0];
13     $s =~ s/&/&amp;/g;
14     $s =~ s/</&lt;/g;
15     $s =~ s/>/&gt;/g;
16     $s =~ s/"/&quot;/g;
17 wakaba 1.12 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18     sprintf '<var>U+%04X</var>', ord $1;
19     }ge;
20 wakaba 1.2 return $s;
21     } # htescape
22    
23 wakaba 1.35 my @nav;
24     my %time;
25     require Message::DOM::DOMImplementation;
26     my $dom = Message::DOM::DOMImplementation->new;
27     {
28 wakaba 1.16 use Message::CGI::HTTP;
29     my $http = Message::CGI::HTTP->new;
30 wakaba 1.1
31 wakaba 1.16 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
32 wakaba 1.8 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
33     exit;
34     }
35    
36 wakaba 1.12 binmode STDOUT, ':utf8';
37 wakaba 1.14 $| = 1;
38 wakaba 1.12
39 wakaba 1.7 load_text_catalog ('en'); ## TODO: conneg
40    
41 wakaba 1.2 print STDOUT qq[Content-Type: text/html; charset=utf-8
42    
43     <!DOCTYPE html>
44     <html lang="en">
45     <head>
46     <title>Web Document Conformance Checker (BETA)</title>
47 wakaba 1.3 <link rel="stylesheet" href="../cc-style.css" type="text/css">
48 wakaba 1.2 </head>
49     <body>
50 wakaba 1.13 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
51     (<em>beta</em>)</h1>
52 wakaba 1.14 ];
53 wakaba 1.2
54 wakaba 1.14 $| = 0;
55     my $input = get_input_document ($http, $dom);
56 wakaba 1.16 my $char_length = 0;
57 wakaba 1.14
58     print qq[
59 wakaba 1.4 <div id="document-info" class="section">
60 wakaba 1.2 <dl>
61 wakaba 1.9 <dt>Request URI</dt>
62     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 wakaba 1.2 <dt>Document URI</dt>
64 wakaba 1.25 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65     <script>
66     document.title = '<'
67     + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68     + document.title;
69     </script></dd>
70 wakaba 1.2 ]; # no </dl> yet
71 wakaba 1.3 push @nav, ['#document-info' => 'Information'];
72 wakaba 1.1
73 wakaba 1.9 if (defined $input->{s}) {
74 wakaba 1.16 $char_length = length $input->{s};
75 wakaba 1.9
76     print STDOUT qq[
77     <dt>Base URI</dt>
78     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79     <dt>Internet Media Type</dt>
80     <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 wakaba 1.25 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 wakaba 1.9 <dt>Character Encoding</dt>
83     <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84     @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 wakaba 1.16 <dt>Length</dt>
86     <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 wakaba 1.9 </dl>
88     </div>
89 wakaba 1.39
90     <script src="../cc-script.js"></script>
91 wakaba 1.9 ];
92    
93 wakaba 1.35 $input->{id_prefix} = '';
94     #$input->{nested} = 0;
95 wakaba 1.20 my $result = {conforming_min => 1, conforming_max => 1};
96 wakaba 1.31 check_and_print ($input => $result);
97 wakaba 1.19 print_result_section ($result);
98 wakaba 1.9 } else {
99 wakaba 1.18 print STDOUT qq[</dl></div>];
100     print_result_input_error_section ($input);
101 wakaba 1.9 }
102 wakaba 1.3
103 wakaba 1.2 print STDOUT qq[
104 wakaba 1.3 <ul class="navigation" id="nav-items">
105     ];
106     for (@nav) {
107     print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
108     }
109     print STDOUT qq[
110     </ul>
111 wakaba 1.2 </body>
112     </html>
113     ];
114 wakaba 1.1
115 wakaba 1.24 for (qw/decode parse parse_html parse_xml parse_manifest
116     check check_manifest/) {
117 wakaba 1.16 next unless defined $time{$_};
118     open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
119     print $file $char_length, "\t", $time{$_}, "\n";
120     }
121    
122 wakaba 1.1 exit;
123 wakaba 1.35 }
124 wakaba 1.1
125 wakaba 1.19 sub add_error ($$$) {
126     my ($layer, $err, $result) = @_;
127     if (defined $err->{level}) {
128     if ($err->{level} eq 's') {
129     $result->{$layer}->{should}++;
130     $result->{$layer}->{score_min} -= 2;
131     $result->{conforming_min} = 0;
132     } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
133     $result->{$layer}->{warning}++;
134 wakaba 1.30 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
135 wakaba 1.19 $result->{$layer}->{unsupported}++;
136     $result->{unsupported} = 1;
137 wakaba 1.37 } elsif ($err->{level} eq 'i') {
138     #
139 wakaba 1.19 } else {
140     $result->{$layer}->{must}++;
141     $result->{$layer}->{score_max} -= 2;
142     $result->{$layer}->{score_min} -= 2;
143     $result->{conforming_min} = 0;
144     $result->{conforming_max} = 0;
145     }
146     } else {
147     $result->{$layer}->{must}++;
148     $result->{$layer}->{score_max} -= 2;
149     $result->{$layer}->{score_min} -= 2;
150     $result->{conforming_min} = 0;
151     $result->{conforming_max} = 0;
152     }
153     } # add_error
154    
155 wakaba 1.31 sub check_and_print ($$) {
156     my ($input, $result) = @_;
157    
158     print_http_header_section ($input, $result);
159    
160     my $doc;
161     my $el;
162 wakaba 1.35 my $cssom;
163 wakaba 1.31 my $manifest;
164 wakaba 1.34 my @subdoc;
165 wakaba 1.31
166     if ($input->{media_type} eq 'text/html') {
167     ($doc, $el) = print_syntax_error_html_section ($input, $result);
168     print_source_string_section
169 wakaba 1.35 ($input,
170     \($input->{s}),
171     $input->{charset} || $doc->input_encoding);
172 wakaba 1.31 } elsif ({
173     'text/xml' => 1,
174     'application/atom+xml' => 1,
175     'application/rss+xml' => 1,
176 wakaba 1.45 'image/svg+xml' => 1,
177 wakaba 1.31 'application/xhtml+xml' => 1,
178     'application/xml' => 1,
179 wakaba 1.45 ## TODO: Should we make all XML MIME Types fall
180     ## into this category?
181    
182     'application/rdf+xml' => 1, ## NOTE: This type has different model.
183 wakaba 1.31 }->{$input->{media_type}}) {
184     ($doc, $el) = print_syntax_error_xml_section ($input, $result);
185 wakaba 1.35 print_source_string_section ($input,
186     \($input->{s}),
187     $doc->input_encoding);
188     } elsif ($input->{media_type} eq 'text/css') {
189     $cssom = print_syntax_error_css_section ($input, $result);
190     print_source_string_section
191     ($input, \($input->{s}),
192     $cssom->manakai_input_encoding);
193 wakaba 1.31 } elsif ($input->{media_type} eq 'text/cache-manifest') {
194     ## TODO: MUST be text/cache-manifest
195     $manifest = print_syntax_error_manifest_section ($input, $result);
196 wakaba 1.35 print_source_string_section ($input, \($input->{s}),
197     'utf-8');
198 wakaba 1.31 } else {
199     ## TODO: Change HTTP status code??
200     print_result_unknown_type_section ($input, $result);
201     }
202    
203     if (defined $doc or defined $el) {
204 wakaba 1.34 $doc->document_uri ($input->{uri});
205     $doc->manakai_entity_base_uri ($input->{base_uri});
206 wakaba 1.32 print_structure_dump_dom_section ($input, $doc, $el);
207     my $elements = print_structure_error_dom_section
208 wakaba 1.34 ($input, $doc, $el, $result, sub {
209     push @subdoc, shift;
210     });
211 wakaba 1.32 print_table_section ($input, $elements->{table}) if @{$elements->{table}};
212 wakaba 1.33 print_listing_section ({
213     id => 'identifiers', label => 'IDs', heading => 'Identifiers',
214     }, $input, $elements->{id}) if keys %{$elements->{id}};
215     print_listing_section ({
216     id => 'terms', label => 'Terms', heading => 'Terms',
217     }, $input, $elements->{term}) if keys %{$elements->{term}};
218     print_listing_section ({
219     id => 'classes', label => 'Classes', heading => 'Classes',
220     }, $input, $elements->{class}) if keys %{$elements->{class}};
221 wakaba 1.48 print_uri_section ($input, $elements->{uri}) if keys %{$elements->{uri}};
222 wakaba 1.45 print_rdf_section ($input, $elements->{rdf}) if @{$elements->{rdf}};
223 wakaba 1.35 } elsif (defined $cssom) {
224     print_structure_dump_cssom_section ($input, $cssom);
225     ## TODO: CSSOM validation
226 wakaba 1.36 add_error ('structure', {level => 'u'} => $result);
227 wakaba 1.31 } elsif (defined $manifest) {
228 wakaba 1.32 print_structure_dump_manifest_section ($input, $manifest);
229     print_structure_error_manifest_section ($input, $manifest, $result);
230 wakaba 1.31 }
231 wakaba 1.34
232     my $id_prefix = 0;
233     for my $subinput (@subdoc) {
234     $subinput->{id_prefix} = 'subdoc-' . ++$id_prefix;
235     $subinput->{nested} = 1;
236     $subinput->{base_uri} = $subinput->{container_node}->base_uri
237     unless defined $subinput->{base_uri};
238     my $ebaseuri = htescape ($subinput->{base_uri});
239     push @nav, ['#' . $subinput->{id_prefix} => 'Sub #' . $id_prefix];
240     print STDOUT qq[<div id="$subinput->{id_prefix}" class=section>
241     <h2>Subdocument #$id_prefix</h2>
242    
243     <dl>
244     <dt>Internet Media Type</dt>
245     <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
246     <dt>Container Node</dt>
247     <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
248     <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
249     <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
250     </dl>];
251    
252 wakaba 1.35 $subinput->{id_prefix} .= '-';
253 wakaba 1.34 check_and_print ($subinput => $result);
254    
255     print STDOUT qq[</div>];
256     }
257 wakaba 1.31 } # check_and_print
258    
259 wakaba 1.19 sub print_http_header_section ($$) {
260     my ($input, $result) = @_;
261 wakaba 1.9 return unless defined $input->{header_status_code} or
262     defined $input->{header_status_text} or
263 wakaba 1.34 @{$input->{header_field} or []};
264 wakaba 1.9
265 wakaba 1.32 push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
266     print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
267 wakaba 1.9 <h2>HTTP Header</h2>
268    
269     <p><strong>Note</strong>: Due to the limitation of the
270     network library in use, the content of this section might
271     not be the real header.</p>
272    
273     <table><tbody>
274     ];
275    
276     if (defined $input->{header_status_code}) {
277     print STDOUT qq[<tr><th scope="row">Status code</th>];
278     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
279     }
280     if (defined $input->{header_status_text}) {
281     print STDOUT qq[<tr><th scope="row">Status text</th>];
282     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
283     }
284    
285     for (@{$input->{header_field}}) {
286     print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
287     print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
288     }
289    
290     print STDOUT qq[</tbody></table></div>];
291     } # print_http_header_section
292    
293 wakaba 1.19 sub print_syntax_error_html_section ($$) {
294     my ($input, $result) = @_;
295 wakaba 1.18
296     require Encode;
297     require Whatpm::HTML;
298    
299     print STDOUT qq[
300 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
301 wakaba 1.18 <h2>Parse Errors</h2>
302    
303 wakaba 1.39 <dl id="$input->{id_prefix}parse-errors-list">];
304 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
305 wakaba 1.18
306     my $onerror = sub {
307     my (%opt) = @_;
308     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
309 wakaba 1.38 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
310     qq[</dt>];
311 wakaba 1.18 $type =~ tr/ /-/;
312     $type =~ s/\|/%7C/g;
313     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
314 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
315     print STDOUT qq[$msg</dd>\n];
316 wakaba 1.19
317     add_error ('syntax', \%opt => $result);
318 wakaba 1.18 };
319    
320     my $doc = $dom->create_document;
321     my $el;
322 wakaba 1.35 my $inner_html_element = $input->{inner_html_element};
323 wakaba 1.18 if (defined $inner_html_element and length $inner_html_element) {
324 wakaba 1.26 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
325 wakaba 1.24 my $time1 = time;
326 wakaba 1.48 my $t = \($input->{s});
327     unless ($input->{is_char_string}) {
328     $t = \(Encode::decode ($input->{charset}, $$t));
329     }
330 wakaba 1.24 $time{decode} = time - $time1;
331    
332 wakaba 1.18 $el = $doc->create_element_ns
333     ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
334 wakaba 1.24 $time1 = time;
335 wakaba 1.48 Whatpm::HTML->set_inner_html ($el, $$t, $onerror);
336 wakaba 1.24 $time{parse} = time - $time1;
337 wakaba 1.18 } else {
338 wakaba 1.24 my $time1 = time;
339 wakaba 1.48 if ($input->{is_char_string}) {
340     Whatpm::HTML->parse_char_string ($input->{s} => $doc, $onerror);
341     } else {
342     Whatpm::HTML->parse_byte_string
343     ($input->{charset}, $input->{s} => $doc, $onerror);
344     }
345 wakaba 1.24 $time{parse_html} = time - $time1;
346 wakaba 1.18 }
347 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
348     if defined $input->{official_charset};
349 wakaba 1.24
350 wakaba 1.18 print STDOUT qq[</dl></div>];
351    
352     return ($doc, $el);
353     } # print_syntax_error_html_section
354    
355 wakaba 1.19 sub print_syntax_error_xml_section ($$) {
356     my ($input, $result) = @_;
357 wakaba 1.18
358     require Message::DOM::XMLParserTemp;
359    
360     print STDOUT qq[
361 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
362 wakaba 1.18 <h2>Parse Errors</h2>
363    
364 wakaba 1.39 <dl id="$input->{id_prefix}parse-errors-list">];
365 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
366 wakaba 1.18
367     my $onerror = sub {
368     my $err = shift;
369     my $line = $err->location->line_number;
370 wakaba 1.35 print STDOUT qq[<dt><a href="#$input->{id_prefix}line-$line">Line $line</a> column ];
371 wakaba 1.18 print STDOUT $err->location->column_number, "</dt><dd>";
372     print STDOUT htescape $err->text, "</dd>\n";
373 wakaba 1.19
374     add_error ('syntax', {type => $err->text,
375     level => [
376     $err->SEVERITY_FATAL_ERROR => 'm',
377     $err->SEVERITY_ERROR => 'm',
378     $err->SEVERITY_WARNING => 's',
379     ]->[$err->severity]} => $result);
380    
381 wakaba 1.18 return 1;
382     };
383    
384 wakaba 1.48 my $t = \($input->{s});
385     if ($input->{is_char_string}) {
386     require Encode;
387     $t = \(Encode::encode ('utf8', $$t));
388     $input->{charset} = 'utf-8';
389     }
390    
391 wakaba 1.18 my $time1 = time;
392 wakaba 1.48 open my $fh, '<', $t;
393 wakaba 1.18 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
394     ($fh => $dom, $onerror, charset => $input->{charset});
395     $time{parse_xml} = time - $time1;
396 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
397     if defined $input->{official_charset};
398 wakaba 1.18
399     print STDOUT qq[</dl></div>];
400    
401     return ($doc, undef);
402     } # print_syntax_error_xml_section
403    
404 wakaba 1.35 sub get_css_parser () {
405     our $CSSParser;
406     return $CSSParser if $CSSParser;
407    
408     require Whatpm::CSS::Parser;
409     my $p = Whatpm::CSS::Parser->new;
410    
411     $p->{prop}->{$_} = 1 for qw/
412 wakaba 1.37 alignment-baseline
413 wakaba 1.35 background background-attachment background-color background-image
414     background-position background-position-x background-position-y
415     background-repeat border border-bottom border-bottom-color
416     border-bottom-style border-bottom-width border-collapse border-color
417     border-left border-left-color
418     border-left-style border-left-width border-right border-right-color
419     border-right-style border-right-width
420     border-spacing -manakai-border-spacing-x -manakai-border-spacing-y
421     border-style border-top border-top-color border-top-style border-top-width
422     border-width bottom
423     caption-side clear clip color content counter-increment counter-reset
424 wakaba 1.37 cursor direction display dominant-baseline empty-cells float font
425 wakaba 1.35 font-family font-size font-size-adjust font-stretch
426     font-style font-variant font-weight height left
427     letter-spacing line-height
428     list-style list-style-image list-style-position list-style-type
429     margin margin-bottom margin-left margin-right margin-top marker-offset
430     marks max-height max-width min-height min-width opacity -moz-opacity
431     orphans outline outline-color outline-style outline-width overflow
432     overflow-x overflow-y
433     padding padding-bottom padding-left padding-right padding-top
434     page page-break-after page-break-before page-break-inside
435     position quotes right size table-layout
436 wakaba 1.37 text-align text-anchor text-decoration text-indent text-transform
437 wakaba 1.35 top unicode-bidi vertical-align visibility white-space width widows
438 wakaba 1.37 word-spacing writing-mode z-index
439 wakaba 1.35 /;
440     $p->{prop_value}->{display}->{$_} = 1 for qw/
441     block clip inline inline-block inline-table list-item none
442     table table-caption table-cell table-column table-column-group
443     table-header-group table-footer-group table-row table-row-group
444     compact marker
445     /;
446     $p->{prop_value}->{position}->{$_} = 1 for qw/
447     absolute fixed relative static
448     /;
449     $p->{prop_value}->{float}->{$_} = 1 for qw/
450     left right none
451     /;
452     $p->{prop_value}->{clear}->{$_} = 1 for qw/
453     left right none both
454     /;
455     $p->{prop_value}->{direction}->{ltr} = 1;
456     $p->{prop_value}->{direction}->{rtl} = 1;
457     $p->{prop_value}->{marks}->{crop} = 1;
458     $p->{prop_value}->{marks}->{cross} = 1;
459     $p->{prop_value}->{'unicode-bidi'}->{$_} = 1 for qw/
460     normal bidi-override embed
461     /;
462     for my $prop_name (qw/overflow overflow-x overflow-y/) {
463     $p->{prop_value}->{$prop_name}->{$_} = 1 for qw/
464     visible hidden scroll auto -webkit-marquee -moz-hidden-unscrollable
465     /;
466     }
467     $p->{prop_value}->{visibility}->{$_} = 1 for qw/
468     visible hidden collapse
469     /;
470     $p->{prop_value}->{'list-style-type'}->{$_} = 1 for qw/
471     disc circle square decimal decimal-leading-zero
472     lower-roman upper-roman lower-greek lower-latin
473     upper-latin armenian georgian lower-alpha upper-alpha none
474     hebrew cjk-ideographic hiragana katakana hiragana-iroha
475     katakana-iroha
476     /;
477     $p->{prop_value}->{'list-style-position'}->{outside} = 1;
478     $p->{prop_value}->{'list-style-position'}->{inside} = 1;
479     $p->{prop_value}->{'page-break-before'}->{$_} = 1 for qw/
480     auto always avoid left right
481     /;
482     $p->{prop_value}->{'page-break-after'}->{$_} = 1 for qw/
483     auto always avoid left right
484     /;
485     $p->{prop_value}->{'page-break-inside'}->{auto} = 1;
486     $p->{prop_value}->{'page-break-inside'}->{avoid} = 1;
487     $p->{prop_value}->{'background-repeat'}->{$_} = 1 for qw/
488     repeat repeat-x repeat-y no-repeat
489     /;
490     $p->{prop_value}->{'background-attachment'}->{scroll} = 1;
491     $p->{prop_value}->{'background-attachment'}->{fixed} = 1;
492     $p->{prop_value}->{'font-size'}->{$_} = 1 for qw/
493     xx-small x-small small medium large x-large xx-large
494     -manakai-xxx-large -webkit-xxx-large
495     larger smaller
496     /;
497     $p->{prop_value}->{'font-style'}->{normal} = 1;
498     $p->{prop_value}->{'font-style'}->{italic} = 1;
499     $p->{prop_value}->{'font-style'}->{oblique} = 1;
500     $p->{prop_value}->{'font-variant'}->{normal} = 1;
501     $p->{prop_value}->{'font-variant'}->{'small-caps'} = 1;
502     $p->{prop_value}->{'font-stretch'}->{$_} = 1 for
503     qw/normal wider narrower ultra-condensed extra-condensed
504     condensed semi-condensed semi-expanded expanded
505     extra-expanded ultra-expanded/;
506     $p->{prop_value}->{'text-align'}->{$_} = 1 for qw/
507     left right center justify begin end
508     /;
509     $p->{prop_value}->{'text-transform'}->{$_} = 1 for qw/
510     capitalize uppercase lowercase none
511     /;
512     $p->{prop_value}->{'white-space'}->{$_} = 1 for qw/
513 wakaba 1.36 normal pre nowrap pre-line pre-wrap -moz-pre-wrap
514 wakaba 1.35 /;
515 wakaba 1.37 $p->{prop_value}->{'writing-mode'}->{$_} = 1 for qw/
516     lr rl tb lr-tb rl-tb tb-rl
517     /;
518     $p->{prop_value}->{'text-anchor'}->{$_} = 1 for qw/
519     start middle end
520     /;
521     $p->{prop_value}->{'dominant-baseline'}->{$_} = 1 for qw/
522     auto use-script no-change reset-size ideographic alphabetic
523     hanging mathematical central middle text-after-edge text-before-edge
524     /;
525     $p->{prop_value}->{'alignment-baseline'}->{$_} = 1 for qw/
526     auto baseline before-edge text-before-edge middle central
527     after-edge text-after-edge ideographic alphabetic hanging
528     mathematical
529     /;
530 wakaba 1.35 $p->{prop_value}->{'text-decoration'}->{$_} = 1 for qw/
531     none blink underline overline line-through
532     /;
533     $p->{prop_value}->{'caption-side'}->{$_} = 1 for qw/
534     top bottom left right
535     /;
536     $p->{prop_value}->{'table-layout'}->{auto} = 1;
537     $p->{prop_value}->{'table-layout'}->{fixed} = 1;
538 wakaba 1.36 $p->{prop_value}->{'border-collapse'}->{collapse} = 1;
539 wakaba 1.35 $p->{prop_value}->{'border-collapse'}->{separate} = 1;
540     $p->{prop_value}->{'empty-cells'}->{show} = 1;
541     $p->{prop_value}->{'empty-cells'}->{hide} = 1;
542     $p->{prop_value}->{cursor}->{$_} = 1 for qw/
543     auto crosshair default pointer move e-resize ne-resize nw-resize n-resize
544     se-resize sw-resize s-resize w-resize text wait help progress
545     /;
546     for my $prop (qw/border-top-style border-left-style
547     border-bottom-style border-right-style outline-style/) {
548     $p->{prop_value}->{$prop}->{$_} = 1 for qw/
549     none hidden dotted dashed solid double groove ridge inset outset
550     /;
551     }
552     for my $prop (qw/color background-color
553     border-bottom-color border-left-color border-right-color
554     border-top-color border-color/) {
555     $p->{prop_value}->{$prop}->{transparent} = 1;
556     $p->{prop_value}->{$prop}->{flavor} = 1;
557     $p->{prop_value}->{$prop}->{'-manakai-default'} = 1;
558     }
559     $p->{prop_value}->{'outline-color'}->{invert} = 1;
560     $p->{prop_value}->{'outline-color'}->{'-manakai-invert-or-currentcolor'} = 1;
561     $p->{pseudo_class}->{$_} = 1 for qw/
562     active checked disabled empty enabled first-child first-of-type
563     focus hover indeterminate last-child last-of-type link only-child
564     only-of-type root target visited
565     lang nth-child nth-last-child nth-of-type nth-last-of-type not
566     -manakai-contains -manakai-current
567     /;
568     $p->{pseudo_element}->{$_} = 1 for qw/
569     after before first-letter first-line
570     /;
571    
572     return $CSSParser = $p;
573     } # get_css_parser
574    
575     sub print_syntax_error_css_section ($$) {
576     my ($input, $result) = @_;
577    
578     print STDOUT qq[
579     <div id="$input->{id_prefix}parse-errors" class="section">
580     <h2>Parse Errors</h2>
581    
582 wakaba 1.39 <dl id="$input->{id_prefix}parse-errors-list">];
583 wakaba 1.35 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
584    
585     my $p = get_css_parser ();
586 wakaba 1.37 $p->init;
587 wakaba 1.35 $p->{onerror} = sub {
588     my (%opt) = @_;
589     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
590     if ($opt{token}) {
591     print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{token}->{line}">Line $opt{token}->{line}</a> column $opt{token}->{column}];
592     } else {
593     print STDOUT qq[<dt class="$cls">Unknown location];
594     }
595     if (defined $opt{value}) {
596     print STDOUT qq[ (<code>@{[htescape ($opt{value})]}</code>)];
597     } elsif (defined $opt{token}) {
598     print STDOUT qq[ (<code>@{[htescape (Whatpm::CSS::Tokenizer->serialize_token ($opt{token}))]}</code>)];
599     }
600     $type =~ tr/ /-/;
601     $type =~ s/\|/%7C/g;
602     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
603     print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
604     print STDOUT qq[$msg</dd>\n];
605    
606     add_error ('syntax', \%opt => $result);
607     };
608     $p->{href} = $input->{uri};
609     $p->{base_uri} = $input->{base_uri};
610    
611 wakaba 1.37 # if ($parse_mode eq 'q') {
612     # $p->{unitless_px} = 1;
613     # $p->{hashless_color} = 1;
614     # }
615    
616     ## TODO: Make $input->{s} a ref.
617    
618 wakaba 1.35 my $s = \$input->{s};
619     my $charset;
620     unless ($input->{is_char_string}) {
621     require Encode;
622     if (defined $input->{charset}) {## TODO: IANA->Perl
623     $charset = $input->{charset};
624     $s = \(Encode::decode ($input->{charset}, $$s));
625     } else {
626     ## TODO: charset detection
627     $s = \(Encode::decode ($charset = 'utf-8', $$s));
628     }
629     }
630    
631     my $cssom = $p->parse_char_string ($$s);
632     $cssom->manakai_input_encoding ($charset) if defined $charset;
633    
634     print STDOUT qq[</dl></div>];
635    
636     return $cssom;
637     } # print_syntax_error_css_section
638    
639 wakaba 1.22 sub print_syntax_error_manifest_section ($$) {
640     my ($input, $result) = @_;
641    
642     require Whatpm::CacheManifest;
643    
644     print STDOUT qq[
645 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
646 wakaba 1.22 <h2>Parse Errors</h2>
647    
648 wakaba 1.39 <dl id="$input->{id_prefix}parse-errors-list">];
649 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
650 wakaba 1.22
651     my $onerror = sub {
652     my (%opt) = @_;
653     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
654 wakaba 1.32 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
655     qq[</dt>];
656 wakaba 1.22 $type =~ tr/ /-/;
657     $type =~ s/\|/%7C/g;
658     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
659 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
660     print STDOUT qq[$msg</dd>\n];
661 wakaba 1.22
662     add_error ('syntax', \%opt => $result);
663     };
664    
665 wakaba 1.48 my $m = $input->{is_char_string} ? 'parse_char_string' : 'parse_byte_string';
666 wakaba 1.22 my $time1 = time;
667 wakaba 1.48 my $manifest = Whatpm::CacheManifest->$m
668 wakaba 1.22 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
669     $time{parse_manifest} = time - $time1;
670    
671     print STDOUT qq[</dl></div>];
672    
673     return $manifest;
674     } # print_syntax_error_manifest_section
675    
676 wakaba 1.35 sub print_source_string_section ($$$) {
677     my $input = shift;
678     my $s;
679     unless ($input->{is_char_string}) {
680     require Encode;
681     my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
682     return unless $enc;
683    
684     $s = \($enc->decode (${$_[0]}));
685     } else {
686     $s = $_[0];
687     }
688 wakaba 1.9
689     my $i = 1;
690 wakaba 1.32 push @nav, ['#source-string' => 'Source'] unless $input->{nested};
691     print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
692 wakaba 1.9 <h2>Document Source</h2>
693     <ol lang="">\n];
694 wakaba 1.7 if (length $$s) {
695 wakaba 1.41 while ($$s =~ /\G([^\x0D\x0A]*?)(?>\x0D\x0A?|\x0A)/gc) {
696 wakaba 1.32 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
697     "</li>\n";
698 wakaba 1.7 $i++;
699     }
700 wakaba 1.41 if ($$s =~ /\G([^\x0D\x0A]+)/gc) {
701 wakaba 1.32 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
702     "</li>\n";
703 wakaba 1.7 }
704     } else {
705 wakaba 1.32 print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
706 wakaba 1.2 }
707 wakaba 1.39 print STDOUT "</ol></div>
708 wakaba 1.42 <script>
709     addSourceToParseErrorList ('$input->{id_prefix}', 'parse-errors-list');
710     </script>";
711 wakaba 1.9 } # print_input_string_section
712 wakaba 1.2
713 wakaba 1.35 sub print_document_tree ($$) {
714     my ($input, $node) = @_;
715    
716 wakaba 1.2 my $r = '<ol class="xoxo">';
717 wakaba 1.1
718 wakaba 1.2 my @node = ($node);
719 wakaba 1.1 while (@node) {
720     my $child = shift @node;
721 wakaba 1.2 unless (ref $child) {
722     $r .= $child;
723     next;
724     }
725    
726 wakaba 1.32 my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
727 wakaba 1.2 my $nt = $child->node_type;
728     if ($nt == $child->ELEMENT_NODE) {
729 wakaba 1.4 my $child_nsuri = $child->namespace_uri;
730     $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
731 wakaba 1.2 '</code>'; ## ISSUE: case
732    
733     if ($child->has_attributes) {
734     $r .= '<ul class="attributes">';
735 wakaba 1.4 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
736 wakaba 1.2 @{$child->attributes}) {
737 wakaba 1.32 $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
738 wakaba 1.2 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
739     }
740     $r .= '</ul>';
741     }
742    
743 wakaba 1.7 if ($child->has_child_nodes) {
744 wakaba 1.2 $r .= '<ol class="children">';
745 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
746     } else {
747     $r .= '</li>';
748 wakaba 1.2 }
749     } elsif ($nt == $child->TEXT_NODE) {
750 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
751 wakaba 1.2 } elsif ($nt == $child->CDATA_SECTION_NODE) {
752 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
753 wakaba 1.2 } elsif ($nt == $child->COMMENT_NODE) {
754 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
755 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_NODE) {
756 wakaba 1.6 $r .= qq'<li id="$node_id" class="tree-document">Document';
757 wakaba 1.7 $r .= qq[<ul class="attributes">];
758 wakaba 1.27 my $cp = $child->manakai_charset;
759     if (defined $cp) {
760     $r .= qq[<li><code>charset</code> parameter = <code>];
761     $r .= htescape ($cp) . qq[</code></li>];
762     }
763     $r .= qq[<li><code>inputEncoding</code> = ];
764     my $ie = $child->input_encoding;
765     if (defined $ie) {
766     $r .= qq[<code>@{[htescape ($ie)]}</code>];
767     if ($child->manakai_has_bom) {
768     $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
769     }
770     } else {
771     $r .= qq[(<code>null</code>)];
772     }
773 wakaba 1.7 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
774     $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
775 wakaba 1.9 unless ($child->manakai_is_html) {
776     $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
777     if (defined $child->xml_encoding) {
778     $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
779     } else {
780     $r .= qq[<li>XML encoding = (null)</li>];
781     }
782     $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
783     }
784 wakaba 1.7 $r .= qq[</ul>];
785 wakaba 1.2 if ($child->has_child_nodes) {
786 wakaba 1.7 $r .= '<ol class="children">';
787 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
788 wakaba 1.1 }
789 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
790 wakaba 1.5 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
791     $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
792     $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
793     $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
794 wakaba 1.2 $r .= '</ul></li>';
795     } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
796 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
797 wakaba 1.1 } else {
798 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
799 wakaba 1.1 }
800     }
801 wakaba 1.2
802     $r .= '</ol>';
803     print STDOUT $r;
804     } # print_document_tree
805 wakaba 1.1
806 wakaba 1.32 sub print_structure_dump_dom_section ($$$) {
807     my ($input, $doc, $el) = @_;
808 wakaba 1.18
809     print STDOUT qq[
810 wakaba 1.32 <div id="$input->{id_prefix}document-tree" class="section">
811 wakaba 1.18 <h2>Document Tree</h2>
812     ];
813 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
814     unless $input->{nested};
815 wakaba 1.18
816 wakaba 1.35 print_document_tree ($input, $el || $doc);
817 wakaba 1.18
818     print STDOUT qq[</div>];
819 wakaba 1.22 } # print_structure_dump_dom_section
820    
821 wakaba 1.35 sub print_structure_dump_cssom_section ($$) {
822     my ($input, $cssom) = @_;
823    
824     print STDOUT qq[
825     <div id="$input->{id_prefix}document-tree" class="section">
826     <h2>Document Tree</h2>
827     ];
828     push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
829     unless $input->{nested};
830    
831     ## TODO:
832     print STDOUT "<pre>".htescape ($cssom->css_text)."</pre>";
833    
834     print STDOUT qq[</div>];
835     } # print_structure_dump_cssom_section
836    
837 wakaba 1.32 sub print_structure_dump_manifest_section ($$) {
838     my ($input, $manifest) = @_;
839 wakaba 1.22
840     print STDOUT qq[
841 wakaba 1.32 <div id="$input->{id_prefix}dump-manifest" class="section">
842 wakaba 1.22 <h2>Cache Manifest</h2>
843     ];
844 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}dump-manifest] => 'Cache Manifest']
845     unless $input->{nested};
846 wakaba 1.22
847     print STDOUT qq[<dl><dt>Explicit entries</dt>];
848 wakaba 1.37 my $i = 0;
849 wakaba 1.22 for my $uri (@{$manifest->[0]}) {
850     my $euri = htescape ($uri);
851 wakaba 1.37 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
852 wakaba 1.22 }
853    
854     print STDOUT qq[<dt>Fallback entries</dt><dd>
855     <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
856     <th scope=row>Fallback Entry</tr><tbody>];
857     for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
858     my $euri = htescape ($uri);
859     my $euri2 = htescape ($manifest->[1]->{$uri});
860 wakaba 1.37 print STDOUT qq[<tr><td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
861     <td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
862 wakaba 1.22 }
863    
864     print STDOUT qq[</table><dt>Online whitelist</dt>];
865     for my $uri (@{$manifest->[2]}) {
866     my $euri = htescape ($uri);
867 wakaba 1.37 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
868 wakaba 1.22 }
869 wakaba 1.18
870 wakaba 1.22 print STDOUT qq[</dl></div>];
871     } # print_structure_dump_manifest_section
872    
873 wakaba 1.34 sub print_structure_error_dom_section ($$$$$) {
874     my ($input, $doc, $el, $result, $onsubdoc) = @_;
875 wakaba 1.18
876 wakaba 1.32 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
877 wakaba 1.18 <h2>Document Errors</h2>
878    
879 wakaba 1.42 <dl id=document-errors-list>];
880 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
881     unless $input->{nested};
882 wakaba 1.18
883     require Whatpm::ContentChecker;
884     my $onerror = sub {
885     my %opt = @_;
886     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
887     $type =~ tr/ /-/;
888     $type =~ s/\|/%7C/g;
889     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
890 wakaba 1.32 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
891 wakaba 1.23 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
892     print STDOUT $msg, "</dd>\n";
893 wakaba 1.19 add_error ('structure', \%opt => $result);
894 wakaba 1.18 };
895    
896     my $elements;
897     my $time1 = time;
898     if ($el) {
899 wakaba 1.34 $elements = Whatpm::ContentChecker->check_element
900     ($el, $onerror, $onsubdoc);
901 wakaba 1.18 } else {
902 wakaba 1.34 $elements = Whatpm::ContentChecker->check_document
903     ($doc, $onerror, $onsubdoc);
904 wakaba 1.18 }
905     $time{check} = time - $time1;
906    
907 wakaba 1.42 print STDOUT qq[</dl>
908     <script>
909     addSourceToParseErrorList ('$input->{id_prefix}', 'document-errors-list');
910     </script></div>];
911 wakaba 1.18
912     return $elements;
913 wakaba 1.22 } # print_structure_error_dom_section
914    
915     sub print_structure_error_manifest_section ($$$) {
916 wakaba 1.32 my ($input, $manifest, $result) = @_;
917 wakaba 1.22
918 wakaba 1.32 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
919 wakaba 1.22 <h2>Document Errors</h2>
920    
921     <dl>];
922 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
923     unless $input->{nested};
924 wakaba 1.22
925     require Whatpm::CacheManifest;
926     Whatpm::CacheManifest->check_manifest ($manifest, sub {
927     my %opt = @_;
928     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
929     $type =~ tr/ /-/;
930     $type =~ s/\|/%7C/g;
931     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
932 wakaba 1.32 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
933 wakaba 1.22 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
934     add_error ('structure', \%opt => $result);
935     });
936    
937     print STDOUT qq[</div>];
938     } # print_structure_error_manifest_section
939 wakaba 1.18
940 wakaba 1.32 sub print_table_section ($$) {
941     my ($input, $tables) = @_;
942 wakaba 1.18
943 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
944     unless $input->{nested};
945 wakaba 1.18 print STDOUT qq[
946 wakaba 1.32 <div id="$input->{id_prefix}tables" class="section">
947 wakaba 1.18 <h2>Tables</h2>
948    
949     <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
950     <script src="../table-script.js" type="text/javascript"></script>
951     <noscript>
952     <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
953     </noscript>
954     ];
955    
956     require JSON;
957    
958     my $i = 0;
959     for my $table_el (@$tables) {
960     $i++;
961 wakaba 1.32 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
962     get_node_link ($input, $table_el) . q[</h3>];
963 wakaba 1.18
964     ## TODO: Make |ContentChecker| return |form_table| result
965     ## so that this script don't have to run the algorithm twice.
966     my $table = Whatpm::HTMLTable->form_table ($table_el);
967    
968 wakaba 1.49 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption},
969     @{$table->{row}}) {
970 wakaba 1.18 next unless $_;
971     delete $_->{element};
972     }
973    
974     for (@{$table->{row_group}}) {
975     next unless $_;
976     next unless $_->{element};
977     $_->{type} = $_->{element}->manakai_local_name;
978     delete $_->{element};
979     }
980    
981     for (@{$table->{cell}}) {
982     next unless $_;
983     for (@{$_}) {
984     next unless $_;
985     for (@$_) {
986     $_->{id} = refaddr $_->{element} if defined $_->{element};
987     delete $_->{element};
988     $_->{is_header} = $_->{is_header} ? 1 : 0;
989     }
990     }
991     }
992    
993     print STDOUT '</div><script type="text/javascript">tableToCanvas (';
994     print STDOUT JSON::objToJson ($table);
995 wakaba 1.32 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
996     print STDOUT qq[, '$input->{id_prefix}');</script>];
997 wakaba 1.18 }
998    
999     print STDOUT qq[</div>];
1000     } # print_table_section
1001    
1002 wakaba 1.33 sub print_listing_section ($$$) {
1003     my ($opt, $input, $ids) = @_;
1004 wakaba 1.18
1005 wakaba 1.35 push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
1006     unless $input->{nested};
1007 wakaba 1.18 print STDOUT qq[
1008 wakaba 1.33 <div id="$input->{id_prefix}$opt->{id}" class="section">
1009     <h2>$opt->{heading}</h2>
1010 wakaba 1.18
1011     <dl>
1012     ];
1013     for my $id (sort {$a cmp $b} keys %$ids) {
1014     print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
1015     for (@{$ids->{$id}}) {
1016 wakaba 1.32 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
1017 wakaba 1.18 }
1018     }
1019     print STDOUT qq[</dl></div>];
1020 wakaba 1.33 } # print_listing_section
1021 wakaba 1.18
1022 wakaba 1.48 sub print_uri_section ($$$) {
1023     my ($input, $uris) = @_;
1024    
1025     ## NOTE: URIs contained in the DOM (i.e. in HTML or XML documents),
1026     ## except for those in RDF triples.
1027     ## TODO: URIs in CSS
1028    
1029     push @nav, ['#' . $input->{id_prefix} . 'uris' => 'URIs']
1030     unless $input->{nested};
1031     print STDOUT qq[
1032     <div id="$input->{id_prefix}uris" class="section">
1033     <h2>URIs</h2>
1034    
1035     <dl>];
1036     for my $uri (sort {$a cmp $b} keys %$uris) {
1037     my $euri = htescape ($uri);
1038     print STDOUT qq[<dt><code class=uri>&lt;<a href="$euri">$euri</a>></code>];
1039     my $eccuri = htescape (get_cc_uri ($uri));
1040     print STDOUT qq[<dd><a href="$eccuri">Check conformance of this document</a>];
1041     print STDOUT qq[<dd>Found at: <ul>];
1042     for my $entry (@{$uris->{$uri}}) {
1043     print STDOUT qq[<li>], get_node_link ($input, $entry->{node});
1044     if (keys %{$entry->{type} or {}}) {
1045     print STDOUT ' (';
1046     print STDOUT join ', ', map {
1047     {
1048     hyperlink => 'Hyperlink',
1049     resource => 'Link to an external resource',
1050     namespace => 'Namespace URI',
1051     cite => 'Citation or link to a long description',
1052     embedded => 'Link to an embedded content',
1053     base => 'Base URI',
1054     action => 'Submission URI',
1055     }->{$_}
1056     or
1057     htescape ($_)
1058     } keys %{$entry->{type}};
1059     print STDOUT ')';
1060     }
1061     }
1062     print STDOUT qq[</ul>];
1063     }
1064     print STDOUT qq[</dl></div>];
1065     } # print_uri_section
1066    
1067 wakaba 1.45 sub print_rdf_section ($$$) {
1068     my ($input, $rdfs) = @_;
1069    
1070     push @nav, ['#' . $input->{id_prefix} . 'rdf' => 'RDF']
1071     unless $input->{nested};
1072     print STDOUT qq[
1073     <div id="$input->{id_prefix}rdf" class="section">
1074     <h2>RDF Triples</h2>
1075    
1076     <dl>];
1077     my $i = 0;
1078     for my $rdf (@$rdfs) {
1079     print STDOUT qq[<dt id="$input->{id_prefix}rdf-@{[$i++]}">];
1080     print STDOUT get_node_link ($input, $rdf->[0]);
1081     print STDOUT qq[<dd><dl>];
1082     for my $triple (@{$rdf->[1]}) {
1083     print STDOUT '<dt>' . get_node_link ($input, $triple->[0]) . '<dd>';
1084     print STDOUT get_rdf_resource_html ($triple->[1]);
1085     print STDOUT ' ';
1086     print STDOUT get_rdf_resource_html ($triple->[2]);
1087     print STDOUT ' ';
1088     print STDOUT get_rdf_resource_html ($triple->[3]);
1089     }
1090     print STDOUT qq[</dl>];
1091     }
1092     print STDOUT qq[</dl></div>];
1093     } # print_rdf_section
1094    
1095     sub get_rdf_resource_html ($) {
1096     my $resource = shift;
1097 wakaba 1.46 if (defined $resource->{uri}) {
1098 wakaba 1.45 my $euri = htescape ($resource->{uri});
1099     return '<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
1100     '</a>></code>';
1101 wakaba 1.46 } elsif (defined $resource->{bnodeid}) {
1102 wakaba 1.45 return htescape ('_:' . $resource->{bnodeid});
1103     } elsif ($resource->{nodes}) {
1104     return '(rdf:XMLLiteral)';
1105     } elsif (defined $resource->{value}) {
1106     my $elang = htescape (defined $resource->{language}
1107     ? $resource->{language} : '');
1108     my $r = qq[<q lang="$elang">] . htescape ($resource->{value}) . '</q>';
1109     if (defined $resource->{datatype}) {
1110     my $euri = htescape ($resource->{datatype});
1111     $r .= '^^<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
1112     '</a>></code>';
1113     } elsif (length $resource->{language}) {
1114     $r .= '@' . htescape ($resource->{language});
1115     }
1116     return $r;
1117     } else {
1118     return '??';
1119     }
1120     } # get_rdf_resource_html
1121    
1122 wakaba 1.19 sub print_result_section ($) {
1123     my $result = shift;
1124    
1125     print STDOUT qq[
1126     <div id="result-summary" class="section">
1127     <h2>Result</h2>];
1128    
1129 wakaba 1.21 if ($result->{unsupported} and $result->{conforming_max}) {
1130 wakaba 1.19 print STDOUT qq[<p class=uncertain id=result-para>The conformance
1131     checker cannot decide whether the document is conforming or
1132     not, since the document contains one or more unsupported
1133 wakaba 1.21 features. The document might or might not be conforming.</p>];
1134 wakaba 1.19 } elsif ($result->{conforming_min}) {
1135     print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
1136     found in this document.</p>];
1137     } elsif ($result->{conforming_max}) {
1138     print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
1139     is <strong>likely <em>non</em>-conforming</strong>, but in rare case
1140     it might be conforming.</p>];
1141     } else {
1142     print STDOUT qq[<p class=FAIL id=result-para>This document is
1143     <strong><em>non</em>-conforming</strong>.</p>];
1144     }
1145    
1146     print STDOUT qq[<table>
1147     <colgroup><col><colgroup><col><col><col><colgroup><col>
1148     <thead>
1149 wakaba 1.23 <tr><th scope=col></th>
1150     <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1151     Errors</a></th>
1152     <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1153     Errors</a></th>
1154     <th scope=col><a href="../error-description#level-w">Warnings</a></th>
1155     <th scope=col>Score</th></tr></thead><tbody>];
1156 wakaba 1.19
1157     my $must_error = 0;
1158     my $should_error = 0;
1159     my $warning = 0;
1160     my $score_min = 0;
1161     my $score_max = 0;
1162     my $score_base = 20;
1163 wakaba 1.21 my $score_unit = $score_base / 100;
1164 wakaba 1.19 for (
1165     [Transfer => 'transfer', ''],
1166     [Character => 'char', ''],
1167     [Syntax => 'syntax', '#parse-errors'],
1168     [Structure => 'structure', '#document-errors'],
1169     ) {
1170     $must_error += ($result->{$_->[1]}->{must} += 0);
1171     $should_error += ($result->{$_->[1]}->{should} += 0);
1172     $warning += ($result->{$_->[1]}->{warning} += 0);
1173 wakaba 1.21 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
1174     $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
1175 wakaba 1.19
1176     my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
1177     my $label = $_->[0];
1178     if ($result->{$_->[1]}->{must} or
1179     $result->{$_->[1]}->{should} or
1180     $result->{$_->[1]}->{warning} or
1181     $result->{$_->[1]}->{unsupported}) {
1182     $label = qq[<a href="$_->[2]">$label</a>];
1183     }
1184    
1185     print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
1186     if ($uncertain) {
1187 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
1188 wakaba 1.19 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
1189 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
1190 wakaba 1.19 } else {
1191 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
1192 wakaba 1.19 }
1193     }
1194    
1195     $score_max += $score_base;
1196    
1197     print STDOUT qq[
1198     <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
1199     </tbody>
1200 wakaba 1.21 <tfoot><tr class=uncertain><th scope=row>Total</th>
1201     <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
1202     <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
1203     <td>$warning?</td>
1204     <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
1205 wakaba 1.19 </table>
1206    
1207     <p><strong>Important</strong>: This conformance checking service
1208     is <em>under development</em>. The result above might be <em>wrong</em>.</p>
1209     </div>];
1210     push @nav, ['#result-summary' => 'Result'];
1211     } # print_result_section
1212    
1213 wakaba 1.24 sub print_result_unknown_type_section ($$) {
1214     my ($input, $result) = @_;
1215 wakaba 1.18
1216 wakaba 1.24 my $euri = htescape ($input->{uri});
1217 wakaba 1.18 print STDOUT qq[
1218 wakaba 1.35 <div id="$input->{id_prefix}parse-errors" class="section">
1219 wakaba 1.24 <h2>Errors</h2>
1220    
1221     <dl>
1222     <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
1223     <dd class=unsupported><strong><a href="../error-description#level-u">Not
1224     supported</a></strong>:
1225     Media type
1226     <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
1227     is not supported.</dd>
1228     </dl>
1229 wakaba 1.18 </div>
1230     ];
1231 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}parse-errors] => 'Errors']
1232     unless $input->{nested};
1233 wakaba 1.30 add_error (char => {level => 'u'} => $result);
1234     add_error (syntax => {level => 'u'} => $result);
1235     add_error (structure => {level => 'u'} => $result);
1236 wakaba 1.18 } # print_result_unknown_type_section
1237    
1238     sub print_result_input_error_section ($) {
1239     my $input = shift;
1240     print STDOUT qq[<div class="section" id="result-summary">
1241     <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
1242     </div>];
1243     push @nav, ['#result-summary' => 'Result'];
1244 wakaba 1.32 } # print_result_input_error_section
1245 wakaba 1.18
1246 wakaba 1.32 sub get_error_label ($$) {
1247     my ($input, $err) = @_;
1248 wakaba 1.22
1249     my $r = '';
1250    
1251 wakaba 1.42 my $line;
1252     my $column;
1253    
1254     if (defined $err->{node}) {
1255     $line = $err->{node}->get_user_data ('manakai_source_line');
1256     if (defined $line) {
1257     $column = $err->{node}->get_user_data ('manakai_source_column');
1258 wakaba 1.40 } else {
1259 wakaba 1.42 if ($err->{node}->node_type == $err->{node}->ATTRIBUTE_NODE) {
1260     my $owner = $err->{node}->owner_element;
1261     $line = $owner->get_user_data ('manakai_source_line');
1262     $column = $owner->get_user_data ('manakai_source_column');
1263 wakaba 1.43 } else {
1264     my $parent = $err->{node}->parent_node;
1265 wakaba 1.44 if ($parent) {
1266     $line = $parent->get_user_data ('manakai_source_line');
1267     $column = $parent->get_user_data ('manakai_source_column');
1268     }
1269 wakaba 1.42 }
1270     }
1271     }
1272     unless (defined $line) {
1273     if (defined $err->{token} and defined $err->{token}->{line}) {
1274     $line = $err->{token}->{line};
1275     $column = $err->{token}->{column};
1276     } elsif (defined $err->{line}) {
1277     $line = $err->{line};
1278     $column = $err->{column};
1279 wakaba 1.40 }
1280 wakaba 1.42 }
1281    
1282     if (defined $line) {
1283     if (defined $column and $column > 0) {
1284     $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a> column $column];
1285 wakaba 1.22 } else {
1286 wakaba 1.42 $line = $line - 1 || 1;
1287     $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a>];
1288 wakaba 1.22 }
1289     }
1290    
1291     if (defined $err->{node}) {
1292     $r .= ' ' if length $r;
1293 wakaba 1.42 $r .= get_node_link ($input, $err->{node});
1294 wakaba 1.22 }
1295    
1296     if (defined $err->{index}) {
1297 wakaba 1.37 if (length $r) {
1298     $r .= ', Index ' . (0+$err->{index});
1299     } else {
1300     $r .= "<a href='#$input->{id_prefix}index-@{[0+$err->{index}]}'>Index "
1301     . (0+$err->{index}) . '</a>';
1302     }
1303 wakaba 1.22 }
1304    
1305     if (defined $err->{value}) {
1306     $r .= ' ' if length $r;
1307     $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
1308     }
1309    
1310     return $r;
1311     } # get_error_label
1312    
1313 wakaba 1.23 sub get_error_level_label ($) {
1314     my $err = shift;
1315    
1316     my $r = '';
1317    
1318     if (not defined $err->{level} or $err->{level} eq 'm') {
1319     $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1320     error</a></strong>: ];
1321     } elsif ($err->{level} eq 's') {
1322     $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1323     error</a></strong>: ];
1324     } elsif ($err->{level} eq 'w') {
1325     $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
1326     ];
1327 wakaba 1.30 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
1328 wakaba 1.23 $r = qq[<strong><a href="../error-description#level-u">Not
1329     supported</a></strong>: ];
1330 wakaba 1.37 } elsif ($err->{level} eq 'i') {
1331     $r = qq[<strong><a href="../error-description#level-i">Information</a></strong>: ];
1332 wakaba 1.23 } else {
1333     my $elevel = htescape ($err->{level});
1334     $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
1335     ];
1336     }
1337    
1338     return $r;
1339     } # get_error_level_label
1340    
1341 wakaba 1.1 sub get_node_path ($) {
1342     my $node = shift;
1343     my @r;
1344     while (defined $node) {
1345     my $rs;
1346     if ($node->node_type == 1) {
1347 wakaba 1.47 $rs = $node->node_name;
1348 wakaba 1.1 $node = $node->parent_node;
1349     } elsif ($node->node_type == 2) {
1350 wakaba 1.47 $rs = '@' . $node->node_name;
1351 wakaba 1.1 $node = $node->owner_element;
1352     } elsif ($node->node_type == 3) {
1353     $rs = '"' . $node->data . '"';
1354     $node = $node->parent_node;
1355     } elsif ($node->node_type == 9) {
1356 wakaba 1.9 @r = ('') unless @r;
1357 wakaba 1.1 $rs = '';
1358     $node = $node->parent_node;
1359     } else {
1360     $rs = '#' . $node->node_type;
1361     $node = $node->parent_node;
1362     }
1363     unshift @r, $rs;
1364     }
1365     return join '/', @r;
1366     } # get_node_path
1367    
1368 wakaba 1.32 sub get_node_link ($$) {
1369     return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
1370     htescape (get_node_path ($_[1])) . qq[</a>];
1371 wakaba 1.6 } # get_node_link
1372    
1373 wakaba 1.7 {
1374     my $Msg = {};
1375    
1376     sub load_text_catalog ($) {
1377     my $lang = shift; # MUST be a canonical lang name
1378 wakaba 1.26 open my $file, '<:utf8', "cc-msg.$lang.txt"
1379     or die "$0: cc-msg.$lang.txt: $!";
1380 wakaba 1.7 while (<$file>) {
1381     if (s/^([^;]+);([^;]*);//) {
1382     my ($type, $cls, $msg) = ($1, $2, $_);
1383     $msg =~ tr/\x0D\x0A//d;
1384     $Msg->{$type} = [$cls, $msg];
1385     }
1386     }
1387     } # load_text_catalog
1388    
1389     sub get_text ($) {
1390 wakaba 1.15 my ($type, $level, $node) = @_;
1391 wakaba 1.7 $type = $level . ':' . $type if defined $level;
1392 wakaba 1.29 $level = 'm' unless defined $level;
1393 wakaba 1.7 my @arg;
1394     {
1395     if (defined $Msg->{$type}) {
1396     my $msg = $Msg->{$type}->[1];
1397 wakaba 1.10 $msg =~ s{<var>\$([0-9]+)</var>}{
1398     defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
1399     }ge;
1400 wakaba 1.15 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
1401     UNIVERSAL::can ($node, 'get_attribute_ns')
1402     ? htescape ($node->get_attribute_ns (undef, $1)) : ''
1403     }ge;
1404     $msg =~ s{<var>{\@}</var>}{
1405     UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
1406     }ge;
1407 wakaba 1.17 $msg =~ s{<var>{local-name}</var>}{
1408     UNIVERSAL::can ($node, 'manakai_local_name')
1409     ? htescape ($node->manakai_local_name) : ''
1410     }ge;
1411     $msg =~ s{<var>{element-local-name}</var>}{
1412     (UNIVERSAL::can ($node, 'owner_element') and
1413     $node->owner_element)
1414     ? htescape ($node->owner_element->manakai_local_name)
1415     : ''
1416     }ge;
1417 wakaba 1.29 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
1418 wakaba 1.7 } elsif ($type =~ s/:([^:]*)$//) {
1419     unshift @arg, $1;
1420     redo;
1421     }
1422     }
1423 wakaba 1.29 return ($type, 'level-'.$level, htescape ($_[0]));
1424 wakaba 1.7 } # get_text
1425    
1426     }
1427    
1428 wakaba 1.48 sub encode_uri_component ($) {
1429     require Encode;
1430     my $s = Encode::encode ('utf8', shift);
1431     $s =~ s/([^0-9A-Za-z_.~-])/sprintf '%%%02X', ord $1/ge;
1432     return $s;
1433     } # encode_uri_component
1434    
1435     sub get_cc_uri ($) {
1436     return './?uri=' . encode_uri_component ($_[0]);
1437     } # get_cc_uri
1438    
1439 wakaba 1.9 sub get_input_document ($$) {
1440     my ($http, $dom) = @_;
1441    
1442 wakaba 1.16 my $request_uri = $http->get_parameter ('uri');
1443 wakaba 1.9 my $r = {};
1444     if (defined $request_uri and length $request_uri) {
1445     my $uri = $dom->create_uri_reference ($request_uri);
1446     unless ({
1447     http => 1,
1448     }->{lc $uri->uri_scheme}) {
1449     return {uri => $request_uri, request_uri => $request_uri,
1450     error_status_text => 'URI scheme not allowed'};
1451     }
1452    
1453     require Message::Util::HostPermit;
1454     my $host_permit = new Message::Util::HostPermit;
1455     $host_permit->add_rule (<<EOH);
1456     Allow host=suika port=80
1457     Deny host=suika
1458     Allow host=suika.fam.cx port=80
1459     Deny host=suika.fam.cx
1460     Deny host=localhost
1461     Deny host=*.localdomain
1462     Deny ipv4=0.0.0.0/8
1463     Deny ipv4=10.0.0.0/8
1464     Deny ipv4=127.0.0.0/8
1465     Deny ipv4=169.254.0.0/16
1466     Deny ipv4=172.0.0.0/11
1467     Deny ipv4=192.0.2.0/24
1468     Deny ipv4=192.88.99.0/24
1469     Deny ipv4=192.168.0.0/16
1470     Deny ipv4=198.18.0.0/15
1471     Deny ipv4=224.0.0.0/4
1472     Deny ipv4=255.255.255.255/32
1473     Deny ipv6=0::0/0
1474     Allow host=*
1475     EOH
1476     unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1477     return {uri => $request_uri, request_uri => $request_uri,
1478     error_status_text => 'Connection to the host is forbidden'};
1479     }
1480    
1481     require LWP::UserAgent;
1482     my $ua = WDCC::LWPUA->new;
1483     $ua->{wdcc_dom} = $dom;
1484     $ua->{wdcc_host_permit} = $host_permit;
1485     $ua->agent ('Mozilla'); ## TODO: for now.
1486     $ua->parse_head (0);
1487     $ua->protocols_allowed ([qw/http/]);
1488     $ua->max_size (1000_000);
1489     my $req = HTTP::Request->new (GET => $request_uri);
1490 wakaba 1.28 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1491 wakaba 1.9 my $res = $ua->request ($req);
1492 wakaba 1.16 ## TODO: 401 sets |is_success| true.
1493     if ($res->is_success or $http->get_parameter ('error-page')) {
1494 wakaba 1.9 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1495     $r->{uri} = $res->request->uri;
1496     $r->{request_uri} = $request_uri;
1497    
1498     ## TODO: More strict parsing...
1499     my $ct = $res->header ('Content-Type');
1500 wakaba 1.22 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1501 wakaba 1.9 $r->{charset} = lc $1;
1502     $r->{charset} =~ tr/\\//d;
1503 wakaba 1.26 $r->{official_charset} = $r->{charset};
1504 wakaba 1.9 }
1505    
1506 wakaba 1.16 my $input_charset = $http->get_parameter ('charset');
1507 wakaba 1.9 if (defined $input_charset and length $input_charset) {
1508     $r->{charset_overridden}
1509     = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1510     $r->{charset} = $input_charset;
1511 wakaba 1.25 }
1512    
1513     ## TODO: Support for HTTP Content-Encoding
1514 wakaba 1.9
1515     $r->{s} = ''.$res->content;
1516 wakaba 1.25
1517     require Whatpm::ContentType;
1518     ($r->{official_type}, $r->{media_type})
1519     = Whatpm::ContentType->get_sniffed_type
1520     (get_file_head => sub {
1521     return substr $r->{s}, 0, shift;
1522     },
1523     http_content_type_byte => $ct,
1524     has_http_content_encoding =>
1525     defined $res->header ('Content-Encoding'),
1526     supported_image_types => {});
1527 wakaba 1.9 } else {
1528     $r->{uri} = $res->request->uri;
1529     $r->{request_uri} = $request_uri;
1530     $r->{error_status_text} = $res->status_line;
1531     }
1532    
1533     $r->{header_field} = [];
1534     $res->scan (sub {
1535     push @{$r->{header_field}}, [$_[0], $_[1]];
1536     });
1537     $r->{header_status_code} = $res->code;
1538     $r->{header_status_text} = $res->message;
1539     } else {
1540 wakaba 1.16 $r->{s} = ''.$http->get_parameter ('s');
1541 wakaba 1.9 $r->{uri} = q<thismessage:/>;
1542     $r->{request_uri} = q<thismessage:/>;
1543     $r->{base_uri} = q<thismessage:/>;
1544 wakaba 1.16 $r->{charset} = ''.$http->get_parameter ('_charset_');
1545 wakaba 1.9 $r->{charset} =~ s/\s+//g;
1546     $r->{charset} = 'utf-8' if $r->{charset} eq '';
1547 wakaba 1.26 $r->{official_charset} = $r->{charset};
1548 wakaba 1.9 $r->{header_field} = [];
1549 wakaba 1.25
1550     require Whatpm::ContentType;
1551     ($r->{official_type}, $r->{media_type})
1552     = Whatpm::ContentType->get_sniffed_type
1553     (get_file_head => sub {
1554     return substr $r->{s}, 0, shift;
1555     },
1556     http_content_type_byte => undef,
1557     has_http_content_encoding => 0,
1558     supported_image_types => {});
1559 wakaba 1.9 }
1560    
1561 wakaba 1.16 my $input_format = $http->get_parameter ('i');
1562 wakaba 1.9 if (defined $input_format and length $input_format) {
1563     $r->{media_type_overridden}
1564     = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1565     $r->{media_type} = $input_format;
1566     }
1567     if (defined $r->{s} and not defined $r->{media_type}) {
1568     $r->{media_type} = 'text/html';
1569     $r->{media_type_overridden} = 1;
1570     }
1571    
1572     if ($r->{media_type} eq 'text/xml') {
1573     unless (defined $r->{charset}) {
1574     $r->{charset} = 'us-ascii';
1575 wakaba 1.26 $r->{official_charset} = $r->{charset};
1576 wakaba 1.9 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1577     $r->{charset_overridden} = 0;
1578     }
1579     }
1580    
1581     if (length $r->{s} > 1000_000) {
1582     $r->{error_status_text} = 'Entity-body too large';
1583     delete $r->{s};
1584     return $r;
1585     }
1586    
1587 wakaba 1.35 $r->{inner_html_element} = $http->get_parameter ('e');
1588    
1589 wakaba 1.9 return $r;
1590     } # get_input_document
1591    
1592     package WDCC::LWPUA;
1593     BEGIN { push our @ISA, 'LWP::UserAgent'; }
1594    
1595     sub redirect_ok {
1596     my $ua = shift;
1597     unless ($ua->SUPER::redirect_ok (@_)) {
1598     return 0;
1599     }
1600    
1601     my $uris = $_[1]->header ('Location');
1602     return 0 unless $uris;
1603     my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1604     unless ({
1605     http => 1,
1606     }->{lc $uri->uri_scheme}) {
1607     return 0;
1608     }
1609     unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1610     return 0;
1611     }
1612     return 1;
1613     } # redirect_ok
1614    
1615 wakaba 1.1 =head1 AUTHOR
1616    
1617     Wakaba <w@suika.fam.cx>.
1618    
1619     =head1 LICENSE
1620    
1621 wakaba 1.35 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1622 wakaba 1.1
1623     This library is free software; you can redistribute it
1624     and/or modify it under the same terms as Perl itself.
1625    
1626     =cut
1627    
1628 wakaba 1.49 ## $Date: 2008/04/12 15:57:56 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24