/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.41 - (hide annotations) (download)
Sun Mar 16 11:38:47 2008 UTC (16 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.40: +3 -3 lines
++ ChangeLog	16 Mar 2008 11:38:43 -0000
	* cc.cgi: Line break treatment was different from
	that of HTML and CSS parsers.

2008-03-16  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3 wakaba 1.23 use utf8;
4 wakaba 1.1
5     use lib qw[/home/httpd/html/www/markup/html/whatpm
6 wakaba 1.16 /home/wakaba/work/manakai2/lib];
7 wakaba 1.1 use CGI::Carp qw[fatalsToBrowser];
8 wakaba 1.2 use Scalar::Util qw[refaddr];
9 wakaba 1.16 use Time::HiRes qw/time/;
10 wakaba 1.1
11 wakaba 1.2 sub htescape ($) {
12     my $s = $_[0];
13     $s =~ s/&/&amp;/g;
14     $s =~ s/</&lt;/g;
15     $s =~ s/>/&gt;/g;
16     $s =~ s/"/&quot;/g;
17 wakaba 1.12 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18     sprintf '<var>U+%04X</var>', ord $1;
19     }ge;
20 wakaba 1.2 return $s;
21     } # htescape
22    
23 wakaba 1.35 my @nav;
24     my %time;
25     require Message::DOM::DOMImplementation;
26     my $dom = Message::DOM::DOMImplementation->new;
27     {
28 wakaba 1.16 use Message::CGI::HTTP;
29     my $http = Message::CGI::HTTP->new;
30 wakaba 1.1
31 wakaba 1.16 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
32 wakaba 1.8 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
33     exit;
34     }
35    
36 wakaba 1.12 binmode STDOUT, ':utf8';
37 wakaba 1.14 $| = 1;
38 wakaba 1.12
39 wakaba 1.7 load_text_catalog ('en'); ## TODO: conneg
40    
41 wakaba 1.2 print STDOUT qq[Content-Type: text/html; charset=utf-8
42    
43     <!DOCTYPE html>
44     <html lang="en">
45     <head>
46     <title>Web Document Conformance Checker (BETA)</title>
47 wakaba 1.3 <link rel="stylesheet" href="../cc-style.css" type="text/css">
48 wakaba 1.2 </head>
49     <body>
50 wakaba 1.13 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
51     (<em>beta</em>)</h1>
52 wakaba 1.14 ];
53 wakaba 1.2
54 wakaba 1.14 $| = 0;
55     my $input = get_input_document ($http, $dom);
56 wakaba 1.16 my $char_length = 0;
57 wakaba 1.14
58     print qq[
59 wakaba 1.4 <div id="document-info" class="section">
60 wakaba 1.2 <dl>
61 wakaba 1.9 <dt>Request URI</dt>
62     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 wakaba 1.2 <dt>Document URI</dt>
64 wakaba 1.25 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65     <script>
66     document.title = '<'
67     + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68     + document.title;
69     </script></dd>
70 wakaba 1.2 ]; # no </dl> yet
71 wakaba 1.3 push @nav, ['#document-info' => 'Information'];
72 wakaba 1.1
73 wakaba 1.9 if (defined $input->{s}) {
74 wakaba 1.16 $char_length = length $input->{s};
75 wakaba 1.9
76     print STDOUT qq[
77     <dt>Base URI</dt>
78     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79     <dt>Internet Media Type</dt>
80     <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 wakaba 1.25 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 wakaba 1.9 <dt>Character Encoding</dt>
83     <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84     @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 wakaba 1.16 <dt>Length</dt>
86     <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 wakaba 1.9 </dl>
88     </div>
89 wakaba 1.39
90     <script src="../cc-script.js"></script>
91 wakaba 1.9 ];
92    
93 wakaba 1.35 $input->{id_prefix} = '';
94     #$input->{nested} = 0;
95 wakaba 1.20 my $result = {conforming_min => 1, conforming_max => 1};
96 wakaba 1.31 check_and_print ($input => $result);
97 wakaba 1.19 print_result_section ($result);
98 wakaba 1.9 } else {
99 wakaba 1.18 print STDOUT qq[</dl></div>];
100     print_result_input_error_section ($input);
101 wakaba 1.9 }
102 wakaba 1.3
103 wakaba 1.2 print STDOUT qq[
104 wakaba 1.3 <ul class="navigation" id="nav-items">
105     ];
106     for (@nav) {
107     print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
108     }
109     print STDOUT qq[
110     </ul>
111 wakaba 1.2 </body>
112     </html>
113     ];
114 wakaba 1.1
115 wakaba 1.24 for (qw/decode parse parse_html parse_xml parse_manifest
116     check check_manifest/) {
117 wakaba 1.16 next unless defined $time{$_};
118     open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
119     print $file $char_length, "\t", $time{$_}, "\n";
120     }
121    
122 wakaba 1.1 exit;
123 wakaba 1.35 }
124 wakaba 1.1
125 wakaba 1.19 sub add_error ($$$) {
126     my ($layer, $err, $result) = @_;
127     if (defined $err->{level}) {
128     if ($err->{level} eq 's') {
129     $result->{$layer}->{should}++;
130     $result->{$layer}->{score_min} -= 2;
131     $result->{conforming_min} = 0;
132     } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
133     $result->{$layer}->{warning}++;
134 wakaba 1.30 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
135 wakaba 1.19 $result->{$layer}->{unsupported}++;
136     $result->{unsupported} = 1;
137 wakaba 1.37 } elsif ($err->{level} eq 'i') {
138     #
139 wakaba 1.19 } else {
140     $result->{$layer}->{must}++;
141     $result->{$layer}->{score_max} -= 2;
142     $result->{$layer}->{score_min} -= 2;
143     $result->{conforming_min} = 0;
144     $result->{conforming_max} = 0;
145     }
146     } else {
147     $result->{$layer}->{must}++;
148     $result->{$layer}->{score_max} -= 2;
149     $result->{$layer}->{score_min} -= 2;
150     $result->{conforming_min} = 0;
151     $result->{conforming_max} = 0;
152     }
153     } # add_error
154    
155 wakaba 1.31 sub check_and_print ($$) {
156     my ($input, $result) = @_;
157    
158     print_http_header_section ($input, $result);
159    
160     my $doc;
161     my $el;
162 wakaba 1.35 my $cssom;
163 wakaba 1.31 my $manifest;
164 wakaba 1.34 my @subdoc;
165 wakaba 1.31
166     if ($input->{media_type} eq 'text/html') {
167     ($doc, $el) = print_syntax_error_html_section ($input, $result);
168     print_source_string_section
169 wakaba 1.35 ($input,
170     \($input->{s}),
171     $input->{charset} || $doc->input_encoding);
172 wakaba 1.31 } elsif ({
173     'text/xml' => 1,
174     'application/atom+xml' => 1,
175     'application/rss+xml' => 1,
176     'application/svg+xml' => 1,
177     'application/xhtml+xml' => 1,
178     'application/xml' => 1,
179     }->{$input->{media_type}}) {
180     ($doc, $el) = print_syntax_error_xml_section ($input, $result);
181 wakaba 1.35 print_source_string_section ($input,
182     \($input->{s}),
183     $doc->input_encoding);
184     } elsif ($input->{media_type} eq 'text/css') {
185     $cssom = print_syntax_error_css_section ($input, $result);
186     print_source_string_section
187     ($input, \($input->{s}),
188     $cssom->manakai_input_encoding);
189 wakaba 1.31 } elsif ($input->{media_type} eq 'text/cache-manifest') {
190     ## TODO: MUST be text/cache-manifest
191     $manifest = print_syntax_error_manifest_section ($input, $result);
192 wakaba 1.35 print_source_string_section ($input, \($input->{s}),
193     'utf-8');
194 wakaba 1.31 } else {
195     ## TODO: Change HTTP status code??
196     print_result_unknown_type_section ($input, $result);
197     }
198    
199     if (defined $doc or defined $el) {
200 wakaba 1.34 $doc->document_uri ($input->{uri});
201     $doc->manakai_entity_base_uri ($input->{base_uri});
202 wakaba 1.32 print_structure_dump_dom_section ($input, $doc, $el);
203     my $elements = print_structure_error_dom_section
204 wakaba 1.34 ($input, $doc, $el, $result, sub {
205     push @subdoc, shift;
206     });
207 wakaba 1.32 print_table_section ($input, $elements->{table}) if @{$elements->{table}};
208 wakaba 1.33 print_listing_section ({
209     id => 'identifiers', label => 'IDs', heading => 'Identifiers',
210     }, $input, $elements->{id}) if keys %{$elements->{id}};
211     print_listing_section ({
212     id => 'terms', label => 'Terms', heading => 'Terms',
213     }, $input, $elements->{term}) if keys %{$elements->{term}};
214     print_listing_section ({
215     id => 'classes', label => 'Classes', heading => 'Classes',
216     }, $input, $elements->{class}) if keys %{$elements->{class}};
217 wakaba 1.35 } elsif (defined $cssom) {
218     print_structure_dump_cssom_section ($input, $cssom);
219     ## TODO: CSSOM validation
220 wakaba 1.36 add_error ('structure', {level => 'u'} => $result);
221 wakaba 1.31 } elsif (defined $manifest) {
222 wakaba 1.32 print_structure_dump_manifest_section ($input, $manifest);
223     print_structure_error_manifest_section ($input, $manifest, $result);
224 wakaba 1.31 }
225 wakaba 1.34
226     my $id_prefix = 0;
227     for my $subinput (@subdoc) {
228     $subinput->{id_prefix} = 'subdoc-' . ++$id_prefix;
229     $subinput->{nested} = 1;
230     $subinput->{base_uri} = $subinput->{container_node}->base_uri
231     unless defined $subinput->{base_uri};
232     my $ebaseuri = htescape ($subinput->{base_uri});
233     push @nav, ['#' . $subinput->{id_prefix} => 'Sub #' . $id_prefix];
234     print STDOUT qq[<div id="$subinput->{id_prefix}" class=section>
235     <h2>Subdocument #$id_prefix</h2>
236    
237     <dl>
238     <dt>Internet Media Type</dt>
239     <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
240     <dt>Container Node</dt>
241     <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
242     <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
243     <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
244     </dl>];
245    
246 wakaba 1.35 $subinput->{id_prefix} .= '-';
247 wakaba 1.34 check_and_print ($subinput => $result);
248    
249     print STDOUT qq[</div>];
250     }
251 wakaba 1.31 } # check_and_print
252    
253 wakaba 1.19 sub print_http_header_section ($$) {
254     my ($input, $result) = @_;
255 wakaba 1.9 return unless defined $input->{header_status_code} or
256     defined $input->{header_status_text} or
257 wakaba 1.34 @{$input->{header_field} or []};
258 wakaba 1.9
259 wakaba 1.32 push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
260     print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
261 wakaba 1.9 <h2>HTTP Header</h2>
262    
263     <p><strong>Note</strong>: Due to the limitation of the
264     network library in use, the content of this section might
265     not be the real header.</p>
266    
267     <table><tbody>
268     ];
269    
270     if (defined $input->{header_status_code}) {
271     print STDOUT qq[<tr><th scope="row">Status code</th>];
272     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
273     }
274     if (defined $input->{header_status_text}) {
275     print STDOUT qq[<tr><th scope="row">Status text</th>];
276     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
277     }
278    
279     for (@{$input->{header_field}}) {
280     print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
281     print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
282     }
283    
284     print STDOUT qq[</tbody></table></div>];
285     } # print_http_header_section
286    
287 wakaba 1.19 sub print_syntax_error_html_section ($$) {
288     my ($input, $result) = @_;
289 wakaba 1.18
290     require Encode;
291     require Whatpm::HTML;
292    
293     print STDOUT qq[
294 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
295 wakaba 1.18 <h2>Parse Errors</h2>
296    
297 wakaba 1.39 <dl id="$input->{id_prefix}parse-errors-list">];
298 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
299 wakaba 1.18
300     my $onerror = sub {
301     my (%opt) = @_;
302     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
303 wakaba 1.38 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
304     qq[</dt>];
305 wakaba 1.18 $type =~ tr/ /-/;
306     $type =~ s/\|/%7C/g;
307     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
308 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
309     print STDOUT qq[$msg</dd>\n];
310 wakaba 1.19
311     add_error ('syntax', \%opt => $result);
312 wakaba 1.18 };
313    
314     my $doc = $dom->create_document;
315     my $el;
316 wakaba 1.35 my $inner_html_element = $input->{inner_html_element};
317 wakaba 1.18 if (defined $inner_html_element and length $inner_html_element) {
318 wakaba 1.26 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
319 wakaba 1.24 my $time1 = time;
320     my $t = Encode::decode ($input->{charset}, $input->{s});
321     $time{decode} = time - $time1;
322    
323 wakaba 1.18 $el = $doc->create_element_ns
324     ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
325 wakaba 1.24 $time1 = time;
326 wakaba 1.18 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
327 wakaba 1.24 $time{parse} = time - $time1;
328 wakaba 1.18 } else {
329 wakaba 1.24 my $time1 = time;
330     Whatpm::HTML->parse_byte_string
331     ($input->{charset}, $input->{s} => $doc, $onerror);
332     $time{parse_html} = time - $time1;
333 wakaba 1.18 }
334 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
335     if defined $input->{official_charset};
336 wakaba 1.24
337 wakaba 1.18 print STDOUT qq[</dl></div>];
338    
339     return ($doc, $el);
340     } # print_syntax_error_html_section
341    
342 wakaba 1.19 sub print_syntax_error_xml_section ($$) {
343     my ($input, $result) = @_;
344 wakaba 1.18
345     require Message::DOM::XMLParserTemp;
346    
347     print STDOUT qq[
348 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
349 wakaba 1.18 <h2>Parse Errors</h2>
350    
351 wakaba 1.39 <dl id="$input->{id_prefix}parse-errors-list">];
352 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
353 wakaba 1.18
354     my $onerror = sub {
355     my $err = shift;
356     my $line = $err->location->line_number;
357 wakaba 1.35 print STDOUT qq[<dt><a href="#$input->{id_prefix}line-$line">Line $line</a> column ];
358 wakaba 1.18 print STDOUT $err->location->column_number, "</dt><dd>";
359     print STDOUT htescape $err->text, "</dd>\n";
360 wakaba 1.19
361     add_error ('syntax', {type => $err->text,
362     level => [
363     $err->SEVERITY_FATAL_ERROR => 'm',
364     $err->SEVERITY_ERROR => 'm',
365     $err->SEVERITY_WARNING => 's',
366     ]->[$err->severity]} => $result);
367    
368 wakaba 1.18 return 1;
369     };
370    
371     my $time1 = time;
372     open my $fh, '<', \($input->{s});
373     my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
374     ($fh => $dom, $onerror, charset => $input->{charset});
375     $time{parse_xml} = time - $time1;
376 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
377     if defined $input->{official_charset};
378 wakaba 1.18
379     print STDOUT qq[</dl></div>];
380    
381     return ($doc, undef);
382     } # print_syntax_error_xml_section
383    
384 wakaba 1.35 sub get_css_parser () {
385     our $CSSParser;
386     return $CSSParser if $CSSParser;
387    
388     require Whatpm::CSS::Parser;
389     my $p = Whatpm::CSS::Parser->new;
390    
391     $p->{prop}->{$_} = 1 for qw/
392 wakaba 1.37 alignment-baseline
393 wakaba 1.35 background background-attachment background-color background-image
394     background-position background-position-x background-position-y
395     background-repeat border border-bottom border-bottom-color
396     border-bottom-style border-bottom-width border-collapse border-color
397     border-left border-left-color
398     border-left-style border-left-width border-right border-right-color
399     border-right-style border-right-width
400     border-spacing -manakai-border-spacing-x -manakai-border-spacing-y
401     border-style border-top border-top-color border-top-style border-top-width
402     border-width bottom
403     caption-side clear clip color content counter-increment counter-reset
404 wakaba 1.37 cursor direction display dominant-baseline empty-cells float font
405 wakaba 1.35 font-family font-size font-size-adjust font-stretch
406     font-style font-variant font-weight height left
407     letter-spacing line-height
408     list-style list-style-image list-style-position list-style-type
409     margin margin-bottom margin-left margin-right margin-top marker-offset
410     marks max-height max-width min-height min-width opacity -moz-opacity
411     orphans outline outline-color outline-style outline-width overflow
412     overflow-x overflow-y
413     padding padding-bottom padding-left padding-right padding-top
414     page page-break-after page-break-before page-break-inside
415     position quotes right size table-layout
416 wakaba 1.37 text-align text-anchor text-decoration text-indent text-transform
417 wakaba 1.35 top unicode-bidi vertical-align visibility white-space width widows
418 wakaba 1.37 word-spacing writing-mode z-index
419 wakaba 1.35 /;
420     $p->{prop_value}->{display}->{$_} = 1 for qw/
421     block clip inline inline-block inline-table list-item none
422     table table-caption table-cell table-column table-column-group
423     table-header-group table-footer-group table-row table-row-group
424     compact marker
425     /;
426     $p->{prop_value}->{position}->{$_} = 1 for qw/
427     absolute fixed relative static
428     /;
429     $p->{prop_value}->{float}->{$_} = 1 for qw/
430     left right none
431     /;
432     $p->{prop_value}->{clear}->{$_} = 1 for qw/
433     left right none both
434     /;
435     $p->{prop_value}->{direction}->{ltr} = 1;
436     $p->{prop_value}->{direction}->{rtl} = 1;
437     $p->{prop_value}->{marks}->{crop} = 1;
438     $p->{prop_value}->{marks}->{cross} = 1;
439     $p->{prop_value}->{'unicode-bidi'}->{$_} = 1 for qw/
440     normal bidi-override embed
441     /;
442     for my $prop_name (qw/overflow overflow-x overflow-y/) {
443     $p->{prop_value}->{$prop_name}->{$_} = 1 for qw/
444     visible hidden scroll auto -webkit-marquee -moz-hidden-unscrollable
445     /;
446     }
447     $p->{prop_value}->{visibility}->{$_} = 1 for qw/
448     visible hidden collapse
449     /;
450     $p->{prop_value}->{'list-style-type'}->{$_} = 1 for qw/
451     disc circle square decimal decimal-leading-zero
452     lower-roman upper-roman lower-greek lower-latin
453     upper-latin armenian georgian lower-alpha upper-alpha none
454     hebrew cjk-ideographic hiragana katakana hiragana-iroha
455     katakana-iroha
456     /;
457     $p->{prop_value}->{'list-style-position'}->{outside} = 1;
458     $p->{prop_value}->{'list-style-position'}->{inside} = 1;
459     $p->{prop_value}->{'page-break-before'}->{$_} = 1 for qw/
460     auto always avoid left right
461     /;
462     $p->{prop_value}->{'page-break-after'}->{$_} = 1 for qw/
463     auto always avoid left right
464     /;
465     $p->{prop_value}->{'page-break-inside'}->{auto} = 1;
466     $p->{prop_value}->{'page-break-inside'}->{avoid} = 1;
467     $p->{prop_value}->{'background-repeat'}->{$_} = 1 for qw/
468     repeat repeat-x repeat-y no-repeat
469     /;
470     $p->{prop_value}->{'background-attachment'}->{scroll} = 1;
471     $p->{prop_value}->{'background-attachment'}->{fixed} = 1;
472     $p->{prop_value}->{'font-size'}->{$_} = 1 for qw/
473     xx-small x-small small medium large x-large xx-large
474     -manakai-xxx-large -webkit-xxx-large
475     larger smaller
476     /;
477     $p->{prop_value}->{'font-style'}->{normal} = 1;
478     $p->{prop_value}->{'font-style'}->{italic} = 1;
479     $p->{prop_value}->{'font-style'}->{oblique} = 1;
480     $p->{prop_value}->{'font-variant'}->{normal} = 1;
481     $p->{prop_value}->{'font-variant'}->{'small-caps'} = 1;
482     $p->{prop_value}->{'font-stretch'}->{$_} = 1 for
483     qw/normal wider narrower ultra-condensed extra-condensed
484     condensed semi-condensed semi-expanded expanded
485     extra-expanded ultra-expanded/;
486     $p->{prop_value}->{'text-align'}->{$_} = 1 for qw/
487     left right center justify begin end
488     /;
489     $p->{prop_value}->{'text-transform'}->{$_} = 1 for qw/
490     capitalize uppercase lowercase none
491     /;
492     $p->{prop_value}->{'white-space'}->{$_} = 1 for qw/
493 wakaba 1.36 normal pre nowrap pre-line pre-wrap -moz-pre-wrap
494 wakaba 1.35 /;
495 wakaba 1.37 $p->{prop_value}->{'writing-mode'}->{$_} = 1 for qw/
496     lr rl tb lr-tb rl-tb tb-rl
497     /;
498     $p->{prop_value}->{'text-anchor'}->{$_} = 1 for qw/
499     start middle end
500     /;
501     $p->{prop_value}->{'dominant-baseline'}->{$_} = 1 for qw/
502     auto use-script no-change reset-size ideographic alphabetic
503     hanging mathematical central middle text-after-edge text-before-edge
504     /;
505     $p->{prop_value}->{'alignment-baseline'}->{$_} = 1 for qw/
506     auto baseline before-edge text-before-edge middle central
507     after-edge text-after-edge ideographic alphabetic hanging
508     mathematical
509     /;
510 wakaba 1.35 $p->{prop_value}->{'text-decoration'}->{$_} = 1 for qw/
511     none blink underline overline line-through
512     /;
513     $p->{prop_value}->{'caption-side'}->{$_} = 1 for qw/
514     top bottom left right
515     /;
516     $p->{prop_value}->{'table-layout'}->{auto} = 1;
517     $p->{prop_value}->{'table-layout'}->{fixed} = 1;
518 wakaba 1.36 $p->{prop_value}->{'border-collapse'}->{collapse} = 1;
519 wakaba 1.35 $p->{prop_value}->{'border-collapse'}->{separate} = 1;
520     $p->{prop_value}->{'empty-cells'}->{show} = 1;
521     $p->{prop_value}->{'empty-cells'}->{hide} = 1;
522     $p->{prop_value}->{cursor}->{$_} = 1 for qw/
523     auto crosshair default pointer move e-resize ne-resize nw-resize n-resize
524     se-resize sw-resize s-resize w-resize text wait help progress
525     /;
526     for my $prop (qw/border-top-style border-left-style
527     border-bottom-style border-right-style outline-style/) {
528     $p->{prop_value}->{$prop}->{$_} = 1 for qw/
529     none hidden dotted dashed solid double groove ridge inset outset
530     /;
531     }
532     for my $prop (qw/color background-color
533     border-bottom-color border-left-color border-right-color
534     border-top-color border-color/) {
535     $p->{prop_value}->{$prop}->{transparent} = 1;
536     $p->{prop_value}->{$prop}->{flavor} = 1;
537     $p->{prop_value}->{$prop}->{'-manakai-default'} = 1;
538     }
539     $p->{prop_value}->{'outline-color'}->{invert} = 1;
540     $p->{prop_value}->{'outline-color'}->{'-manakai-invert-or-currentcolor'} = 1;
541     $p->{pseudo_class}->{$_} = 1 for qw/
542     active checked disabled empty enabled first-child first-of-type
543     focus hover indeterminate last-child last-of-type link only-child
544     only-of-type root target visited
545     lang nth-child nth-last-child nth-of-type nth-last-of-type not
546     -manakai-contains -manakai-current
547     /;
548     $p->{pseudo_element}->{$_} = 1 for qw/
549     after before first-letter first-line
550     /;
551    
552     return $CSSParser = $p;
553     } # get_css_parser
554    
555     sub print_syntax_error_css_section ($$) {
556     my ($input, $result) = @_;
557    
558     print STDOUT qq[
559     <div id="$input->{id_prefix}parse-errors" class="section">
560     <h2>Parse Errors</h2>
561    
562 wakaba 1.39 <dl id="$input->{id_prefix}parse-errors-list">];
563 wakaba 1.35 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
564    
565     my $p = get_css_parser ();
566 wakaba 1.37 $p->init;
567 wakaba 1.35 $p->{onerror} = sub {
568     my (%opt) = @_;
569     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
570     if ($opt{token}) {
571     print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{token}->{line}">Line $opt{token}->{line}</a> column $opt{token}->{column}];
572     } else {
573     print STDOUT qq[<dt class="$cls">Unknown location];
574     }
575     if (defined $opt{value}) {
576     print STDOUT qq[ (<code>@{[htescape ($opt{value})]}</code>)];
577     } elsif (defined $opt{token}) {
578     print STDOUT qq[ (<code>@{[htescape (Whatpm::CSS::Tokenizer->serialize_token ($opt{token}))]}</code>)];
579     }
580     $type =~ tr/ /-/;
581     $type =~ s/\|/%7C/g;
582     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
583     print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
584     print STDOUT qq[$msg</dd>\n];
585    
586     add_error ('syntax', \%opt => $result);
587     };
588     $p->{href} = $input->{uri};
589     $p->{base_uri} = $input->{base_uri};
590    
591 wakaba 1.37 # if ($parse_mode eq 'q') {
592     # $p->{unitless_px} = 1;
593     # $p->{hashless_color} = 1;
594     # }
595    
596     ## TODO: Make $input->{s} a ref.
597    
598 wakaba 1.35 my $s = \$input->{s};
599     my $charset;
600     unless ($input->{is_char_string}) {
601     require Encode;
602     if (defined $input->{charset}) {## TODO: IANA->Perl
603     $charset = $input->{charset};
604     $s = \(Encode::decode ($input->{charset}, $$s));
605     } else {
606     ## TODO: charset detection
607     $s = \(Encode::decode ($charset = 'utf-8', $$s));
608     }
609     }
610    
611     my $cssom = $p->parse_char_string ($$s);
612     $cssom->manakai_input_encoding ($charset) if defined $charset;
613    
614     print STDOUT qq[</dl></div>];
615    
616     return $cssom;
617     } # print_syntax_error_css_section
618    
619 wakaba 1.22 sub print_syntax_error_manifest_section ($$) {
620     my ($input, $result) = @_;
621    
622     require Whatpm::CacheManifest;
623    
624     print STDOUT qq[
625 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
626 wakaba 1.22 <h2>Parse Errors</h2>
627    
628 wakaba 1.39 <dl id="$input->{id_prefix}parse-errors-list">];
629 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
630 wakaba 1.22
631     my $onerror = sub {
632     my (%opt) = @_;
633     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
634 wakaba 1.32 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
635     qq[</dt>];
636 wakaba 1.22 $type =~ tr/ /-/;
637     $type =~ s/\|/%7C/g;
638     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
639 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
640     print STDOUT qq[$msg</dd>\n];
641 wakaba 1.22
642     add_error ('syntax', \%opt => $result);
643     };
644    
645     my $time1 = time;
646     my $manifest = Whatpm::CacheManifest->parse_byte_string
647     ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
648     $time{parse_manifest} = time - $time1;
649    
650     print STDOUT qq[</dl></div>];
651    
652     return $manifest;
653     } # print_syntax_error_manifest_section
654    
655 wakaba 1.35 sub print_source_string_section ($$$) {
656     my $input = shift;
657     my $s;
658     unless ($input->{is_char_string}) {
659     require Encode;
660     my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
661     return unless $enc;
662    
663     $s = \($enc->decode (${$_[0]}));
664     } else {
665     $s = $_[0];
666     }
667 wakaba 1.9
668     my $i = 1;
669 wakaba 1.32 push @nav, ['#source-string' => 'Source'] unless $input->{nested};
670     print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
671 wakaba 1.9 <h2>Document Source</h2>
672     <ol lang="">\n];
673 wakaba 1.7 if (length $$s) {
674 wakaba 1.41 while ($$s =~ /\G([^\x0D\x0A]*?)(?>\x0D\x0A?|\x0A)/gc) {
675 wakaba 1.32 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
676     "</li>\n";
677 wakaba 1.7 $i++;
678     }
679 wakaba 1.41 if ($$s =~ /\G([^\x0D\x0A]+)/gc) {
680 wakaba 1.32 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
681     "</li>\n";
682 wakaba 1.7 }
683     } else {
684 wakaba 1.32 print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
685 wakaba 1.2 }
686 wakaba 1.39 print STDOUT "</ol></div>
687     <script> addSourceToParseErrorList ('$input->{id_prefix}'); </script>";
688 wakaba 1.9 } # print_input_string_section
689 wakaba 1.2
690 wakaba 1.35 sub print_document_tree ($$) {
691     my ($input, $node) = @_;
692    
693 wakaba 1.2 my $r = '<ol class="xoxo">';
694 wakaba 1.1
695 wakaba 1.2 my @node = ($node);
696 wakaba 1.1 while (@node) {
697     my $child = shift @node;
698 wakaba 1.2 unless (ref $child) {
699     $r .= $child;
700     next;
701     }
702    
703 wakaba 1.32 my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
704 wakaba 1.2 my $nt = $child->node_type;
705     if ($nt == $child->ELEMENT_NODE) {
706 wakaba 1.4 my $child_nsuri = $child->namespace_uri;
707     $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
708 wakaba 1.2 '</code>'; ## ISSUE: case
709    
710     if ($child->has_attributes) {
711     $r .= '<ul class="attributes">';
712 wakaba 1.4 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
713 wakaba 1.2 @{$child->attributes}) {
714 wakaba 1.32 $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
715 wakaba 1.2 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
716     }
717     $r .= '</ul>';
718     }
719    
720 wakaba 1.7 if ($child->has_child_nodes) {
721 wakaba 1.2 $r .= '<ol class="children">';
722 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
723     } else {
724     $r .= '</li>';
725 wakaba 1.2 }
726     } elsif ($nt == $child->TEXT_NODE) {
727 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
728 wakaba 1.2 } elsif ($nt == $child->CDATA_SECTION_NODE) {
729 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
730 wakaba 1.2 } elsif ($nt == $child->COMMENT_NODE) {
731 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
732 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_NODE) {
733 wakaba 1.6 $r .= qq'<li id="$node_id" class="tree-document">Document';
734 wakaba 1.7 $r .= qq[<ul class="attributes">];
735 wakaba 1.27 my $cp = $child->manakai_charset;
736     if (defined $cp) {
737     $r .= qq[<li><code>charset</code> parameter = <code>];
738     $r .= htescape ($cp) . qq[</code></li>];
739     }
740     $r .= qq[<li><code>inputEncoding</code> = ];
741     my $ie = $child->input_encoding;
742     if (defined $ie) {
743     $r .= qq[<code>@{[htescape ($ie)]}</code>];
744     if ($child->manakai_has_bom) {
745     $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
746     }
747     } else {
748     $r .= qq[(<code>null</code>)];
749     }
750 wakaba 1.7 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
751     $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
752 wakaba 1.9 unless ($child->manakai_is_html) {
753     $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
754     if (defined $child->xml_encoding) {
755     $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
756     } else {
757     $r .= qq[<li>XML encoding = (null)</li>];
758     }
759     $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
760     }
761 wakaba 1.7 $r .= qq[</ul>];
762 wakaba 1.2 if ($child->has_child_nodes) {
763 wakaba 1.7 $r .= '<ol class="children">';
764 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
765 wakaba 1.1 }
766 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
767 wakaba 1.5 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
768     $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
769     $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
770     $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
771 wakaba 1.2 $r .= '</ul></li>';
772     } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
773 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
774 wakaba 1.1 } else {
775 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
776 wakaba 1.1 }
777     }
778 wakaba 1.2
779     $r .= '</ol>';
780     print STDOUT $r;
781     } # print_document_tree
782 wakaba 1.1
783 wakaba 1.32 sub print_structure_dump_dom_section ($$$) {
784     my ($input, $doc, $el) = @_;
785 wakaba 1.18
786     print STDOUT qq[
787 wakaba 1.32 <div id="$input->{id_prefix}document-tree" class="section">
788 wakaba 1.18 <h2>Document Tree</h2>
789     ];
790 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
791     unless $input->{nested};
792 wakaba 1.18
793 wakaba 1.35 print_document_tree ($input, $el || $doc);
794 wakaba 1.18
795     print STDOUT qq[</div>];
796 wakaba 1.22 } # print_structure_dump_dom_section
797    
798 wakaba 1.35 sub print_structure_dump_cssom_section ($$) {
799     my ($input, $cssom) = @_;
800    
801     print STDOUT qq[
802     <div id="$input->{id_prefix}document-tree" class="section">
803     <h2>Document Tree</h2>
804     ];
805     push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
806     unless $input->{nested};
807    
808     ## TODO:
809     print STDOUT "<pre>".htescape ($cssom->css_text)."</pre>";
810    
811     print STDOUT qq[</div>];
812     } # print_structure_dump_cssom_section
813    
814 wakaba 1.32 sub print_structure_dump_manifest_section ($$) {
815     my ($input, $manifest) = @_;
816 wakaba 1.22
817     print STDOUT qq[
818 wakaba 1.32 <div id="$input->{id_prefix}dump-manifest" class="section">
819 wakaba 1.22 <h2>Cache Manifest</h2>
820     ];
821 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}dump-manifest] => 'Cache Manifest']
822     unless $input->{nested};
823 wakaba 1.22
824     print STDOUT qq[<dl><dt>Explicit entries</dt>];
825 wakaba 1.37 my $i = 0;
826 wakaba 1.22 for my $uri (@{$manifest->[0]}) {
827     my $euri = htescape ($uri);
828 wakaba 1.37 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
829 wakaba 1.22 }
830    
831     print STDOUT qq[<dt>Fallback entries</dt><dd>
832     <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
833     <th scope=row>Fallback Entry</tr><tbody>];
834     for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
835     my $euri = htescape ($uri);
836     my $euri2 = htescape ($manifest->[1]->{$uri});
837 wakaba 1.37 print STDOUT qq[<tr><td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
838     <td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
839 wakaba 1.22 }
840    
841     print STDOUT qq[</table><dt>Online whitelist</dt>];
842     for my $uri (@{$manifest->[2]}) {
843     my $euri = htescape ($uri);
844 wakaba 1.37 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
845 wakaba 1.22 }
846 wakaba 1.18
847 wakaba 1.22 print STDOUT qq[</dl></div>];
848     } # print_structure_dump_manifest_section
849    
850 wakaba 1.34 sub print_structure_error_dom_section ($$$$$) {
851     my ($input, $doc, $el, $result, $onsubdoc) = @_;
852 wakaba 1.18
853 wakaba 1.32 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
854 wakaba 1.18 <h2>Document Errors</h2>
855    
856     <dl>];
857 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
858     unless $input->{nested};
859 wakaba 1.18
860     require Whatpm::ContentChecker;
861     my $onerror = sub {
862     my %opt = @_;
863     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
864     $type =~ tr/ /-/;
865     $type =~ s/\|/%7C/g;
866     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
867 wakaba 1.32 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
868 wakaba 1.23 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
869     print STDOUT $msg, "</dd>\n";
870 wakaba 1.19 add_error ('structure', \%opt => $result);
871 wakaba 1.18 };
872    
873     my $elements;
874     my $time1 = time;
875     if ($el) {
876 wakaba 1.34 $elements = Whatpm::ContentChecker->check_element
877     ($el, $onerror, $onsubdoc);
878 wakaba 1.18 } else {
879 wakaba 1.34 $elements = Whatpm::ContentChecker->check_document
880     ($doc, $onerror, $onsubdoc);
881 wakaba 1.18 }
882     $time{check} = time - $time1;
883    
884     print STDOUT qq[</dl></div>];
885    
886     return $elements;
887 wakaba 1.22 } # print_structure_error_dom_section
888    
889     sub print_structure_error_manifest_section ($$$) {
890 wakaba 1.32 my ($input, $manifest, $result) = @_;
891 wakaba 1.22
892 wakaba 1.32 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
893 wakaba 1.22 <h2>Document Errors</h2>
894    
895     <dl>];
896 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
897     unless $input->{nested};
898 wakaba 1.22
899     require Whatpm::CacheManifest;
900     Whatpm::CacheManifest->check_manifest ($manifest, sub {
901     my %opt = @_;
902     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
903     $type =~ tr/ /-/;
904     $type =~ s/\|/%7C/g;
905     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
906 wakaba 1.32 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
907 wakaba 1.22 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
908     add_error ('structure', \%opt => $result);
909     });
910    
911     print STDOUT qq[</div>];
912     } # print_structure_error_manifest_section
913 wakaba 1.18
914 wakaba 1.32 sub print_table_section ($$) {
915     my ($input, $tables) = @_;
916 wakaba 1.18
917 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
918     unless $input->{nested};
919 wakaba 1.18 print STDOUT qq[
920 wakaba 1.32 <div id="$input->{id_prefix}tables" class="section">
921 wakaba 1.18 <h2>Tables</h2>
922    
923     <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
924     <script src="../table-script.js" type="text/javascript"></script>
925     <noscript>
926     <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
927     </noscript>
928     ];
929    
930     require JSON;
931    
932     my $i = 0;
933     for my $table_el (@$tables) {
934     $i++;
935 wakaba 1.32 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
936     get_node_link ($input, $table_el) . q[</h3>];
937 wakaba 1.18
938     ## TODO: Make |ContentChecker| return |form_table| result
939     ## so that this script don't have to run the algorithm twice.
940     my $table = Whatpm::HTMLTable->form_table ($table_el);
941    
942     for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
943     next unless $_;
944     delete $_->{element};
945     }
946    
947     for (@{$table->{row_group}}) {
948     next unless $_;
949     next unless $_->{element};
950     $_->{type} = $_->{element}->manakai_local_name;
951     delete $_->{element};
952     }
953    
954     for (@{$table->{cell}}) {
955     next unless $_;
956     for (@{$_}) {
957     next unless $_;
958     for (@$_) {
959     $_->{id} = refaddr $_->{element} if defined $_->{element};
960     delete $_->{element};
961     $_->{is_header} = $_->{is_header} ? 1 : 0;
962     }
963     }
964     }
965    
966     print STDOUT '</div><script type="text/javascript">tableToCanvas (';
967     print STDOUT JSON::objToJson ($table);
968 wakaba 1.32 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
969     print STDOUT qq[, '$input->{id_prefix}');</script>];
970 wakaba 1.18 }
971    
972     print STDOUT qq[</div>];
973     } # print_table_section
974    
975 wakaba 1.33 sub print_listing_section ($$$) {
976     my ($opt, $input, $ids) = @_;
977 wakaba 1.18
978 wakaba 1.35 push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
979     unless $input->{nested};
980 wakaba 1.18 print STDOUT qq[
981 wakaba 1.33 <div id="$input->{id_prefix}$opt->{id}" class="section">
982     <h2>$opt->{heading}</h2>
983 wakaba 1.18
984     <dl>
985     ];
986     for my $id (sort {$a cmp $b} keys %$ids) {
987     print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
988     for (@{$ids->{$id}}) {
989 wakaba 1.32 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
990 wakaba 1.18 }
991     }
992     print STDOUT qq[</dl></div>];
993 wakaba 1.33 } # print_listing_section
994 wakaba 1.18
995 wakaba 1.19 sub print_result_section ($) {
996     my $result = shift;
997    
998     print STDOUT qq[
999     <div id="result-summary" class="section">
1000     <h2>Result</h2>];
1001    
1002 wakaba 1.21 if ($result->{unsupported} and $result->{conforming_max}) {
1003 wakaba 1.19 print STDOUT qq[<p class=uncertain id=result-para>The conformance
1004     checker cannot decide whether the document is conforming or
1005     not, since the document contains one or more unsupported
1006 wakaba 1.21 features. The document might or might not be conforming.</p>];
1007 wakaba 1.19 } elsif ($result->{conforming_min}) {
1008     print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
1009     found in this document.</p>];
1010     } elsif ($result->{conforming_max}) {
1011     print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
1012     is <strong>likely <em>non</em>-conforming</strong>, but in rare case
1013     it might be conforming.</p>];
1014     } else {
1015     print STDOUT qq[<p class=FAIL id=result-para>This document is
1016     <strong><em>non</em>-conforming</strong>.</p>];
1017     }
1018    
1019     print STDOUT qq[<table>
1020     <colgroup><col><colgroup><col><col><col><colgroup><col>
1021     <thead>
1022 wakaba 1.23 <tr><th scope=col></th>
1023     <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1024     Errors</a></th>
1025     <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1026     Errors</a></th>
1027     <th scope=col><a href="../error-description#level-w">Warnings</a></th>
1028     <th scope=col>Score</th></tr></thead><tbody>];
1029 wakaba 1.19
1030     my $must_error = 0;
1031     my $should_error = 0;
1032     my $warning = 0;
1033     my $score_min = 0;
1034     my $score_max = 0;
1035     my $score_base = 20;
1036 wakaba 1.21 my $score_unit = $score_base / 100;
1037 wakaba 1.19 for (
1038     [Transfer => 'transfer', ''],
1039     [Character => 'char', ''],
1040     [Syntax => 'syntax', '#parse-errors'],
1041     [Structure => 'structure', '#document-errors'],
1042     ) {
1043     $must_error += ($result->{$_->[1]}->{must} += 0);
1044     $should_error += ($result->{$_->[1]}->{should} += 0);
1045     $warning += ($result->{$_->[1]}->{warning} += 0);
1046 wakaba 1.21 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
1047     $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
1048 wakaba 1.19
1049     my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
1050     my $label = $_->[0];
1051     if ($result->{$_->[1]}->{must} or
1052     $result->{$_->[1]}->{should} or
1053     $result->{$_->[1]}->{warning} or
1054     $result->{$_->[1]}->{unsupported}) {
1055     $label = qq[<a href="$_->[2]">$label</a>];
1056     }
1057    
1058     print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
1059     if ($uncertain) {
1060 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
1061 wakaba 1.19 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
1062 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
1063 wakaba 1.19 } else {
1064 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
1065 wakaba 1.19 }
1066     }
1067    
1068     $score_max += $score_base;
1069    
1070     print STDOUT qq[
1071     <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
1072     </tbody>
1073 wakaba 1.21 <tfoot><tr class=uncertain><th scope=row>Total</th>
1074     <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
1075     <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
1076     <td>$warning?</td>
1077     <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
1078 wakaba 1.19 </table>
1079    
1080     <p><strong>Important</strong>: This conformance checking service
1081     is <em>under development</em>. The result above might be <em>wrong</em>.</p>
1082     </div>];
1083     push @nav, ['#result-summary' => 'Result'];
1084     } # print_result_section
1085    
1086 wakaba 1.24 sub print_result_unknown_type_section ($$) {
1087     my ($input, $result) = @_;
1088 wakaba 1.18
1089 wakaba 1.24 my $euri = htescape ($input->{uri});
1090 wakaba 1.18 print STDOUT qq[
1091 wakaba 1.35 <div id="$input->{id_prefix}parse-errors" class="section">
1092 wakaba 1.24 <h2>Errors</h2>
1093    
1094     <dl>
1095     <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
1096     <dd class=unsupported><strong><a href="../error-description#level-u">Not
1097     supported</a></strong>:
1098     Media type
1099     <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
1100     is not supported.</dd>
1101     </dl>
1102 wakaba 1.18 </div>
1103     ];
1104 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}parse-errors] => 'Errors']
1105     unless $input->{nested};
1106 wakaba 1.30 add_error (char => {level => 'u'} => $result);
1107     add_error (syntax => {level => 'u'} => $result);
1108     add_error (structure => {level => 'u'} => $result);
1109 wakaba 1.18 } # print_result_unknown_type_section
1110    
1111     sub print_result_input_error_section ($) {
1112     my $input = shift;
1113     print STDOUT qq[<div class="section" id="result-summary">
1114     <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
1115     </div>];
1116     push @nav, ['#result-summary' => 'Result'];
1117 wakaba 1.32 } # print_result_input_error_section
1118 wakaba 1.18
1119 wakaba 1.32 sub get_error_label ($$) {
1120     my ($input, $err) = @_;
1121 wakaba 1.22
1122     my $r = '';
1123    
1124 wakaba 1.40 if (defined $err->{token} and defined $err->{token}->{line}) {
1125     if ($err->{token}->{column} > 0) {
1126     $r = qq[<a href="#$input->{id_prefix}line-$err->{token}->{line}">Line $err->{token}->{line}</a> column $err->{token}->{column}];
1127     } else {
1128     $err->{token}->{line} = $err->{token}->{line} - 1 || 1;
1129     $r = qq[<a href="#$input->{id_prefix}line-$err->{token}->{line}">Line $err->{token}->{line}</a>];
1130     }
1131     } elsif (defined $err->{line}) {
1132 wakaba 1.22 if ($err->{column} > 0) {
1133 wakaba 1.35 $r = qq[<a href="#$input->{id_prefix}line-$err->{line}">Line $err->{line}</a> column $err->{column}];
1134 wakaba 1.22 } else {
1135     $err->{line} = $err->{line} - 1 || 1;
1136 wakaba 1.35 $r = qq[<a href="#$input->{id_prefix}line-$err->{line}">Line $err->{line}</a>];
1137 wakaba 1.22 }
1138     }
1139    
1140     if (defined $err->{node}) {
1141     $r .= ' ' if length $r;
1142 wakaba 1.32 $r = get_node_link ($input, $err->{node});
1143 wakaba 1.22 }
1144    
1145     if (defined $err->{index}) {
1146 wakaba 1.37 if (length $r) {
1147     $r .= ', Index ' . (0+$err->{index});
1148     } else {
1149     $r .= "<a href='#$input->{id_prefix}index-@{[0+$err->{index}]}'>Index "
1150     . (0+$err->{index}) . '</a>';
1151     }
1152 wakaba 1.22 }
1153    
1154     if (defined $err->{value}) {
1155     $r .= ' ' if length $r;
1156     $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
1157     }
1158    
1159     return $r;
1160     } # get_error_label
1161    
1162 wakaba 1.23 sub get_error_level_label ($) {
1163     my $err = shift;
1164    
1165     my $r = '';
1166    
1167     if (not defined $err->{level} or $err->{level} eq 'm') {
1168     $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1169     error</a></strong>: ];
1170     } elsif ($err->{level} eq 's') {
1171     $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1172     error</a></strong>: ];
1173     } elsif ($err->{level} eq 'w') {
1174     $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
1175     ];
1176 wakaba 1.30 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
1177 wakaba 1.23 $r = qq[<strong><a href="../error-description#level-u">Not
1178     supported</a></strong>: ];
1179 wakaba 1.37 } elsif ($err->{level} eq 'i') {
1180     $r = qq[<strong><a href="../error-description#level-i">Information</a></strong>: ];
1181 wakaba 1.23 } else {
1182     my $elevel = htescape ($err->{level});
1183     $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
1184     ];
1185     }
1186    
1187     return $r;
1188     } # get_error_level_label
1189    
1190 wakaba 1.1 sub get_node_path ($) {
1191     my $node = shift;
1192     my @r;
1193     while (defined $node) {
1194     my $rs;
1195     if ($node->node_type == 1) {
1196     $rs = $node->manakai_local_name;
1197     $node = $node->parent_node;
1198     } elsif ($node->node_type == 2) {
1199     $rs = '@' . $node->manakai_local_name;
1200     $node = $node->owner_element;
1201     } elsif ($node->node_type == 3) {
1202     $rs = '"' . $node->data . '"';
1203     $node = $node->parent_node;
1204     } elsif ($node->node_type == 9) {
1205 wakaba 1.9 @r = ('') unless @r;
1206 wakaba 1.1 $rs = '';
1207     $node = $node->parent_node;
1208     } else {
1209     $rs = '#' . $node->node_type;
1210     $node = $node->parent_node;
1211     }
1212     unshift @r, $rs;
1213     }
1214     return join '/', @r;
1215     } # get_node_path
1216    
1217 wakaba 1.32 sub get_node_link ($$) {
1218     return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
1219     htescape (get_node_path ($_[1])) . qq[</a>];
1220 wakaba 1.6 } # get_node_link
1221    
1222 wakaba 1.7 {
1223     my $Msg = {};
1224    
1225     sub load_text_catalog ($) {
1226     my $lang = shift; # MUST be a canonical lang name
1227 wakaba 1.26 open my $file, '<:utf8', "cc-msg.$lang.txt"
1228     or die "$0: cc-msg.$lang.txt: $!";
1229 wakaba 1.7 while (<$file>) {
1230     if (s/^([^;]+);([^;]*);//) {
1231     my ($type, $cls, $msg) = ($1, $2, $_);
1232     $msg =~ tr/\x0D\x0A//d;
1233     $Msg->{$type} = [$cls, $msg];
1234     }
1235     }
1236     } # load_text_catalog
1237    
1238     sub get_text ($) {
1239 wakaba 1.15 my ($type, $level, $node) = @_;
1240 wakaba 1.7 $type = $level . ':' . $type if defined $level;
1241 wakaba 1.29 $level = 'm' unless defined $level;
1242 wakaba 1.7 my @arg;
1243     {
1244     if (defined $Msg->{$type}) {
1245     my $msg = $Msg->{$type}->[1];
1246 wakaba 1.10 $msg =~ s{<var>\$([0-9]+)</var>}{
1247     defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
1248     }ge;
1249 wakaba 1.15 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
1250     UNIVERSAL::can ($node, 'get_attribute_ns')
1251     ? htescape ($node->get_attribute_ns (undef, $1)) : ''
1252     }ge;
1253     $msg =~ s{<var>{\@}</var>}{
1254     UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
1255     }ge;
1256 wakaba 1.17 $msg =~ s{<var>{local-name}</var>}{
1257     UNIVERSAL::can ($node, 'manakai_local_name')
1258     ? htescape ($node->manakai_local_name) : ''
1259     }ge;
1260     $msg =~ s{<var>{element-local-name}</var>}{
1261     (UNIVERSAL::can ($node, 'owner_element') and
1262     $node->owner_element)
1263     ? htescape ($node->owner_element->manakai_local_name)
1264     : ''
1265     }ge;
1266 wakaba 1.29 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
1267 wakaba 1.7 } elsif ($type =~ s/:([^:]*)$//) {
1268     unshift @arg, $1;
1269     redo;
1270     }
1271     }
1272 wakaba 1.29 return ($type, 'level-'.$level, htescape ($_[0]));
1273 wakaba 1.7 } # get_text
1274    
1275     }
1276    
1277 wakaba 1.9 sub get_input_document ($$) {
1278     my ($http, $dom) = @_;
1279    
1280 wakaba 1.16 my $request_uri = $http->get_parameter ('uri');
1281 wakaba 1.9 my $r = {};
1282     if (defined $request_uri and length $request_uri) {
1283     my $uri = $dom->create_uri_reference ($request_uri);
1284     unless ({
1285     http => 1,
1286     }->{lc $uri->uri_scheme}) {
1287     return {uri => $request_uri, request_uri => $request_uri,
1288     error_status_text => 'URI scheme not allowed'};
1289     }
1290    
1291     require Message::Util::HostPermit;
1292     my $host_permit = new Message::Util::HostPermit;
1293     $host_permit->add_rule (<<EOH);
1294     Allow host=suika port=80
1295     Deny host=suika
1296     Allow host=suika.fam.cx port=80
1297     Deny host=suika.fam.cx
1298     Deny host=localhost
1299     Deny host=*.localdomain
1300     Deny ipv4=0.0.0.0/8
1301     Deny ipv4=10.0.0.0/8
1302     Deny ipv4=127.0.0.0/8
1303     Deny ipv4=169.254.0.0/16
1304     Deny ipv4=172.0.0.0/11
1305     Deny ipv4=192.0.2.0/24
1306     Deny ipv4=192.88.99.0/24
1307     Deny ipv4=192.168.0.0/16
1308     Deny ipv4=198.18.0.0/15
1309     Deny ipv4=224.0.0.0/4
1310     Deny ipv4=255.255.255.255/32
1311     Deny ipv6=0::0/0
1312     Allow host=*
1313     EOH
1314     unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1315     return {uri => $request_uri, request_uri => $request_uri,
1316     error_status_text => 'Connection to the host is forbidden'};
1317     }
1318    
1319     require LWP::UserAgent;
1320     my $ua = WDCC::LWPUA->new;
1321     $ua->{wdcc_dom} = $dom;
1322     $ua->{wdcc_host_permit} = $host_permit;
1323     $ua->agent ('Mozilla'); ## TODO: for now.
1324     $ua->parse_head (0);
1325     $ua->protocols_allowed ([qw/http/]);
1326     $ua->max_size (1000_000);
1327     my $req = HTTP::Request->new (GET => $request_uri);
1328 wakaba 1.28 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1329 wakaba 1.9 my $res = $ua->request ($req);
1330 wakaba 1.16 ## TODO: 401 sets |is_success| true.
1331     if ($res->is_success or $http->get_parameter ('error-page')) {
1332 wakaba 1.9 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1333     $r->{uri} = $res->request->uri;
1334     $r->{request_uri} = $request_uri;
1335    
1336     ## TODO: More strict parsing...
1337     my $ct = $res->header ('Content-Type');
1338 wakaba 1.22 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1339 wakaba 1.9 $r->{charset} = lc $1;
1340     $r->{charset} =~ tr/\\//d;
1341 wakaba 1.26 $r->{official_charset} = $r->{charset};
1342 wakaba 1.9 }
1343    
1344 wakaba 1.16 my $input_charset = $http->get_parameter ('charset');
1345 wakaba 1.9 if (defined $input_charset and length $input_charset) {
1346     $r->{charset_overridden}
1347     = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1348     $r->{charset} = $input_charset;
1349 wakaba 1.25 }
1350    
1351     ## TODO: Support for HTTP Content-Encoding
1352 wakaba 1.9
1353     $r->{s} = ''.$res->content;
1354 wakaba 1.25
1355     require Whatpm::ContentType;
1356     ($r->{official_type}, $r->{media_type})
1357     = Whatpm::ContentType->get_sniffed_type
1358     (get_file_head => sub {
1359     return substr $r->{s}, 0, shift;
1360     },
1361     http_content_type_byte => $ct,
1362     has_http_content_encoding =>
1363     defined $res->header ('Content-Encoding'),
1364     supported_image_types => {});
1365 wakaba 1.9 } else {
1366     $r->{uri} = $res->request->uri;
1367     $r->{request_uri} = $request_uri;
1368     $r->{error_status_text} = $res->status_line;
1369     }
1370    
1371     $r->{header_field} = [];
1372     $res->scan (sub {
1373     push @{$r->{header_field}}, [$_[0], $_[1]];
1374     });
1375     $r->{header_status_code} = $res->code;
1376     $r->{header_status_text} = $res->message;
1377     } else {
1378 wakaba 1.16 $r->{s} = ''.$http->get_parameter ('s');
1379 wakaba 1.9 $r->{uri} = q<thismessage:/>;
1380     $r->{request_uri} = q<thismessage:/>;
1381     $r->{base_uri} = q<thismessage:/>;
1382 wakaba 1.16 $r->{charset} = ''.$http->get_parameter ('_charset_');
1383 wakaba 1.9 $r->{charset} =~ s/\s+//g;
1384     $r->{charset} = 'utf-8' if $r->{charset} eq '';
1385 wakaba 1.26 $r->{official_charset} = $r->{charset};
1386 wakaba 1.9 $r->{header_field} = [];
1387 wakaba 1.25
1388     require Whatpm::ContentType;
1389     ($r->{official_type}, $r->{media_type})
1390     = Whatpm::ContentType->get_sniffed_type
1391     (get_file_head => sub {
1392     return substr $r->{s}, 0, shift;
1393     },
1394     http_content_type_byte => undef,
1395     has_http_content_encoding => 0,
1396     supported_image_types => {});
1397 wakaba 1.9 }
1398    
1399 wakaba 1.16 my $input_format = $http->get_parameter ('i');
1400 wakaba 1.9 if (defined $input_format and length $input_format) {
1401     $r->{media_type_overridden}
1402     = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1403     $r->{media_type} = $input_format;
1404     }
1405     if (defined $r->{s} and not defined $r->{media_type}) {
1406     $r->{media_type} = 'text/html';
1407     $r->{media_type_overridden} = 1;
1408     }
1409    
1410     if ($r->{media_type} eq 'text/xml') {
1411     unless (defined $r->{charset}) {
1412     $r->{charset} = 'us-ascii';
1413 wakaba 1.26 $r->{official_charset} = $r->{charset};
1414 wakaba 1.9 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1415     $r->{charset_overridden} = 0;
1416     }
1417     }
1418    
1419     if (length $r->{s} > 1000_000) {
1420     $r->{error_status_text} = 'Entity-body too large';
1421     delete $r->{s};
1422     return $r;
1423     }
1424    
1425 wakaba 1.35 $r->{inner_html_element} = $http->get_parameter ('e');
1426    
1427 wakaba 1.9 return $r;
1428     } # get_input_document
1429    
1430     package WDCC::LWPUA;
1431     BEGIN { push our @ISA, 'LWP::UserAgent'; }
1432    
1433     sub redirect_ok {
1434     my $ua = shift;
1435     unless ($ua->SUPER::redirect_ok (@_)) {
1436     return 0;
1437     }
1438    
1439     my $uris = $_[1]->header ('Location');
1440     return 0 unless $uris;
1441     my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1442     unless ({
1443     http => 1,
1444     }->{lc $uri->uri_scheme}) {
1445     return 0;
1446     }
1447     unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1448     return 0;
1449     }
1450     return 1;
1451     } # redirect_ok
1452    
1453 wakaba 1.1 =head1 AUTHOR
1454    
1455     Wakaba <w@suika.fam.cx>.
1456    
1457     =head1 LICENSE
1458    
1459 wakaba 1.35 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1460 wakaba 1.1
1461     This library is free software; you can redistribute it
1462     and/or modify it under the same terms as Perl itself.
1463    
1464     =cut
1465    
1466 wakaba 1.41 ## $Date: 2008/03/16 07:08:34 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24