/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.38 - (hide annotations) (download)
Tue Mar 11 14:10:11 2008 UTC (16 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.37: +3 -7 lines
++ ChangeLog	11 Mar 2008 14:10:08 -0000
2008-03-11  Wakaba  <wakaba@suika.fam.cx>

	* cc.cgi: |value| should be output in |dt| in Parse Errors
	section.

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3 wakaba 1.23 use utf8;
4 wakaba 1.1
5     use lib qw[/home/httpd/html/www/markup/html/whatpm
6 wakaba 1.16 /home/wakaba/work/manakai2/lib];
7 wakaba 1.1 use CGI::Carp qw[fatalsToBrowser];
8 wakaba 1.2 use Scalar::Util qw[refaddr];
9 wakaba 1.16 use Time::HiRes qw/time/;
10 wakaba 1.1
11 wakaba 1.2 sub htescape ($) {
12     my $s = $_[0];
13     $s =~ s/&/&amp;/g;
14     $s =~ s/</&lt;/g;
15     $s =~ s/>/&gt;/g;
16     $s =~ s/"/&quot;/g;
17 wakaba 1.12 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18     sprintf '<var>U+%04X</var>', ord $1;
19     }ge;
20 wakaba 1.2 return $s;
21     } # htescape
22    
23 wakaba 1.35 my @nav;
24     my %time;
25     require Message::DOM::DOMImplementation;
26     my $dom = Message::DOM::DOMImplementation->new;
27     {
28 wakaba 1.16 use Message::CGI::HTTP;
29     my $http = Message::CGI::HTTP->new;
30 wakaba 1.1
31 wakaba 1.16 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
32 wakaba 1.8 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
33     exit;
34     }
35    
36 wakaba 1.12 binmode STDOUT, ':utf8';
37 wakaba 1.14 $| = 1;
38 wakaba 1.12
39 wakaba 1.7 load_text_catalog ('en'); ## TODO: conneg
40    
41 wakaba 1.2 print STDOUT qq[Content-Type: text/html; charset=utf-8
42    
43     <!DOCTYPE html>
44     <html lang="en">
45     <head>
46     <title>Web Document Conformance Checker (BETA)</title>
47 wakaba 1.3 <link rel="stylesheet" href="../cc-style.css" type="text/css">
48 wakaba 1.2 </head>
49     <body>
50 wakaba 1.13 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
51     (<em>beta</em>)</h1>
52 wakaba 1.14 ];
53 wakaba 1.2
54 wakaba 1.14 $| = 0;
55     my $input = get_input_document ($http, $dom);
56 wakaba 1.16 my $char_length = 0;
57 wakaba 1.14
58     print qq[
59 wakaba 1.4 <div id="document-info" class="section">
60 wakaba 1.2 <dl>
61 wakaba 1.9 <dt>Request URI</dt>
62     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 wakaba 1.2 <dt>Document URI</dt>
64 wakaba 1.25 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65     <script>
66     document.title = '<'
67     + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68     + document.title;
69     </script></dd>
70 wakaba 1.2 ]; # no </dl> yet
71 wakaba 1.3 push @nav, ['#document-info' => 'Information'];
72 wakaba 1.1
73 wakaba 1.9 if (defined $input->{s}) {
74 wakaba 1.16 $char_length = length $input->{s};
75 wakaba 1.9
76     print STDOUT qq[
77     <dt>Base URI</dt>
78     <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79     <dt>Internet Media Type</dt>
80     <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 wakaba 1.25 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 wakaba 1.9 <dt>Character Encoding</dt>
83     <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84     @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 wakaba 1.16 <dt>Length</dt>
86     <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 wakaba 1.9 </dl>
88     </div>
89     ];
90    
91 wakaba 1.35 $input->{id_prefix} = '';
92     #$input->{nested} = 0;
93 wakaba 1.20 my $result = {conforming_min => 1, conforming_max => 1};
94 wakaba 1.31 check_and_print ($input => $result);
95 wakaba 1.19 print_result_section ($result);
96 wakaba 1.9 } else {
97 wakaba 1.18 print STDOUT qq[</dl></div>];
98     print_result_input_error_section ($input);
99 wakaba 1.9 }
100 wakaba 1.3
101 wakaba 1.2 print STDOUT qq[
102 wakaba 1.3 <ul class="navigation" id="nav-items">
103     ];
104     for (@nav) {
105     print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
106     }
107     print STDOUT qq[
108     </ul>
109 wakaba 1.2 </body>
110     </html>
111     ];
112 wakaba 1.1
113 wakaba 1.24 for (qw/decode parse parse_html parse_xml parse_manifest
114     check check_manifest/) {
115 wakaba 1.16 next unless defined $time{$_};
116     open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
117     print $file $char_length, "\t", $time{$_}, "\n";
118     }
119    
120 wakaba 1.1 exit;
121 wakaba 1.35 }
122 wakaba 1.1
123 wakaba 1.19 sub add_error ($$$) {
124     my ($layer, $err, $result) = @_;
125     if (defined $err->{level}) {
126     if ($err->{level} eq 's') {
127     $result->{$layer}->{should}++;
128     $result->{$layer}->{score_min} -= 2;
129     $result->{conforming_min} = 0;
130     } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
131     $result->{$layer}->{warning}++;
132 wakaba 1.30 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
133 wakaba 1.19 $result->{$layer}->{unsupported}++;
134     $result->{unsupported} = 1;
135 wakaba 1.37 } elsif ($err->{level} eq 'i') {
136     #
137 wakaba 1.19 } else {
138     $result->{$layer}->{must}++;
139     $result->{$layer}->{score_max} -= 2;
140     $result->{$layer}->{score_min} -= 2;
141     $result->{conforming_min} = 0;
142     $result->{conforming_max} = 0;
143     }
144     } else {
145     $result->{$layer}->{must}++;
146     $result->{$layer}->{score_max} -= 2;
147     $result->{$layer}->{score_min} -= 2;
148     $result->{conforming_min} = 0;
149     $result->{conforming_max} = 0;
150     }
151     } # add_error
152    
153 wakaba 1.31 sub check_and_print ($$) {
154     my ($input, $result) = @_;
155    
156     print_http_header_section ($input, $result);
157    
158     my $doc;
159     my $el;
160 wakaba 1.35 my $cssom;
161 wakaba 1.31 my $manifest;
162 wakaba 1.34 my @subdoc;
163 wakaba 1.31
164     if ($input->{media_type} eq 'text/html') {
165     ($doc, $el) = print_syntax_error_html_section ($input, $result);
166     print_source_string_section
167 wakaba 1.35 ($input,
168     \($input->{s}),
169     $input->{charset} || $doc->input_encoding);
170 wakaba 1.31 } elsif ({
171     'text/xml' => 1,
172     'application/atom+xml' => 1,
173     'application/rss+xml' => 1,
174     'application/svg+xml' => 1,
175     'application/xhtml+xml' => 1,
176     'application/xml' => 1,
177     }->{$input->{media_type}}) {
178     ($doc, $el) = print_syntax_error_xml_section ($input, $result);
179 wakaba 1.35 print_source_string_section ($input,
180     \($input->{s}),
181     $doc->input_encoding);
182     } elsif ($input->{media_type} eq 'text/css') {
183     $cssom = print_syntax_error_css_section ($input, $result);
184     print_source_string_section
185     ($input, \($input->{s}),
186     $cssom->manakai_input_encoding);
187 wakaba 1.31 } elsif ($input->{media_type} eq 'text/cache-manifest') {
188     ## TODO: MUST be text/cache-manifest
189     $manifest = print_syntax_error_manifest_section ($input, $result);
190 wakaba 1.35 print_source_string_section ($input, \($input->{s}),
191     'utf-8');
192 wakaba 1.31 } else {
193     ## TODO: Change HTTP status code??
194     print_result_unknown_type_section ($input, $result);
195     }
196    
197     if (defined $doc or defined $el) {
198 wakaba 1.34 $doc->document_uri ($input->{uri});
199     $doc->manakai_entity_base_uri ($input->{base_uri});
200 wakaba 1.32 print_structure_dump_dom_section ($input, $doc, $el);
201     my $elements = print_structure_error_dom_section
202 wakaba 1.34 ($input, $doc, $el, $result, sub {
203     push @subdoc, shift;
204     });
205 wakaba 1.32 print_table_section ($input, $elements->{table}) if @{$elements->{table}};
206 wakaba 1.33 print_listing_section ({
207     id => 'identifiers', label => 'IDs', heading => 'Identifiers',
208     }, $input, $elements->{id}) if keys %{$elements->{id}};
209     print_listing_section ({
210     id => 'terms', label => 'Terms', heading => 'Terms',
211     }, $input, $elements->{term}) if keys %{$elements->{term}};
212     print_listing_section ({
213     id => 'classes', label => 'Classes', heading => 'Classes',
214     }, $input, $elements->{class}) if keys %{$elements->{class}};
215 wakaba 1.35 } elsif (defined $cssom) {
216     print_structure_dump_cssom_section ($input, $cssom);
217     ## TODO: CSSOM validation
218 wakaba 1.36 add_error ('structure', {level => 'u'} => $result);
219 wakaba 1.31 } elsif (defined $manifest) {
220 wakaba 1.32 print_structure_dump_manifest_section ($input, $manifest);
221     print_structure_error_manifest_section ($input, $manifest, $result);
222 wakaba 1.31 }
223 wakaba 1.34
224     my $id_prefix = 0;
225     for my $subinput (@subdoc) {
226     $subinput->{id_prefix} = 'subdoc-' . ++$id_prefix;
227     $subinput->{nested} = 1;
228     $subinput->{base_uri} = $subinput->{container_node}->base_uri
229     unless defined $subinput->{base_uri};
230     my $ebaseuri = htescape ($subinput->{base_uri});
231     push @nav, ['#' . $subinput->{id_prefix} => 'Sub #' . $id_prefix];
232     print STDOUT qq[<div id="$subinput->{id_prefix}" class=section>
233     <h2>Subdocument #$id_prefix</h2>
234    
235     <dl>
236     <dt>Internet Media Type</dt>
237     <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
238     <dt>Container Node</dt>
239     <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
240     <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
241     <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
242     </dl>];
243    
244 wakaba 1.35 $subinput->{id_prefix} .= '-';
245 wakaba 1.34 check_and_print ($subinput => $result);
246    
247     print STDOUT qq[</div>];
248     }
249 wakaba 1.31 } # check_and_print
250    
251 wakaba 1.19 sub print_http_header_section ($$) {
252     my ($input, $result) = @_;
253 wakaba 1.9 return unless defined $input->{header_status_code} or
254     defined $input->{header_status_text} or
255 wakaba 1.34 @{$input->{header_field} or []};
256 wakaba 1.9
257 wakaba 1.32 push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
258     print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
259 wakaba 1.9 <h2>HTTP Header</h2>
260    
261     <p><strong>Note</strong>: Due to the limitation of the
262     network library in use, the content of this section might
263     not be the real header.</p>
264    
265     <table><tbody>
266     ];
267    
268     if (defined $input->{header_status_code}) {
269     print STDOUT qq[<tr><th scope="row">Status code</th>];
270     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
271     }
272     if (defined $input->{header_status_text}) {
273     print STDOUT qq[<tr><th scope="row">Status text</th>];
274     print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
275     }
276    
277     for (@{$input->{header_field}}) {
278     print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
279     print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
280     }
281    
282     print STDOUT qq[</tbody></table></div>];
283     } # print_http_header_section
284    
285 wakaba 1.19 sub print_syntax_error_html_section ($$) {
286     my ($input, $result) = @_;
287 wakaba 1.18
288     require Encode;
289     require Whatpm::HTML;
290    
291     print STDOUT qq[
292 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
293 wakaba 1.18 <h2>Parse Errors</h2>
294    
295     <dl>];
296 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
297 wakaba 1.18
298     my $onerror = sub {
299     my (%opt) = @_;
300     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
301 wakaba 1.38 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
302     qq[</dt>];
303 wakaba 1.18 $type =~ tr/ /-/;
304     $type =~ s/\|/%7C/g;
305     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
306 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
307     print STDOUT qq[$msg</dd>\n];
308 wakaba 1.19
309     add_error ('syntax', \%opt => $result);
310 wakaba 1.18 };
311    
312     my $doc = $dom->create_document;
313     my $el;
314 wakaba 1.35 my $inner_html_element = $input->{inner_html_element};
315 wakaba 1.18 if (defined $inner_html_element and length $inner_html_element) {
316 wakaba 1.26 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
317 wakaba 1.24 my $time1 = time;
318     my $t = Encode::decode ($input->{charset}, $input->{s});
319     $time{decode} = time - $time1;
320    
321 wakaba 1.18 $el = $doc->create_element_ns
322     ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
323 wakaba 1.24 $time1 = time;
324 wakaba 1.18 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
325 wakaba 1.24 $time{parse} = time - $time1;
326 wakaba 1.18 } else {
327 wakaba 1.24 my $time1 = time;
328     Whatpm::HTML->parse_byte_string
329     ($input->{charset}, $input->{s} => $doc, $onerror);
330     $time{parse_html} = time - $time1;
331 wakaba 1.18 }
332 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
333     if defined $input->{official_charset};
334 wakaba 1.24
335 wakaba 1.18 print STDOUT qq[</dl></div>];
336    
337     return ($doc, $el);
338     } # print_syntax_error_html_section
339    
340 wakaba 1.19 sub print_syntax_error_xml_section ($$) {
341     my ($input, $result) = @_;
342 wakaba 1.18
343     require Message::DOM::XMLParserTemp;
344    
345     print STDOUT qq[
346 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
347 wakaba 1.18 <h2>Parse Errors</h2>
348    
349     <dl>];
350 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
351 wakaba 1.18
352     my $onerror = sub {
353     my $err = shift;
354     my $line = $err->location->line_number;
355 wakaba 1.35 print STDOUT qq[<dt><a href="#$input->{id_prefix}line-$line">Line $line</a> column ];
356 wakaba 1.18 print STDOUT $err->location->column_number, "</dt><dd>";
357     print STDOUT htescape $err->text, "</dd>\n";
358 wakaba 1.19
359     add_error ('syntax', {type => $err->text,
360     level => [
361     $err->SEVERITY_FATAL_ERROR => 'm',
362     $err->SEVERITY_ERROR => 'm',
363     $err->SEVERITY_WARNING => 's',
364     ]->[$err->severity]} => $result);
365    
366 wakaba 1.18 return 1;
367     };
368    
369     my $time1 = time;
370     open my $fh, '<', \($input->{s});
371     my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
372     ($fh => $dom, $onerror, charset => $input->{charset});
373     $time{parse_xml} = time - $time1;
374 wakaba 1.26 $doc->manakai_charset ($input->{official_charset})
375     if defined $input->{official_charset};
376 wakaba 1.18
377     print STDOUT qq[</dl></div>];
378    
379     return ($doc, undef);
380     } # print_syntax_error_xml_section
381    
382 wakaba 1.35 sub get_css_parser () {
383     our $CSSParser;
384     return $CSSParser if $CSSParser;
385    
386     require Whatpm::CSS::Parser;
387     my $p = Whatpm::CSS::Parser->new;
388    
389     $p->{prop}->{$_} = 1 for qw/
390 wakaba 1.37 alignment-baseline
391 wakaba 1.35 background background-attachment background-color background-image
392     background-position background-position-x background-position-y
393     background-repeat border border-bottom border-bottom-color
394     border-bottom-style border-bottom-width border-collapse border-color
395     border-left border-left-color
396     border-left-style border-left-width border-right border-right-color
397     border-right-style border-right-width
398     border-spacing -manakai-border-spacing-x -manakai-border-spacing-y
399     border-style border-top border-top-color border-top-style border-top-width
400     border-width bottom
401     caption-side clear clip color content counter-increment counter-reset
402 wakaba 1.37 cursor direction display dominant-baseline empty-cells float font
403 wakaba 1.35 font-family font-size font-size-adjust font-stretch
404     font-style font-variant font-weight height left
405     letter-spacing line-height
406     list-style list-style-image list-style-position list-style-type
407     margin margin-bottom margin-left margin-right margin-top marker-offset
408     marks max-height max-width min-height min-width opacity -moz-opacity
409     orphans outline outline-color outline-style outline-width overflow
410     overflow-x overflow-y
411     padding padding-bottom padding-left padding-right padding-top
412     page page-break-after page-break-before page-break-inside
413     position quotes right size table-layout
414 wakaba 1.37 text-align text-anchor text-decoration text-indent text-transform
415 wakaba 1.35 top unicode-bidi vertical-align visibility white-space width widows
416 wakaba 1.37 word-spacing writing-mode z-index
417 wakaba 1.35 /;
418     $p->{prop_value}->{display}->{$_} = 1 for qw/
419     block clip inline inline-block inline-table list-item none
420     table table-caption table-cell table-column table-column-group
421     table-header-group table-footer-group table-row table-row-group
422     compact marker
423     /;
424     $p->{prop_value}->{position}->{$_} = 1 for qw/
425     absolute fixed relative static
426     /;
427     $p->{prop_value}->{float}->{$_} = 1 for qw/
428     left right none
429     /;
430     $p->{prop_value}->{clear}->{$_} = 1 for qw/
431     left right none both
432     /;
433     $p->{prop_value}->{direction}->{ltr} = 1;
434     $p->{prop_value}->{direction}->{rtl} = 1;
435     $p->{prop_value}->{marks}->{crop} = 1;
436     $p->{prop_value}->{marks}->{cross} = 1;
437     $p->{prop_value}->{'unicode-bidi'}->{$_} = 1 for qw/
438     normal bidi-override embed
439     /;
440     for my $prop_name (qw/overflow overflow-x overflow-y/) {
441     $p->{prop_value}->{$prop_name}->{$_} = 1 for qw/
442     visible hidden scroll auto -webkit-marquee -moz-hidden-unscrollable
443     /;
444     }
445     $p->{prop_value}->{visibility}->{$_} = 1 for qw/
446     visible hidden collapse
447     /;
448     $p->{prop_value}->{'list-style-type'}->{$_} = 1 for qw/
449     disc circle square decimal decimal-leading-zero
450     lower-roman upper-roman lower-greek lower-latin
451     upper-latin armenian georgian lower-alpha upper-alpha none
452     hebrew cjk-ideographic hiragana katakana hiragana-iroha
453     katakana-iroha
454     /;
455     $p->{prop_value}->{'list-style-position'}->{outside} = 1;
456     $p->{prop_value}->{'list-style-position'}->{inside} = 1;
457     $p->{prop_value}->{'page-break-before'}->{$_} = 1 for qw/
458     auto always avoid left right
459     /;
460     $p->{prop_value}->{'page-break-after'}->{$_} = 1 for qw/
461     auto always avoid left right
462     /;
463     $p->{prop_value}->{'page-break-inside'}->{auto} = 1;
464     $p->{prop_value}->{'page-break-inside'}->{avoid} = 1;
465     $p->{prop_value}->{'background-repeat'}->{$_} = 1 for qw/
466     repeat repeat-x repeat-y no-repeat
467     /;
468     $p->{prop_value}->{'background-attachment'}->{scroll} = 1;
469     $p->{prop_value}->{'background-attachment'}->{fixed} = 1;
470     $p->{prop_value}->{'font-size'}->{$_} = 1 for qw/
471     xx-small x-small small medium large x-large xx-large
472     -manakai-xxx-large -webkit-xxx-large
473     larger smaller
474     /;
475     $p->{prop_value}->{'font-style'}->{normal} = 1;
476     $p->{prop_value}->{'font-style'}->{italic} = 1;
477     $p->{prop_value}->{'font-style'}->{oblique} = 1;
478     $p->{prop_value}->{'font-variant'}->{normal} = 1;
479     $p->{prop_value}->{'font-variant'}->{'small-caps'} = 1;
480     $p->{prop_value}->{'font-stretch'}->{$_} = 1 for
481     qw/normal wider narrower ultra-condensed extra-condensed
482     condensed semi-condensed semi-expanded expanded
483     extra-expanded ultra-expanded/;
484     $p->{prop_value}->{'text-align'}->{$_} = 1 for qw/
485     left right center justify begin end
486     /;
487     $p->{prop_value}->{'text-transform'}->{$_} = 1 for qw/
488     capitalize uppercase lowercase none
489     /;
490     $p->{prop_value}->{'white-space'}->{$_} = 1 for qw/
491 wakaba 1.36 normal pre nowrap pre-line pre-wrap -moz-pre-wrap
492 wakaba 1.35 /;
493 wakaba 1.37 $p->{prop_value}->{'writing-mode'}->{$_} = 1 for qw/
494     lr rl tb lr-tb rl-tb tb-rl
495     /;
496     $p->{prop_value}->{'text-anchor'}->{$_} = 1 for qw/
497     start middle end
498     /;
499     $p->{prop_value}->{'dominant-baseline'}->{$_} = 1 for qw/
500     auto use-script no-change reset-size ideographic alphabetic
501     hanging mathematical central middle text-after-edge text-before-edge
502     /;
503     $p->{prop_value}->{'alignment-baseline'}->{$_} = 1 for qw/
504     auto baseline before-edge text-before-edge middle central
505     after-edge text-after-edge ideographic alphabetic hanging
506     mathematical
507     /;
508 wakaba 1.35 $p->{prop_value}->{'text-decoration'}->{$_} = 1 for qw/
509     none blink underline overline line-through
510     /;
511     $p->{prop_value}->{'caption-side'}->{$_} = 1 for qw/
512     top bottom left right
513     /;
514     $p->{prop_value}->{'table-layout'}->{auto} = 1;
515     $p->{prop_value}->{'table-layout'}->{fixed} = 1;
516 wakaba 1.36 $p->{prop_value}->{'border-collapse'}->{collapse} = 1;
517 wakaba 1.35 $p->{prop_value}->{'border-collapse'}->{separate} = 1;
518     $p->{prop_value}->{'empty-cells'}->{show} = 1;
519     $p->{prop_value}->{'empty-cells'}->{hide} = 1;
520     $p->{prop_value}->{cursor}->{$_} = 1 for qw/
521     auto crosshair default pointer move e-resize ne-resize nw-resize n-resize
522     se-resize sw-resize s-resize w-resize text wait help progress
523     /;
524     for my $prop (qw/border-top-style border-left-style
525     border-bottom-style border-right-style outline-style/) {
526     $p->{prop_value}->{$prop}->{$_} = 1 for qw/
527     none hidden dotted dashed solid double groove ridge inset outset
528     /;
529     }
530     for my $prop (qw/color background-color
531     border-bottom-color border-left-color border-right-color
532     border-top-color border-color/) {
533     $p->{prop_value}->{$prop}->{transparent} = 1;
534     $p->{prop_value}->{$prop}->{flavor} = 1;
535     $p->{prop_value}->{$prop}->{'-manakai-default'} = 1;
536     }
537     $p->{prop_value}->{'outline-color'}->{invert} = 1;
538     $p->{prop_value}->{'outline-color'}->{'-manakai-invert-or-currentcolor'} = 1;
539     $p->{pseudo_class}->{$_} = 1 for qw/
540     active checked disabled empty enabled first-child first-of-type
541     focus hover indeterminate last-child last-of-type link only-child
542     only-of-type root target visited
543     lang nth-child nth-last-child nth-of-type nth-last-of-type not
544     -manakai-contains -manakai-current
545     /;
546     $p->{pseudo_element}->{$_} = 1 for qw/
547     after before first-letter first-line
548     /;
549    
550     return $CSSParser = $p;
551     } # get_css_parser
552    
553     sub print_syntax_error_css_section ($$) {
554     my ($input, $result) = @_;
555    
556     print STDOUT qq[
557     <div id="$input->{id_prefix}parse-errors" class="section">
558     <h2>Parse Errors</h2>
559    
560     <dl>];
561     push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
562    
563     my $p = get_css_parser ();
564 wakaba 1.37 $p->init;
565 wakaba 1.35 $p->{onerror} = sub {
566     my (%opt) = @_;
567     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
568     if ($opt{token}) {
569     print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{token}->{line}">Line $opt{token}->{line}</a> column $opt{token}->{column}];
570     } else {
571     print STDOUT qq[<dt class="$cls">Unknown location];
572     }
573     if (defined $opt{value}) {
574     print STDOUT qq[ (<code>@{[htescape ($opt{value})]}</code>)];
575     } elsif (defined $opt{token}) {
576     print STDOUT qq[ (<code>@{[htescape (Whatpm::CSS::Tokenizer->serialize_token ($opt{token}))]}</code>)];
577     }
578     $type =~ tr/ /-/;
579     $type =~ s/\|/%7C/g;
580     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
581     print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
582     print STDOUT qq[$msg</dd>\n];
583    
584     add_error ('syntax', \%opt => $result);
585     };
586     $p->{href} = $input->{uri};
587     $p->{base_uri} = $input->{base_uri};
588    
589 wakaba 1.37 # if ($parse_mode eq 'q') {
590     # $p->{unitless_px} = 1;
591     # $p->{hashless_color} = 1;
592     # }
593    
594     ## TODO: Make $input->{s} a ref.
595    
596 wakaba 1.35 my $s = \$input->{s};
597     my $charset;
598     unless ($input->{is_char_string}) {
599     require Encode;
600     if (defined $input->{charset}) {## TODO: IANA->Perl
601     $charset = $input->{charset};
602     $s = \(Encode::decode ($input->{charset}, $$s));
603     } else {
604     ## TODO: charset detection
605     $s = \(Encode::decode ($charset = 'utf-8', $$s));
606     }
607     }
608    
609     my $cssom = $p->parse_char_string ($$s);
610     $cssom->manakai_input_encoding ($charset) if defined $charset;
611    
612     print STDOUT qq[</dl></div>];
613    
614     return $cssom;
615     } # print_syntax_error_css_section
616    
617 wakaba 1.22 sub print_syntax_error_manifest_section ($$) {
618     my ($input, $result) = @_;
619    
620     require Whatpm::CacheManifest;
621    
622     print STDOUT qq[
623 wakaba 1.32 <div id="$input->{id_prefix}parse-errors" class="section">
624 wakaba 1.22 <h2>Parse Errors</h2>
625    
626     <dl>];
627 wakaba 1.32 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
628 wakaba 1.22
629     my $onerror = sub {
630     my (%opt) = @_;
631     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
632 wakaba 1.32 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
633     qq[</dt>];
634 wakaba 1.22 $type =~ tr/ /-/;
635     $type =~ s/\|/%7C/g;
636     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
637 wakaba 1.23 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
638     print STDOUT qq[$msg</dd>\n];
639 wakaba 1.22
640     add_error ('syntax', \%opt => $result);
641     };
642    
643     my $time1 = time;
644     my $manifest = Whatpm::CacheManifest->parse_byte_string
645     ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
646     $time{parse_manifest} = time - $time1;
647    
648     print STDOUT qq[</dl></div>];
649    
650     return $manifest;
651     } # print_syntax_error_manifest_section
652    
653 wakaba 1.35 sub print_source_string_section ($$$) {
654     my $input = shift;
655     my $s;
656     unless ($input->{is_char_string}) {
657     require Encode;
658     my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
659     return unless $enc;
660    
661     $s = \($enc->decode (${$_[0]}));
662     } else {
663     $s = $_[0];
664     }
665 wakaba 1.9
666     my $i = 1;
667 wakaba 1.32 push @nav, ['#source-string' => 'Source'] unless $input->{nested};
668     print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
669 wakaba 1.9 <h2>Document Source</h2>
670     <ol lang="">\n];
671 wakaba 1.7 if (length $$s) {
672     while ($$s =~ /\G([^\x0A]*?)\x0D?\x0A/gc) {
673 wakaba 1.32 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
674     "</li>\n";
675 wakaba 1.7 $i++;
676     }
677     if ($$s =~ /\G([^\x0A]+)/gc) {
678 wakaba 1.32 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
679     "</li>\n";
680 wakaba 1.7 }
681     } else {
682 wakaba 1.32 print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
683 wakaba 1.2 }
684 wakaba 1.9 print STDOUT "</ol></div>";
685     } # print_input_string_section
686 wakaba 1.2
687 wakaba 1.35 sub print_document_tree ($$) {
688     my ($input, $node) = @_;
689    
690 wakaba 1.2 my $r = '<ol class="xoxo">';
691 wakaba 1.1
692 wakaba 1.2 my @node = ($node);
693 wakaba 1.1 while (@node) {
694     my $child = shift @node;
695 wakaba 1.2 unless (ref $child) {
696     $r .= $child;
697     next;
698     }
699    
700 wakaba 1.32 my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
701 wakaba 1.2 my $nt = $child->node_type;
702     if ($nt == $child->ELEMENT_NODE) {
703 wakaba 1.4 my $child_nsuri = $child->namespace_uri;
704     $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
705 wakaba 1.2 '</code>'; ## ISSUE: case
706    
707     if ($child->has_attributes) {
708     $r .= '<ul class="attributes">';
709 wakaba 1.4 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
710 wakaba 1.2 @{$child->attributes}) {
711 wakaba 1.32 $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
712 wakaba 1.2 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
713     }
714     $r .= '</ul>';
715     }
716    
717 wakaba 1.7 if ($child->has_child_nodes) {
718 wakaba 1.2 $r .= '<ol class="children">';
719 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
720     } else {
721     $r .= '</li>';
722 wakaba 1.2 }
723     } elsif ($nt == $child->TEXT_NODE) {
724 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
725 wakaba 1.2 } elsif ($nt == $child->CDATA_SECTION_NODE) {
726 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
727 wakaba 1.2 } elsif ($nt == $child->COMMENT_NODE) {
728 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
729 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_NODE) {
730 wakaba 1.6 $r .= qq'<li id="$node_id" class="tree-document">Document';
731 wakaba 1.7 $r .= qq[<ul class="attributes">];
732 wakaba 1.27 my $cp = $child->manakai_charset;
733     if (defined $cp) {
734     $r .= qq[<li><code>charset</code> parameter = <code>];
735     $r .= htescape ($cp) . qq[</code></li>];
736     }
737     $r .= qq[<li><code>inputEncoding</code> = ];
738     my $ie = $child->input_encoding;
739     if (defined $ie) {
740     $r .= qq[<code>@{[htescape ($ie)]}</code>];
741     if ($child->manakai_has_bom) {
742     $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
743     }
744     } else {
745     $r .= qq[(<code>null</code>)];
746     }
747 wakaba 1.7 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
748     $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
749 wakaba 1.9 unless ($child->manakai_is_html) {
750     $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
751     if (defined $child->xml_encoding) {
752     $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
753     } else {
754     $r .= qq[<li>XML encoding = (null)</li>];
755     }
756     $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
757     }
758 wakaba 1.7 $r .= qq[</ul>];
759 wakaba 1.2 if ($child->has_child_nodes) {
760 wakaba 1.7 $r .= '<ol class="children">';
761 wakaba 1.6 unshift @node, @{$child->child_nodes}, '</ol></li>';
762 wakaba 1.1 }
763 wakaba 1.2 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
764 wakaba 1.5 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
765     $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
766     $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
767     $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
768 wakaba 1.2 $r .= '</ul></li>';
769     } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
770 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
771 wakaba 1.1 } else {
772 wakaba 1.4 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
773 wakaba 1.1 }
774     }
775 wakaba 1.2
776     $r .= '</ol>';
777     print STDOUT $r;
778     } # print_document_tree
779 wakaba 1.1
780 wakaba 1.32 sub print_structure_dump_dom_section ($$$) {
781     my ($input, $doc, $el) = @_;
782 wakaba 1.18
783     print STDOUT qq[
784 wakaba 1.32 <div id="$input->{id_prefix}document-tree" class="section">
785 wakaba 1.18 <h2>Document Tree</h2>
786     ];
787 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
788     unless $input->{nested};
789 wakaba 1.18
790 wakaba 1.35 print_document_tree ($input, $el || $doc);
791 wakaba 1.18
792     print STDOUT qq[</div>];
793 wakaba 1.22 } # print_structure_dump_dom_section
794    
795 wakaba 1.35 sub print_structure_dump_cssom_section ($$) {
796     my ($input, $cssom) = @_;
797    
798     print STDOUT qq[
799     <div id="$input->{id_prefix}document-tree" class="section">
800     <h2>Document Tree</h2>
801     ];
802     push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
803     unless $input->{nested};
804    
805     ## TODO:
806     print STDOUT "<pre>".htescape ($cssom->css_text)."</pre>";
807    
808     print STDOUT qq[</div>];
809     } # print_structure_dump_cssom_section
810    
811 wakaba 1.32 sub print_structure_dump_manifest_section ($$) {
812     my ($input, $manifest) = @_;
813 wakaba 1.22
814     print STDOUT qq[
815 wakaba 1.32 <div id="$input->{id_prefix}dump-manifest" class="section">
816 wakaba 1.22 <h2>Cache Manifest</h2>
817     ];
818 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}dump-manifest] => 'Cache Manifest']
819     unless $input->{nested};
820 wakaba 1.22
821     print STDOUT qq[<dl><dt>Explicit entries</dt>];
822 wakaba 1.37 my $i = 0;
823 wakaba 1.22 for my $uri (@{$manifest->[0]}) {
824     my $euri = htescape ($uri);
825 wakaba 1.37 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
826 wakaba 1.22 }
827    
828     print STDOUT qq[<dt>Fallback entries</dt><dd>
829     <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
830     <th scope=row>Fallback Entry</tr><tbody>];
831     for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
832     my $euri = htescape ($uri);
833     my $euri2 = htescape ($manifest->[1]->{$uri});
834 wakaba 1.37 print STDOUT qq[<tr><td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
835     <td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
836 wakaba 1.22 }
837    
838     print STDOUT qq[</table><dt>Online whitelist</dt>];
839     for my $uri (@{$manifest->[2]}) {
840     my $euri = htescape ($uri);
841 wakaba 1.37 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
842 wakaba 1.22 }
843 wakaba 1.18
844 wakaba 1.22 print STDOUT qq[</dl></div>];
845     } # print_structure_dump_manifest_section
846    
847 wakaba 1.34 sub print_structure_error_dom_section ($$$$$) {
848     my ($input, $doc, $el, $result, $onsubdoc) = @_;
849 wakaba 1.18
850 wakaba 1.32 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
851 wakaba 1.18 <h2>Document Errors</h2>
852    
853     <dl>];
854 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
855     unless $input->{nested};
856 wakaba 1.18
857     require Whatpm::ContentChecker;
858     my $onerror = sub {
859     my %opt = @_;
860     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
861     $type =~ tr/ /-/;
862     $type =~ s/\|/%7C/g;
863     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
864 wakaba 1.32 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
865 wakaba 1.23 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
866     print STDOUT $msg, "</dd>\n";
867 wakaba 1.19 add_error ('structure', \%opt => $result);
868 wakaba 1.18 };
869    
870     my $elements;
871     my $time1 = time;
872     if ($el) {
873 wakaba 1.34 $elements = Whatpm::ContentChecker->check_element
874     ($el, $onerror, $onsubdoc);
875 wakaba 1.18 } else {
876 wakaba 1.34 $elements = Whatpm::ContentChecker->check_document
877     ($doc, $onerror, $onsubdoc);
878 wakaba 1.18 }
879     $time{check} = time - $time1;
880    
881     print STDOUT qq[</dl></div>];
882    
883     return $elements;
884 wakaba 1.22 } # print_structure_error_dom_section
885    
886     sub print_structure_error_manifest_section ($$$) {
887 wakaba 1.32 my ($input, $manifest, $result) = @_;
888 wakaba 1.22
889 wakaba 1.32 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
890 wakaba 1.22 <h2>Document Errors</h2>
891    
892     <dl>];
893 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
894     unless $input->{nested};
895 wakaba 1.22
896     require Whatpm::CacheManifest;
897     Whatpm::CacheManifest->check_manifest ($manifest, sub {
898     my %opt = @_;
899     my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
900     $type =~ tr/ /-/;
901     $type =~ s/\|/%7C/g;
902     $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
903 wakaba 1.32 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
904 wakaba 1.22 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
905     add_error ('structure', \%opt => $result);
906     });
907    
908     print STDOUT qq[</div>];
909     } # print_structure_error_manifest_section
910 wakaba 1.18
911 wakaba 1.32 sub print_table_section ($$) {
912     my ($input, $tables) = @_;
913 wakaba 1.18
914 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
915     unless $input->{nested};
916 wakaba 1.18 print STDOUT qq[
917 wakaba 1.32 <div id="$input->{id_prefix}tables" class="section">
918 wakaba 1.18 <h2>Tables</h2>
919    
920     <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
921     <script src="../table-script.js" type="text/javascript"></script>
922     <noscript>
923     <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
924     </noscript>
925     ];
926    
927     require JSON;
928    
929     my $i = 0;
930     for my $table_el (@$tables) {
931     $i++;
932 wakaba 1.32 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
933     get_node_link ($input, $table_el) . q[</h3>];
934 wakaba 1.18
935     ## TODO: Make |ContentChecker| return |form_table| result
936     ## so that this script don't have to run the algorithm twice.
937     my $table = Whatpm::HTMLTable->form_table ($table_el);
938    
939     for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
940     next unless $_;
941     delete $_->{element};
942     }
943    
944     for (@{$table->{row_group}}) {
945     next unless $_;
946     next unless $_->{element};
947     $_->{type} = $_->{element}->manakai_local_name;
948     delete $_->{element};
949     }
950    
951     for (@{$table->{cell}}) {
952     next unless $_;
953     for (@{$_}) {
954     next unless $_;
955     for (@$_) {
956     $_->{id} = refaddr $_->{element} if defined $_->{element};
957     delete $_->{element};
958     $_->{is_header} = $_->{is_header} ? 1 : 0;
959     }
960     }
961     }
962    
963     print STDOUT '</div><script type="text/javascript">tableToCanvas (';
964     print STDOUT JSON::objToJson ($table);
965 wakaba 1.32 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
966     print STDOUT qq[, '$input->{id_prefix}');</script>];
967 wakaba 1.18 }
968    
969     print STDOUT qq[</div>];
970     } # print_table_section
971    
972 wakaba 1.33 sub print_listing_section ($$$) {
973     my ($opt, $input, $ids) = @_;
974 wakaba 1.18
975 wakaba 1.35 push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
976     unless $input->{nested};
977 wakaba 1.18 print STDOUT qq[
978 wakaba 1.33 <div id="$input->{id_prefix}$opt->{id}" class="section">
979     <h2>$opt->{heading}</h2>
980 wakaba 1.18
981     <dl>
982     ];
983     for my $id (sort {$a cmp $b} keys %$ids) {
984     print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
985     for (@{$ids->{$id}}) {
986 wakaba 1.32 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
987 wakaba 1.18 }
988     }
989     print STDOUT qq[</dl></div>];
990 wakaba 1.33 } # print_listing_section
991 wakaba 1.18
992 wakaba 1.19 sub print_result_section ($) {
993     my $result = shift;
994    
995     print STDOUT qq[
996     <div id="result-summary" class="section">
997     <h2>Result</h2>];
998    
999 wakaba 1.21 if ($result->{unsupported} and $result->{conforming_max}) {
1000 wakaba 1.19 print STDOUT qq[<p class=uncertain id=result-para>The conformance
1001     checker cannot decide whether the document is conforming or
1002     not, since the document contains one or more unsupported
1003 wakaba 1.21 features. The document might or might not be conforming.</p>];
1004 wakaba 1.19 } elsif ($result->{conforming_min}) {
1005     print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
1006     found in this document.</p>];
1007     } elsif ($result->{conforming_max}) {
1008     print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
1009     is <strong>likely <em>non</em>-conforming</strong>, but in rare case
1010     it might be conforming.</p>];
1011     } else {
1012     print STDOUT qq[<p class=FAIL id=result-para>This document is
1013     <strong><em>non</em>-conforming</strong>.</p>];
1014     }
1015    
1016     print STDOUT qq[<table>
1017     <colgroup><col><colgroup><col><col><col><colgroup><col>
1018     <thead>
1019 wakaba 1.23 <tr><th scope=col></th>
1020     <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1021     Errors</a></th>
1022     <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1023     Errors</a></th>
1024     <th scope=col><a href="../error-description#level-w">Warnings</a></th>
1025     <th scope=col>Score</th></tr></thead><tbody>];
1026 wakaba 1.19
1027     my $must_error = 0;
1028     my $should_error = 0;
1029     my $warning = 0;
1030     my $score_min = 0;
1031     my $score_max = 0;
1032     my $score_base = 20;
1033 wakaba 1.21 my $score_unit = $score_base / 100;
1034 wakaba 1.19 for (
1035     [Transfer => 'transfer', ''],
1036     [Character => 'char', ''],
1037     [Syntax => 'syntax', '#parse-errors'],
1038     [Structure => 'structure', '#document-errors'],
1039     ) {
1040     $must_error += ($result->{$_->[1]}->{must} += 0);
1041     $should_error += ($result->{$_->[1]}->{should} += 0);
1042     $warning += ($result->{$_->[1]}->{warning} += 0);
1043 wakaba 1.21 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
1044     $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
1045 wakaba 1.19
1046     my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
1047     my $label = $_->[0];
1048     if ($result->{$_->[1]}->{must} or
1049     $result->{$_->[1]}->{should} or
1050     $result->{$_->[1]}->{warning} or
1051     $result->{$_->[1]}->{unsupported}) {
1052     $label = qq[<a href="$_->[2]">$label</a>];
1053     }
1054    
1055     print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
1056     if ($uncertain) {
1057 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
1058 wakaba 1.19 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
1059 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
1060 wakaba 1.19 } else {
1061 wakaba 1.21 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
1062 wakaba 1.19 }
1063     }
1064    
1065     $score_max += $score_base;
1066    
1067     print STDOUT qq[
1068     <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
1069     </tbody>
1070 wakaba 1.21 <tfoot><tr class=uncertain><th scope=row>Total</th>
1071     <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
1072     <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
1073     <td>$warning?</td>
1074     <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
1075 wakaba 1.19 </table>
1076    
1077     <p><strong>Important</strong>: This conformance checking service
1078     is <em>under development</em>. The result above might be <em>wrong</em>.</p>
1079     </div>];
1080     push @nav, ['#result-summary' => 'Result'];
1081     } # print_result_section
1082    
1083 wakaba 1.24 sub print_result_unknown_type_section ($$) {
1084     my ($input, $result) = @_;
1085 wakaba 1.18
1086 wakaba 1.24 my $euri = htescape ($input->{uri});
1087 wakaba 1.18 print STDOUT qq[
1088 wakaba 1.35 <div id="$input->{id_prefix}parse-errors" class="section">
1089 wakaba 1.24 <h2>Errors</h2>
1090    
1091     <dl>
1092     <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
1093     <dd class=unsupported><strong><a href="../error-description#level-u">Not
1094     supported</a></strong>:
1095     Media type
1096     <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
1097     is not supported.</dd>
1098     </dl>
1099 wakaba 1.18 </div>
1100     ];
1101 wakaba 1.35 push @nav, [qq[#$input->{id_prefix}parse-errors] => 'Errors']
1102     unless $input->{nested};
1103 wakaba 1.30 add_error (char => {level => 'u'} => $result);
1104     add_error (syntax => {level => 'u'} => $result);
1105     add_error (structure => {level => 'u'} => $result);
1106 wakaba 1.18 } # print_result_unknown_type_section
1107    
1108     sub print_result_input_error_section ($) {
1109     my $input = shift;
1110     print STDOUT qq[<div class="section" id="result-summary">
1111     <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
1112     </div>];
1113     push @nav, ['#result-summary' => 'Result'];
1114 wakaba 1.32 } # print_result_input_error_section
1115 wakaba 1.18
1116 wakaba 1.32 sub get_error_label ($$) {
1117     my ($input, $err) = @_;
1118 wakaba 1.22
1119     my $r = '';
1120    
1121     if (defined $err->{line}) {
1122     if ($err->{column} > 0) {
1123 wakaba 1.35 $r = qq[<a href="#$input->{id_prefix}line-$err->{line}">Line $err->{line}</a> column $err->{column}];
1124 wakaba 1.22 } else {
1125     $err->{line} = $err->{line} - 1 || 1;
1126 wakaba 1.35 $r = qq[<a href="#$input->{id_prefix}line-$err->{line}">Line $err->{line}</a>];
1127 wakaba 1.22 }
1128     }
1129    
1130     if (defined $err->{node}) {
1131     $r .= ' ' if length $r;
1132 wakaba 1.32 $r = get_node_link ($input, $err->{node});
1133 wakaba 1.22 }
1134    
1135     if (defined $err->{index}) {
1136 wakaba 1.37 if (length $r) {
1137     $r .= ', Index ' . (0+$err->{index});
1138     } else {
1139     $r .= "<a href='#$input->{id_prefix}index-@{[0+$err->{index}]}'>Index "
1140     . (0+$err->{index}) . '</a>';
1141     }
1142 wakaba 1.22 }
1143    
1144     if (defined $err->{value}) {
1145     $r .= ' ' if length $r;
1146     $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
1147     }
1148    
1149     return $r;
1150     } # get_error_label
1151    
1152 wakaba 1.23 sub get_error_level_label ($) {
1153     my $err = shift;
1154    
1155     my $r = '';
1156    
1157     if (not defined $err->{level} or $err->{level} eq 'm') {
1158     $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1159     error</a></strong>: ];
1160     } elsif ($err->{level} eq 's') {
1161     $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1162     error</a></strong>: ];
1163     } elsif ($err->{level} eq 'w') {
1164     $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
1165     ];
1166 wakaba 1.30 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
1167 wakaba 1.23 $r = qq[<strong><a href="../error-description#level-u">Not
1168     supported</a></strong>: ];
1169 wakaba 1.37 } elsif ($err->{level} eq 'i') {
1170     $r = qq[<strong><a href="../error-description#level-i">Information</a></strong>: ];
1171 wakaba 1.23 } else {
1172     my $elevel = htescape ($err->{level});
1173     $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
1174     ];
1175     }
1176    
1177     return $r;
1178     } # get_error_level_label
1179    
1180 wakaba 1.1 sub get_node_path ($) {
1181     my $node = shift;
1182     my @r;
1183     while (defined $node) {
1184     my $rs;
1185     if ($node->node_type == 1) {
1186     $rs = $node->manakai_local_name;
1187     $node = $node->parent_node;
1188     } elsif ($node->node_type == 2) {
1189     $rs = '@' . $node->manakai_local_name;
1190     $node = $node->owner_element;
1191     } elsif ($node->node_type == 3) {
1192     $rs = '"' . $node->data . '"';
1193     $node = $node->parent_node;
1194     } elsif ($node->node_type == 9) {
1195 wakaba 1.9 @r = ('') unless @r;
1196 wakaba 1.1 $rs = '';
1197     $node = $node->parent_node;
1198     } else {
1199     $rs = '#' . $node->node_type;
1200     $node = $node->parent_node;
1201     }
1202     unshift @r, $rs;
1203     }
1204     return join '/', @r;
1205     } # get_node_path
1206    
1207 wakaba 1.32 sub get_node_link ($$) {
1208     return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
1209     htescape (get_node_path ($_[1])) . qq[</a>];
1210 wakaba 1.6 } # get_node_link
1211    
1212 wakaba 1.7 {
1213     my $Msg = {};
1214    
1215     sub load_text_catalog ($) {
1216     my $lang = shift; # MUST be a canonical lang name
1217 wakaba 1.26 open my $file, '<:utf8', "cc-msg.$lang.txt"
1218     or die "$0: cc-msg.$lang.txt: $!";
1219 wakaba 1.7 while (<$file>) {
1220     if (s/^([^;]+);([^;]*);//) {
1221     my ($type, $cls, $msg) = ($1, $2, $_);
1222     $msg =~ tr/\x0D\x0A//d;
1223     $Msg->{$type} = [$cls, $msg];
1224     }
1225     }
1226     } # load_text_catalog
1227    
1228     sub get_text ($) {
1229 wakaba 1.15 my ($type, $level, $node) = @_;
1230 wakaba 1.7 $type = $level . ':' . $type if defined $level;
1231 wakaba 1.29 $level = 'm' unless defined $level;
1232 wakaba 1.7 my @arg;
1233     {
1234     if (defined $Msg->{$type}) {
1235     my $msg = $Msg->{$type}->[1];
1236 wakaba 1.10 $msg =~ s{<var>\$([0-9]+)</var>}{
1237     defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
1238     }ge;
1239 wakaba 1.15 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
1240     UNIVERSAL::can ($node, 'get_attribute_ns')
1241     ? htescape ($node->get_attribute_ns (undef, $1)) : ''
1242     }ge;
1243     $msg =~ s{<var>{\@}</var>}{
1244     UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
1245     }ge;
1246 wakaba 1.17 $msg =~ s{<var>{local-name}</var>}{
1247     UNIVERSAL::can ($node, 'manakai_local_name')
1248     ? htescape ($node->manakai_local_name) : ''
1249     }ge;
1250     $msg =~ s{<var>{element-local-name}</var>}{
1251     (UNIVERSAL::can ($node, 'owner_element') and
1252     $node->owner_element)
1253     ? htescape ($node->owner_element->manakai_local_name)
1254     : ''
1255     }ge;
1256 wakaba 1.29 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
1257 wakaba 1.7 } elsif ($type =~ s/:([^:]*)$//) {
1258     unshift @arg, $1;
1259     redo;
1260     }
1261     }
1262 wakaba 1.29 return ($type, 'level-'.$level, htescape ($_[0]));
1263 wakaba 1.7 } # get_text
1264    
1265     }
1266    
1267 wakaba 1.9 sub get_input_document ($$) {
1268     my ($http, $dom) = @_;
1269    
1270 wakaba 1.16 my $request_uri = $http->get_parameter ('uri');
1271 wakaba 1.9 my $r = {};
1272     if (defined $request_uri and length $request_uri) {
1273     my $uri = $dom->create_uri_reference ($request_uri);
1274     unless ({
1275     http => 1,
1276     }->{lc $uri->uri_scheme}) {
1277     return {uri => $request_uri, request_uri => $request_uri,
1278     error_status_text => 'URI scheme not allowed'};
1279     }
1280    
1281     require Message::Util::HostPermit;
1282     my $host_permit = new Message::Util::HostPermit;
1283     $host_permit->add_rule (<<EOH);
1284     Allow host=suika port=80
1285     Deny host=suika
1286     Allow host=suika.fam.cx port=80
1287     Deny host=suika.fam.cx
1288     Deny host=localhost
1289     Deny host=*.localdomain
1290     Deny ipv4=0.0.0.0/8
1291     Deny ipv4=10.0.0.0/8
1292     Deny ipv4=127.0.0.0/8
1293     Deny ipv4=169.254.0.0/16
1294     Deny ipv4=172.0.0.0/11
1295     Deny ipv4=192.0.2.0/24
1296     Deny ipv4=192.88.99.0/24
1297     Deny ipv4=192.168.0.0/16
1298     Deny ipv4=198.18.0.0/15
1299     Deny ipv4=224.0.0.0/4
1300     Deny ipv4=255.255.255.255/32
1301     Deny ipv6=0::0/0
1302     Allow host=*
1303     EOH
1304     unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1305     return {uri => $request_uri, request_uri => $request_uri,
1306     error_status_text => 'Connection to the host is forbidden'};
1307     }
1308    
1309     require LWP::UserAgent;
1310     my $ua = WDCC::LWPUA->new;
1311     $ua->{wdcc_dom} = $dom;
1312     $ua->{wdcc_host_permit} = $host_permit;
1313     $ua->agent ('Mozilla'); ## TODO: for now.
1314     $ua->parse_head (0);
1315     $ua->protocols_allowed ([qw/http/]);
1316     $ua->max_size (1000_000);
1317     my $req = HTTP::Request->new (GET => $request_uri);
1318 wakaba 1.28 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1319 wakaba 1.9 my $res = $ua->request ($req);
1320 wakaba 1.16 ## TODO: 401 sets |is_success| true.
1321     if ($res->is_success or $http->get_parameter ('error-page')) {
1322 wakaba 1.9 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1323     $r->{uri} = $res->request->uri;
1324     $r->{request_uri} = $request_uri;
1325    
1326     ## TODO: More strict parsing...
1327     my $ct = $res->header ('Content-Type');
1328 wakaba 1.22 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1329 wakaba 1.9 $r->{charset} = lc $1;
1330     $r->{charset} =~ tr/\\//d;
1331 wakaba 1.26 $r->{official_charset} = $r->{charset};
1332 wakaba 1.9 }
1333    
1334 wakaba 1.16 my $input_charset = $http->get_parameter ('charset');
1335 wakaba 1.9 if (defined $input_charset and length $input_charset) {
1336     $r->{charset_overridden}
1337     = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1338     $r->{charset} = $input_charset;
1339 wakaba 1.25 }
1340    
1341     ## TODO: Support for HTTP Content-Encoding
1342 wakaba 1.9
1343     $r->{s} = ''.$res->content;
1344 wakaba 1.25
1345     require Whatpm::ContentType;
1346     ($r->{official_type}, $r->{media_type})
1347     = Whatpm::ContentType->get_sniffed_type
1348     (get_file_head => sub {
1349     return substr $r->{s}, 0, shift;
1350     },
1351     http_content_type_byte => $ct,
1352     has_http_content_encoding =>
1353     defined $res->header ('Content-Encoding'),
1354     supported_image_types => {});
1355 wakaba 1.9 } else {
1356     $r->{uri} = $res->request->uri;
1357     $r->{request_uri} = $request_uri;
1358     $r->{error_status_text} = $res->status_line;
1359     }
1360    
1361     $r->{header_field} = [];
1362     $res->scan (sub {
1363     push @{$r->{header_field}}, [$_[0], $_[1]];
1364     });
1365     $r->{header_status_code} = $res->code;
1366     $r->{header_status_text} = $res->message;
1367     } else {
1368 wakaba 1.16 $r->{s} = ''.$http->get_parameter ('s');
1369 wakaba 1.9 $r->{uri} = q<thismessage:/>;
1370     $r->{request_uri} = q<thismessage:/>;
1371     $r->{base_uri} = q<thismessage:/>;
1372 wakaba 1.16 $r->{charset} = ''.$http->get_parameter ('_charset_');
1373 wakaba 1.9 $r->{charset} =~ s/\s+//g;
1374     $r->{charset} = 'utf-8' if $r->{charset} eq '';
1375 wakaba 1.26 $r->{official_charset} = $r->{charset};
1376 wakaba 1.9 $r->{header_field} = [];
1377 wakaba 1.25
1378     require Whatpm::ContentType;
1379     ($r->{official_type}, $r->{media_type})
1380     = Whatpm::ContentType->get_sniffed_type
1381     (get_file_head => sub {
1382     return substr $r->{s}, 0, shift;
1383     },
1384     http_content_type_byte => undef,
1385     has_http_content_encoding => 0,
1386     supported_image_types => {});
1387 wakaba 1.9 }
1388    
1389 wakaba 1.16 my $input_format = $http->get_parameter ('i');
1390 wakaba 1.9 if (defined $input_format and length $input_format) {
1391     $r->{media_type_overridden}
1392     = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1393     $r->{media_type} = $input_format;
1394     }
1395     if (defined $r->{s} and not defined $r->{media_type}) {
1396     $r->{media_type} = 'text/html';
1397     $r->{media_type_overridden} = 1;
1398     }
1399    
1400     if ($r->{media_type} eq 'text/xml') {
1401     unless (defined $r->{charset}) {
1402     $r->{charset} = 'us-ascii';
1403 wakaba 1.26 $r->{official_charset} = $r->{charset};
1404 wakaba 1.9 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1405     $r->{charset_overridden} = 0;
1406     }
1407     }
1408    
1409     if (length $r->{s} > 1000_000) {
1410     $r->{error_status_text} = 'Entity-body too large';
1411     delete $r->{s};
1412     return $r;
1413     }
1414    
1415 wakaba 1.35 $r->{inner_html_element} = $http->get_parameter ('e');
1416    
1417 wakaba 1.9 return $r;
1418     } # get_input_document
1419    
1420     package WDCC::LWPUA;
1421     BEGIN { push our @ISA, 'LWP::UserAgent'; }
1422    
1423     sub redirect_ok {
1424     my $ua = shift;
1425     unless ($ua->SUPER::redirect_ok (@_)) {
1426     return 0;
1427     }
1428    
1429     my $uris = $_[1]->header ('Location');
1430     return 0 unless $uris;
1431     my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1432     unless ({
1433     http => 1,
1434     }->{lc $uri->uri_scheme}) {
1435     return 0;
1436     }
1437     unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1438     return 0;
1439     }
1440     return 1;
1441     } # redirect_ok
1442    
1443 wakaba 1.1 =head1 AUTHOR
1444    
1445     Wakaba <w@suika.fam.cx>.
1446    
1447     =head1 LICENSE
1448    
1449 wakaba 1.35 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1450 wakaba 1.1
1451     This library is free software; you can redistribute it
1452     and/or modify it under the same terms as Perl itself.
1453    
1454     =cut
1455    
1456 wakaba 1.38 ## $Date: 2008/02/24 02:17:51 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24