/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.42 - (show annotations) (download)
Mon Mar 17 13:25:19 2008 UTC (16 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.41: +38 -15 lines
++ ChangeLog	17 Mar 2008 13:25:16 -0000
2008-03-17  Wakaba  <wakaba@suika.fam.cx>

	* cc-script.js: The ID of the list is now given as an argument.

	* cc.cgi: List of document errors now also expanded by source
	code fragment generated by scripting.
	(get_error_label): Use line/column information from the error
	context node, if any.

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4
5 use lib qw[/home/httpd/html/www/markup/html/whatpm
6 /home/wakaba/work/manakai2/lib];
7 use CGI::Carp qw[fatalsToBrowser];
8 use Scalar::Util qw[refaddr];
9 use Time::HiRes qw/time/;
10
11 sub htescape ($) {
12 my $s = $_[0];
13 $s =~ s/&/&amp;/g;
14 $s =~ s/</&lt;/g;
15 $s =~ s/>/&gt;/g;
16 $s =~ s/"/&quot;/g;
17 $s =~ s{([\x00-\x09\x0B-\x1F\x7F-\xA0\x{FEFF}\x{FFFC}-\x{FFFF}])}{
18 sprintf '<var>U+%04X</var>', ord $1;
19 }ge;
20 return $s;
21 } # htescape
22
23 my @nav;
24 my %time;
25 require Message::DOM::DOMImplementation;
26 my $dom = Message::DOM::DOMImplementation->new;
27 {
28 use Message::CGI::HTTP;
29 my $http = Message::CGI::HTTP->new;
30
31 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
32 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
33 exit;
34 }
35
36 binmode STDOUT, ':utf8';
37 $| = 1;
38
39 load_text_catalog ('en'); ## TODO: conneg
40
41 print STDOUT qq[Content-Type: text/html; charset=utf-8
42
43 <!DOCTYPE html>
44 <html lang="en">
45 <head>
46 <title>Web Document Conformance Checker (BETA)</title>
47 <link rel="stylesheet" href="../cc-style.css" type="text/css">
48 </head>
49 <body>
50 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
51 (<em>beta</em>)</h1>
52 ];
53
54 $| = 0;
55 my $input = get_input_document ($http, $dom);
56 my $char_length = 0;
57
58 print qq[
59 <div id="document-info" class="section">
60 <dl>
61 <dt>Request URI</dt>
62 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{request_uri}]}">@{[htescape $input->{request_uri}]}</a>&gt;</code></dd>
63 <dt>Document URI</dt>
64 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{uri}]}" id=anchor-document-uri>@{[htescape $input->{uri}]}</a>&gt;</code>
65 <script>
66 document.title = '<'
67 + document.getElementById ('anchor-document-uri').href + '> \\u2014 '
68 + document.title;
69 </script></dd>
70 ]; # no </dl> yet
71 push @nav, ['#document-info' => 'Information'];
72
73 if (defined $input->{s}) {
74 $char_length = length $input->{s};
75
76 print STDOUT qq[
77 <dt>Base URI</dt>
78 <dd><code class="URI" lang="">&lt;<a href="@{[htescape $input->{base_uri}]}">@{[htescape $input->{base_uri}]}</a>&gt;</code></dd>
79 <dt>Internet Media Type</dt>
80 <dd><code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
81 @{[$input->{media_type_overridden} ? '<em>(overridden)</em>' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '<em>(sniffed; official type is: <code class=MIME lang=en>'.htescape ($input->{official_type}).'</code>)' : '<em>(sniffed)</em>']}</dd>
82 <dt>Character Encoding</dt>
83 <dd>@{[defined $input->{charset} ? '<code class="charset" lang="en">'.htescape ($input->{charset}).'</code>' : '(none)']}
84 @{[$input->{charset_overridden} ? '<em>(overridden)</em>' : '']}</dd>
85 <dt>Length</dt>
86 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
87 </dl>
88 </div>
89
90 <script src="../cc-script.js"></script>
91 ];
92
93 $input->{id_prefix} = '';
94 #$input->{nested} = 0;
95 my $result = {conforming_min => 1, conforming_max => 1};
96 check_and_print ($input => $result);
97 print_result_section ($result);
98 } else {
99 print STDOUT qq[</dl></div>];
100 print_result_input_error_section ($input);
101 }
102
103 print STDOUT qq[
104 <ul class="navigation" id="nav-items">
105 ];
106 for (@nav) {
107 print STDOUT qq[<li><a href="$_->[0]">$_->[1]</a></li>];
108 }
109 print STDOUT qq[
110 </ul>
111 </body>
112 </html>
113 ];
114
115 for (qw/decode parse parse_html parse_xml parse_manifest
116 check check_manifest/) {
117 next unless defined $time{$_};
118 open my $file, '>>', ".cc-$_.txt" or die ".cc-$_.txt: $!";
119 print $file $char_length, "\t", $time{$_}, "\n";
120 }
121
122 exit;
123 }
124
125 sub add_error ($$$) {
126 my ($layer, $err, $result) = @_;
127 if (defined $err->{level}) {
128 if ($err->{level} eq 's') {
129 $result->{$layer}->{should}++;
130 $result->{$layer}->{score_min} -= 2;
131 $result->{conforming_min} = 0;
132 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
133 $result->{$layer}->{warning}++;
134 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
135 $result->{$layer}->{unsupported}++;
136 $result->{unsupported} = 1;
137 } elsif ($err->{level} eq 'i') {
138 #
139 } else {
140 $result->{$layer}->{must}++;
141 $result->{$layer}->{score_max} -= 2;
142 $result->{$layer}->{score_min} -= 2;
143 $result->{conforming_min} = 0;
144 $result->{conforming_max} = 0;
145 }
146 } else {
147 $result->{$layer}->{must}++;
148 $result->{$layer}->{score_max} -= 2;
149 $result->{$layer}->{score_min} -= 2;
150 $result->{conforming_min} = 0;
151 $result->{conforming_max} = 0;
152 }
153 } # add_error
154
155 sub check_and_print ($$) {
156 my ($input, $result) = @_;
157
158 print_http_header_section ($input, $result);
159
160 my $doc;
161 my $el;
162 my $cssom;
163 my $manifest;
164 my @subdoc;
165
166 if ($input->{media_type} eq 'text/html') {
167 ($doc, $el) = print_syntax_error_html_section ($input, $result);
168 print_source_string_section
169 ($input,
170 \($input->{s}),
171 $input->{charset} || $doc->input_encoding);
172 } elsif ({
173 'text/xml' => 1,
174 'application/atom+xml' => 1,
175 'application/rss+xml' => 1,
176 'application/svg+xml' => 1,
177 'application/xhtml+xml' => 1,
178 'application/xml' => 1,
179 }->{$input->{media_type}}) {
180 ($doc, $el) = print_syntax_error_xml_section ($input, $result);
181 print_source_string_section ($input,
182 \($input->{s}),
183 $doc->input_encoding);
184 } elsif ($input->{media_type} eq 'text/css') {
185 $cssom = print_syntax_error_css_section ($input, $result);
186 print_source_string_section
187 ($input, \($input->{s}),
188 $cssom->manakai_input_encoding);
189 } elsif ($input->{media_type} eq 'text/cache-manifest') {
190 ## TODO: MUST be text/cache-manifest
191 $manifest = print_syntax_error_manifest_section ($input, $result);
192 print_source_string_section ($input, \($input->{s}),
193 'utf-8');
194 } else {
195 ## TODO: Change HTTP status code??
196 print_result_unknown_type_section ($input, $result);
197 }
198
199 if (defined $doc or defined $el) {
200 $doc->document_uri ($input->{uri});
201 $doc->manakai_entity_base_uri ($input->{base_uri});
202 print_structure_dump_dom_section ($input, $doc, $el);
203 my $elements = print_structure_error_dom_section
204 ($input, $doc, $el, $result, sub {
205 push @subdoc, shift;
206 });
207 print_table_section ($input, $elements->{table}) if @{$elements->{table}};
208 print_listing_section ({
209 id => 'identifiers', label => 'IDs', heading => 'Identifiers',
210 }, $input, $elements->{id}) if keys %{$elements->{id}};
211 print_listing_section ({
212 id => 'terms', label => 'Terms', heading => 'Terms',
213 }, $input, $elements->{term}) if keys %{$elements->{term}};
214 print_listing_section ({
215 id => 'classes', label => 'Classes', heading => 'Classes',
216 }, $input, $elements->{class}) if keys %{$elements->{class}};
217 } elsif (defined $cssom) {
218 print_structure_dump_cssom_section ($input, $cssom);
219 ## TODO: CSSOM validation
220 add_error ('structure', {level => 'u'} => $result);
221 } elsif (defined $manifest) {
222 print_structure_dump_manifest_section ($input, $manifest);
223 print_structure_error_manifest_section ($input, $manifest, $result);
224 }
225
226 my $id_prefix = 0;
227 for my $subinput (@subdoc) {
228 $subinput->{id_prefix} = 'subdoc-' . ++$id_prefix;
229 $subinput->{nested} = 1;
230 $subinput->{base_uri} = $subinput->{container_node}->base_uri
231 unless defined $subinput->{base_uri};
232 my $ebaseuri = htescape ($subinput->{base_uri});
233 push @nav, ['#' . $subinput->{id_prefix} => 'Sub #' . $id_prefix];
234 print STDOUT qq[<div id="$subinput->{id_prefix}" class=section>
235 <h2>Subdocument #$id_prefix</h2>
236
237 <dl>
238 <dt>Internet Media Type</dt>
239 <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
240 <dt>Container Node</dt>
241 <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
242 <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
243 <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
244 </dl>];
245
246 $subinput->{id_prefix} .= '-';
247 check_and_print ($subinput => $result);
248
249 print STDOUT qq[</div>];
250 }
251 } # check_and_print
252
253 sub print_http_header_section ($$) {
254 my ($input, $result) = @_;
255 return unless defined $input->{header_status_code} or
256 defined $input->{header_status_text} or
257 @{$input->{header_field} or []};
258
259 push @nav, ['#source-header' => 'HTTP Header'] unless $input->{nested};
260 print STDOUT qq[<div id="$input->{id_prefix}source-header" class="section">
261 <h2>HTTP Header</h2>
262
263 <p><strong>Note</strong>: Due to the limitation of the
264 network library in use, the content of this section might
265 not be the real header.</p>
266
267 <table><tbody>
268 ];
269
270 if (defined $input->{header_status_code}) {
271 print STDOUT qq[<tr><th scope="row">Status code</th>];
272 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_code})]}</code></td></tr>];
273 }
274 if (defined $input->{header_status_text}) {
275 print STDOUT qq[<tr><th scope="row">Status text</th>];
276 print STDOUT qq[<td><code>@{[htescape ($input->{header_status_text})]}</code></td></tr>];
277 }
278
279 for (@{$input->{header_field}}) {
280 print STDOUT qq[<tr><th scope="row"><code>@{[htescape ($_->[0])]}</code></th>];
281 print STDOUT qq[<td><code>@{[htescape ($_->[1])]}</code></td></tr>];
282 }
283
284 print STDOUT qq[</tbody></table></div>];
285 } # print_http_header_section
286
287 sub print_syntax_error_html_section ($$) {
288 my ($input, $result) = @_;
289
290 require Encode;
291 require Whatpm::HTML;
292
293 print STDOUT qq[
294 <div id="$input->{id_prefix}parse-errors" class="section">
295 <h2>Parse Errors</h2>
296
297 <dl id="$input->{id_prefix}parse-errors-list">];
298 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
299
300 my $onerror = sub {
301 my (%opt) = @_;
302 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
303 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
304 qq[</dt>];
305 $type =~ tr/ /-/;
306 $type =~ s/\|/%7C/g;
307 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
308 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
309 print STDOUT qq[$msg</dd>\n];
310
311 add_error ('syntax', \%opt => $result);
312 };
313
314 my $doc = $dom->create_document;
315 my $el;
316 my $inner_html_element = $input->{inner_html_element};
317 if (defined $inner_html_element and length $inner_html_element) {
318 $input->{charset} ||= 'windows-1252'; ## TODO: for now.
319 my $time1 = time;
320 my $t = Encode::decode ($input->{charset}, $input->{s});
321 $time{decode} = time - $time1;
322
323 $el = $doc->create_element_ns
324 ('http://www.w3.org/1999/xhtml', [undef, $inner_html_element]);
325 $time1 = time;
326 Whatpm::HTML->set_inner_html ($el, $t, $onerror);
327 $time{parse} = time - $time1;
328 } else {
329 my $time1 = time;
330 Whatpm::HTML->parse_byte_string
331 ($input->{charset}, $input->{s} => $doc, $onerror);
332 $time{parse_html} = time - $time1;
333 }
334 $doc->manakai_charset ($input->{official_charset})
335 if defined $input->{official_charset};
336
337 print STDOUT qq[</dl></div>];
338
339 return ($doc, $el);
340 } # print_syntax_error_html_section
341
342 sub print_syntax_error_xml_section ($$) {
343 my ($input, $result) = @_;
344
345 require Message::DOM::XMLParserTemp;
346
347 print STDOUT qq[
348 <div id="$input->{id_prefix}parse-errors" class="section">
349 <h2>Parse Errors</h2>
350
351 <dl id="$input->{id_prefix}parse-errors-list">];
352 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{prefix};
353
354 my $onerror = sub {
355 my $err = shift;
356 my $line = $err->location->line_number;
357 print STDOUT qq[<dt><a href="#$input->{id_prefix}line-$line">Line $line</a> column ];
358 print STDOUT $err->location->column_number, "</dt><dd>";
359 print STDOUT htescape $err->text, "</dd>\n";
360
361 add_error ('syntax', {type => $err->text,
362 level => [
363 $err->SEVERITY_FATAL_ERROR => 'm',
364 $err->SEVERITY_ERROR => 'm',
365 $err->SEVERITY_WARNING => 's',
366 ]->[$err->severity]} => $result);
367
368 return 1;
369 };
370
371 my $time1 = time;
372 open my $fh, '<', \($input->{s});
373 my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
374 ($fh => $dom, $onerror, charset => $input->{charset});
375 $time{parse_xml} = time - $time1;
376 $doc->manakai_charset ($input->{official_charset})
377 if defined $input->{official_charset};
378
379 print STDOUT qq[</dl></div>];
380
381 return ($doc, undef);
382 } # print_syntax_error_xml_section
383
384 sub get_css_parser () {
385 our $CSSParser;
386 return $CSSParser if $CSSParser;
387
388 require Whatpm::CSS::Parser;
389 my $p = Whatpm::CSS::Parser->new;
390
391 $p->{prop}->{$_} = 1 for qw/
392 alignment-baseline
393 background background-attachment background-color background-image
394 background-position background-position-x background-position-y
395 background-repeat border border-bottom border-bottom-color
396 border-bottom-style border-bottom-width border-collapse border-color
397 border-left border-left-color
398 border-left-style border-left-width border-right border-right-color
399 border-right-style border-right-width
400 border-spacing -manakai-border-spacing-x -manakai-border-spacing-y
401 border-style border-top border-top-color border-top-style border-top-width
402 border-width bottom
403 caption-side clear clip color content counter-increment counter-reset
404 cursor direction display dominant-baseline empty-cells float font
405 font-family font-size font-size-adjust font-stretch
406 font-style font-variant font-weight height left
407 letter-spacing line-height
408 list-style list-style-image list-style-position list-style-type
409 margin margin-bottom margin-left margin-right margin-top marker-offset
410 marks max-height max-width min-height min-width opacity -moz-opacity
411 orphans outline outline-color outline-style outline-width overflow
412 overflow-x overflow-y
413 padding padding-bottom padding-left padding-right padding-top
414 page page-break-after page-break-before page-break-inside
415 position quotes right size table-layout
416 text-align text-anchor text-decoration text-indent text-transform
417 top unicode-bidi vertical-align visibility white-space width widows
418 word-spacing writing-mode z-index
419 /;
420 $p->{prop_value}->{display}->{$_} = 1 for qw/
421 block clip inline inline-block inline-table list-item none
422 table table-caption table-cell table-column table-column-group
423 table-header-group table-footer-group table-row table-row-group
424 compact marker
425 /;
426 $p->{prop_value}->{position}->{$_} = 1 for qw/
427 absolute fixed relative static
428 /;
429 $p->{prop_value}->{float}->{$_} = 1 for qw/
430 left right none
431 /;
432 $p->{prop_value}->{clear}->{$_} = 1 for qw/
433 left right none both
434 /;
435 $p->{prop_value}->{direction}->{ltr} = 1;
436 $p->{prop_value}->{direction}->{rtl} = 1;
437 $p->{prop_value}->{marks}->{crop} = 1;
438 $p->{prop_value}->{marks}->{cross} = 1;
439 $p->{prop_value}->{'unicode-bidi'}->{$_} = 1 for qw/
440 normal bidi-override embed
441 /;
442 for my $prop_name (qw/overflow overflow-x overflow-y/) {
443 $p->{prop_value}->{$prop_name}->{$_} = 1 for qw/
444 visible hidden scroll auto -webkit-marquee -moz-hidden-unscrollable
445 /;
446 }
447 $p->{prop_value}->{visibility}->{$_} = 1 for qw/
448 visible hidden collapse
449 /;
450 $p->{prop_value}->{'list-style-type'}->{$_} = 1 for qw/
451 disc circle square decimal decimal-leading-zero
452 lower-roman upper-roman lower-greek lower-latin
453 upper-latin armenian georgian lower-alpha upper-alpha none
454 hebrew cjk-ideographic hiragana katakana hiragana-iroha
455 katakana-iroha
456 /;
457 $p->{prop_value}->{'list-style-position'}->{outside} = 1;
458 $p->{prop_value}->{'list-style-position'}->{inside} = 1;
459 $p->{prop_value}->{'page-break-before'}->{$_} = 1 for qw/
460 auto always avoid left right
461 /;
462 $p->{prop_value}->{'page-break-after'}->{$_} = 1 for qw/
463 auto always avoid left right
464 /;
465 $p->{prop_value}->{'page-break-inside'}->{auto} = 1;
466 $p->{prop_value}->{'page-break-inside'}->{avoid} = 1;
467 $p->{prop_value}->{'background-repeat'}->{$_} = 1 for qw/
468 repeat repeat-x repeat-y no-repeat
469 /;
470 $p->{prop_value}->{'background-attachment'}->{scroll} = 1;
471 $p->{prop_value}->{'background-attachment'}->{fixed} = 1;
472 $p->{prop_value}->{'font-size'}->{$_} = 1 for qw/
473 xx-small x-small small medium large x-large xx-large
474 -manakai-xxx-large -webkit-xxx-large
475 larger smaller
476 /;
477 $p->{prop_value}->{'font-style'}->{normal} = 1;
478 $p->{prop_value}->{'font-style'}->{italic} = 1;
479 $p->{prop_value}->{'font-style'}->{oblique} = 1;
480 $p->{prop_value}->{'font-variant'}->{normal} = 1;
481 $p->{prop_value}->{'font-variant'}->{'small-caps'} = 1;
482 $p->{prop_value}->{'font-stretch'}->{$_} = 1 for
483 qw/normal wider narrower ultra-condensed extra-condensed
484 condensed semi-condensed semi-expanded expanded
485 extra-expanded ultra-expanded/;
486 $p->{prop_value}->{'text-align'}->{$_} = 1 for qw/
487 left right center justify begin end
488 /;
489 $p->{prop_value}->{'text-transform'}->{$_} = 1 for qw/
490 capitalize uppercase lowercase none
491 /;
492 $p->{prop_value}->{'white-space'}->{$_} = 1 for qw/
493 normal pre nowrap pre-line pre-wrap -moz-pre-wrap
494 /;
495 $p->{prop_value}->{'writing-mode'}->{$_} = 1 for qw/
496 lr rl tb lr-tb rl-tb tb-rl
497 /;
498 $p->{prop_value}->{'text-anchor'}->{$_} = 1 for qw/
499 start middle end
500 /;
501 $p->{prop_value}->{'dominant-baseline'}->{$_} = 1 for qw/
502 auto use-script no-change reset-size ideographic alphabetic
503 hanging mathematical central middle text-after-edge text-before-edge
504 /;
505 $p->{prop_value}->{'alignment-baseline'}->{$_} = 1 for qw/
506 auto baseline before-edge text-before-edge middle central
507 after-edge text-after-edge ideographic alphabetic hanging
508 mathematical
509 /;
510 $p->{prop_value}->{'text-decoration'}->{$_} = 1 for qw/
511 none blink underline overline line-through
512 /;
513 $p->{prop_value}->{'caption-side'}->{$_} = 1 for qw/
514 top bottom left right
515 /;
516 $p->{prop_value}->{'table-layout'}->{auto} = 1;
517 $p->{prop_value}->{'table-layout'}->{fixed} = 1;
518 $p->{prop_value}->{'border-collapse'}->{collapse} = 1;
519 $p->{prop_value}->{'border-collapse'}->{separate} = 1;
520 $p->{prop_value}->{'empty-cells'}->{show} = 1;
521 $p->{prop_value}->{'empty-cells'}->{hide} = 1;
522 $p->{prop_value}->{cursor}->{$_} = 1 for qw/
523 auto crosshair default pointer move e-resize ne-resize nw-resize n-resize
524 se-resize sw-resize s-resize w-resize text wait help progress
525 /;
526 for my $prop (qw/border-top-style border-left-style
527 border-bottom-style border-right-style outline-style/) {
528 $p->{prop_value}->{$prop}->{$_} = 1 for qw/
529 none hidden dotted dashed solid double groove ridge inset outset
530 /;
531 }
532 for my $prop (qw/color background-color
533 border-bottom-color border-left-color border-right-color
534 border-top-color border-color/) {
535 $p->{prop_value}->{$prop}->{transparent} = 1;
536 $p->{prop_value}->{$prop}->{flavor} = 1;
537 $p->{prop_value}->{$prop}->{'-manakai-default'} = 1;
538 }
539 $p->{prop_value}->{'outline-color'}->{invert} = 1;
540 $p->{prop_value}->{'outline-color'}->{'-manakai-invert-or-currentcolor'} = 1;
541 $p->{pseudo_class}->{$_} = 1 for qw/
542 active checked disabled empty enabled first-child first-of-type
543 focus hover indeterminate last-child last-of-type link only-child
544 only-of-type root target visited
545 lang nth-child nth-last-child nth-of-type nth-last-of-type not
546 -manakai-contains -manakai-current
547 /;
548 $p->{pseudo_element}->{$_} = 1 for qw/
549 after before first-letter first-line
550 /;
551
552 return $CSSParser = $p;
553 } # get_css_parser
554
555 sub print_syntax_error_css_section ($$) {
556 my ($input, $result) = @_;
557
558 print STDOUT qq[
559 <div id="$input->{id_prefix}parse-errors" class="section">
560 <h2>Parse Errors</h2>
561
562 <dl id="$input->{id_prefix}parse-errors-list">];
563 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
564
565 my $p = get_css_parser ();
566 $p->init;
567 $p->{onerror} = sub {
568 my (%opt) = @_;
569 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
570 if ($opt{token}) {
571 print STDOUT qq[<dt class="$cls"><a href="#$input->{id_prefix}line-$opt{token}->{line}">Line $opt{token}->{line}</a> column $opt{token}->{column}];
572 } else {
573 print STDOUT qq[<dt class="$cls">Unknown location];
574 }
575 if (defined $opt{value}) {
576 print STDOUT qq[ (<code>@{[htescape ($opt{value})]}</code>)];
577 } elsif (defined $opt{token}) {
578 print STDOUT qq[ (<code>@{[htescape (Whatpm::CSS::Tokenizer->serialize_token ($opt{token}))]}</code>)];
579 }
580 $type =~ tr/ /-/;
581 $type =~ s/\|/%7C/g;
582 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
583 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
584 print STDOUT qq[$msg</dd>\n];
585
586 add_error ('syntax', \%opt => $result);
587 };
588 $p->{href} = $input->{uri};
589 $p->{base_uri} = $input->{base_uri};
590
591 # if ($parse_mode eq 'q') {
592 # $p->{unitless_px} = 1;
593 # $p->{hashless_color} = 1;
594 # }
595
596 ## TODO: Make $input->{s} a ref.
597
598 my $s = \$input->{s};
599 my $charset;
600 unless ($input->{is_char_string}) {
601 require Encode;
602 if (defined $input->{charset}) {## TODO: IANA->Perl
603 $charset = $input->{charset};
604 $s = \(Encode::decode ($input->{charset}, $$s));
605 } else {
606 ## TODO: charset detection
607 $s = \(Encode::decode ($charset = 'utf-8', $$s));
608 }
609 }
610
611 my $cssom = $p->parse_char_string ($$s);
612 $cssom->manakai_input_encoding ($charset) if defined $charset;
613
614 print STDOUT qq[</dl></div>];
615
616 return $cssom;
617 } # print_syntax_error_css_section
618
619 sub print_syntax_error_manifest_section ($$) {
620 my ($input, $result) = @_;
621
622 require Whatpm::CacheManifest;
623
624 print STDOUT qq[
625 <div id="$input->{id_prefix}parse-errors" class="section">
626 <h2>Parse Errors</h2>
627
628 <dl id="$input->{id_prefix}parse-errors-list">];
629 push @nav, ['#parse-errors' => 'Parse Error'] unless $input->{nested};
630
631 my $onerror = sub {
632 my (%opt) = @_;
633 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level});
634 print STDOUT qq[<dt class="$cls">], get_error_label ($input, \%opt),
635 qq[</dt>];
636 $type =~ tr/ /-/;
637 $type =~ s/\|/%7C/g;
638 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
639 print STDOUT qq[<dd class="$cls">], get_error_level_label (\%opt);
640 print STDOUT qq[$msg</dd>\n];
641
642 add_error ('syntax', \%opt => $result);
643 };
644
645 my $time1 = time;
646 my $manifest = Whatpm::CacheManifest->parse_byte_string
647 ($input->{s}, $input->{uri}, $input->{base_uri}, $onerror);
648 $time{parse_manifest} = time - $time1;
649
650 print STDOUT qq[</dl></div>];
651
652 return $manifest;
653 } # print_syntax_error_manifest_section
654
655 sub print_source_string_section ($$$) {
656 my $input = shift;
657 my $s;
658 unless ($input->{is_char_string}) {
659 require Encode;
660 my $enc = Encode::find_encoding ($_[1]); ## TODO: charset name -> Perl name
661 return unless $enc;
662
663 $s = \($enc->decode (${$_[0]}));
664 } else {
665 $s = $_[0];
666 }
667
668 my $i = 1;
669 push @nav, ['#source-string' => 'Source'] unless $input->{nested};
670 print STDOUT qq[<div id="$input->{id_prefix}source-string" class="section">
671 <h2>Document Source</h2>
672 <ol lang="">\n];
673 if (length $$s) {
674 while ($$s =~ /\G([^\x0D\x0A]*?)(?>\x0D\x0A?|\x0A)/gc) {
675 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
676 "</li>\n";
677 $i++;
678 }
679 if ($$s =~ /\G([^\x0D\x0A]+)/gc) {
680 print STDOUT qq[<li id="$input->{id_prefix}line-$i">], htescape $1,
681 "</li>\n";
682 }
683 } else {
684 print STDOUT q[<li id="$input->{id_prefix}line-1"></li>];
685 }
686 print STDOUT "</ol></div>
687 <script>
688 addSourceToParseErrorList ('$input->{id_prefix}', 'parse-errors-list');
689 </script>";
690 } # print_input_string_section
691
692 sub print_document_tree ($$) {
693 my ($input, $node) = @_;
694
695 my $r = '<ol class="xoxo">';
696
697 my @node = ($node);
698 while (@node) {
699 my $child = shift @node;
700 unless (ref $child) {
701 $r .= $child;
702 next;
703 }
704
705 my $node_id = $input->{id_prefix} . 'node-'.refaddr $child;
706 my $nt = $child->node_type;
707 if ($nt == $child->ELEMENT_NODE) {
708 my $child_nsuri = $child->namespace_uri;
709 $r .= qq[<li id="$node_id" class="tree-element"><code title="@{[defined $child_nsuri ? $child_nsuri : '']}">] . htescape ($child->tag_name) .
710 '</code>'; ## ISSUE: case
711
712 if ($child->has_attributes) {
713 $r .= '<ul class="attributes">';
714 for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
715 @{$child->attributes}) {
716 $r .= qq[<li id="$input->{id_prefix}$attr->[3]" class="tree-attribute"><code title="@{[defined $attr->[2] ? htescape ($attr->[2]) : '']}">] . htescape ($attr->[0]) . '</code> = '; ## ISSUE: case?
717 $r .= '<q>' . htescape ($attr->[1]) . '</q></li>'; ## TODO: children
718 }
719 $r .= '</ul>';
720 }
721
722 if ($child->has_child_nodes) {
723 $r .= '<ol class="children">';
724 unshift @node, @{$child->child_nodes}, '</ol></li>';
725 } else {
726 $r .= '</li>';
727 }
728 } elsif ($nt == $child->TEXT_NODE) {
729 $r .= qq'<li id="$node_id" class="tree-text"><q lang="">' . htescape ($child->data) . '</q></li>';
730 } elsif ($nt == $child->CDATA_SECTION_NODE) {
731 $r .= qq'<li id="$node_id" class="tree-cdata"><code>&lt;[CDATA[</code><q lang="">' . htescape ($child->data) . '</q><code>]]&gt;</code></li>';
732 } elsif ($nt == $child->COMMENT_NODE) {
733 $r .= qq'<li id="$node_id" class="tree-comment"><code>&lt;!--</code><q lang="">' . htescape ($child->data) . '</q><code>--&gt;</code></li>';
734 } elsif ($nt == $child->DOCUMENT_NODE) {
735 $r .= qq'<li id="$node_id" class="tree-document">Document';
736 $r .= qq[<ul class="attributes">];
737 my $cp = $child->manakai_charset;
738 if (defined $cp) {
739 $r .= qq[<li><code>charset</code> parameter = <code>];
740 $r .= htescape ($cp) . qq[</code></li>];
741 }
742 $r .= qq[<li><code>inputEncoding</code> = ];
743 my $ie = $child->input_encoding;
744 if (defined $ie) {
745 $r .= qq[<code>@{[htescape ($ie)]}</code>];
746 if ($child->manakai_has_bom) {
747 $r .= qq[ (with <code class=charname><abbr>BOM</abbr></code>)];
748 }
749 } else {
750 $r .= qq[(<code>null</code>)];
751 }
752 $r .= qq[<li>@{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}</li>];
753 $r .= qq[<li>@{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}</li>];
754 unless ($child->manakai_is_html) {
755 $r .= qq[<li>XML version = <code>@{[htescape ($child->xml_version)]}</code></li>];
756 if (defined $child->xml_encoding) {
757 $r .= qq[<li>XML encoding = <code>@{[htescape ($child->xml_encoding)]}</code></li>];
758 } else {
759 $r .= qq[<li>XML encoding = (null)</li>];
760 }
761 $r .= qq[<li>XML standalone = @{[$child->xml_standalone ? 'true' : 'false']}</li>];
762 }
763 $r .= qq[</ul>];
764 if ($child->has_child_nodes) {
765 $r .= '<ol class="children">';
766 unshift @node, @{$child->child_nodes}, '</ol></li>';
767 }
768 } elsif ($nt == $child->DOCUMENT_TYPE_NODE) {
769 $r .= qq'<li id="$node_id" class="tree-doctype"><code>&lt;!DOCTYPE&gt;</code><ul class="attributes">';
770 $r .= qq[<li class="tree-doctype-name">Name = <q>@{[htescape ($child->name)]}</q></li>];
771 $r .= qq[<li class="tree-doctype-publicid">Public identifier = <q>@{[htescape ($child->public_id)]}</q></li>];
772 $r .= qq[<li class="tree-doctype-systemid">System identifier = <q>@{[htescape ($child->system_id)]}</q></li>];
773 $r .= '</ul></li>';
774 } elsif ($nt == $child->PROCESSING_INSTRUCTION_NODE) {
775 $r .= qq'<li id="$node_id" class="tree-id"><code>&lt;?@{[htescape ($child->target)]}</code> <q>@{[htescape ($child->data)]}</q><code>?&gt;</code></li>';
776 } else {
777 $r .= qq'<li id="$node_id" class="tree-unknown">@{[$child->node_type]} @{[htescape ($child->node_name)]}</li>'; # error
778 }
779 }
780
781 $r .= '</ol>';
782 print STDOUT $r;
783 } # print_document_tree
784
785 sub print_structure_dump_dom_section ($$$) {
786 my ($input, $doc, $el) = @_;
787
788 print STDOUT qq[
789 <div id="$input->{id_prefix}document-tree" class="section">
790 <h2>Document Tree</h2>
791 ];
792 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
793 unless $input->{nested};
794
795 print_document_tree ($input, $el || $doc);
796
797 print STDOUT qq[</div>];
798 } # print_structure_dump_dom_section
799
800 sub print_structure_dump_cssom_section ($$) {
801 my ($input, $cssom) = @_;
802
803 print STDOUT qq[
804 <div id="$input->{id_prefix}document-tree" class="section">
805 <h2>Document Tree</h2>
806 ];
807 push @nav, [qq[#$input->{id_prefix}document-tree] => 'Tree']
808 unless $input->{nested};
809
810 ## TODO:
811 print STDOUT "<pre>".htescape ($cssom->css_text)."</pre>";
812
813 print STDOUT qq[</div>];
814 } # print_structure_dump_cssom_section
815
816 sub print_structure_dump_manifest_section ($$) {
817 my ($input, $manifest) = @_;
818
819 print STDOUT qq[
820 <div id="$input->{id_prefix}dump-manifest" class="section">
821 <h2>Cache Manifest</h2>
822 ];
823 push @nav, [qq[#$input->{id_prefix}dump-manifest] => 'Cache Manifest']
824 unless $input->{nested};
825
826 print STDOUT qq[<dl><dt>Explicit entries</dt>];
827 my $i = 0;
828 for my $uri (@{$manifest->[0]}) {
829 my $euri = htescape ($uri);
830 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
831 }
832
833 print STDOUT qq[<dt>Fallback entries</dt><dd>
834 <table><thead><tr><th scope=row>Oppotunistic Caching Namespace</th>
835 <th scope=row>Fallback Entry</tr><tbody>];
836 for my $uri (sort {$a cmp $b} keys %{$manifest->[1]}) {
837 my $euri = htescape ($uri);
838 my $euri2 = htescape ($manifest->[1]->{$uri});
839 print STDOUT qq[<tr><td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></td>
840 <td id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri2">$euri2</a>></code></td>];
841 }
842
843 print STDOUT qq[</table><dt>Online whitelist</dt>];
844 for my $uri (@{$manifest->[2]}) {
845 my $euri = htescape ($uri);
846 print STDOUT qq[<dd id="$input->{id_prefix}index-@{[$i++]}"><code class=uri>&lt;<a href="$euri">$euri</a>></code></dd>];
847 }
848
849 print STDOUT qq[</dl></div>];
850 } # print_structure_dump_manifest_section
851
852 sub print_structure_error_dom_section ($$$$$) {
853 my ($input, $doc, $el, $result, $onsubdoc) = @_;
854
855 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
856 <h2>Document Errors</h2>
857
858 <dl id=document-errors-list>];
859 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
860 unless $input->{nested};
861
862 require Whatpm::ContentChecker;
863 my $onerror = sub {
864 my %opt = @_;
865 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
866 $type =~ tr/ /-/;
867 $type =~ s/\|/%7C/g;
868 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
869 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
870 qq[</dt>\n<dd class="$cls">], get_error_level_label (\%opt);
871 print STDOUT $msg, "</dd>\n";
872 add_error ('structure', \%opt => $result);
873 };
874
875 my $elements;
876 my $time1 = time;
877 if ($el) {
878 $elements = Whatpm::ContentChecker->check_element
879 ($el, $onerror, $onsubdoc);
880 } else {
881 $elements = Whatpm::ContentChecker->check_document
882 ($doc, $onerror, $onsubdoc);
883 }
884 $time{check} = time - $time1;
885
886 print STDOUT qq[</dl>
887 <script>
888 addSourceToParseErrorList ('$input->{id_prefix}', 'document-errors-list');
889 </script></div>];
890
891 return $elements;
892 } # print_structure_error_dom_section
893
894 sub print_structure_error_manifest_section ($$$) {
895 my ($input, $manifest, $result) = @_;
896
897 print STDOUT qq[<div id="$input->{id_prefix}document-errors" class="section">
898 <h2>Document Errors</h2>
899
900 <dl>];
901 push @nav, [qq[#$input->{id_prefix}document-errors] => 'Document Error']
902 unless $input->{nested};
903
904 require Whatpm::CacheManifest;
905 Whatpm::CacheManifest->check_manifest ($manifest, sub {
906 my %opt = @_;
907 my ($type, $cls, $msg) = get_text ($opt{type}, $opt{level}, $opt{node});
908 $type =~ tr/ /-/;
909 $type =~ s/\|/%7C/g;
910 $msg .= qq[ [<a href="../error-description#@{[htescape ($type)]}">Description</a>]];
911 print STDOUT qq[<dt class="$cls">] . get_error_label ($input, \%opt) .
912 qq[</dt>\n<dd class="$cls">], $msg, "</dd>\n";
913 add_error ('structure', \%opt => $result);
914 });
915
916 print STDOUT qq[</div>];
917 } # print_structure_error_manifest_section
918
919 sub print_table_section ($$) {
920 my ($input, $tables) = @_;
921
922 push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
923 unless $input->{nested};
924 print STDOUT qq[
925 <div id="$input->{id_prefix}tables" class="section">
926 <h2>Tables</h2>
927
928 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
929 <script src="../table-script.js" type="text/javascript"></script>
930 <noscript>
931 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
932 </noscript>
933 ];
934
935 require JSON;
936
937 my $i = 0;
938 for my $table_el (@$tables) {
939 $i++;
940 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
941 get_node_link ($input, $table_el) . q[</h3>];
942
943 ## TODO: Make |ContentChecker| return |form_table| result
944 ## so that this script don't have to run the algorithm twice.
945 my $table = Whatpm::HTMLTable->form_table ($table_el);
946
947 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption}) {
948 next unless $_;
949 delete $_->{element};
950 }
951
952 for (@{$table->{row_group}}) {
953 next unless $_;
954 next unless $_->{element};
955 $_->{type} = $_->{element}->manakai_local_name;
956 delete $_->{element};
957 }
958
959 for (@{$table->{cell}}) {
960 next unless $_;
961 for (@{$_}) {
962 next unless $_;
963 for (@$_) {
964 $_->{id} = refaddr $_->{element} if defined $_->{element};
965 delete $_->{element};
966 $_->{is_header} = $_->{is_header} ? 1 : 0;
967 }
968 }
969 }
970
971 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
972 print STDOUT JSON::objToJson ($table);
973 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
974 print STDOUT qq[, '$input->{id_prefix}');</script>];
975 }
976
977 print STDOUT qq[</div>];
978 } # print_table_section
979
980 sub print_listing_section ($$$) {
981 my ($opt, $input, $ids) = @_;
982
983 push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
984 unless $input->{nested};
985 print STDOUT qq[
986 <div id="$input->{id_prefix}$opt->{id}" class="section">
987 <h2>$opt->{heading}</h2>
988
989 <dl>
990 ];
991 for my $id (sort {$a cmp $b} keys %$ids) {
992 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
993 for (@{$ids->{$id}}) {
994 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
995 }
996 }
997 print STDOUT qq[</dl></div>];
998 } # print_listing_section
999
1000 sub print_result_section ($) {
1001 my $result = shift;
1002
1003 print STDOUT qq[
1004 <div id="result-summary" class="section">
1005 <h2>Result</h2>];
1006
1007 if ($result->{unsupported} and $result->{conforming_max}) {
1008 print STDOUT qq[<p class=uncertain id=result-para>The conformance
1009 checker cannot decide whether the document is conforming or
1010 not, since the document contains one or more unsupported
1011 features. The document might or might not be conforming.</p>];
1012 } elsif ($result->{conforming_min}) {
1013 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
1014 found in this document.</p>];
1015 } elsif ($result->{conforming_max}) {
1016 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
1017 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
1018 it might be conforming.</p>];
1019 } else {
1020 print STDOUT qq[<p class=FAIL id=result-para>This document is
1021 <strong><em>non</em>-conforming</strong>.</p>];
1022 }
1023
1024 print STDOUT qq[<table>
1025 <colgroup><col><colgroup><col><col><col><colgroup><col>
1026 <thead>
1027 <tr><th scope=col></th>
1028 <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1029 Errors</a></th>
1030 <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1031 Errors</a></th>
1032 <th scope=col><a href="../error-description#level-w">Warnings</a></th>
1033 <th scope=col>Score</th></tr></thead><tbody>];
1034
1035 my $must_error = 0;
1036 my $should_error = 0;
1037 my $warning = 0;
1038 my $score_min = 0;
1039 my $score_max = 0;
1040 my $score_base = 20;
1041 my $score_unit = $score_base / 100;
1042 for (
1043 [Transfer => 'transfer', ''],
1044 [Character => 'char', ''],
1045 [Syntax => 'syntax', '#parse-errors'],
1046 [Structure => 'structure', '#document-errors'],
1047 ) {
1048 $must_error += ($result->{$_->[1]}->{must} += 0);
1049 $should_error += ($result->{$_->[1]}->{should} += 0);
1050 $warning += ($result->{$_->[1]}->{warning} += 0);
1051 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
1052 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
1053
1054 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
1055 my $label = $_->[0];
1056 if ($result->{$_->[1]}->{must} or
1057 $result->{$_->[1]}->{should} or
1058 $result->{$_->[1]}->{warning} or
1059 $result->{$_->[1]}->{unsupported}) {
1060 $label = qq[<a href="$_->[2]">$label</a>];
1061 }
1062
1063 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
1064 if ($uncertain) {
1065 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}</td>];
1066 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
1067 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}</td></tr>];
1068 } else {
1069 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}</td></tr>];
1070 }
1071 }
1072
1073 $score_max += $score_base;
1074
1075 print STDOUT qq[
1076 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base</td></tr>
1077 </tbody>
1078 <tfoot><tr class=uncertain><th scope=row>Total</th>
1079 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
1080 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
1081 <td>$warning?</td>
1082 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong></td></tr></tfoot>
1083 </table>
1084
1085 <p><strong>Important</strong>: This conformance checking service
1086 is <em>under development</em>. The result above might be <em>wrong</em>.</p>
1087 </div>];
1088 push @nav, ['#result-summary' => 'Result'];
1089 } # print_result_section
1090
1091 sub print_result_unknown_type_section ($$) {
1092 my ($input, $result) = @_;
1093
1094 my $euri = htescape ($input->{uri});
1095 print STDOUT qq[
1096 <div id="$input->{id_prefix}parse-errors" class="section">
1097 <h2>Errors</h2>
1098
1099 <dl>
1100 <dt class=unsupported><code>&lt;<a href="$euri">$euri</a>&gt;</code></dt>
1101 <dd class=unsupported><strong><a href="../error-description#level-u">Not
1102 supported</a></strong>:
1103 Media type
1104 <code class="MIME" lang="en">@{[htescape $input->{media_type}]}</code>
1105 is not supported.</dd>
1106 </dl>
1107 </div>
1108 ];
1109 push @nav, [qq[#$input->{id_prefix}parse-errors] => 'Errors']
1110 unless $input->{nested};
1111 add_error (char => {level => 'u'} => $result);
1112 add_error (syntax => {level => 'u'} => $result);
1113 add_error (structure => {level => 'u'} => $result);
1114 } # print_result_unknown_type_section
1115
1116 sub print_result_input_error_section ($) {
1117 my $input = shift;
1118 print STDOUT qq[<div class="section" id="result-summary">
1119 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>
1120 </div>];
1121 push @nav, ['#result-summary' => 'Result'];
1122 } # print_result_input_error_section
1123
1124 sub get_error_label ($$) {
1125 my ($input, $err) = @_;
1126
1127 my $r = '';
1128
1129 my $line;
1130 my $column;
1131
1132 if (defined $err->{node}) {
1133 $line = $err->{node}->get_user_data ('manakai_source_line');
1134 if (defined $line) {
1135 $column = $err->{node}->get_user_data ('manakai_source_column');
1136 } else {
1137 if ($err->{node}->node_type == $err->{node}->ATTRIBUTE_NODE) {
1138 my $owner = $err->{node}->owner_element;
1139 $line = $owner->get_user_data ('manakai_source_line');
1140 $column = $owner->get_user_data ('manakai_source_column');
1141 }
1142 }
1143 }
1144 unless (defined $line) {
1145 if (defined $err->{token} and defined $err->{token}->{line}) {
1146 $line = $err->{token}->{line};
1147 $column = $err->{token}->{column};
1148 } elsif (defined $err->{line}) {
1149 $line = $err->{line};
1150 $column = $err->{column};
1151 }
1152 }
1153
1154 if (defined $line) {
1155 if (defined $column and $column > 0) {
1156 $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a> column $column];
1157 } else {
1158 $line = $line - 1 || 1;
1159 $r = qq[<a href="#$input->{id_prefix}line-$line">Line $line</a>];
1160 }
1161 }
1162
1163 if (defined $err->{node}) {
1164 $r .= ' ' if length $r;
1165 $r .= get_node_link ($input, $err->{node});
1166 }
1167
1168 if (defined $err->{index}) {
1169 if (length $r) {
1170 $r .= ', Index ' . (0+$err->{index});
1171 } else {
1172 $r .= "<a href='#$input->{id_prefix}index-@{[0+$err->{index}]}'>Index "
1173 . (0+$err->{index}) . '</a>';
1174 }
1175 }
1176
1177 if (defined $err->{value}) {
1178 $r .= ' ' if length $r;
1179 $r .= '<q><code>' . htescape ($err->{value}) . '</code></q>';
1180 }
1181
1182 return $r;
1183 } # get_error_label
1184
1185 sub get_error_level_label ($) {
1186 my $err = shift;
1187
1188 my $r = '';
1189
1190 if (not defined $err->{level} or $err->{level} eq 'm') {
1191 $r = qq[<strong><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
1192 error</a></strong>: ];
1193 } elsif ($err->{level} eq 's') {
1194 $r = qq[<strong><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
1195 error</a></strong>: ];
1196 } elsif ($err->{level} eq 'w') {
1197 $r = qq[<strong><a href="../error-description#level-w">Warning</a></strong>:
1198 ];
1199 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
1200 $r = qq[<strong><a href="../error-description#level-u">Not
1201 supported</a></strong>: ];
1202 } elsif ($err->{level} eq 'i') {
1203 $r = qq[<strong><a href="../error-description#level-i">Information</a></strong>: ];
1204 } else {
1205 my $elevel = htescape ($err->{level});
1206 $r = qq[<strong><a href="../error-description#level-$elevel">$elevel</a></strong>:
1207 ];
1208 }
1209
1210 return $r;
1211 } # get_error_level_label
1212
1213 sub get_node_path ($) {
1214 my $node = shift;
1215 my @r;
1216 while (defined $node) {
1217 my $rs;
1218 if ($node->node_type == 1) {
1219 $rs = $node->manakai_local_name;
1220 $node = $node->parent_node;
1221 } elsif ($node->node_type == 2) {
1222 $rs = '@' . $node->manakai_local_name;
1223 $node = $node->owner_element;
1224 } elsif ($node->node_type == 3) {
1225 $rs = '"' . $node->data . '"';
1226 $node = $node->parent_node;
1227 } elsif ($node->node_type == 9) {
1228 @r = ('') unless @r;
1229 $rs = '';
1230 $node = $node->parent_node;
1231 } else {
1232 $rs = '#' . $node->node_type;
1233 $node = $node->parent_node;
1234 }
1235 unshift @r, $rs;
1236 }
1237 return join '/', @r;
1238 } # get_node_path
1239
1240 sub get_node_link ($$) {
1241 return qq[<a href="#$_[0]->{id_prefix}node-@{[refaddr $_[1]]}">] .
1242 htescape (get_node_path ($_[1])) . qq[</a>];
1243 } # get_node_link
1244
1245 {
1246 my $Msg = {};
1247
1248 sub load_text_catalog ($) {
1249 my $lang = shift; # MUST be a canonical lang name
1250 open my $file, '<:utf8', "cc-msg.$lang.txt"
1251 or die "$0: cc-msg.$lang.txt: $!";
1252 while (<$file>) {
1253 if (s/^([^;]+);([^;]*);//) {
1254 my ($type, $cls, $msg) = ($1, $2, $_);
1255 $msg =~ tr/\x0D\x0A//d;
1256 $Msg->{$type} = [$cls, $msg];
1257 }
1258 }
1259 } # load_text_catalog
1260
1261 sub get_text ($) {
1262 my ($type, $level, $node) = @_;
1263 $type = $level . ':' . $type if defined $level;
1264 $level = 'm' unless defined $level;
1265 my @arg;
1266 {
1267 if (defined $Msg->{$type}) {
1268 my $msg = $Msg->{$type}->[1];
1269 $msg =~ s{<var>\$([0-9]+)</var>}{
1270 defined $arg[$1] ? htescape ($arg[$1]) : '(undef)';
1271 }ge;
1272 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
1273 UNIVERSAL::can ($node, 'get_attribute_ns')
1274 ? htescape ($node->get_attribute_ns (undef, $1)) : ''
1275 }ge;
1276 $msg =~ s{<var>{\@}</var>}{
1277 UNIVERSAL::can ($node, 'value') ? htescape ($node->value) : ''
1278 }ge;
1279 $msg =~ s{<var>{local-name}</var>}{
1280 UNIVERSAL::can ($node, 'manakai_local_name')
1281 ? htescape ($node->manakai_local_name) : ''
1282 }ge;
1283 $msg =~ s{<var>{element-local-name}</var>}{
1284 (UNIVERSAL::can ($node, 'owner_element') and
1285 $node->owner_element)
1286 ? htescape ($node->owner_element->manakai_local_name)
1287 : ''
1288 }ge;
1289 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
1290 } elsif ($type =~ s/:([^:]*)$//) {
1291 unshift @arg, $1;
1292 redo;
1293 }
1294 }
1295 return ($type, 'level-'.$level, htescape ($_[0]));
1296 } # get_text
1297
1298 }
1299
1300 sub get_input_document ($$) {
1301 my ($http, $dom) = @_;
1302
1303 my $request_uri = $http->get_parameter ('uri');
1304 my $r = {};
1305 if (defined $request_uri and length $request_uri) {
1306 my $uri = $dom->create_uri_reference ($request_uri);
1307 unless ({
1308 http => 1,
1309 }->{lc $uri->uri_scheme}) {
1310 return {uri => $request_uri, request_uri => $request_uri,
1311 error_status_text => 'URI scheme not allowed'};
1312 }
1313
1314 require Message::Util::HostPermit;
1315 my $host_permit = new Message::Util::HostPermit;
1316 $host_permit->add_rule (<<EOH);
1317 Allow host=suika port=80
1318 Deny host=suika
1319 Allow host=suika.fam.cx port=80
1320 Deny host=suika.fam.cx
1321 Deny host=localhost
1322 Deny host=*.localdomain
1323 Deny ipv4=0.0.0.0/8
1324 Deny ipv4=10.0.0.0/8
1325 Deny ipv4=127.0.0.0/8
1326 Deny ipv4=169.254.0.0/16
1327 Deny ipv4=172.0.0.0/11
1328 Deny ipv4=192.0.2.0/24
1329 Deny ipv4=192.88.99.0/24
1330 Deny ipv4=192.168.0.0/16
1331 Deny ipv4=198.18.0.0/15
1332 Deny ipv4=224.0.0.0/4
1333 Deny ipv4=255.255.255.255/32
1334 Deny ipv6=0::0/0
1335 Allow host=*
1336 EOH
1337 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
1338 return {uri => $request_uri, request_uri => $request_uri,
1339 error_status_text => 'Connection to the host is forbidden'};
1340 }
1341
1342 require LWP::UserAgent;
1343 my $ua = WDCC::LWPUA->new;
1344 $ua->{wdcc_dom} = $dom;
1345 $ua->{wdcc_host_permit} = $host_permit;
1346 $ua->agent ('Mozilla'); ## TODO: for now.
1347 $ua->parse_head (0);
1348 $ua->protocols_allowed ([qw/http/]);
1349 $ua->max_size (1000_000);
1350 my $req = HTTP::Request->new (GET => $request_uri);
1351 $req->header ('Accept-Encoding' => 'identity, *; q=0');
1352 my $res = $ua->request ($req);
1353 ## TODO: 401 sets |is_success| true.
1354 if ($res->is_success or $http->get_parameter ('error-page')) {
1355 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
1356 $r->{uri} = $res->request->uri;
1357 $r->{request_uri} = $request_uri;
1358
1359 ## TODO: More strict parsing...
1360 my $ct = $res->header ('Content-Type');
1361 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
1362 $r->{charset} = lc $1;
1363 $r->{charset} =~ tr/\\//d;
1364 $r->{official_charset} = $r->{charset};
1365 }
1366
1367 my $input_charset = $http->get_parameter ('charset');
1368 if (defined $input_charset and length $input_charset) {
1369 $r->{charset_overridden}
1370 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
1371 $r->{charset} = $input_charset;
1372 }
1373
1374 ## TODO: Support for HTTP Content-Encoding
1375
1376 $r->{s} = ''.$res->content;
1377
1378 require Whatpm::ContentType;
1379 ($r->{official_type}, $r->{media_type})
1380 = Whatpm::ContentType->get_sniffed_type
1381 (get_file_head => sub {
1382 return substr $r->{s}, 0, shift;
1383 },
1384 http_content_type_byte => $ct,
1385 has_http_content_encoding =>
1386 defined $res->header ('Content-Encoding'),
1387 supported_image_types => {});
1388 } else {
1389 $r->{uri} = $res->request->uri;
1390 $r->{request_uri} = $request_uri;
1391 $r->{error_status_text} = $res->status_line;
1392 }
1393
1394 $r->{header_field} = [];
1395 $res->scan (sub {
1396 push @{$r->{header_field}}, [$_[0], $_[1]];
1397 });
1398 $r->{header_status_code} = $res->code;
1399 $r->{header_status_text} = $res->message;
1400 } else {
1401 $r->{s} = ''.$http->get_parameter ('s');
1402 $r->{uri} = q<thismessage:/>;
1403 $r->{request_uri} = q<thismessage:/>;
1404 $r->{base_uri} = q<thismessage:/>;
1405 $r->{charset} = ''.$http->get_parameter ('_charset_');
1406 $r->{charset} =~ s/\s+//g;
1407 $r->{charset} = 'utf-8' if $r->{charset} eq '';
1408 $r->{official_charset} = $r->{charset};
1409 $r->{header_field} = [];
1410
1411 require Whatpm::ContentType;
1412 ($r->{official_type}, $r->{media_type})
1413 = Whatpm::ContentType->get_sniffed_type
1414 (get_file_head => sub {
1415 return substr $r->{s}, 0, shift;
1416 },
1417 http_content_type_byte => undef,
1418 has_http_content_encoding => 0,
1419 supported_image_types => {});
1420 }
1421
1422 my $input_format = $http->get_parameter ('i');
1423 if (defined $input_format and length $input_format) {
1424 $r->{media_type_overridden}
1425 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
1426 $r->{media_type} = $input_format;
1427 }
1428 if (defined $r->{s} and not defined $r->{media_type}) {
1429 $r->{media_type} = 'text/html';
1430 $r->{media_type_overridden} = 1;
1431 }
1432
1433 if ($r->{media_type} eq 'text/xml') {
1434 unless (defined $r->{charset}) {
1435 $r->{charset} = 'us-ascii';
1436 $r->{official_charset} = $r->{charset};
1437 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
1438 $r->{charset_overridden} = 0;
1439 }
1440 }
1441
1442 if (length $r->{s} > 1000_000) {
1443 $r->{error_status_text} = 'Entity-body too large';
1444 delete $r->{s};
1445 return $r;
1446 }
1447
1448 $r->{inner_html_element} = $http->get_parameter ('e');
1449
1450 return $r;
1451 } # get_input_document
1452
1453 package WDCC::LWPUA;
1454 BEGIN { push our @ISA, 'LWP::UserAgent'; }
1455
1456 sub redirect_ok {
1457 my $ua = shift;
1458 unless ($ua->SUPER::redirect_ok (@_)) {
1459 return 0;
1460 }
1461
1462 my $uris = $_[1]->header ('Location');
1463 return 0 unless $uris;
1464 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
1465 unless ({
1466 http => 1,
1467 }->{lc $uri->uri_scheme}) {
1468 return 0;
1469 }
1470 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
1471 return 0;
1472 }
1473 return 1;
1474 } # redirect_ok
1475
1476 =head1 AUTHOR
1477
1478 Wakaba <w@suika.fam.cx>.
1479
1480 =head1 LICENSE
1481
1482 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
1483
1484 This library is free software; you can redistribute it
1485 and/or modify it under the same terms as Perl itself.
1486
1487 =cut
1488
1489 ## $Date: 2008/03/16 11:38:47 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24