/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.53 - (hide annotations) (download)
Sun Jul 20 14:58:24 2008 UTC (16 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.52: +175 -1125 lines
++ ChangeLog	20 Jul 2008 14:58:20 -0000
2008-07-20  Wakaba  <wakaba@suika.fam.cx>

	* cc.cgi: Modularized.

	* WebHACC/: New directory.

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3 wakaba 1.23 use utf8;
4 wakaba 1.1
5     use lib qw[/home/httpd/html/www/markup/html/whatpm
6 wakaba 1.16 /home/wakaba/work/manakai2/lib];
7 wakaba 1.1 use CGI::Carp qw[fatalsToBrowser];
8 wakaba 1.2 use Scalar::Util qw[refaddr];
9 wakaba 1.1
10 wakaba 1.53 require WebHACC::Input;
11     require WebHACC::Result;
12     require WebHACC::Output;
13    
14     my $out;
15 wakaba 1.2
16 wakaba 1.35 require Message::DOM::DOMImplementation;
17     my $dom = Message::DOM::DOMImplementation->new;
18     {
19 wakaba 1.16 use Message::CGI::HTTP;
20     my $http = Message::CGI::HTTP->new;
21 wakaba 1.1
22 wakaba 1.16 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
23 wakaba 1.8 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
24     exit;
25     }
26 wakaba 1.53
27 wakaba 1.7 load_text_catalog ('en'); ## TODO: conneg
28    
29 wakaba 1.53 $out = WebHACC::Output->new;
30     $out->handle (*STDOUT);
31     $out->set_utf8;
32     $out->set_flush;
33     $out->html (qq[Content-Type: text/html; charset=utf-8
34 wakaba 1.2
35     <!DOCTYPE html>
36     <html lang="en">
37     <head>
38     <title>Web Document Conformance Checker (BETA)</title>
39 wakaba 1.3 <link rel="stylesheet" href="../cc-style.css" type="text/css">
40 wakaba 1.2 </head>
41     <body>
42 wakaba 1.13 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
43     (<em>beta</em>)</h1>
44 wakaba 1.53 ]);
45 wakaba 1.2
46 wakaba 1.14 my $input = get_input_document ($http, $dom);
47 wakaba 1.53 $out->input ($input);
48     $out->unset_flush;
49    
50 wakaba 1.16 my $char_length = 0;
51 wakaba 1.14
52 wakaba 1.53 $out->start_section (id => 'document-info', title => 'Information');
53     $out->html (qq[<dl>
54     <dt>Request URL</dt>
55     <dd>]);
56     $out->url ($input->{request_uri});
57     $out->html (q[<dt>Document URL<!-- HTML5 document's address? -->
58     <dd>]);
59     $out->url ($input->{uri}, id => 'anchor-document-url');
60     $out->html (q[
61 wakaba 1.25 <script>
62     document.title = '<'
63 wakaba 1.53 + document.getElementById ('anchor-document-url').href + '> \\u2014 '
64 wakaba 1.25 + document.title;
65 wakaba 1.53 </script>]);
66     ## NOTE: no </dl> yet
67 wakaba 1.1
68 wakaba 1.53 if (defined $input->{s}) {
69     $char_length = length $input->{s};
70 wakaba 1.9
71 wakaba 1.53 $out->html (qq[<dt>Base URI<dd>]);
72     $out->url ($input->{base_uri});
73     $out->html (qq[<dt>Internet Media Type</dt>
74     <dd><code class="MIME" lang="en">]);
75     $out->text ($input->{media_type});
76     $out->html (qq[</code> ]);
77     if ($input->{media_type_overridden}) {
78     $out->html ('<em>(overridden)</em>');
79     } elsif (defined $input->{official_type}) {
80     if ($input->{media_type} eq $input->{official_type}) {
81     #
82     } else {
83     $out->html ('<em>(sniffed; official type is: <code class=MIME lang=en>');
84     $out->text ($input->{official_type});
85     $out->html ('</code>)');
86     }
87     } else {
88     $out->html ('<em>(sniffed)</em>');
89     }
90     $out->html (q[<dt>Character Encoding<dd>]);
91     if (defined $input->{charset}) {
92     $out->html ('<code class="charset" lang="en">');
93     $out->text ($input->{charset});
94     $out->html ('</code>');
95     } else {
96     $out->text ('(none)');
97     }
98     $out->html (' <em>overridden</em>') if $input->{charset_overridden};
99     $out->html (qq[
100 wakaba 1.16 <dt>Length</dt>
101     <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
102 wakaba 1.9 </dl>
103 wakaba 1.39
104     <script src="../cc-script.js"></script>
105 wakaba 1.53 ]);
106     $out->end_section;
107 wakaba 1.9
108 wakaba 1.53 my $result = WebHACC::Result->new;
109     $result->{conforming_min} = 1;
110     $result->{conforming_max} = 1;
111     check_and_print ($input => $result => $out);
112     print_result_section ($result);
113     } else {
114     $out->html ('</dl>');
115     $out->end_section;
116     print_result_input_error_section ($input);
117 wakaba 1.3 }
118 wakaba 1.1
119 wakaba 1.53 $out->nav_list;
120 wakaba 1.16
121 wakaba 1.53 exit;
122 wakaba 1.35 }
123 wakaba 1.1
124 wakaba 1.19 sub add_error ($$$) {
125     my ($layer, $err, $result) = @_;
126     if (defined $err->{level}) {
127     if ($err->{level} eq 's') {
128     $result->{$layer}->{should}++;
129     $result->{$layer}->{score_min} -= 2;
130     $result->{conforming_min} = 0;
131     } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
132     $result->{$layer}->{warning}++;
133 wakaba 1.30 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
134 wakaba 1.19 $result->{$layer}->{unsupported}++;
135     $result->{unsupported} = 1;
136 wakaba 1.37 } elsif ($err->{level} eq 'i') {
137     #
138 wakaba 1.19 } else {
139     $result->{$layer}->{must}++;
140     $result->{$layer}->{score_max} -= 2;
141     $result->{$layer}->{score_min} -= 2;
142     $result->{conforming_min} = 0;
143     $result->{conforming_max} = 0;
144     }
145     } else {
146     $result->{$layer}->{must}++;
147     $result->{$layer}->{score_max} -= 2;
148     $result->{$layer}->{score_min} -= 2;
149     $result->{conforming_min} = 0;
150     $result->{conforming_max} = 0;
151     }
152     } # add_error
153    
154 wakaba 1.53 sub check_and_print ($$$) {
155     my ($input, $result, $out) = @_;
156     my $original_input = $out->input;
157     $out->input ($input);
158 wakaba 1.31
159     print_http_header_section ($input, $result);
160    
161 wakaba 1.34 my @subdoc;
162 wakaba 1.31
163 wakaba 1.53 my $checker_class = {
164     'text/cache-manifest' => 'WebHACC::Language::CacheManifest',
165     'text/css' => 'WebHACC::Language::CSS',
166     'text/html' => 'WebHACC::Language::HTML',
167     'text/x-webidl' => 'WebHACC::Language::WebIDL',
168    
169     'text/xml' => 'WebHACC::Language::XML',
170     'application/atom+xml' => 'WebHACC::Language::XML',
171     'application/rss+xml' => 'WebHACC::Language::XML',
172     'image/svg+xml' => 'WebHACC::Language::XML',
173     'application/xhtml+xml' => 'WebHACC::Language::XML',
174     'application/xml' => 'WebHACC::Language::XML',
175     ## TODO: Should we make all XML MIME Types fall
176     ## into this category?
177    
178     ## NOTE: This type has different model from normal XML types.
179     'application/rdf+xml' => 'WebHACC::Language::XML',
180     }->{$input->{media_type}} || 'WebHACC::Language::Default';
181    
182     eval qq{ require $checker_class } or die "$0: Loading $checker_class: $@";
183     my $checker = $checker_class->new;
184     $checker->input ($input);
185     $checker->output ($out);
186     $checker->result ($result);
187    
188     ## TODO: A cache manifest MUST be text/cache-manifest
189     ## TODO: WebIDL media type "text/x-webidl"
190    
191     $checker->generate_syntax_error_section;
192     $checker->generate_source_string_section;
193    
194     $checker->onsubdoc (sub {
195     push @subdoc, shift;
196     });
197    
198     $checker->generate_structure_dump_section;
199     $checker->generate_structure_error_section;
200     $checker->generate_additional_sections;
201    
202     =pod
203 wakaba 1.31
204     if (defined $doc or defined $el) {
205 wakaba 1.53
206 wakaba 1.32 print_table_section ($input, $elements->{table}) if @{$elements->{table}};
207 wakaba 1.33 print_listing_section ({
208     id => 'identifiers', label => 'IDs', heading => 'Identifiers',
209     }, $input, $elements->{id}) if keys %{$elements->{id}};
210     print_listing_section ({
211     id => 'terms', label => 'Terms', heading => 'Terms',
212     }, $input, $elements->{term}) if keys %{$elements->{term}};
213     print_listing_section ({
214     id => 'classes', label => 'Classes', heading => 'Classes',
215     }, $input, $elements->{class}) if keys %{$elements->{class}};
216 wakaba 1.53
217 wakaba 1.45 print_rdf_section ($input, $elements->{rdf}) if @{$elements->{rdf}};
218 wakaba 1.31 }
219 wakaba 1.34
220 wakaba 1.53 =cut
221    
222 wakaba 1.34 my $id_prefix = 0;
223 wakaba 1.53 for my $_subinput (@subdoc) {
224     my $subinput = WebHACC::Input->new;
225     $subinput->{$_} = $_subinput->{$_} for keys %$_subinput;
226     $subinput->id_prefix ('subdoc-' . ++$id_prefix);
227     $subinput->nested (1);
228 wakaba 1.34 $subinput->{base_uri} = $subinput->{container_node}->base_uri
229     unless defined $subinput->{base_uri};
230     my $ebaseuri = htescape ($subinput->{base_uri});
231 wakaba 1.53 $out->start_section (id => $subinput->id_prefix,
232     title => qq[Subdocument #$id_prefix]);
233     print STDOUT qq[
234 wakaba 1.34 <dl>
235     <dt>Internet Media Type</dt>
236     <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
237     <dt>Container Node</dt>
238     <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
239     <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
240     <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
241     </dl>];
242    
243 wakaba 1.35 $subinput->{id_prefix} .= '-';
244 wakaba 1.53 check_and_print ($subinput => $result => $out);
245 wakaba 1.34
246 wakaba 1.53 $out->end_section;
247 wakaba 1.34 }
248 wakaba 1.53
249     $out->input ($original_input);
250 wakaba 1.31 } # check_and_print
251    
252 wakaba 1.19 sub print_http_header_section ($$) {
253     my ($input, $result) = @_;
254 wakaba 1.9 return unless defined $input->{header_status_code} or
255     defined $input->{header_status_text} or
256 wakaba 1.34 @{$input->{header_field} or []};
257 wakaba 1.9
258 wakaba 1.53 $out->start_section (id => 'source-header', title => 'HTTP Header');
259     print STDOUT qq[<p><strong>Note</strong>: Due to the limitation of the
260 wakaba 1.9 network library in use, the content of this section might
261     not be the real header.</p>
262    
263     <table><tbody>
264     ];
265    
266     if (defined $input->{header_status_code}) {
267     print STDOUT qq[<tr><th scope="row">Status code</th>];
268 wakaba 1.53 print STDOUT qq[<td>];
269     $out->code ($input->{header_status_code});
270 wakaba 1.9 }
271     if (defined $input->{header_status_text}) {
272     print STDOUT qq[<tr><th scope="row">Status text</th>];
273 wakaba 1.53 print STDOUT qq[<td>];
274     $out->code ($input->{header_status_text});
275 wakaba 1.9 }
276    
277     for (@{$input->{header_field}}) {
278 wakaba 1.53 print STDOUT qq[<tr><th scope="row">];
279     $out->code ($_->[0]);
280     print STDOUT qq[<td>];
281     $out->code ($_->[1]);
282 wakaba 1.9 }
283    
284 wakaba 1.53 print STDOUT qq[</tbody></table>];
285    
286     $out->end_section;
287 wakaba 1.9 } # print_http_header_section
288    
289 wakaba 1.32 sub print_table_section ($$) {
290     my ($input, $tables) = @_;
291 wakaba 1.18
292 wakaba 1.53 # push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
293     # unless $input->{nested};
294 wakaba 1.18 print STDOUT qq[
295 wakaba 1.32 <div id="$input->{id_prefix}tables" class="section">
296 wakaba 1.18 <h2>Tables</h2>
297    
298     <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
299     <script src="../table-script.js" type="text/javascript"></script>
300     <noscript>
301     <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
302     </noscript>
303     ];
304    
305     require JSON;
306    
307     my $i = 0;
308 wakaba 1.50 for my $table (@$tables) {
309 wakaba 1.18 $i++;
310 wakaba 1.32 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
311 wakaba 1.50 get_node_link ($input, $table->{element}) . q[</h3>];
312    
313     delete $table->{element};
314 wakaba 1.18
315 wakaba 1.49 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption},
316     @{$table->{row}}) {
317 wakaba 1.18 next unless $_;
318     delete $_->{element};
319     }
320    
321     for (@{$table->{row_group}}) {
322     next unless $_;
323     next unless $_->{element};
324     $_->{type} = $_->{element}->manakai_local_name;
325     delete $_->{element};
326     }
327    
328     for (@{$table->{cell}}) {
329     next unless $_;
330     for (@{$_}) {
331     next unless $_;
332     for (@$_) {
333     $_->{id} = refaddr $_->{element} if defined $_->{element};
334     delete $_->{element};
335     $_->{is_header} = $_->{is_header} ? 1 : 0;
336     }
337     }
338     }
339    
340     print STDOUT '</div><script type="text/javascript">tableToCanvas (';
341     print STDOUT JSON::objToJson ($table);
342 wakaba 1.32 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
343     print STDOUT qq[, '$input->{id_prefix}');</script>];
344 wakaba 1.18 }
345    
346     print STDOUT qq[</div>];
347     } # print_table_section
348    
349 wakaba 1.33 sub print_listing_section ($$$) {
350     my ($opt, $input, $ids) = @_;
351 wakaba 1.18
352 wakaba 1.53 # push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
353     # unless $input->{nested};
354 wakaba 1.18 print STDOUT qq[
355 wakaba 1.33 <div id="$input->{id_prefix}$opt->{id}" class="section">
356     <h2>$opt->{heading}</h2>
357 wakaba 1.18
358     <dl>
359     ];
360     for my $id (sort {$a cmp $b} keys %$ids) {
361     print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
362     for (@{$ids->{$id}}) {
363 wakaba 1.32 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
364 wakaba 1.18 }
365     }
366     print STDOUT qq[</dl></div>];
367 wakaba 1.33 } # print_listing_section
368 wakaba 1.18
369 wakaba 1.48
370 wakaba 1.45 sub print_rdf_section ($$$) {
371     my ($input, $rdfs) = @_;
372    
373 wakaba 1.53 # push @nav, ['#' . $input->{id_prefix} . 'rdf' => 'RDF']
374     # unless $input->{nested};
375 wakaba 1.45 print STDOUT qq[
376     <div id="$input->{id_prefix}rdf" class="section">
377     <h2>RDF Triples</h2>
378    
379     <dl>];
380     my $i = 0;
381     for my $rdf (@$rdfs) {
382     print STDOUT qq[<dt id="$input->{id_prefix}rdf-@{[$i++]}">];
383     print STDOUT get_node_link ($input, $rdf->[0]);
384     print STDOUT qq[<dd><dl>];
385     for my $triple (@{$rdf->[1]}) {
386     print STDOUT '<dt>' . get_node_link ($input, $triple->[0]) . '<dd>';
387     print STDOUT get_rdf_resource_html ($triple->[1]);
388     print STDOUT ' ';
389     print STDOUT get_rdf_resource_html ($triple->[2]);
390     print STDOUT ' ';
391     print STDOUT get_rdf_resource_html ($triple->[3]);
392     }
393     print STDOUT qq[</dl>];
394     }
395     print STDOUT qq[</dl></div>];
396     } # print_rdf_section
397    
398     sub get_rdf_resource_html ($) {
399     my $resource = shift;
400 wakaba 1.46 if (defined $resource->{uri}) {
401 wakaba 1.45 my $euri = htescape ($resource->{uri});
402     return '<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
403     '</a>></code>';
404 wakaba 1.46 } elsif (defined $resource->{bnodeid}) {
405 wakaba 1.45 return htescape ('_:' . $resource->{bnodeid});
406     } elsif ($resource->{nodes}) {
407     return '(rdf:XMLLiteral)';
408     } elsif (defined $resource->{value}) {
409     my $elang = htescape (defined $resource->{language}
410     ? $resource->{language} : '');
411     my $r = qq[<q lang="$elang">] . htescape ($resource->{value}) . '</q>';
412     if (defined $resource->{datatype}) {
413     my $euri = htescape ($resource->{datatype});
414     $r .= '^^<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
415     '</a>></code>';
416     } elsif (length $resource->{language}) {
417     $r .= '@' . htescape ($resource->{language});
418     }
419     return $r;
420     } else {
421     return '??';
422     }
423     } # get_rdf_resource_html
424    
425 wakaba 1.19 sub print_result_section ($) {
426     my $result = shift;
427    
428 wakaba 1.53 $out->start_section (id => 'result-summary',
429     title => 'Result');
430 wakaba 1.19
431 wakaba 1.21 if ($result->{unsupported} and $result->{conforming_max}) {
432 wakaba 1.19 print STDOUT qq[<p class=uncertain id=result-para>The conformance
433     checker cannot decide whether the document is conforming or
434     not, since the document contains one or more unsupported
435 wakaba 1.21 features. The document might or might not be conforming.</p>];
436 wakaba 1.19 } elsif ($result->{conforming_min}) {
437     print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
438     found in this document.</p>];
439     } elsif ($result->{conforming_max}) {
440     print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
441     is <strong>likely <em>non</em>-conforming</strong>, but in rare case
442     it might be conforming.</p>];
443     } else {
444     print STDOUT qq[<p class=FAIL id=result-para>This document is
445     <strong><em>non</em>-conforming</strong>.</p>];
446     }
447    
448     print STDOUT qq[<table>
449     <colgroup><col><colgroup><col><col><col><colgroup><col>
450     <thead>
451 wakaba 1.23 <tr><th scope=col></th>
452     <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
453     Errors</a></th>
454     <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
455     Errors</a></th>
456     <th scope=col><a href="../error-description#level-w">Warnings</a></th>
457     <th scope=col>Score</th></tr></thead><tbody>];
458 wakaba 1.19
459     my $must_error = 0;
460     my $should_error = 0;
461     my $warning = 0;
462     my $score_min = 0;
463     my $score_max = 0;
464     my $score_base = 20;
465 wakaba 1.21 my $score_unit = $score_base / 100;
466 wakaba 1.19 for (
467     [Transfer => 'transfer', ''],
468     [Character => 'char', ''],
469     [Syntax => 'syntax', '#parse-errors'],
470     [Structure => 'structure', '#document-errors'],
471     ) {
472     $must_error += ($result->{$_->[1]}->{must} += 0);
473     $should_error += ($result->{$_->[1]}->{should} += 0);
474     $warning += ($result->{$_->[1]}->{warning} += 0);
475 wakaba 1.21 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
476     $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
477 wakaba 1.19
478     my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
479     my $label = $_->[0];
480     if ($result->{$_->[1]}->{must} or
481     $result->{$_->[1]}->{should} or
482     $result->{$_->[1]}->{warning} or
483     $result->{$_->[1]}->{unsupported}) {
484     $label = qq[<a href="$_->[2]">$label</a>];
485     }
486    
487     print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
488     if ($uncertain) {
489 wakaba 1.51 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}];
490 wakaba 1.19 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
491 wakaba 1.51 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}];
492 wakaba 1.19 } else {
493 wakaba 1.51 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}];
494 wakaba 1.19 }
495 wakaba 1.51 print qq[ / 20];
496 wakaba 1.19 }
497    
498     $score_max += $score_base;
499    
500     print STDOUT qq[
501 wakaba 1.51 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base / 20
502 wakaba 1.19 </tbody>
503 wakaba 1.21 <tfoot><tr class=uncertain><th scope=row>Total</th>
504     <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
505     <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
506     <td>$warning?</td>
507 wakaba 1.51 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong> / 100
508 wakaba 1.19 </table>
509    
510     <p><strong>Important</strong>: This conformance checking service
511 wakaba 1.53 is <em>under development</em>. The result above might be <em>wrong</em>.</p>];
512     $out->end_section;
513 wakaba 1.19 } # print_result_section
514    
515 wakaba 1.18 sub print_result_input_error_section ($) {
516     my $input = shift;
517 wakaba 1.53 $out->start_section (id => 'result-summary', title => 'Result');
518     print STDOUT qq[
519     <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>];
520     $out->end_section;
521 wakaba 1.32 } # print_result_input_error_section
522 wakaba 1.18
523 wakaba 1.7 {
524     my $Msg = {};
525    
526     sub load_text_catalog ($) {
527 wakaba 1.53 # my $self = shift;
528 wakaba 1.7 my $lang = shift; # MUST be a canonical lang name
529 wakaba 1.26 open my $file, '<:utf8', "cc-msg.$lang.txt"
530     or die "$0: cc-msg.$lang.txt: $!";
531 wakaba 1.7 while (<$file>) {
532     if (s/^([^;]+);([^;]*);//) {
533     my ($type, $cls, $msg) = ($1, $2, $_);
534     $msg =~ tr/\x0D\x0A//d;
535     $Msg->{$type} = [$cls, $msg];
536     }
537     }
538     } # load_text_catalog
539    
540 wakaba 1.53 sub get_text ($;$$) {
541     # my $self = shift;
542 wakaba 1.15 my ($type, $level, $node) = @_;
543 wakaba 1.7 $type = $level . ':' . $type if defined $level;
544 wakaba 1.29 $level = 'm' unless defined $level;
545 wakaba 1.7 my @arg;
546     {
547     if (defined $Msg->{$type}) {
548     my $msg = $Msg->{$type}->[1];
549 wakaba 1.10 $msg =~ s{<var>\$([0-9]+)</var>}{
550 wakaba 1.53 defined $arg[$1] ? ($arg[$1]) : '(undef)';
551     }ge; ##BUG: ^ must be escaped
552 wakaba 1.15 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
553     UNIVERSAL::can ($node, 'get_attribute_ns')
554 wakaba 1.53 ? ($node->get_attribute_ns (undef, $1)) : ''
555     }ge; ## BUG: ^ must be escaped
556     $msg =~ s{<var>{\@}</var>}{ ## BUG: v must be escaped
557     UNIVERSAL::can ($node, 'value') ? ($node->value) : ''
558 wakaba 1.15 }ge;
559 wakaba 1.17 $msg =~ s{<var>{local-name}</var>}{
560     UNIVERSAL::can ($node, 'manakai_local_name')
561 wakaba 1.53 ? ($node->manakai_local_name) : ''
562     }ge; ## BUG: ^ must be escaped
563 wakaba 1.17 $msg =~ s{<var>{element-local-name}</var>}{
564     (UNIVERSAL::can ($node, 'owner_element') and
565     $node->owner_element)
566 wakaba 1.53 ? ($node->owner_element->manakai_local_name)
567     : '' ## BUG: ^ must be escaped
568 wakaba 1.17 }ge;
569 wakaba 1.29 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
570 wakaba 1.7 } elsif ($type =~ s/:([^:]*)$//) {
571     unshift @arg, $1;
572     redo;
573     }
574     }
575 wakaba 1.53 return ($type, 'level-'.$level, ($_[0]));
576     ## BUG: ^ must be escaped
577 wakaba 1.7 } # get_text
578    
579     }
580    
581 wakaba 1.9 sub get_input_document ($$) {
582     my ($http, $dom) = @_;
583    
584 wakaba 1.16 my $request_uri = $http->get_parameter ('uri');
585 wakaba 1.53 my $r = WebHACC::Input->new;
586 wakaba 1.9 if (defined $request_uri and length $request_uri) {
587     my $uri = $dom->create_uri_reference ($request_uri);
588     unless ({
589     http => 1,
590     }->{lc $uri->uri_scheme}) {
591     return {uri => $request_uri, request_uri => $request_uri,
592     error_status_text => 'URI scheme not allowed'};
593     }
594    
595     require Message::Util::HostPermit;
596     my $host_permit = new Message::Util::HostPermit;
597     $host_permit->add_rule (<<EOH);
598     Allow host=suika port=80
599     Deny host=suika
600     Allow host=suika.fam.cx port=80
601     Deny host=suika.fam.cx
602     Deny host=localhost
603     Deny host=*.localdomain
604     Deny ipv4=0.0.0.0/8
605     Deny ipv4=10.0.0.0/8
606     Deny ipv4=127.0.0.0/8
607     Deny ipv4=169.254.0.0/16
608     Deny ipv4=172.0.0.0/11
609     Deny ipv4=192.0.2.0/24
610     Deny ipv4=192.88.99.0/24
611     Deny ipv4=192.168.0.0/16
612     Deny ipv4=198.18.0.0/15
613     Deny ipv4=224.0.0.0/4
614     Deny ipv4=255.255.255.255/32
615     Deny ipv6=0::0/0
616     Allow host=*
617     EOH
618     unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
619     return {uri => $request_uri, request_uri => $request_uri,
620     error_status_text => 'Connection to the host is forbidden'};
621     }
622    
623     require LWP::UserAgent;
624     my $ua = WDCC::LWPUA->new;
625     $ua->{wdcc_dom} = $dom;
626     $ua->{wdcc_host_permit} = $host_permit;
627     $ua->agent ('Mozilla'); ## TODO: for now.
628     $ua->parse_head (0);
629     $ua->protocols_allowed ([qw/http/]);
630     $ua->max_size (1000_000);
631     my $req = HTTP::Request->new (GET => $request_uri);
632 wakaba 1.28 $req->header ('Accept-Encoding' => 'identity, *; q=0');
633 wakaba 1.9 my $res = $ua->request ($req);
634 wakaba 1.16 ## TODO: 401 sets |is_success| true.
635     if ($res->is_success or $http->get_parameter ('error-page')) {
636 wakaba 1.9 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
637     $r->{uri} = $res->request->uri;
638     $r->{request_uri} = $request_uri;
639    
640     ## TODO: More strict parsing...
641     my $ct = $res->header ('Content-Type');
642 wakaba 1.22 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
643 wakaba 1.9 $r->{charset} = lc $1;
644     $r->{charset} =~ tr/\\//d;
645 wakaba 1.26 $r->{official_charset} = $r->{charset};
646 wakaba 1.9 }
647    
648 wakaba 1.16 my $input_charset = $http->get_parameter ('charset');
649 wakaba 1.9 if (defined $input_charset and length $input_charset) {
650     $r->{charset_overridden}
651     = (not defined $r->{charset} or $r->{charset} ne $input_charset);
652     $r->{charset} = $input_charset;
653 wakaba 1.25 }
654    
655     ## TODO: Support for HTTP Content-Encoding
656 wakaba 1.9
657     $r->{s} = ''.$res->content;
658 wakaba 1.25
659     require Whatpm::ContentType;
660     ($r->{official_type}, $r->{media_type})
661     = Whatpm::ContentType->get_sniffed_type
662     (get_file_head => sub {
663     return substr $r->{s}, 0, shift;
664     },
665     http_content_type_byte => $ct,
666     has_http_content_encoding =>
667     defined $res->header ('Content-Encoding'),
668     supported_image_types => {});
669 wakaba 1.9 } else {
670     $r->{uri} = $res->request->uri;
671     $r->{request_uri} = $request_uri;
672     $r->{error_status_text} = $res->status_line;
673     }
674    
675     $r->{header_field} = [];
676     $res->scan (sub {
677     push @{$r->{header_field}}, [$_[0], $_[1]];
678     });
679     $r->{header_status_code} = $res->code;
680     $r->{header_status_text} = $res->message;
681     } else {
682 wakaba 1.16 $r->{s} = ''.$http->get_parameter ('s');
683 wakaba 1.9 $r->{uri} = q<thismessage:/>;
684     $r->{request_uri} = q<thismessage:/>;
685     $r->{base_uri} = q<thismessage:/>;
686 wakaba 1.16 $r->{charset} = ''.$http->get_parameter ('_charset_');
687 wakaba 1.9 $r->{charset} =~ s/\s+//g;
688     $r->{charset} = 'utf-8' if $r->{charset} eq '';
689 wakaba 1.26 $r->{official_charset} = $r->{charset};
690 wakaba 1.9 $r->{header_field} = [];
691 wakaba 1.25
692     require Whatpm::ContentType;
693     ($r->{official_type}, $r->{media_type})
694     = Whatpm::ContentType->get_sniffed_type
695     (get_file_head => sub {
696     return substr $r->{s}, 0, shift;
697     },
698     http_content_type_byte => undef,
699     has_http_content_encoding => 0,
700     supported_image_types => {});
701 wakaba 1.9 }
702    
703 wakaba 1.16 my $input_format = $http->get_parameter ('i');
704 wakaba 1.9 if (defined $input_format and length $input_format) {
705     $r->{media_type_overridden}
706     = (not defined $r->{media_type} or $input_format ne $r->{media_type});
707     $r->{media_type} = $input_format;
708     }
709     if (defined $r->{s} and not defined $r->{media_type}) {
710     $r->{media_type} = 'text/html';
711     $r->{media_type_overridden} = 1;
712     }
713    
714     if ($r->{media_type} eq 'text/xml') {
715     unless (defined $r->{charset}) {
716     $r->{charset} = 'us-ascii';
717 wakaba 1.26 $r->{official_charset} = $r->{charset};
718 wakaba 1.9 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
719     $r->{charset_overridden} = 0;
720     }
721     }
722    
723     if (length $r->{s} > 1000_000) {
724     $r->{error_status_text} = 'Entity-body too large';
725     delete $r->{s};
726     return $r;
727     }
728    
729 wakaba 1.35 $r->{inner_html_element} = $http->get_parameter ('e');
730    
731 wakaba 1.9 return $r;
732     } # get_input_document
733    
734     package WDCC::LWPUA;
735     BEGIN { push our @ISA, 'LWP::UserAgent'; }
736    
737     sub redirect_ok {
738     my $ua = shift;
739     unless ($ua->SUPER::redirect_ok (@_)) {
740     return 0;
741     }
742    
743     my $uris = $_[1]->header ('Location');
744     return 0 unless $uris;
745     my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
746     unless ({
747     http => 1,
748     }->{lc $uri->uri_scheme}) {
749     return 0;
750     }
751     unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
752     return 0;
753     }
754     return 1;
755     } # redirect_ok
756    
757 wakaba 1.1 =head1 AUTHOR
758    
759     Wakaba <w@suika.fam.cx>.
760    
761     =head1 LICENSE
762    
763 wakaba 1.35 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
764 wakaba 1.1
765     This library is free software; you can redistribute it
766     and/or modify it under the same terms as Perl itself.
767    
768     =cut
769    
770 wakaba 1.53 ## $Date: 2008/07/18 14:44:16 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24