/[suikacvs]/test/html-webhacc/cc.cgi
Suika

Contents of /test/html-webhacc/cc.cgi

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.53 - (show annotations) (download)
Sun Jul 20 14:58:24 2008 UTC (15 years, 10 months ago) by wakaba
Branch: MAIN
Changes since 1.52: +175 -1125 lines
++ ChangeLog	20 Jul 2008 14:58:20 -0000
2008-07-20  Wakaba  <wakaba@suika.fam.cx>

	* cc.cgi: Modularized.

	* WebHACC/: New directory.

1 #!/usr/bin/perl
2 use strict;
3 use utf8;
4
5 use lib qw[/home/httpd/html/www/markup/html/whatpm
6 /home/wakaba/work/manakai2/lib];
7 use CGI::Carp qw[fatalsToBrowser];
8 use Scalar::Util qw[refaddr];
9
10 require WebHACC::Input;
11 require WebHACC::Result;
12 require WebHACC::Output;
13
14 my $out;
15
16 require Message::DOM::DOMImplementation;
17 my $dom = Message::DOM::DOMImplementation->new;
18 {
19 use Message::CGI::HTTP;
20 my $http = Message::CGI::HTTP->new;
21
22 if ($http->get_meta_variable ('PATH_INFO') ne '/') {
23 print STDOUT "Status: 404 Not Found\nContent-Type: text/plain; charset=us-ascii\n\n400";
24 exit;
25 }
26
27 load_text_catalog ('en'); ## TODO: conneg
28
29 $out = WebHACC::Output->new;
30 $out->handle (*STDOUT);
31 $out->set_utf8;
32 $out->set_flush;
33 $out->html (qq[Content-Type: text/html; charset=utf-8
34
35 <!DOCTYPE html>
36 <html lang="en">
37 <head>
38 <title>Web Document Conformance Checker (BETA)</title>
39 <link rel="stylesheet" href="../cc-style.css" type="text/css">
40 </head>
41 <body>
42 <h1><a href="../cc-interface">Web Document Conformance Checker</a>
43 (<em>beta</em>)</h1>
44 ]);
45
46 my $input = get_input_document ($http, $dom);
47 $out->input ($input);
48 $out->unset_flush;
49
50 my $char_length = 0;
51
52 $out->start_section (id => 'document-info', title => 'Information');
53 $out->html (qq[<dl>
54 <dt>Request URL</dt>
55 <dd>]);
56 $out->url ($input->{request_uri});
57 $out->html (q[<dt>Document URL<!-- HTML5 document's address? -->
58 <dd>]);
59 $out->url ($input->{uri}, id => 'anchor-document-url');
60 $out->html (q[
61 <script>
62 document.title = '<'
63 + document.getElementById ('anchor-document-url').href + '> \\u2014 '
64 + document.title;
65 </script>]);
66 ## NOTE: no </dl> yet
67
68 if (defined $input->{s}) {
69 $char_length = length $input->{s};
70
71 $out->html (qq[<dt>Base URI<dd>]);
72 $out->url ($input->{base_uri});
73 $out->html (qq[<dt>Internet Media Type</dt>
74 <dd><code class="MIME" lang="en">]);
75 $out->text ($input->{media_type});
76 $out->html (qq[</code> ]);
77 if ($input->{media_type_overridden}) {
78 $out->html ('<em>(overridden)</em>');
79 } elsif (defined $input->{official_type}) {
80 if ($input->{media_type} eq $input->{official_type}) {
81 #
82 } else {
83 $out->html ('<em>(sniffed; official type is: <code class=MIME lang=en>');
84 $out->text ($input->{official_type});
85 $out->html ('</code>)');
86 }
87 } else {
88 $out->html ('<em>(sniffed)</em>');
89 }
90 $out->html (q[<dt>Character Encoding<dd>]);
91 if (defined $input->{charset}) {
92 $out->html ('<code class="charset" lang="en">');
93 $out->text ($input->{charset});
94 $out->html ('</code>');
95 } else {
96 $out->text ('(none)');
97 }
98 $out->html (' <em>overridden</em>') if $input->{charset_overridden};
99 $out->html (qq[
100 <dt>Length</dt>
101 <dd>$char_length byte@{[$char_length == 1 ? '' : 's']}</dd>
102 </dl>
103
104 <script src="../cc-script.js"></script>
105 ]);
106 $out->end_section;
107
108 my $result = WebHACC::Result->new;
109 $result->{conforming_min} = 1;
110 $result->{conforming_max} = 1;
111 check_and_print ($input => $result => $out);
112 print_result_section ($result);
113 } else {
114 $out->html ('</dl>');
115 $out->end_section;
116 print_result_input_error_section ($input);
117 }
118
119 $out->nav_list;
120
121 exit;
122 }
123
124 sub add_error ($$$) {
125 my ($layer, $err, $result) = @_;
126 if (defined $err->{level}) {
127 if ($err->{level} eq 's') {
128 $result->{$layer}->{should}++;
129 $result->{$layer}->{score_min} -= 2;
130 $result->{conforming_min} = 0;
131 } elsif ($err->{level} eq 'w' or $err->{level} eq 'g') {
132 $result->{$layer}->{warning}++;
133 } elsif ($err->{level} eq 'u' or $err->{level} eq 'unsupported') {
134 $result->{$layer}->{unsupported}++;
135 $result->{unsupported} = 1;
136 } elsif ($err->{level} eq 'i') {
137 #
138 } else {
139 $result->{$layer}->{must}++;
140 $result->{$layer}->{score_max} -= 2;
141 $result->{$layer}->{score_min} -= 2;
142 $result->{conforming_min} = 0;
143 $result->{conforming_max} = 0;
144 }
145 } else {
146 $result->{$layer}->{must}++;
147 $result->{$layer}->{score_max} -= 2;
148 $result->{$layer}->{score_min} -= 2;
149 $result->{conforming_min} = 0;
150 $result->{conforming_max} = 0;
151 }
152 } # add_error
153
154 sub check_and_print ($$$) {
155 my ($input, $result, $out) = @_;
156 my $original_input = $out->input;
157 $out->input ($input);
158
159 print_http_header_section ($input, $result);
160
161 my @subdoc;
162
163 my $checker_class = {
164 'text/cache-manifest' => 'WebHACC::Language::CacheManifest',
165 'text/css' => 'WebHACC::Language::CSS',
166 'text/html' => 'WebHACC::Language::HTML',
167 'text/x-webidl' => 'WebHACC::Language::WebIDL',
168
169 'text/xml' => 'WebHACC::Language::XML',
170 'application/atom+xml' => 'WebHACC::Language::XML',
171 'application/rss+xml' => 'WebHACC::Language::XML',
172 'image/svg+xml' => 'WebHACC::Language::XML',
173 'application/xhtml+xml' => 'WebHACC::Language::XML',
174 'application/xml' => 'WebHACC::Language::XML',
175 ## TODO: Should we make all XML MIME Types fall
176 ## into this category?
177
178 ## NOTE: This type has different model from normal XML types.
179 'application/rdf+xml' => 'WebHACC::Language::XML',
180 }->{$input->{media_type}} || 'WebHACC::Language::Default';
181
182 eval qq{ require $checker_class } or die "$0: Loading $checker_class: $@";
183 my $checker = $checker_class->new;
184 $checker->input ($input);
185 $checker->output ($out);
186 $checker->result ($result);
187
188 ## TODO: A cache manifest MUST be text/cache-manifest
189 ## TODO: WebIDL media type "text/x-webidl"
190
191 $checker->generate_syntax_error_section;
192 $checker->generate_source_string_section;
193
194 $checker->onsubdoc (sub {
195 push @subdoc, shift;
196 });
197
198 $checker->generate_structure_dump_section;
199 $checker->generate_structure_error_section;
200 $checker->generate_additional_sections;
201
202 =pod
203
204 if (defined $doc or defined $el) {
205
206 print_table_section ($input, $elements->{table}) if @{$elements->{table}};
207 print_listing_section ({
208 id => 'identifiers', label => 'IDs', heading => 'Identifiers',
209 }, $input, $elements->{id}) if keys %{$elements->{id}};
210 print_listing_section ({
211 id => 'terms', label => 'Terms', heading => 'Terms',
212 }, $input, $elements->{term}) if keys %{$elements->{term}};
213 print_listing_section ({
214 id => 'classes', label => 'Classes', heading => 'Classes',
215 }, $input, $elements->{class}) if keys %{$elements->{class}};
216
217 print_rdf_section ($input, $elements->{rdf}) if @{$elements->{rdf}};
218 }
219
220 =cut
221
222 my $id_prefix = 0;
223 for my $_subinput (@subdoc) {
224 my $subinput = WebHACC::Input->new;
225 $subinput->{$_} = $_subinput->{$_} for keys %$_subinput;
226 $subinput->id_prefix ('subdoc-' . ++$id_prefix);
227 $subinput->nested (1);
228 $subinput->{base_uri} = $subinput->{container_node}->base_uri
229 unless defined $subinput->{base_uri};
230 my $ebaseuri = htescape ($subinput->{base_uri});
231 $out->start_section (id => $subinput->id_prefix,
232 title => qq[Subdocument #$id_prefix]);
233 print STDOUT qq[
234 <dl>
235 <dt>Internet Media Type</dt>
236 <dd><code class="MIME" lang="en">@{[htescape $subinput->{media_type}]}</code>
237 <dt>Container Node</dt>
238 <dd>@{[get_node_link ($input, $subinput->{container_node})]}</dd>
239 <dt>Base <abbr title="Uniform Resource Identifiers">URI</abbr></dt>
240 <dd><code class=URI>&lt;<a href="$ebaseuri">$ebaseuri</a>></code></dd>
241 </dl>];
242
243 $subinput->{id_prefix} .= '-';
244 check_and_print ($subinput => $result => $out);
245
246 $out->end_section;
247 }
248
249 $out->input ($original_input);
250 } # check_and_print
251
252 sub print_http_header_section ($$) {
253 my ($input, $result) = @_;
254 return unless defined $input->{header_status_code} or
255 defined $input->{header_status_text} or
256 @{$input->{header_field} or []};
257
258 $out->start_section (id => 'source-header', title => 'HTTP Header');
259 print STDOUT qq[<p><strong>Note</strong>: Due to the limitation of the
260 network library in use, the content of this section might
261 not be the real header.</p>
262
263 <table><tbody>
264 ];
265
266 if (defined $input->{header_status_code}) {
267 print STDOUT qq[<tr><th scope="row">Status code</th>];
268 print STDOUT qq[<td>];
269 $out->code ($input->{header_status_code});
270 }
271 if (defined $input->{header_status_text}) {
272 print STDOUT qq[<tr><th scope="row">Status text</th>];
273 print STDOUT qq[<td>];
274 $out->code ($input->{header_status_text});
275 }
276
277 for (@{$input->{header_field}}) {
278 print STDOUT qq[<tr><th scope="row">];
279 $out->code ($_->[0]);
280 print STDOUT qq[<td>];
281 $out->code ($_->[1]);
282 }
283
284 print STDOUT qq[</tbody></table>];
285
286 $out->end_section;
287 } # print_http_header_section
288
289 sub print_table_section ($$) {
290 my ($input, $tables) = @_;
291
292 # push @nav, [qq[#$input->{id_prefix}tables] => 'Tables']
293 # unless $input->{nested};
294 print STDOUT qq[
295 <div id="$input->{id_prefix}tables" class="section">
296 <h2>Tables</h2>
297
298 <!--[if IE]><script type="text/javascript" src="../excanvas.js"></script><![endif]-->
299 <script src="../table-script.js" type="text/javascript"></script>
300 <noscript>
301 <p><em>Structure of tables are visualized here if scripting is enabled.</em></p>
302 </noscript>
303 ];
304
305 require JSON;
306
307 my $i = 0;
308 for my $table (@$tables) {
309 $i++;
310 print STDOUT qq[<div class="section" id="$input->{id_prefix}table-$i"><h3>] .
311 get_node_link ($input, $table->{element}) . q[</h3>];
312
313 delete $table->{element};
314
315 for (@{$table->{column_group}}, @{$table->{column}}, $table->{caption},
316 @{$table->{row}}) {
317 next unless $_;
318 delete $_->{element};
319 }
320
321 for (@{$table->{row_group}}) {
322 next unless $_;
323 next unless $_->{element};
324 $_->{type} = $_->{element}->manakai_local_name;
325 delete $_->{element};
326 }
327
328 for (@{$table->{cell}}) {
329 next unless $_;
330 for (@{$_}) {
331 next unless $_;
332 for (@$_) {
333 $_->{id} = refaddr $_->{element} if defined $_->{element};
334 delete $_->{element};
335 $_->{is_header} = $_->{is_header} ? 1 : 0;
336 }
337 }
338 }
339
340 print STDOUT '</div><script type="text/javascript">tableToCanvas (';
341 print STDOUT JSON::objToJson ($table);
342 print STDOUT qq[, document.getElementById ('$input->{id_prefix}table-$i')];
343 print STDOUT qq[, '$input->{id_prefix}');</script>];
344 }
345
346 print STDOUT qq[</div>];
347 } # print_table_section
348
349 sub print_listing_section ($$$) {
350 my ($opt, $input, $ids) = @_;
351
352 # push @nav, ['#' . $input->{id_prefix} . $opt->{id} => $opt->{label}]
353 # unless $input->{nested};
354 print STDOUT qq[
355 <div id="$input->{id_prefix}$opt->{id}" class="section">
356 <h2>$opt->{heading}</h2>
357
358 <dl>
359 ];
360 for my $id (sort {$a cmp $b} keys %$ids) {
361 print STDOUT qq[<dt><code>@{[htescape $id]}</code></dt>];
362 for (@{$ids->{$id}}) {
363 print STDOUT qq[<dd>].get_node_link ($input, $_).qq[</dd>];
364 }
365 }
366 print STDOUT qq[</dl></div>];
367 } # print_listing_section
368
369
370 sub print_rdf_section ($$$) {
371 my ($input, $rdfs) = @_;
372
373 # push @nav, ['#' . $input->{id_prefix} . 'rdf' => 'RDF']
374 # unless $input->{nested};
375 print STDOUT qq[
376 <div id="$input->{id_prefix}rdf" class="section">
377 <h2>RDF Triples</h2>
378
379 <dl>];
380 my $i = 0;
381 for my $rdf (@$rdfs) {
382 print STDOUT qq[<dt id="$input->{id_prefix}rdf-@{[$i++]}">];
383 print STDOUT get_node_link ($input, $rdf->[0]);
384 print STDOUT qq[<dd><dl>];
385 for my $triple (@{$rdf->[1]}) {
386 print STDOUT '<dt>' . get_node_link ($input, $triple->[0]) . '<dd>';
387 print STDOUT get_rdf_resource_html ($triple->[1]);
388 print STDOUT ' ';
389 print STDOUT get_rdf_resource_html ($triple->[2]);
390 print STDOUT ' ';
391 print STDOUT get_rdf_resource_html ($triple->[3]);
392 }
393 print STDOUT qq[</dl>];
394 }
395 print STDOUT qq[</dl></div>];
396 } # print_rdf_section
397
398 sub get_rdf_resource_html ($) {
399 my $resource = shift;
400 if (defined $resource->{uri}) {
401 my $euri = htescape ($resource->{uri});
402 return '<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
403 '</a>></code>';
404 } elsif (defined $resource->{bnodeid}) {
405 return htescape ('_:' . $resource->{bnodeid});
406 } elsif ($resource->{nodes}) {
407 return '(rdf:XMLLiteral)';
408 } elsif (defined $resource->{value}) {
409 my $elang = htescape (defined $resource->{language}
410 ? $resource->{language} : '');
411 my $r = qq[<q lang="$elang">] . htescape ($resource->{value}) . '</q>';
412 if (defined $resource->{datatype}) {
413 my $euri = htescape ($resource->{datatype});
414 $r .= '^^<code class=uri>&lt;<a href="' . $euri . '">' . $euri .
415 '</a>></code>';
416 } elsif (length $resource->{language}) {
417 $r .= '@' . htescape ($resource->{language});
418 }
419 return $r;
420 } else {
421 return '??';
422 }
423 } # get_rdf_resource_html
424
425 sub print_result_section ($) {
426 my $result = shift;
427
428 $out->start_section (id => 'result-summary',
429 title => 'Result');
430
431 if ($result->{unsupported} and $result->{conforming_max}) {
432 print STDOUT qq[<p class=uncertain id=result-para>The conformance
433 checker cannot decide whether the document is conforming or
434 not, since the document contains one or more unsupported
435 features. The document might or might not be conforming.</p>];
436 } elsif ($result->{conforming_min}) {
437 print STDOUT qq[<p class=PASS id=result-para>No conformance-error is
438 found in this document.</p>];
439 } elsif ($result->{conforming_max}) {
440 print STDOUT qq[<p class=SEE-RESULT id=result-para>This document
441 is <strong>likely <em>non</em>-conforming</strong>, but in rare case
442 it might be conforming.</p>];
443 } else {
444 print STDOUT qq[<p class=FAIL id=result-para>This document is
445 <strong><em>non</em>-conforming</strong>.</p>];
446 }
447
448 print STDOUT qq[<table>
449 <colgroup><col><colgroup><col><col><col><colgroup><col>
450 <thead>
451 <tr><th scope=col></th>
452 <th scope=col><a href="../error-description#level-m"><em class=rfc2119>MUST</em>‐level
453 Errors</a></th>
454 <th scope=col><a href="../error-description#level-s"><em class=rfc2119>SHOULD</em>‐level
455 Errors</a></th>
456 <th scope=col><a href="../error-description#level-w">Warnings</a></th>
457 <th scope=col>Score</th></tr></thead><tbody>];
458
459 my $must_error = 0;
460 my $should_error = 0;
461 my $warning = 0;
462 my $score_min = 0;
463 my $score_max = 0;
464 my $score_base = 20;
465 my $score_unit = $score_base / 100;
466 for (
467 [Transfer => 'transfer', ''],
468 [Character => 'char', ''],
469 [Syntax => 'syntax', '#parse-errors'],
470 [Structure => 'structure', '#document-errors'],
471 ) {
472 $must_error += ($result->{$_->[1]}->{must} += 0);
473 $should_error += ($result->{$_->[1]}->{should} += 0);
474 $warning += ($result->{$_->[1]}->{warning} += 0);
475 $score_min += (($result->{$_->[1]}->{score_min} *= $score_unit) += $score_base);
476 $score_max += (($result->{$_->[1]}->{score_max} *= $score_unit) += $score_base);
477
478 my $uncertain = $result->{$_->[1]}->{unsupported} ? '?' : '';
479 my $label = $_->[0];
480 if ($result->{$_->[1]}->{must} or
481 $result->{$_->[1]}->{should} or
482 $result->{$_->[1]}->{warning} or
483 $result->{$_->[1]}->{unsupported}) {
484 $label = qq[<a href="$_->[2]">$label</a>];
485 }
486
487 print STDOUT qq[<tr class="@{[$uncertain ? 'uncertain' : '']}"><th scope=row>$label</th><td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{must}$uncertain</td><td class="@{[$result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">$result->{$_->[1]}->{should}$uncertain</td><td>$result->{$_->[1]}->{warning}$uncertain</td>];
488 if ($uncertain) {
489 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : $result->{$_->[1]}->{should} ? 'SEE-RESULT' : '']}">&#x2212;&#x221E;..$result->{$_->[1]}->{score_max}];
490 } elsif ($result->{$_->[1]}->{score_min} != $result->{$_->[1]}->{score_max}) {
491 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : 'SEE-RESULT']}">$result->{$_->[1]}->{score_min}..$result->{$_->[1]}->{score_max}];
492 } else {
493 print qq[<td class="@{[$result->{$_->[1]}->{must} ? 'FAIL' : '']}">$result->{$_->[1]}->{score_min}];
494 }
495 print qq[ / 20];
496 }
497
498 $score_max += $score_base;
499
500 print STDOUT qq[
501 <tr class=uncertain><th scope=row>Semantics</th><td>0?</td><td>0?</td><td>0?</td><td>&#x2212;&#x221E;..$score_base / 20
502 </tbody>
503 <tfoot><tr class=uncertain><th scope=row>Total</th>
504 <td class="@{[$must_error ? 'FAIL' : '']}">$must_error?</td>
505 <td class="@{[$should_error ? 'SEE-RESULT' : '']}">$should_error?</td>
506 <td>$warning?</td>
507 <td class="@{[$must_error ? 'FAIL' : $should_error ? 'SEE-RESULT' : '']}"><strong>&#x2212;&#x221E;..$score_max</strong> / 100
508 </table>
509
510 <p><strong>Important</strong>: This conformance checking service
511 is <em>under development</em>. The result above might be <em>wrong</em>.</p>];
512 $out->end_section;
513 } # print_result_section
514
515 sub print_result_input_error_section ($) {
516 my $input = shift;
517 $out->start_section (id => 'result-summary', title => 'Result');
518 print STDOUT qq[
519 <p><em><strong>Input Error</strong>: @{[htescape ($input->{error_status_text})]}</em></p>];
520 $out->end_section;
521 } # print_result_input_error_section
522
523 {
524 my $Msg = {};
525
526 sub load_text_catalog ($) {
527 # my $self = shift;
528 my $lang = shift; # MUST be a canonical lang name
529 open my $file, '<:utf8', "cc-msg.$lang.txt"
530 or die "$0: cc-msg.$lang.txt: $!";
531 while (<$file>) {
532 if (s/^([^;]+);([^;]*);//) {
533 my ($type, $cls, $msg) = ($1, $2, $_);
534 $msg =~ tr/\x0D\x0A//d;
535 $Msg->{$type} = [$cls, $msg];
536 }
537 }
538 } # load_text_catalog
539
540 sub get_text ($;$$) {
541 # my $self = shift;
542 my ($type, $level, $node) = @_;
543 $type = $level . ':' . $type if defined $level;
544 $level = 'm' unless defined $level;
545 my @arg;
546 {
547 if (defined $Msg->{$type}) {
548 my $msg = $Msg->{$type}->[1];
549 $msg =~ s{<var>\$([0-9]+)</var>}{
550 defined $arg[$1] ? ($arg[$1]) : '(undef)';
551 }ge; ##BUG: ^ must be escaped
552 $msg =~ s{<var>{\@([A-Za-z0-9:_.-]+)}</var>}{
553 UNIVERSAL::can ($node, 'get_attribute_ns')
554 ? ($node->get_attribute_ns (undef, $1)) : ''
555 }ge; ## BUG: ^ must be escaped
556 $msg =~ s{<var>{\@}</var>}{ ## BUG: v must be escaped
557 UNIVERSAL::can ($node, 'value') ? ($node->value) : ''
558 }ge;
559 $msg =~ s{<var>{local-name}</var>}{
560 UNIVERSAL::can ($node, 'manakai_local_name')
561 ? ($node->manakai_local_name) : ''
562 }ge; ## BUG: ^ must be escaped
563 $msg =~ s{<var>{element-local-name}</var>}{
564 (UNIVERSAL::can ($node, 'owner_element') and
565 $node->owner_element)
566 ? ($node->owner_element->manakai_local_name)
567 : '' ## BUG: ^ must be escaped
568 }ge;
569 return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
570 } elsif ($type =~ s/:([^:]*)$//) {
571 unshift @arg, $1;
572 redo;
573 }
574 }
575 return ($type, 'level-'.$level, ($_[0]));
576 ## BUG: ^ must be escaped
577 } # get_text
578
579 }
580
581 sub get_input_document ($$) {
582 my ($http, $dom) = @_;
583
584 my $request_uri = $http->get_parameter ('uri');
585 my $r = WebHACC::Input->new;
586 if (defined $request_uri and length $request_uri) {
587 my $uri = $dom->create_uri_reference ($request_uri);
588 unless ({
589 http => 1,
590 }->{lc $uri->uri_scheme}) {
591 return {uri => $request_uri, request_uri => $request_uri,
592 error_status_text => 'URI scheme not allowed'};
593 }
594
595 require Message::Util::HostPermit;
596 my $host_permit = new Message::Util::HostPermit;
597 $host_permit->add_rule (<<EOH);
598 Allow host=suika port=80
599 Deny host=suika
600 Allow host=suika.fam.cx port=80
601 Deny host=suika.fam.cx
602 Deny host=localhost
603 Deny host=*.localdomain
604 Deny ipv4=0.0.0.0/8
605 Deny ipv4=10.0.0.0/8
606 Deny ipv4=127.0.0.0/8
607 Deny ipv4=169.254.0.0/16
608 Deny ipv4=172.0.0.0/11
609 Deny ipv4=192.0.2.0/24
610 Deny ipv4=192.88.99.0/24
611 Deny ipv4=192.168.0.0/16
612 Deny ipv4=198.18.0.0/15
613 Deny ipv4=224.0.0.0/4
614 Deny ipv4=255.255.255.255/32
615 Deny ipv6=0::0/0
616 Allow host=*
617 EOH
618 unless ($host_permit->check ($uri->uri_host, $uri->uri_port || 80)) {
619 return {uri => $request_uri, request_uri => $request_uri,
620 error_status_text => 'Connection to the host is forbidden'};
621 }
622
623 require LWP::UserAgent;
624 my $ua = WDCC::LWPUA->new;
625 $ua->{wdcc_dom} = $dom;
626 $ua->{wdcc_host_permit} = $host_permit;
627 $ua->agent ('Mozilla'); ## TODO: for now.
628 $ua->parse_head (0);
629 $ua->protocols_allowed ([qw/http/]);
630 $ua->max_size (1000_000);
631 my $req = HTTP::Request->new (GET => $request_uri);
632 $req->header ('Accept-Encoding' => 'identity, *; q=0');
633 my $res = $ua->request ($req);
634 ## TODO: 401 sets |is_success| true.
635 if ($res->is_success or $http->get_parameter ('error-page')) {
636 $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
637 $r->{uri} = $res->request->uri;
638 $r->{request_uri} = $request_uri;
639
640 ## TODO: More strict parsing...
641 my $ct = $res->header ('Content-Type');
642 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
643 $r->{charset} = lc $1;
644 $r->{charset} =~ tr/\\//d;
645 $r->{official_charset} = $r->{charset};
646 }
647
648 my $input_charset = $http->get_parameter ('charset');
649 if (defined $input_charset and length $input_charset) {
650 $r->{charset_overridden}
651 = (not defined $r->{charset} or $r->{charset} ne $input_charset);
652 $r->{charset} = $input_charset;
653 }
654
655 ## TODO: Support for HTTP Content-Encoding
656
657 $r->{s} = ''.$res->content;
658
659 require Whatpm::ContentType;
660 ($r->{official_type}, $r->{media_type})
661 = Whatpm::ContentType->get_sniffed_type
662 (get_file_head => sub {
663 return substr $r->{s}, 0, shift;
664 },
665 http_content_type_byte => $ct,
666 has_http_content_encoding =>
667 defined $res->header ('Content-Encoding'),
668 supported_image_types => {});
669 } else {
670 $r->{uri} = $res->request->uri;
671 $r->{request_uri} = $request_uri;
672 $r->{error_status_text} = $res->status_line;
673 }
674
675 $r->{header_field} = [];
676 $res->scan (sub {
677 push @{$r->{header_field}}, [$_[0], $_[1]];
678 });
679 $r->{header_status_code} = $res->code;
680 $r->{header_status_text} = $res->message;
681 } else {
682 $r->{s} = ''.$http->get_parameter ('s');
683 $r->{uri} = q<thismessage:/>;
684 $r->{request_uri} = q<thismessage:/>;
685 $r->{base_uri} = q<thismessage:/>;
686 $r->{charset} = ''.$http->get_parameter ('_charset_');
687 $r->{charset} =~ s/\s+//g;
688 $r->{charset} = 'utf-8' if $r->{charset} eq '';
689 $r->{official_charset} = $r->{charset};
690 $r->{header_field} = [];
691
692 require Whatpm::ContentType;
693 ($r->{official_type}, $r->{media_type})
694 = Whatpm::ContentType->get_sniffed_type
695 (get_file_head => sub {
696 return substr $r->{s}, 0, shift;
697 },
698 http_content_type_byte => undef,
699 has_http_content_encoding => 0,
700 supported_image_types => {});
701 }
702
703 my $input_format = $http->get_parameter ('i');
704 if (defined $input_format and length $input_format) {
705 $r->{media_type_overridden}
706 = (not defined $r->{media_type} or $input_format ne $r->{media_type});
707 $r->{media_type} = $input_format;
708 }
709 if (defined $r->{s} and not defined $r->{media_type}) {
710 $r->{media_type} = 'text/html';
711 $r->{media_type_overridden} = 1;
712 }
713
714 if ($r->{media_type} eq 'text/xml') {
715 unless (defined $r->{charset}) {
716 $r->{charset} = 'us-ascii';
717 $r->{official_charset} = $r->{charset};
718 } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
719 $r->{charset_overridden} = 0;
720 }
721 }
722
723 if (length $r->{s} > 1000_000) {
724 $r->{error_status_text} = 'Entity-body too large';
725 delete $r->{s};
726 return $r;
727 }
728
729 $r->{inner_html_element} = $http->get_parameter ('e');
730
731 return $r;
732 } # get_input_document
733
734 package WDCC::LWPUA;
735 BEGIN { push our @ISA, 'LWP::UserAgent'; }
736
737 sub redirect_ok {
738 my $ua = shift;
739 unless ($ua->SUPER::redirect_ok (@_)) {
740 return 0;
741 }
742
743 my $uris = $_[1]->header ('Location');
744 return 0 unless $uris;
745 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
746 unless ({
747 http => 1,
748 }->{lc $uri->uri_scheme}) {
749 return 0;
750 }
751 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
752 return 0;
753 }
754 return 1;
755 } # redirect_ok
756
757 =head1 AUTHOR
758
759 Wakaba <w@suika.fam.cx>.
760
761 =head1 LICENSE
762
763 Copyright 2007-2008 Wakaba <w@suika.fam.cx>
764
765 This library is free software; you can redistribute it
766 and/or modify it under the same terms as Perl itself.
767
768 =cut
769
770 ## $Date: 2008/07/18 14:44:16 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24