/[suikacvs]/test/html-webhacc/WebHACC/Input.pm
Suika

Contents of /test/html-webhacc/WebHACC/Input.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.9 - (hide annotations) (download)
Fri Aug 15 05:53:23 2008 UTC (16 years, 3 months ago) by wakaba
Branch: MAIN
Changes since 1.8: +5 -4 lines
++ ChangeLog	15 Aug 2008 05:43:43 -0000
2008-08-15  Wakaba  <wakaba@suika.fam.cx>

	* cc-style.css: Add icons to links and headings of error sections.
	Use standard "uncertain" color for level-u errors.  "No error found"
	messages are now handled by catalog, not by CSS presentation.
	The result paragraph saying that the conformance is unknown
	should be bordered as uncertain error messages are.

	* error-description-source.xml: New message entries
	for not-translated-yet messages.  Distinguish result table's
	layer names from other similar texts.

++ html/WebHACC/Language/ChangeLog	15 Aug 2008 05:53:19 -0000
2008-08-15  Wakaba  <wakaba@suika.fam.cx>

	* CSS.pm, CacheManifest.pm, HTML.pm, WebIDL.pm, XML.pm: Don't
	set "uncertain" flag to character encoding (encode) layer if the
	input is a character string.

++ html/WebHACC/ChangeLog	15 Aug 2008 05:51:54 -0000
2008-08-15  Wakaba  <wakaba@suika.fam.cx>

	* Input.pm (generate_transfer_sections): Use standard
	error list methods for transfer errors.  Typos fixed.

	* Output.pm (has_error): New attribute.
	(start_error_list, end_error_list): Support for role "transfer-errors".
	(end_error_list): Generate "no error found" paragraph if it should be.
	(generate_input_section): Decode parameters as UTF-8.

	* Result.pm (add_error): Set |has_error| flag.  Use catalog
	for "Unknown location" message.
	(generate_result_section): Use different text for
	result table rows than normal messages, to avoid collision
	with other messages (such as "Charset").

1 wakaba 1.1 package WebHACC::Input;
2     use strict;
3    
4     sub new ($) {
5 wakaba 1.6 return bless {urls => []}, shift;
6 wakaba 1.1 } # new
7    
8 wakaba 1.3 sub id_prefix ($) { '' }
9    
10     sub nested ($) { 0 }
11    
12     sub subdocument_index ($) { 0 }
13    
14 wakaba 1.4 sub full_subdocument_index ($) { 0 }
15    
16 wakaba 1.6 sub url ($) {
17     my $self = shift;
18     if (@{$self->{urls}}) {
19     return $self->{urls}->[-1];
20     } else {
21     return undef;
22     }
23     } # url
24    
25     sub add_url ($$) {
26     my ($self, $url) = @_;
27     push @{$self->{urls}}, ''.$url;
28     } # add_url
29    
30     sub urls ($) {
31     my $self = shift;
32     return [@{$self->{urls}}];
33     } # urls
34    
35     sub get_document ($$$$) {
36     my $self = shift->new;
37    
38     my ($cgi => $result => $out) = @_;
39    
40     $out->input ($self);
41    
42     require Encode;
43     my $url_s = Encode::decode ('utf-8', $cgi->get_parameter ('uri'));
44     my $url_o;
45     if (defined $url_s and length $url_s) {
46     require Message::DOM::DOMImplementation;
47     my $dom = Message::DOM::DOMImplementation->new;
48    
49     $url_o = $dom->create_uri_reference ($url_s);
50     $url_o->uri_fragment (undef);
51    
52     $self->add_url ($url_o->uri_reference);
53    
54     my $url_scheme = lc $url_o->uri_scheme; ## TODO: html5_url_scheme
55     my $class = {
56     http => 'WebHACC::Input::HTTP',
57     }->{$url_scheme} || 'WebHACC::Input::UnsupportedURLSchemeError';
58     bless $self, $class;
59     } else {
60     bless $self, 'WebHACC::Input::Text';
61     }
62    
63     $self->_get_document ($cgi => $result => $out, $url_o);
64    
65     return $self unless defined $self->{s};
66    
67     if (length $self->{s} > 1000_000) {
68     $self->{error_status_text} = 'Entity-body too large';
69     delete $self->{s};
70     bless $self, 'WebHACC::Input::Error';
71     return $self;
72     }
73    
74     require Whatpm::ContentType;
75     ($self->{official_type}, $self->{media_type})
76     = Whatpm::ContentType->get_sniffed_type
77     (get_file_head => sub {
78     return substr $self->{s}, 0, shift;
79     },
80     http_content_type_byte => $self->{http_content_type_bytes},
81     supported_image_types => {});
82    
83     my $input_format = $cgi->get_parameter ('i');
84     if (defined $input_format and length $input_format) {
85     $self->{media_type_overridden}
86     = (not defined $self->{media_type} or
87     $input_format ne $self->{media_type});
88     $self->{media_type} = $input_format;
89     }
90     if (defined $self->{s} and not defined $self->{media_type}) {
91     $self->{media_type} = 'text/html';
92     $self->{media_type_overridden} = 1;
93     }
94    
95     if ($self->{media_type} eq 'text/xml') {
96     unless (defined $self->{charset}) {
97     $self->{charset} = 'us-ascii';
98     $self->{official_charset} = $self->{charset};
99     } elsif ($self->{charset_overridden} and $self->{charset} eq 'us-ascii') {
100     $self->{charset_overridden} = 0;
101     }
102     }
103    
104     $self->{inner_html_element} = $cgi->get_parameter ('e');
105    
106     return $self;
107     } # get_document
108    
109     sub _get_document ($$$$) {
110     die "$0: _get_document of " . ref $_[0];
111     } # _get_document
112    
113 wakaba 1.3 sub generate_info_section ($$) {
114     my $self = shift;
115    
116     my $result = shift;
117     my $out = $result->output;
118    
119     $out->start_section (id => 'document-info', title => 'Information');
120     $out->start_tag ('dl');
121    
122 wakaba 1.6 my $urls = $self->urls;
123 wakaba 1.3
124 wakaba 1.6 $out->dt (@$urls == 1 ? 'URL' : 'URLs');
125     my $url = pop @$urls;
126     for (@$urls) {
127     $out->start_tag ('dd');
128     $out->url ($_);
129     }
130 wakaba 1.3 $out->start_tag ('dd');
131 wakaba 1.6 $out->url ($url, id => 'anchor-document-url');
132 wakaba 1.3 $out->script (q[
133     document.title = '<'
134     + document.getElementById ('anchor-document-url').href + '> \\u2014 '
135     + document.title;
136     ]);
137    
138     if (defined $self->{s}) {
139     $out->dt ('Base URL');
140     $out->start_tag ('dd');
141     $out->url ($self->{base_uri});
142    
143     $out->dt ('Internet Media Type');
144     $out->start_tag ('dd');
145     $out->code ($self->{media_type}, class => 'MIME', lang => 'en');
146     if ($self->{media_type_overridden}) {
147 wakaba 1.5 $out->nl_text ('... overridden');
148 wakaba 1.3 } elsif (defined $self->{official_type}) {
149     if ($self->{media_type} eq $self->{official_type}) {
150     #
151     } else {
152 wakaba 1.5 $out->nl_text ('... sniffed, official type is #',
153     text => $self->{official_type});
154 wakaba 1.3 }
155 wakaba 1.1 } else {
156 wakaba 1.5 $out->nl_text ( '... sniffed');
157 wakaba 1.1 }
158    
159 wakaba 1.3 $out->dt ('Character Encoding');
160     $out->start_tag ('dd');
161     if (defined $self->{charset}) {
162     $out->code ($self->{charset}, class => 'charset', lang => 'en');
163 wakaba 1.1 } else {
164 wakaba 1.5 $out->nl_text ('(unknown)');
165 wakaba 1.1 }
166 wakaba 1.5 $out->nl_text ('... overridden') if $self->{charset_overridden};
167 wakaba 1.3
168     $out->dt ($self->{is_char_string} ? 'Character Length' : 'Byte Length');
169     ## TODO: formatting
170     $out->start_tag ('dd');
171     my $length = length $self->{s};
172 wakaba 1.5 $out->text ($length . ' ');
173     $out->nl_text (($self->{is_char_string} ? 'character' : 'byte') .
174     ($length == 1 ? '' : 's'));
175 wakaba 1.1 }
176    
177 wakaba 1.3 $out->end_tag ('dl');
178     $out->end_section;
179     } # generate_info_section
180 wakaba 1.1
181 wakaba 1.8 sub generate_transfer_sections ($$) { }
182 wakaba 1.3
183 wakaba 1.6 package WebHACC::Input::HTTP;
184     push our @ISA, 'WebHACC::Input';
185    
186     {
187     my $HostPermit;
188     sub host_permit ($) {
189     return $HostPermit if $HostPermit;
190    
191     require Message::Util::HostPermit;
192     $HostPermit = new Message::Util::HostPermit;
193     $HostPermit->add_rule (<<'EOH');
194     Allow host=suika port=80
195     Deny host=suika
196     Allow host=suika.fam.cx port=80
197     Deny host=suika.fam.cx
198     Deny host=localhost
199     Deny host=*.localdomain
200     Deny ipv4=0.0.0.0/8
201     Deny ipv4=10.0.0.0/8
202     Deny ipv4=127.0.0.0/8
203     Deny ipv4=169.254.0.0/16
204     Deny ipv4=172.0.0.0/11
205     Deny ipv4=192.0.2.0/24
206     Deny ipv4=192.88.99.0/24
207     Deny ipv4=192.168.0.0/16
208     Deny ipv4=198.18.0.0/15
209     Deny ipv4=224.0.0.0/4
210     Deny ipv4=255.255.255.255/32
211     Deny ipv6=0::0/0
212     Allow host=*
213     EOH
214     return $HostPermit;
215     } # host_permit
216     }
217    
218     sub _get_document ($$$$$) {
219     my ($self, $cgi => $result => $out, $url_o) = @_;
220    
221     unless ($self->host_permit->check ($url_o->uri_host, $url_o->uri_port || 80)) {
222     $self->{error_status_text} = 'Connection to the host is forbidden';
223 wakaba 1.8 bless $self, 'WebHACC::Input::Error';
224 wakaba 1.6 return $self;
225     }
226    
227     my $ua = WDCC::LWPUA->new;
228     $ua->{wdcc_dom} = Message::DOM::DOMImplementation->new;
229     $ua->{wdcc_host_permit} = $self->host_permit;
230     $ua->agent ('Mozilla'); ## TODO: for now.
231     $ua->parse_head (0);
232     $ua->protocols_allowed ([qw/http/]);
233     $ua->max_size (1000_000);
234     my $req = HTTP::Request->new (GET => $url_o->uri_reference);
235     $req->header ('Accept-Encoding' => 'identity, *; q=0');
236     my $res = $ua->request ($req);
237     ## TODO: 401 sets |is_success| true.
238     ## TODO: Don't follow redirect if error-page=true
239     if ($res->is_success or $cgi->get_parameter ('error-page')) {
240     $self->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
241     my $new_url = $res->request->uri;
242     $self->add_url ($new_url) if $new_url ne $self->url;
243    
244     ## TODO: More strict parsing...
245     my $ct = $self->{http_content_type_bytes} = $res->header ('Content-Type');
246     if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
247     $self->{charset} = lc $1;
248     $self->{charset} =~ tr/\\//d;
249     $self->{official_charset} = $self->{charset};
250     }
251    
252     my $input_charset = $cgi->get_parameter ('charset');
253     if (defined $input_charset and length $input_charset) {
254     $self->{charset_overridden}
255     = (not defined $self->{charset} or $self->{charset} ne $input_charset);
256     $self->{charset} = $input_charset;
257     }
258    
259     ## TODO: Support for HTTP Content-Encoding
260    
261     $self->{s} = ''.$res->content;
262     } else {
263     $self->add_url ($res->request->uri);
264     $self->{error_status_text} = $res->status_line;
265     bless $self, 'WebHACC::Input::HTTPError';
266     }
267    
268     $self->{header_field} = [];
269     $res->scan (sub {
270     push @{$self->{header_field}}, [$_[0], $_[1]];
271     });
272     $self->{header_status_code} = $res->code;
273     $self->{header_status_text} = $res->message;
274    
275     return $self;
276     } # _get_document
277    
278 wakaba 1.8 sub generate_transfer_sections ($$) {
279     my $self = shift;
280     my $result = shift;
281    
282     $result->layer_uncertain ('transfer');
283    
284     $self->generate_http_header_section ($result);
285     } # generate_transfer_sections
286    
287     sub generate_http_header_section ($$) {
288     my ($self, $result) = @_;
289    
290     return unless defined $self->{header_status_code} or
291     defined $self->{header_status_text} or
292     @{$self->{header_field} or []};
293    
294     my $out = $result->output;
295    
296     $out->start_section (id => 'source-header', title => 'HTTP Header');
297     $out->html (qq[<p><strong>Note</strong>: Due to the limitation of the
298     network library in use, the content of this section might
299     not be the real header.</p>
300    
301     <table><tbody>
302     ]);
303    
304     if (defined $self->{header_status_code}) {
305     $out->html (qq[<tr><th scope="row">Status code</th>]);
306     $out->start_tag ('td');
307     $out->code ($self->{header_status_code});
308     }
309     if (defined $self->{header_status_text}) {
310     $out->html (qq[<tr><th scope="row">Status text</th>]);
311     $out->start_tag ('td');
312     $out->code ($self->{header_status_text});
313     }
314    
315     for (@{$self->{header_field}}) {
316     $out->start_tag ('tr');
317     $out->start_tag ('th', scope => 'row');
318     $out->code ($_->[0]);
319     $out->start_tag ('td');
320     $out->code ($_->[1]);
321     }
322    
323     $out->end_tag ('table');
324    
325     $out->end_section;
326     } # generate_http_header_section
327    
328 wakaba 1.6 package WebHACC::Input::Text;
329     push our @ISA, 'WebHACC::Input';
330    
331     sub _get_document ($$$$) {
332     my ($self, $cgi => $result => $out) = @_;
333    
334     $self->add_url (q<thismessage:/>);
335     $self->{base_uri} = q<thismessage:/>;
336    
337     $self->{s} = ''.$cgi->get_parameter ('s');
338     $self->{charset} = ''.$cgi->get_parameter ('_charset_');
339     $self->{charset} =~ s/\s+//g;
340     $self->{charset} = 'utf-8' if $self->{charset} eq '';
341     $self->{official_charset} = $self->{charset};
342     $self->{header_field} = [];
343    
344     return $self;
345     } # _get_document
346    
347 wakaba 1.3 package WebHACC::Input::Subdocument;
348     push our @ISA, 'WebHACC::Input';
349    
350     sub new ($$) {
351     my $self = bless {}, shift;
352     $self->{subdocument_index} = shift;
353     return $self;
354     } # new
355    
356     sub id_prefix ($) {
357 wakaba 1.7 my $self = shift;
358     return $self->{parent_input}->id_prefix .
359     'subdoc-' . $self->{subdocument_index} . '-';
360 wakaba 1.3 } # id_prefix
361    
362     sub nested ($) { 1 }
363    
364     sub subdocument_index ($) {
365     return shift->{subdocument_index};
366     } # subdocument_index
367    
368 wakaba 1.4 sub full_subdocument_index ($) {
369     my $self = shift;
370     my $parent = $self->{parent_input}->full_subdocument_index;
371     if ($parent) {
372     return $parent . '.' . $self->{subdocument_index};
373     } else {
374     return $self->{subdocument_index};
375     }
376     } # full_subdocument_index
377    
378 wakaba 1.3 sub start_section ($$) {
379     my $self = shift;
380    
381     my $result = shift;
382     my $out = $result->output;
383    
384 wakaba 1.7 my $index = $self->subdocument_index;
385     $out->start_section (id => my $id = 'subdoc-' . $index . '-',
386 wakaba 1.5 title => qq[Subdocument #],
387     short_title => 'Sub #',
388 wakaba 1.7 role => 'subdoc',
389     text => $self->full_subdocument_index);
390     $out->script (q[ insertNavSections ('] . $out->input->id_prefix . $id . q[') ]);
391 wakaba 1.3 } # start_section
392    
393     sub end_section ($$) {
394     $_[1]->output->end_section;
395     } # end_section
396    
397     sub generate_info_section ($$) {
398     my $self = shift;
399    
400     my $result = shift;
401     my $out = $result->output;
402    
403     $out->start_section (id => 'document-info', title => 'Information');
404     $out->start_tag ('dl');
405    
406     $out->dt ('Internet Media Type');
407     $out->start_tag ('dd');
408     $out->code ($self->{media_type}, code => 'MIME', lang => 'en');
409    
410     if (defined $self->{container_node}) {
411     $out->dt ('Container Node');
412     $out->start_tag ('dd');
413     my $original_input = $out->input;
414     $out->input ($self->{parent_input});
415     $out->node_link ($self->{container_node});
416     $out->input ($original_input);
417     }
418    
419     $out->dt ('Base URL');
420     $out->start_tag ('dd');
421     $out->url ($self->{base_uri});
422    
423     $out->end_tag ('dl');
424     $out->end_section;
425     } # generate_info_section
426 wakaba 1.2
427     package WebHACC::Input::Error;
428     push our @ISA, 'WebHACC::Input';
429    
430     sub generate_transfer_sections ($$) {
431     my $self = shift;
432    
433     my $result = shift;
434     my $out = $result->output;
435    
436 wakaba 1.8 $out->start_section (role => 'transfer-errors');
437 wakaba 1.9 $out->start_error_list (role => 'transfer-errors');
438 wakaba 1.2
439 wakaba 1.9 $result->layer_applicable ('transfer');
440 wakaba 1.2 $result->add_error (layer => 'transfer',
441     level => 'u',
442     type => 'resource retrieval error',
443     url => $self->{request_uri},
444     text => $self->{error_status_text});
445    
446 wakaba 1.9 $out->end_error_list (role => 'transfer-errors');
447 wakaba 1.2 $out->end_section;
448     } # generate_transfer_sections
449 wakaba 1.6
450     package WebHACC::Input::HTTPError;
451     push our @ISA, 'WebHACC::Input::Error', 'WebHACC::Input::HTTP';
452 wakaba 1.8
453     sub generate_transfer_sections ($$) {
454     my $self = shift;
455    
456     my $result = shift;
457    
458 wakaba 1.9 $self->WebHACC::Input::Error::generate_transfer_sections ($result);
459     $self->WebHACC::Input::HTTP::generate_transfer_sections ($result);
460 wakaba 1.8
461     } # generate_transfer_sections
462 wakaba 1.6
463     package WebHACC::Input::UnsupportedURLSchemeError;
464     push our @ISA, 'WebHACC::Input::Error';
465    
466     sub _get_document ($$$$) {
467     my ($self, $cgi => $result => $out) = @_;
468    
469     $self->{error_status_text} = 'URL scheme not allowed';
470    
471     return $self;
472     } # _get_document
473    
474     package WDCC::LWPUA;
475     require LWP::UserAgent;
476     push our @ISA, 'LWP::UserAgent';
477    
478     sub redirect_ok {
479     my $ua = shift;
480     unless ($ua->SUPER::redirect_ok (@_)) {
481     return 0;
482     }
483    
484     my $uris = $_[1]->header ('Location');
485     return 0 unless $uris;
486     my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
487     unless ({
488     http => 1,
489     }->{lc $uri->uri_scheme}) { ## TODO: html5_url_scheme
490     return 0;
491     }
492     unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
493     return 0;
494     }
495     return 1;
496     } # redirect_ok
497 wakaba 1.2
498 wakaba 1.1 1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24