/[suikacvs]/test/html-webhacc/WebHACC/Input.pm
Suika

Contents of /test/html-webhacc/WebHACC/Input.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (hide annotations) (download)
Thu Aug 14 15:50:42 2008 UTC (16 years, 3 months ago) by wakaba
Branch: MAIN
Changes since 1.7: +63 -50 lines
++ ChangeLog	14 Aug 2008 15:42:17 -0000
	* cc.cgi: Generate result summary sections for
	each subdocument.

	* error-description-source.xml: New entries to
	support localization of result sections.

2008-08-14  Wakaba  <wakaba@suika.fam.cx>

	* cc-style.css: Support for revised version of result summary
	section styling.

2008-08-14  Wakaba  <wakaba@suika.fam.cx>

++ html/WebHACC/Language/ChangeLog	14 Aug 2008 15:50:38 -0000
	* Base.pm, CSS.pm, CacheManifest.pm, DOM.pm, Default.pm,
	HTML.pm, WebIDL.pm, XML.pm: Set |layer_applicable|
	or |layer_uncertain| flag appropriately.

2008-08-14  Wakaba  <wakaba@suika.fam.cx>

++ html/WebHACC/ChangeLog	14 Aug 2008 15:48:38 -0000
	* Input.pm: Methods |generate_transfer_sections|
	and |generate_http_header_section| are moved to HTTP
	subclass, since they are irrelevant to non-HTTP inputs.
	(_get_document): Forbidden host error was not represented
	by WebHACC::Input::Error subclass.
	(WebHACC::Input::Error generate_transfer_sections): Use
	role name for the section.
	(WebHACC::Input::HTTPError generate_transfer_sections): New method
	added, since the main superclass, i.e. WebHACC::Input::Error,
	no longer dumps HTTP headers due to the change mentioned above.

	* Output.pm (start_section): New roles "transfer-errors" and "result".

	* Result.pm (parent_result): New attribute.
	(layer_applicable, layer_uncertain): New methods to set flags.
	(add_error): Natural language strings are now handled
	by the catalog mechanism.  Use new scoring mechanism.
	(generate_result_section): Use catalog for all natural
	language strings.  Table generation is now much more sophiscated
	that it was.  Support for subdoc result summary.  Support
	for the column of the number of informational message.  Support
	for "N/A" status.

2008-08-14  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package WebHACC::Input;
2     use strict;
3    
4     sub new ($) {
5 wakaba 1.6 return bless {urls => []}, shift;
6 wakaba 1.1 } # new
7    
8 wakaba 1.3 sub id_prefix ($) { '' }
9    
10     sub nested ($) { 0 }
11    
12     sub subdocument_index ($) { 0 }
13    
14 wakaba 1.4 sub full_subdocument_index ($) { 0 }
15    
16 wakaba 1.6 sub url ($) {
17     my $self = shift;
18     if (@{$self->{urls}}) {
19     return $self->{urls}->[-1];
20     } else {
21     return undef;
22     }
23     } # url
24    
25     sub add_url ($$) {
26     my ($self, $url) = @_;
27     push @{$self->{urls}}, ''.$url;
28     } # add_url
29    
30     sub urls ($) {
31     my $self = shift;
32     return [@{$self->{urls}}];
33     } # urls
34    
35     sub get_document ($$$$) {
36     my $self = shift->new;
37    
38     my ($cgi => $result => $out) = @_;
39    
40     $out->input ($self);
41    
42     require Encode;
43     my $url_s = Encode::decode ('utf-8', $cgi->get_parameter ('uri'));
44     my $url_o;
45     if (defined $url_s and length $url_s) {
46     require Message::DOM::DOMImplementation;
47     my $dom = Message::DOM::DOMImplementation->new;
48    
49     $url_o = $dom->create_uri_reference ($url_s);
50     $url_o->uri_fragment (undef);
51    
52     $self->add_url ($url_o->uri_reference);
53    
54     my $url_scheme = lc $url_o->uri_scheme; ## TODO: html5_url_scheme
55     my $class = {
56     http => 'WebHACC::Input::HTTP',
57     }->{$url_scheme} || 'WebHACC::Input::UnsupportedURLSchemeError';
58     bless $self, $class;
59     } else {
60     bless $self, 'WebHACC::Input::Text';
61     }
62    
63     $self->_get_document ($cgi => $result => $out, $url_o);
64    
65     return $self unless defined $self->{s};
66    
67     if (length $self->{s} > 1000_000) {
68     $self->{error_status_text} = 'Entity-body too large';
69     delete $self->{s};
70     bless $self, 'WebHACC::Input::Error';
71     return $self;
72     }
73    
74     require Whatpm::ContentType;
75     ($self->{official_type}, $self->{media_type})
76     = Whatpm::ContentType->get_sniffed_type
77     (get_file_head => sub {
78     return substr $self->{s}, 0, shift;
79     },
80     http_content_type_byte => $self->{http_content_type_bytes},
81     supported_image_types => {});
82    
83     my $input_format = $cgi->get_parameter ('i');
84     if (defined $input_format and length $input_format) {
85     $self->{media_type_overridden}
86     = (not defined $self->{media_type} or
87     $input_format ne $self->{media_type});
88     $self->{media_type} = $input_format;
89     }
90     if (defined $self->{s} and not defined $self->{media_type}) {
91     $self->{media_type} = 'text/html';
92     $self->{media_type_overridden} = 1;
93     }
94    
95     if ($self->{media_type} eq 'text/xml') {
96     unless (defined $self->{charset}) {
97     $self->{charset} = 'us-ascii';
98     $self->{official_charset} = $self->{charset};
99     } elsif ($self->{charset_overridden} and $self->{charset} eq 'us-ascii') {
100     $self->{charset_overridden} = 0;
101     }
102     }
103    
104     $self->{inner_html_element} = $cgi->get_parameter ('e');
105    
106     return $self;
107     } # get_document
108    
109     sub _get_document ($$$$) {
110     die "$0: _get_document of " . ref $_[0];
111     } # _get_document
112    
113 wakaba 1.3 sub generate_info_section ($$) {
114     my $self = shift;
115    
116     my $result = shift;
117     my $out = $result->output;
118    
119     $out->start_section (id => 'document-info', title => 'Information');
120     $out->start_tag ('dl');
121    
122 wakaba 1.6 my $urls = $self->urls;
123 wakaba 1.3
124 wakaba 1.6 $out->dt (@$urls == 1 ? 'URL' : 'URLs');
125     my $url = pop @$urls;
126     for (@$urls) {
127     $out->start_tag ('dd');
128     $out->url ($_);
129     }
130 wakaba 1.3 $out->start_tag ('dd');
131 wakaba 1.6 $out->url ($url, id => 'anchor-document-url');
132 wakaba 1.3 $out->script (q[
133     document.title = '<'
134     + document.getElementById ('anchor-document-url').href + '> \\u2014 '
135     + document.title;
136     ]);
137    
138     if (defined $self->{s}) {
139     $out->dt ('Base URL');
140     $out->start_tag ('dd');
141     $out->url ($self->{base_uri});
142    
143     $out->dt ('Internet Media Type');
144     $out->start_tag ('dd');
145     $out->code ($self->{media_type}, class => 'MIME', lang => 'en');
146     if ($self->{media_type_overridden}) {
147 wakaba 1.5 $out->nl_text ('... overridden');
148 wakaba 1.3 } elsif (defined $self->{official_type}) {
149     if ($self->{media_type} eq $self->{official_type}) {
150     #
151     } else {
152 wakaba 1.5 $out->nl_text ('... sniffed, official type is #',
153     text => $self->{official_type});
154 wakaba 1.3 }
155 wakaba 1.1 } else {
156 wakaba 1.5 $out->nl_text ( '... sniffed');
157 wakaba 1.1 }
158    
159 wakaba 1.3 $out->dt ('Character Encoding');
160     $out->start_tag ('dd');
161     if (defined $self->{charset}) {
162     $out->code ($self->{charset}, class => 'charset', lang => 'en');
163 wakaba 1.1 } else {
164 wakaba 1.5 $out->nl_text ('(unknown)');
165 wakaba 1.1 }
166 wakaba 1.5 $out->nl_text ('... overridden') if $self->{charset_overridden};
167 wakaba 1.3
168     $out->dt ($self->{is_char_string} ? 'Character Length' : 'Byte Length');
169     ## TODO: formatting
170     $out->start_tag ('dd');
171     my $length = length $self->{s};
172 wakaba 1.5 $out->text ($length . ' ');
173     $out->nl_text (($self->{is_char_string} ? 'character' : 'byte') .
174     ($length == 1 ? '' : 's'));
175 wakaba 1.1 }
176    
177 wakaba 1.3 $out->end_tag ('dl');
178     $out->end_section;
179     } # generate_info_section
180 wakaba 1.1
181 wakaba 1.8 sub generate_transfer_sections ($$) { }
182 wakaba 1.3
183 wakaba 1.6 package WebHACC::Input::HTTP;
184     push our @ISA, 'WebHACC::Input';
185    
186     {
187     my $HostPermit;
188     sub host_permit ($) {
189     return $HostPermit if $HostPermit;
190    
191     require Message::Util::HostPermit;
192     $HostPermit = new Message::Util::HostPermit;
193     $HostPermit->add_rule (<<'EOH');
194     Allow host=suika port=80
195     Deny host=suika
196     Allow host=suika.fam.cx port=80
197     Deny host=suika.fam.cx
198     Deny host=localhost
199     Deny host=*.localdomain
200     Deny ipv4=0.0.0.0/8
201     Deny ipv4=10.0.0.0/8
202     Deny ipv4=127.0.0.0/8
203     Deny ipv4=169.254.0.0/16
204     Deny ipv4=172.0.0.0/11
205     Deny ipv4=192.0.2.0/24
206     Deny ipv4=192.88.99.0/24
207     Deny ipv4=192.168.0.0/16
208     Deny ipv4=198.18.0.0/15
209     Deny ipv4=224.0.0.0/4
210     Deny ipv4=255.255.255.255/32
211     Deny ipv6=0::0/0
212     Allow host=*
213     EOH
214     return $HostPermit;
215     } # host_permit
216     }
217    
218     sub _get_document ($$$$$) {
219     my ($self, $cgi => $result => $out, $url_o) = @_;
220    
221     unless ($self->host_permit->check ($url_o->uri_host, $url_o->uri_port || 80)) {
222     $self->{error_status_text} = 'Connection to the host is forbidden';
223 wakaba 1.8 bless $self, 'WebHACC::Input::Error';
224 wakaba 1.6 return $self;
225     }
226    
227     my $ua = WDCC::LWPUA->new;
228     $ua->{wdcc_dom} = Message::DOM::DOMImplementation->new;
229     $ua->{wdcc_host_permit} = $self->host_permit;
230     $ua->agent ('Mozilla'); ## TODO: for now.
231     $ua->parse_head (0);
232     $ua->protocols_allowed ([qw/http/]);
233     $ua->max_size (1000_000);
234     my $req = HTTP::Request->new (GET => $url_o->uri_reference);
235     $req->header ('Accept-Encoding' => 'identity, *; q=0');
236     my $res = $ua->request ($req);
237     ## TODO: 401 sets |is_success| true.
238     ## TODO: Don't follow redirect if error-page=true
239     if ($res->is_success or $cgi->get_parameter ('error-page')) {
240     $self->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
241     my $new_url = $res->request->uri;
242     $self->add_url ($new_url) if $new_url ne $self->url;
243    
244     ## TODO: More strict parsing...
245     my $ct = $self->{http_content_type_bytes} = $res->header ('Content-Type');
246     if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
247     $self->{charset} = lc $1;
248     $self->{charset} =~ tr/\\//d;
249     $self->{official_charset} = $self->{charset};
250     }
251    
252     my $input_charset = $cgi->get_parameter ('charset');
253     if (defined $input_charset and length $input_charset) {
254     $self->{charset_overridden}
255     = (not defined $self->{charset} or $self->{charset} ne $input_charset);
256     $self->{charset} = $input_charset;
257     }
258    
259     ## TODO: Support for HTTP Content-Encoding
260    
261     $self->{s} = ''.$res->content;
262     } else {
263     $self->add_url ($res->request->uri);
264     $self->{error_status_text} = $res->status_line;
265     bless $self, 'WebHACC::Input::HTTPError';
266     }
267    
268     $self->{header_field} = [];
269     $res->scan (sub {
270     push @{$self->{header_field}}, [$_[0], $_[1]];
271     });
272     $self->{header_status_code} = $res->code;
273     $self->{header_status_text} = $res->message;
274    
275     return $self;
276     } # _get_document
277    
278 wakaba 1.8 sub generate_transfer_sections ($$) {
279     my $self = shift;
280     my $result = shift;
281    
282     $result->layer_uncertain ('transfer');
283    
284     $self->generate_http_header_section ($result);
285     } # generate_transfer_sections
286    
287     sub generate_http_header_section ($$) {
288     my ($self, $result) = @_;
289    
290     return unless defined $self->{header_status_code} or
291     defined $self->{header_status_text} or
292     @{$self->{header_field} or []};
293    
294     my $out = $result->output;
295    
296     $out->start_section (id => 'source-header', title => 'HTTP Header');
297     $out->html (qq[<p><strong>Note</strong>: Due to the limitation of the
298     network library in use, the content of this section might
299     not be the real header.</p>
300    
301     <table><tbody>
302     ]);
303    
304     if (defined $self->{header_status_code}) {
305     $out->html (qq[<tr><th scope="row">Status code</th>]);
306     $out->start_tag ('td');
307     $out->code ($self->{header_status_code});
308     }
309     if (defined $self->{header_status_text}) {
310     $out->html (qq[<tr><th scope="row">Status text</th>]);
311     $out->start_tag ('td');
312     $out->code ($self->{header_status_text});
313     }
314    
315     for (@{$self->{header_field}}) {
316     $out->start_tag ('tr');
317     $out->start_tag ('th', scope => 'row');
318     $out->code ($_->[0]);
319     $out->start_tag ('td');
320     $out->code ($_->[1]);
321     }
322    
323     $out->end_tag ('table');
324    
325     $out->end_section;
326     } # generate_http_header_section
327    
328 wakaba 1.6 package WebHACC::Input::Text;
329     push our @ISA, 'WebHACC::Input';
330    
331     sub _get_document ($$$$) {
332     my ($self, $cgi => $result => $out) = @_;
333    
334     $self->add_url (q<thismessage:/>);
335     $self->{base_uri} = q<thismessage:/>;
336    
337     $self->{s} = ''.$cgi->get_parameter ('s');
338     $self->{charset} = ''.$cgi->get_parameter ('_charset_');
339     $self->{charset} =~ s/\s+//g;
340     $self->{charset} = 'utf-8' if $self->{charset} eq '';
341     $self->{official_charset} = $self->{charset};
342     $self->{header_field} = [];
343    
344     return $self;
345     } # _get_document
346    
347 wakaba 1.3 package WebHACC::Input::Subdocument;
348     push our @ISA, 'WebHACC::Input';
349    
350     sub new ($$) {
351     my $self = bless {}, shift;
352     $self->{subdocument_index} = shift;
353     return $self;
354     } # new
355    
356     sub id_prefix ($) {
357 wakaba 1.7 my $self = shift;
358     return $self->{parent_input}->id_prefix .
359     'subdoc-' . $self->{subdocument_index} . '-';
360 wakaba 1.3 } # id_prefix
361    
362     sub nested ($) { 1 }
363    
364     sub subdocument_index ($) {
365     return shift->{subdocument_index};
366     } # subdocument_index
367    
368 wakaba 1.4 sub full_subdocument_index ($) {
369     my $self = shift;
370     my $parent = $self->{parent_input}->full_subdocument_index;
371     if ($parent) {
372     return $parent . '.' . $self->{subdocument_index};
373     } else {
374     return $self->{subdocument_index};
375     }
376     } # full_subdocument_index
377    
378 wakaba 1.3 sub start_section ($$) {
379     my $self = shift;
380    
381     my $result = shift;
382     my $out = $result->output;
383    
384 wakaba 1.7 my $index = $self->subdocument_index;
385     $out->start_section (id => my $id = 'subdoc-' . $index . '-',
386 wakaba 1.5 title => qq[Subdocument #],
387     short_title => 'Sub #',
388 wakaba 1.7 role => 'subdoc',
389     text => $self->full_subdocument_index);
390     $out->script (q[ insertNavSections ('] . $out->input->id_prefix . $id . q[') ]);
391 wakaba 1.3 } # start_section
392    
393     sub end_section ($$) {
394     $_[1]->output->end_section;
395     } # end_section
396    
397     sub generate_info_section ($$) {
398     my $self = shift;
399    
400     my $result = shift;
401     my $out = $result->output;
402    
403     $out->start_section (id => 'document-info', title => 'Information');
404     $out->start_tag ('dl');
405    
406     $out->dt ('Internet Media Type');
407     $out->start_tag ('dd');
408     $out->code ($self->{media_type}, code => 'MIME', lang => 'en');
409    
410     if (defined $self->{container_node}) {
411     $out->dt ('Container Node');
412     $out->start_tag ('dd');
413     my $original_input = $out->input;
414     $out->input ($self->{parent_input});
415     $out->node_link ($self->{container_node});
416     $out->input ($original_input);
417     }
418    
419     $out->dt ('Base URL');
420     $out->start_tag ('dd');
421     $out->url ($self->{base_uri});
422    
423     $out->end_tag ('dl');
424     $out->end_section;
425     } # generate_info_section
426 wakaba 1.2
427     package WebHACC::Input::Error;
428     push our @ISA, 'WebHACC::Input';
429    
430     sub generate_transfer_sections ($$) {
431     my $self = shift;
432    
433     my $result = shift;
434     my $out = $result->output;
435    
436 wakaba 1.8 $out->start_section (role => 'transfer-errors');
437 wakaba 1.2
438     $out->start_tag ('dl');
439     $result->add_error (layer => 'transfer',
440     level => 'u',
441     type => 'resource retrieval error',
442     url => $self->{request_uri},
443     text => $self->{error_status_text});
444     $out->end_tag ('dl');
445    
446     $out->end_section;
447     } # generate_transfer_sections
448 wakaba 1.6
449     package WebHACC::Input::HTTPError;
450     push our @ISA, 'WebHACC::Input::Error', 'WebHACC::Input::HTTP';
451 wakaba 1.8
452     sub generate_transfer_sections ($$) {
453     my $self = shift;
454    
455     my $result = shift;
456    
457     $self->WebHACC::Input::Error->generate_transfer_sections ($result);
458     $self->WebHACC::Input::HTTP->generate_transfer_sections ($result);
459    
460     } # generate_transfer_sections
461 wakaba 1.6
462     package WebHACC::Input::UnsupportedURLSchemeError;
463     push our @ISA, 'WebHACC::Input::Error';
464    
465     sub _get_document ($$$$) {
466     my ($self, $cgi => $result => $out) = @_;
467    
468     $self->{error_status_text} = 'URL scheme not allowed';
469    
470     return $self;
471     } # _get_document
472    
473     package WDCC::LWPUA;
474     require LWP::UserAgent;
475     push our @ISA, 'LWP::UserAgent';
476    
477     sub redirect_ok {
478     my $ua = shift;
479     unless ($ua->SUPER::redirect_ok (@_)) {
480     return 0;
481     }
482    
483     my $uris = $_[1]->header ('Location');
484     return 0 unless $uris;
485     my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
486     unless ({
487     http => 1,
488     }->{lc $uri->uri_scheme}) { ## TODO: html5_url_scheme
489     return 0;
490     }
491     unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
492     return 0;
493     }
494     return 1;
495     } # redirect_ok
496 wakaba 1.2
497 wakaba 1.1 1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24