/[suikacvs]/test/html-webhacc/WebHACC/Input.pm
Suika

Contents of /test/html-webhacc/WebHACC/Input.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (show annotations) (download)
Thu Aug 14 15:50:42 2008 UTC (16 years, 3 months ago) by wakaba
Branch: MAIN
Changes since 1.7: +63 -50 lines
++ ChangeLog	14 Aug 2008 15:42:17 -0000
	* cc.cgi: Generate result summary sections for
	each subdocument.

	* error-description-source.xml: New entries to
	support localization of result sections.

2008-08-14  Wakaba  <wakaba@suika.fam.cx>

	* cc-style.css: Support for revised version of result summary
	section styling.

2008-08-14  Wakaba  <wakaba@suika.fam.cx>

++ html/WebHACC/Language/ChangeLog	14 Aug 2008 15:50:38 -0000
	* Base.pm, CSS.pm, CacheManifest.pm, DOM.pm, Default.pm,
	HTML.pm, WebIDL.pm, XML.pm: Set |layer_applicable|
	or |layer_uncertain| flag appropriately.

2008-08-14  Wakaba  <wakaba@suika.fam.cx>

++ html/WebHACC/ChangeLog	14 Aug 2008 15:48:38 -0000
	* Input.pm: Methods |generate_transfer_sections|
	and |generate_http_header_section| are moved to HTTP
	subclass, since they are irrelevant to non-HTTP inputs.
	(_get_document): Forbidden host error was not represented
	by WebHACC::Input::Error subclass.
	(WebHACC::Input::Error generate_transfer_sections): Use
	role name for the section.
	(WebHACC::Input::HTTPError generate_transfer_sections): New method
	added, since the main superclass, i.e. WebHACC::Input::Error,
	no longer dumps HTTP headers due to the change mentioned above.

	* Output.pm (start_section): New roles "transfer-errors" and "result".

	* Result.pm (parent_result): New attribute.
	(layer_applicable, layer_uncertain): New methods to set flags.
	(add_error): Natural language strings are now handled
	by the catalog mechanism.  Use new scoring mechanism.
	(generate_result_section): Use catalog for all natural
	language strings.  Table generation is now much more sophiscated
	that it was.  Support for subdoc result summary.  Support
	for the column of the number of informational message.  Support
	for "N/A" status.

2008-08-14  Wakaba  <wakaba@suika.fam.cx>

1 package WebHACC::Input;
2 use strict;
3
4 sub new ($) {
5 return bless {urls => []}, shift;
6 } # new
7
8 sub id_prefix ($) { '' }
9
10 sub nested ($) { 0 }
11
12 sub subdocument_index ($) { 0 }
13
14 sub full_subdocument_index ($) { 0 }
15
16 sub url ($) {
17 my $self = shift;
18 if (@{$self->{urls}}) {
19 return $self->{urls}->[-1];
20 } else {
21 return undef;
22 }
23 } # url
24
25 sub add_url ($$) {
26 my ($self, $url) = @_;
27 push @{$self->{urls}}, ''.$url;
28 } # add_url
29
30 sub urls ($) {
31 my $self = shift;
32 return [@{$self->{urls}}];
33 } # urls
34
35 sub get_document ($$$$) {
36 my $self = shift->new;
37
38 my ($cgi => $result => $out) = @_;
39
40 $out->input ($self);
41
42 require Encode;
43 my $url_s = Encode::decode ('utf-8', $cgi->get_parameter ('uri'));
44 my $url_o;
45 if (defined $url_s and length $url_s) {
46 require Message::DOM::DOMImplementation;
47 my $dom = Message::DOM::DOMImplementation->new;
48
49 $url_o = $dom->create_uri_reference ($url_s);
50 $url_o->uri_fragment (undef);
51
52 $self->add_url ($url_o->uri_reference);
53
54 my $url_scheme = lc $url_o->uri_scheme; ## TODO: html5_url_scheme
55 my $class = {
56 http => 'WebHACC::Input::HTTP',
57 }->{$url_scheme} || 'WebHACC::Input::UnsupportedURLSchemeError';
58 bless $self, $class;
59 } else {
60 bless $self, 'WebHACC::Input::Text';
61 }
62
63 $self->_get_document ($cgi => $result => $out, $url_o);
64
65 return $self unless defined $self->{s};
66
67 if (length $self->{s} > 1000_000) {
68 $self->{error_status_text} = 'Entity-body too large';
69 delete $self->{s};
70 bless $self, 'WebHACC::Input::Error';
71 return $self;
72 }
73
74 require Whatpm::ContentType;
75 ($self->{official_type}, $self->{media_type})
76 = Whatpm::ContentType->get_sniffed_type
77 (get_file_head => sub {
78 return substr $self->{s}, 0, shift;
79 },
80 http_content_type_byte => $self->{http_content_type_bytes},
81 supported_image_types => {});
82
83 my $input_format = $cgi->get_parameter ('i');
84 if (defined $input_format and length $input_format) {
85 $self->{media_type_overridden}
86 = (not defined $self->{media_type} or
87 $input_format ne $self->{media_type});
88 $self->{media_type} = $input_format;
89 }
90 if (defined $self->{s} and not defined $self->{media_type}) {
91 $self->{media_type} = 'text/html';
92 $self->{media_type_overridden} = 1;
93 }
94
95 if ($self->{media_type} eq 'text/xml') {
96 unless (defined $self->{charset}) {
97 $self->{charset} = 'us-ascii';
98 $self->{official_charset} = $self->{charset};
99 } elsif ($self->{charset_overridden} and $self->{charset} eq 'us-ascii') {
100 $self->{charset_overridden} = 0;
101 }
102 }
103
104 $self->{inner_html_element} = $cgi->get_parameter ('e');
105
106 return $self;
107 } # get_document
108
109 sub _get_document ($$$$) {
110 die "$0: _get_document of " . ref $_[0];
111 } # _get_document
112
113 sub generate_info_section ($$) {
114 my $self = shift;
115
116 my $result = shift;
117 my $out = $result->output;
118
119 $out->start_section (id => 'document-info', title => 'Information');
120 $out->start_tag ('dl');
121
122 my $urls = $self->urls;
123
124 $out->dt (@$urls == 1 ? 'URL' : 'URLs');
125 my $url = pop @$urls;
126 for (@$urls) {
127 $out->start_tag ('dd');
128 $out->url ($_);
129 }
130 $out->start_tag ('dd');
131 $out->url ($url, id => 'anchor-document-url');
132 $out->script (q[
133 document.title = '<'
134 + document.getElementById ('anchor-document-url').href + '> \\u2014 '
135 + document.title;
136 ]);
137
138 if (defined $self->{s}) {
139 $out->dt ('Base URL');
140 $out->start_tag ('dd');
141 $out->url ($self->{base_uri});
142
143 $out->dt ('Internet Media Type');
144 $out->start_tag ('dd');
145 $out->code ($self->{media_type}, class => 'MIME', lang => 'en');
146 if ($self->{media_type_overridden}) {
147 $out->nl_text ('... overridden');
148 } elsif (defined $self->{official_type}) {
149 if ($self->{media_type} eq $self->{official_type}) {
150 #
151 } else {
152 $out->nl_text ('... sniffed, official type is #',
153 text => $self->{official_type});
154 }
155 } else {
156 $out->nl_text ( '... sniffed');
157 }
158
159 $out->dt ('Character Encoding');
160 $out->start_tag ('dd');
161 if (defined $self->{charset}) {
162 $out->code ($self->{charset}, class => 'charset', lang => 'en');
163 } else {
164 $out->nl_text ('(unknown)');
165 }
166 $out->nl_text ('... overridden') if $self->{charset_overridden};
167
168 $out->dt ($self->{is_char_string} ? 'Character Length' : 'Byte Length');
169 ## TODO: formatting
170 $out->start_tag ('dd');
171 my $length = length $self->{s};
172 $out->text ($length . ' ');
173 $out->nl_text (($self->{is_char_string} ? 'character' : 'byte') .
174 ($length == 1 ? '' : 's'));
175 }
176
177 $out->end_tag ('dl');
178 $out->end_section;
179 } # generate_info_section
180
181 sub generate_transfer_sections ($$) { }
182
183 package WebHACC::Input::HTTP;
184 push our @ISA, 'WebHACC::Input';
185
186 {
187 my $HostPermit;
188 sub host_permit ($) {
189 return $HostPermit if $HostPermit;
190
191 require Message::Util::HostPermit;
192 $HostPermit = new Message::Util::HostPermit;
193 $HostPermit->add_rule (<<'EOH');
194 Allow host=suika port=80
195 Deny host=suika
196 Allow host=suika.fam.cx port=80
197 Deny host=suika.fam.cx
198 Deny host=localhost
199 Deny host=*.localdomain
200 Deny ipv4=0.0.0.0/8
201 Deny ipv4=10.0.0.0/8
202 Deny ipv4=127.0.0.0/8
203 Deny ipv4=169.254.0.0/16
204 Deny ipv4=172.0.0.0/11
205 Deny ipv4=192.0.2.0/24
206 Deny ipv4=192.88.99.0/24
207 Deny ipv4=192.168.0.0/16
208 Deny ipv4=198.18.0.0/15
209 Deny ipv4=224.0.0.0/4
210 Deny ipv4=255.255.255.255/32
211 Deny ipv6=0::0/0
212 Allow host=*
213 EOH
214 return $HostPermit;
215 } # host_permit
216 }
217
218 sub _get_document ($$$$$) {
219 my ($self, $cgi => $result => $out, $url_o) = @_;
220
221 unless ($self->host_permit->check ($url_o->uri_host, $url_o->uri_port || 80)) {
222 $self->{error_status_text} = 'Connection to the host is forbidden';
223 bless $self, 'WebHACC::Input::Error';
224 return $self;
225 }
226
227 my $ua = WDCC::LWPUA->new;
228 $ua->{wdcc_dom} = Message::DOM::DOMImplementation->new;
229 $ua->{wdcc_host_permit} = $self->host_permit;
230 $ua->agent ('Mozilla'); ## TODO: for now.
231 $ua->parse_head (0);
232 $ua->protocols_allowed ([qw/http/]);
233 $ua->max_size (1000_000);
234 my $req = HTTP::Request->new (GET => $url_o->uri_reference);
235 $req->header ('Accept-Encoding' => 'identity, *; q=0');
236 my $res = $ua->request ($req);
237 ## TODO: 401 sets |is_success| true.
238 ## TODO: Don't follow redirect if error-page=true
239 if ($res->is_success or $cgi->get_parameter ('error-page')) {
240 $self->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
241 my $new_url = $res->request->uri;
242 $self->add_url ($new_url) if $new_url ne $self->url;
243
244 ## TODO: More strict parsing...
245 my $ct = $self->{http_content_type_bytes} = $res->header ('Content-Type');
246 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
247 $self->{charset} = lc $1;
248 $self->{charset} =~ tr/\\//d;
249 $self->{official_charset} = $self->{charset};
250 }
251
252 my $input_charset = $cgi->get_parameter ('charset');
253 if (defined $input_charset and length $input_charset) {
254 $self->{charset_overridden}
255 = (not defined $self->{charset} or $self->{charset} ne $input_charset);
256 $self->{charset} = $input_charset;
257 }
258
259 ## TODO: Support for HTTP Content-Encoding
260
261 $self->{s} = ''.$res->content;
262 } else {
263 $self->add_url ($res->request->uri);
264 $self->{error_status_text} = $res->status_line;
265 bless $self, 'WebHACC::Input::HTTPError';
266 }
267
268 $self->{header_field} = [];
269 $res->scan (sub {
270 push @{$self->{header_field}}, [$_[0], $_[1]];
271 });
272 $self->{header_status_code} = $res->code;
273 $self->{header_status_text} = $res->message;
274
275 return $self;
276 } # _get_document
277
278 sub generate_transfer_sections ($$) {
279 my $self = shift;
280 my $result = shift;
281
282 $result->layer_uncertain ('transfer');
283
284 $self->generate_http_header_section ($result);
285 } # generate_transfer_sections
286
287 sub generate_http_header_section ($$) {
288 my ($self, $result) = @_;
289
290 return unless defined $self->{header_status_code} or
291 defined $self->{header_status_text} or
292 @{$self->{header_field} or []};
293
294 my $out = $result->output;
295
296 $out->start_section (id => 'source-header', title => 'HTTP Header');
297 $out->html (qq[<p><strong>Note</strong>: Due to the limitation of the
298 network library in use, the content of this section might
299 not be the real header.</p>
300
301 <table><tbody>
302 ]);
303
304 if (defined $self->{header_status_code}) {
305 $out->html (qq[<tr><th scope="row">Status code</th>]);
306 $out->start_tag ('td');
307 $out->code ($self->{header_status_code});
308 }
309 if (defined $self->{header_status_text}) {
310 $out->html (qq[<tr><th scope="row">Status text</th>]);
311 $out->start_tag ('td');
312 $out->code ($self->{header_status_text});
313 }
314
315 for (@{$self->{header_field}}) {
316 $out->start_tag ('tr');
317 $out->start_tag ('th', scope => 'row');
318 $out->code ($_->[0]);
319 $out->start_tag ('td');
320 $out->code ($_->[1]);
321 }
322
323 $out->end_tag ('table');
324
325 $out->end_section;
326 } # generate_http_header_section
327
328 package WebHACC::Input::Text;
329 push our @ISA, 'WebHACC::Input';
330
331 sub _get_document ($$$$) {
332 my ($self, $cgi => $result => $out) = @_;
333
334 $self->add_url (q<thismessage:/>);
335 $self->{base_uri} = q<thismessage:/>;
336
337 $self->{s} = ''.$cgi->get_parameter ('s');
338 $self->{charset} = ''.$cgi->get_parameter ('_charset_');
339 $self->{charset} =~ s/\s+//g;
340 $self->{charset} = 'utf-8' if $self->{charset} eq '';
341 $self->{official_charset} = $self->{charset};
342 $self->{header_field} = [];
343
344 return $self;
345 } # _get_document
346
347 package WebHACC::Input::Subdocument;
348 push our @ISA, 'WebHACC::Input';
349
350 sub new ($$) {
351 my $self = bless {}, shift;
352 $self->{subdocument_index} = shift;
353 return $self;
354 } # new
355
356 sub id_prefix ($) {
357 my $self = shift;
358 return $self->{parent_input}->id_prefix .
359 'subdoc-' . $self->{subdocument_index} . '-';
360 } # id_prefix
361
362 sub nested ($) { 1 }
363
364 sub subdocument_index ($) {
365 return shift->{subdocument_index};
366 } # subdocument_index
367
368 sub full_subdocument_index ($) {
369 my $self = shift;
370 my $parent = $self->{parent_input}->full_subdocument_index;
371 if ($parent) {
372 return $parent . '.' . $self->{subdocument_index};
373 } else {
374 return $self->{subdocument_index};
375 }
376 } # full_subdocument_index
377
378 sub start_section ($$) {
379 my $self = shift;
380
381 my $result = shift;
382 my $out = $result->output;
383
384 my $index = $self->subdocument_index;
385 $out->start_section (id => my $id = 'subdoc-' . $index . '-',
386 title => qq[Subdocument #],
387 short_title => 'Sub #',
388 role => 'subdoc',
389 text => $self->full_subdocument_index);
390 $out->script (q[ insertNavSections ('] . $out->input->id_prefix . $id . q[') ]);
391 } # start_section
392
393 sub end_section ($$) {
394 $_[1]->output->end_section;
395 } # end_section
396
397 sub generate_info_section ($$) {
398 my $self = shift;
399
400 my $result = shift;
401 my $out = $result->output;
402
403 $out->start_section (id => 'document-info', title => 'Information');
404 $out->start_tag ('dl');
405
406 $out->dt ('Internet Media Type');
407 $out->start_tag ('dd');
408 $out->code ($self->{media_type}, code => 'MIME', lang => 'en');
409
410 if (defined $self->{container_node}) {
411 $out->dt ('Container Node');
412 $out->start_tag ('dd');
413 my $original_input = $out->input;
414 $out->input ($self->{parent_input});
415 $out->node_link ($self->{container_node});
416 $out->input ($original_input);
417 }
418
419 $out->dt ('Base URL');
420 $out->start_tag ('dd');
421 $out->url ($self->{base_uri});
422
423 $out->end_tag ('dl');
424 $out->end_section;
425 } # generate_info_section
426
427 package WebHACC::Input::Error;
428 push our @ISA, 'WebHACC::Input';
429
430 sub generate_transfer_sections ($$) {
431 my $self = shift;
432
433 my $result = shift;
434 my $out = $result->output;
435
436 $out->start_section (role => 'transfer-errors');
437
438 $out->start_tag ('dl');
439 $result->add_error (layer => 'transfer',
440 level => 'u',
441 type => 'resource retrieval error',
442 url => $self->{request_uri},
443 text => $self->{error_status_text});
444 $out->end_tag ('dl');
445
446 $out->end_section;
447 } # generate_transfer_sections
448
449 package WebHACC::Input::HTTPError;
450 push our @ISA, 'WebHACC::Input::Error', 'WebHACC::Input::HTTP';
451
452 sub generate_transfer_sections ($$) {
453 my $self = shift;
454
455 my $result = shift;
456
457 $self->WebHACC::Input::Error->generate_transfer_sections ($result);
458 $self->WebHACC::Input::HTTP->generate_transfer_sections ($result);
459
460 } # generate_transfer_sections
461
462 package WebHACC::Input::UnsupportedURLSchemeError;
463 push our @ISA, 'WebHACC::Input::Error';
464
465 sub _get_document ($$$$) {
466 my ($self, $cgi => $result => $out) = @_;
467
468 $self->{error_status_text} = 'URL scheme not allowed';
469
470 return $self;
471 } # _get_document
472
473 package WDCC::LWPUA;
474 require LWP::UserAgent;
475 push our @ISA, 'LWP::UserAgent';
476
477 sub redirect_ok {
478 my $ua = shift;
479 unless ($ua->SUPER::redirect_ok (@_)) {
480 return 0;
481 }
482
483 my $uris = $_[1]->header ('Location');
484 return 0 unless $uris;
485 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
486 unless ({
487 http => 1,
488 }->{lc $uri->uri_scheme}) { ## TODO: html5_url_scheme
489 return 0;
490 }
491 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
492 return 0;
493 }
494 return 1;
495 } # redirect_ok
496
497 1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24