/[suikacvs]/webroot/gate/2007/html/WebHACC/Input.pm
Suika

Contents of /webroot/gate/2007/html/WebHACC/Input.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.10 - (show annotations) (download)
Sat Aug 16 07:42:20 2008 UTC (15 years, 9 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.9: +1 -1 lines
++ ChangeLog	16 Aug 2008 07:38:01 -0000
	* cc-script.js: Support for #index- fragment identifiers.

	* cc-style.css: Prety styling for reformatted sources.
	Support for new version of manifest dump sections.

	* error-description-source.xml: Support for Whatpm::CacheManifest,
	Whatpm::CSS::SelectorsParser, Whatpm::CSS::MediaQueryParser,
	and Whatpm::CSS::Parser errors.  Support for l10n of cache
	manifest dump sections.

2008-08-16  Wakaba  <wakaba@suika.fam.cx>

++ html/WebHACC/Language/ChangeLog	16 Aug 2008 07:42:17 -0000
	* CSS.pm, CacheManifest.pm, HTML.pm, XML.pm: Use ->url attribute to
	obtain the URL of the document.

	* CacheManifest.pm (generate_structure_dump_section): It is
	now i18n'ed.  In addition, since URLs are tend to be long,
	tables for fallback entries are replaced by |dd| entries and
	paragraphs.  "No entry" message is now handled by catalog,
	rather than CSS.

2008-08-16  Wakaba  <wakaba@suika.fam.cx>

++ html/WebHACC/ChangeLog	16 Aug 2008 07:39:54 -0000
	* Input.pm (Subdocument new): Invoke superclass's new method
	such that |urls| attribute is initialized.

	* Result.pm (add_error): Use ->url attribute to obtain
	the URL of the document.  No longer output |text| argument,
	since all error types except for those used in the WebIDL module
	are now defined in the catalog.

2008-08-16  Wakaba  <wakaba@suika.fam.cx>

1 package WebHACC::Input;
2 use strict;
3
4 sub new ($) {
5 return bless {urls => []}, shift;
6 } # new
7
8 sub id_prefix ($) { '' }
9
10 sub nested ($) { 0 }
11
12 sub subdocument_index ($) { 0 }
13
14 sub full_subdocument_index ($) { 0 }
15
16 sub url ($) {
17 my $self = shift;
18 if (@{$self->{urls}}) {
19 return $self->{urls}->[-1];
20 } else {
21 return undef;
22 }
23 } # url
24
25 sub add_url ($$) {
26 my ($self, $url) = @_;
27 push @{$self->{urls}}, ''.$url;
28 } # add_url
29
30 sub urls ($) {
31 my $self = shift;
32 return [@{$self->{urls}}];
33 } # urls
34
35 sub get_document ($$$$) {
36 my $self = shift->new;
37
38 my ($cgi => $result => $out) = @_;
39
40 $out->input ($self);
41
42 require Encode;
43 my $url_s = Encode::decode ('utf-8', $cgi->get_parameter ('uri'));
44 my $url_o;
45 if (defined $url_s and length $url_s) {
46 require Message::DOM::DOMImplementation;
47 my $dom = Message::DOM::DOMImplementation->new;
48
49 $url_o = $dom->create_uri_reference ($url_s);
50 $url_o->uri_fragment (undef);
51
52 $self->add_url ($url_o->uri_reference);
53
54 my $url_scheme = lc $url_o->uri_scheme; ## TODO: html5_url_scheme
55 my $class = {
56 http => 'WebHACC::Input::HTTP',
57 }->{$url_scheme} || 'WebHACC::Input::UnsupportedURLSchemeError';
58 bless $self, $class;
59 } else {
60 bless $self, 'WebHACC::Input::Text';
61 }
62
63 $self->_get_document ($cgi => $result => $out, $url_o);
64
65 return $self unless defined $self->{s};
66
67 if (length $self->{s} > 1000_000) {
68 $self->{error_status_text} = 'Entity-body too large';
69 delete $self->{s};
70 bless $self, 'WebHACC::Input::Error';
71 return $self;
72 }
73
74 require Whatpm::ContentType;
75 ($self->{official_type}, $self->{media_type})
76 = Whatpm::ContentType->get_sniffed_type
77 (get_file_head => sub {
78 return substr $self->{s}, 0, shift;
79 },
80 http_content_type_byte => $self->{http_content_type_bytes},
81 supported_image_types => {});
82
83 my $input_format = $cgi->get_parameter ('i');
84 if (defined $input_format and length $input_format) {
85 $self->{media_type_overridden}
86 = (not defined $self->{media_type} or
87 $input_format ne $self->{media_type});
88 $self->{media_type} = $input_format;
89 }
90 if (defined $self->{s} and not defined $self->{media_type}) {
91 $self->{media_type} = 'text/html';
92 $self->{media_type_overridden} = 1;
93 }
94
95 if ($self->{media_type} eq 'text/xml') {
96 unless (defined $self->{charset}) {
97 $self->{charset} = 'us-ascii';
98 $self->{official_charset} = $self->{charset};
99 } elsif ($self->{charset_overridden} and $self->{charset} eq 'us-ascii') {
100 $self->{charset_overridden} = 0;
101 }
102 }
103
104 $self->{inner_html_element} = $cgi->get_parameter ('e');
105
106 return $self;
107 } # get_document
108
109 sub _get_document ($$$$) {
110 die "$0: _get_document of " . ref $_[0];
111 } # _get_document
112
113 sub generate_info_section ($$) {
114 my $self = shift;
115
116 my $result = shift;
117 my $out = $result->output;
118
119 $out->start_section (id => 'document-info', title => 'Information');
120 $out->start_tag ('dl');
121
122 my $urls = $self->urls;
123
124 $out->dt (@$urls == 1 ? 'URL' : 'URLs');
125 my $url = pop @$urls;
126 for (@$urls) {
127 $out->start_tag ('dd');
128 $out->url ($_);
129 }
130 $out->start_tag ('dd');
131 $out->url ($url, id => 'anchor-document-url');
132 $out->script (q[
133 document.title = '<'
134 + document.getElementById ('anchor-document-url').href + '> \\u2014 '
135 + document.title;
136 ]);
137
138 if (defined $self->{s}) {
139 $out->dt ('Base URL');
140 $out->start_tag ('dd');
141 $out->url ($self->{base_uri});
142
143 $out->dt ('Internet Media Type');
144 $out->start_tag ('dd');
145 $out->code ($self->{media_type}, class => 'MIME', lang => 'en');
146 if ($self->{media_type_overridden}) {
147 $out->nl_text ('... overridden');
148 } elsif (defined $self->{official_type}) {
149 if ($self->{media_type} eq $self->{official_type}) {
150 #
151 } else {
152 $out->nl_text ('... sniffed, official type is #',
153 text => $self->{official_type});
154 }
155 } else {
156 $out->nl_text ( '... sniffed');
157 }
158
159 $out->dt ('Character Encoding');
160 $out->start_tag ('dd');
161 if (defined $self->{charset}) {
162 $out->code ($self->{charset}, class => 'charset', lang => 'en');
163 } else {
164 $out->nl_text ('(unknown)');
165 }
166 $out->nl_text ('... overridden') if $self->{charset_overridden};
167
168 $out->dt ($self->{is_char_string} ? 'Character Length' : 'Byte Length');
169 ## TODO: formatting
170 $out->start_tag ('dd');
171 my $length = length $self->{s};
172 $out->text ($length . ' ');
173 $out->nl_text (($self->{is_char_string} ? 'character' : 'byte') .
174 ($length == 1 ? '' : 's'));
175 }
176
177 $out->end_tag ('dl');
178 $out->end_section;
179 } # generate_info_section
180
181 sub generate_transfer_sections ($$) { }
182
183 package WebHACC::Input::HTTP;
184 push our @ISA, 'WebHACC::Input';
185
186 {
187 my $HostPermit;
188 sub host_permit ($) {
189 return $HostPermit if $HostPermit;
190
191 require Message::Util::HostPermit;
192 $HostPermit = new Message::Util::HostPermit;
193 $HostPermit->add_rule (<<'EOH');
194 Allow host=suika port=80
195 Deny host=suika
196 Allow host=suika.fam.cx port=80
197 Deny host=suika.fam.cx
198 Deny host=localhost
199 Deny host=*.localdomain
200 Deny ipv4=0.0.0.0/8
201 Deny ipv4=10.0.0.0/8
202 Deny ipv4=127.0.0.0/8
203 Deny ipv4=169.254.0.0/16
204 Deny ipv4=172.0.0.0/11
205 Deny ipv4=192.0.2.0/24
206 Deny ipv4=192.88.99.0/24
207 Deny ipv4=192.168.0.0/16
208 Deny ipv4=198.18.0.0/15
209 Deny ipv4=224.0.0.0/4
210 Deny ipv4=255.255.255.255/32
211 Deny ipv6=0::0/0
212 Allow host=*
213 EOH
214 return $HostPermit;
215 } # host_permit
216 }
217
218 sub _get_document ($$$$$) {
219 my ($self, $cgi => $result => $out, $url_o) = @_;
220
221 unless ($self->host_permit->check ($url_o->uri_host, $url_o->uri_port || 80)) {
222 $self->{error_status_text} = 'Connection to the host is forbidden';
223 bless $self, 'WebHACC::Input::Error';
224 return $self;
225 }
226
227 my $ua = WDCC::LWPUA->new;
228 $ua->{wdcc_dom} = Message::DOM::DOMImplementation->new;
229 $ua->{wdcc_host_permit} = $self->host_permit;
230 $ua->agent ('Mozilla'); ## TODO: for now.
231 $ua->parse_head (0);
232 $ua->protocols_allowed ([qw/http/]);
233 $ua->max_size (1000_000);
234 my $req = HTTP::Request->new (GET => $url_o->uri_reference);
235 $req->header ('Accept-Encoding' => 'identity, *; q=0');
236 my $res = $ua->request ($req);
237 ## TODO: 401 sets |is_success| true.
238 ## TODO: Don't follow redirect if error-page=true
239 if ($res->is_success or $cgi->get_parameter ('error-page')) {
240 $self->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
241 my $new_url = $res->request->uri;
242 $self->add_url ($new_url) if $new_url ne $self->url;
243
244 ## TODO: More strict parsing...
245 my $ct = $self->{http_content_type_bytes} = $res->header ('Content-Type');
246 if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
247 $self->{charset} = lc $1;
248 $self->{charset} =~ tr/\\//d;
249 $self->{official_charset} = $self->{charset};
250 }
251
252 my $input_charset = $cgi->get_parameter ('charset');
253 if (defined $input_charset and length $input_charset) {
254 $self->{charset_overridden}
255 = (not defined $self->{charset} or $self->{charset} ne $input_charset);
256 $self->{charset} = $input_charset;
257 }
258
259 ## TODO: Support for HTTP Content-Encoding
260
261 $self->{s} = ''.$res->content;
262 } else {
263 $self->add_url ($res->request->uri);
264 $self->{error_status_text} = $res->status_line;
265 bless $self, 'WebHACC::Input::HTTPError';
266 }
267
268 $self->{header_field} = [];
269 $res->scan (sub {
270 push @{$self->{header_field}}, [$_[0], $_[1]];
271 });
272 $self->{header_status_code} = $res->code;
273 $self->{header_status_text} = $res->message;
274
275 return $self;
276 } # _get_document
277
278 sub generate_transfer_sections ($$) {
279 my $self = shift;
280 my $result = shift;
281
282 $result->layer_uncertain ('transfer');
283
284 $self->generate_http_header_section ($result);
285 } # generate_transfer_sections
286
287 sub generate_http_header_section ($$) {
288 my ($self, $result) = @_;
289
290 return unless defined $self->{header_status_code} or
291 defined $self->{header_status_text} or
292 @{$self->{header_field} or []};
293
294 my $out = $result->output;
295
296 $out->start_section (id => 'source-header', title => 'HTTP Header');
297 $out->html (qq[<p><strong>Note</strong>: Due to the limitation of the
298 network library in use, the content of this section might
299 not be the real header.</p>
300
301 <table><tbody>
302 ]);
303
304 if (defined $self->{header_status_code}) {
305 $out->html (qq[<tr><th scope="row">Status code</th>]);
306 $out->start_tag ('td');
307 $out->code ($self->{header_status_code});
308 }
309 if (defined $self->{header_status_text}) {
310 $out->html (qq[<tr><th scope="row">Status text</th>]);
311 $out->start_tag ('td');
312 $out->code ($self->{header_status_text});
313 }
314
315 for (@{$self->{header_field}}) {
316 $out->start_tag ('tr');
317 $out->start_tag ('th', scope => 'row');
318 $out->code ($_->[0]);
319 $out->start_tag ('td');
320 $out->code ($_->[1]);
321 }
322
323 $out->end_tag ('table');
324
325 $out->end_section;
326 } # generate_http_header_section
327
328 package WebHACC::Input::Text;
329 push our @ISA, 'WebHACC::Input';
330
331 sub _get_document ($$$$) {
332 my ($self, $cgi => $result => $out) = @_;
333
334 $self->add_url (q<thismessage:/>);
335 $self->{base_uri} = q<thismessage:/>;
336
337 $self->{s} = ''.$cgi->get_parameter ('s');
338 $self->{charset} = ''.$cgi->get_parameter ('_charset_');
339 $self->{charset} =~ s/\s+//g;
340 $self->{charset} = 'utf-8' if $self->{charset} eq '';
341 $self->{official_charset} = $self->{charset};
342 $self->{header_field} = [];
343
344 return $self;
345 } # _get_document
346
347 package WebHACC::Input::Subdocument;
348 push our @ISA, 'WebHACC::Input';
349
350 sub new ($$) {
351 my $self = shift->SUPER::new;
352 $self->{subdocument_index} = shift;
353 return $self;
354 } # new
355
356 sub id_prefix ($) {
357 my $self = shift;
358 return $self->{parent_input}->id_prefix .
359 'subdoc-' . $self->{subdocument_index} . '-';
360 } # id_prefix
361
362 sub nested ($) { 1 }
363
364 sub subdocument_index ($) {
365 return shift->{subdocument_index};
366 } # subdocument_index
367
368 sub full_subdocument_index ($) {
369 my $self = shift;
370 my $parent = $self->{parent_input}->full_subdocument_index;
371 if ($parent) {
372 return $parent . '.' . $self->{subdocument_index};
373 } else {
374 return $self->{subdocument_index};
375 }
376 } # full_subdocument_index
377
378 sub start_section ($$) {
379 my $self = shift;
380
381 my $result = shift;
382 my $out = $result->output;
383
384 my $index = $self->subdocument_index;
385 $out->start_section (id => my $id = 'subdoc-' . $index . '-',
386 title => qq[Subdocument #],
387 short_title => 'Sub #',
388 role => 'subdoc',
389 text => $self->full_subdocument_index);
390 $out->script (q[ insertNavSections ('] . $out->input->id_prefix . $id . q[') ]);
391 } # start_section
392
393 sub end_section ($$) {
394 $_[1]->output->end_section;
395 } # end_section
396
397 sub generate_info_section ($$) {
398 my $self = shift;
399
400 my $result = shift;
401 my $out = $result->output;
402
403 $out->start_section (id => 'document-info', title => 'Information');
404 $out->start_tag ('dl');
405
406 $out->dt ('Internet Media Type');
407 $out->start_tag ('dd');
408 $out->code ($self->{media_type}, code => 'MIME', lang => 'en');
409
410 if (defined $self->{container_node}) {
411 $out->dt ('Container Node');
412 $out->start_tag ('dd');
413 my $original_input = $out->input;
414 $out->input ($self->{parent_input});
415 $out->node_link ($self->{container_node});
416 $out->input ($original_input);
417 }
418
419 $out->dt ('Base URL');
420 $out->start_tag ('dd');
421 $out->url ($self->{base_uri});
422
423 $out->end_tag ('dl');
424 $out->end_section;
425 } # generate_info_section
426
427 package WebHACC::Input::Error;
428 push our @ISA, 'WebHACC::Input';
429
430 sub generate_transfer_sections ($$) {
431 my $self = shift;
432
433 my $result = shift;
434 my $out = $result->output;
435
436 $out->start_section (role => 'transfer-errors');
437 $out->start_error_list (role => 'transfer-errors');
438
439 $result->layer_applicable ('transfer');
440 $result->add_error (layer => 'transfer',
441 level => 'u',
442 type => 'resource retrieval error',
443 url => $self->{request_uri},
444 text => $self->{error_status_text});
445
446 $out->end_error_list (role => 'transfer-errors');
447 $out->end_section;
448 } # generate_transfer_sections
449
450 package WebHACC::Input::HTTPError;
451 push our @ISA, 'WebHACC::Input::Error', 'WebHACC::Input::HTTP';
452
453 sub generate_transfer_sections ($$) {
454 my $self = shift;
455
456 my $result = shift;
457
458 $self->WebHACC::Input::Error::generate_transfer_sections ($result);
459 $self->WebHACC::Input::HTTP::generate_transfer_sections ($result);
460
461 } # generate_transfer_sections
462
463 package WebHACC::Input::UnsupportedURLSchemeError;
464 push our @ISA, 'WebHACC::Input::Error';
465
466 sub _get_document ($$$$) {
467 my ($self, $cgi => $result => $out) = @_;
468
469 $self->{error_status_text} = 'URL scheme not allowed';
470
471 return $self;
472 } # _get_document
473
474 package WDCC::LWPUA;
475 require LWP::UserAgent;
476 push our @ISA, 'LWP::UserAgent';
477
478 sub redirect_ok {
479 my $ua = shift;
480 unless ($ua->SUPER::redirect_ok (@_)) {
481 return 0;
482 }
483
484 my $uris = $_[1]->header ('Location');
485 return 0 unless $uris;
486 my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
487 unless ({
488 http => 1,
489 }->{lc $uri->uri_scheme}) { ## TODO: html5_url_scheme
490 return 0;
491 }
492 unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
493 return 0;
494 }
495 return 1;
496 } # redirect_ok
497
498 1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24