/[suikacvs]/test/html-webhacc/WebHACC/Input.pm
Suika

Diff of /test/html-webhacc/WebHACC/Input.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.3 by wakaba, Mon Jul 21 05:24:32 2008 UTC revision 1.10 by wakaba, Sat Aug 16 07:42:20 2008 UTC
# Line 2  package WebHACC::Input; Line 2  package WebHACC::Input;
2  use strict;  use strict;
3    
4  sub new ($) {  sub new ($) {
5    return bless {}, shift;    return bless {urls => []}, shift;
6  } # new  } # new
7    
8  sub id_prefix ($) { '' }  sub id_prefix ($) { '' }
# Line 11  sub nested ($) { 0 } Line 11  sub nested ($) { 0 }
11    
12  sub subdocument_index ($) { 0 }  sub subdocument_index ($) { 0 }
13    
14    sub full_subdocument_index ($) { 0 }
15    
16    sub url ($) {
17      my $self = shift;
18      if (@{$self->{urls}}) {
19        return $self->{urls}->[-1];
20      } else {
21        return undef;
22      }
23    } # url
24    
25    sub add_url ($$) {
26      my ($self, $url) = @_;
27      push @{$self->{urls}}, ''.$url;
28    } # add_url
29    
30    sub urls ($) {
31      my $self = shift;
32      return [@{$self->{urls}}];
33    } # urls
34    
35    sub get_document ($$$$) {
36      my $self = shift->new;
37    
38      my ($cgi => $result => $out) = @_;
39    
40      $out->input ($self);
41    
42      require Encode;
43      my $url_s = Encode::decode ('utf-8', $cgi->get_parameter ('uri'));
44      my $url_o;
45      if (defined $url_s and length $url_s) {
46        require Message::DOM::DOMImplementation;
47        my $dom = Message::DOM::DOMImplementation->new;
48        
49        $url_o = $dom->create_uri_reference ($url_s);
50        $url_o->uri_fragment (undef);
51    
52        $self->add_url ($url_o->uri_reference);
53    
54        my $url_scheme = lc $url_o->uri_scheme; ## TODO: html5_url_scheme
55        my $class = {
56          http => 'WebHACC::Input::HTTP',
57        }->{$url_scheme} || 'WebHACC::Input::UnsupportedURLSchemeError';
58        bless $self, $class;
59      } else {
60        bless $self, 'WebHACC::Input::Text';
61      }
62    
63      $self->_get_document ($cgi => $result => $out, $url_o);
64    
65      return $self unless defined $self->{s};
66    
67      if (length $self->{s} > 1000_000) {
68        $self->{error_status_text} = 'Entity-body too large';
69        delete $self->{s};
70        bless $self, 'WebHACC::Input::Error';
71        return $self;
72      }
73    
74      require Whatpm::ContentType;
75      ($self->{official_type}, $self->{media_type})
76            = Whatpm::ContentType->get_sniffed_type
77                (get_file_head => sub {
78                   return substr $self->{s}, 0, shift;
79                 },
80                 http_content_type_byte => $self->{http_content_type_bytes},
81                 supported_image_types => {});
82    
83      my $input_format = $cgi->get_parameter ('i');
84      if (defined $input_format and length $input_format) {
85        $self->{media_type_overridden}
86            = (not defined $self->{media_type} or
87               $input_format ne $self->{media_type});
88        $self->{media_type} = $input_format;
89      }
90      if (defined $self->{s} and not defined $self->{media_type}) {
91        $self->{media_type} = 'text/html';
92        $self->{media_type_overridden} = 1;
93      }
94    
95      if ($self->{media_type} eq 'text/xml') {
96        unless (defined $self->{charset}) {
97          $self->{charset} = 'us-ascii';
98          $self->{official_charset} = $self->{charset};
99        } elsif ($self->{charset_overridden} and $self->{charset} eq 'us-ascii') {
100          $self->{charset_overridden} = 0;
101        }
102      }
103    
104      $self->{inner_html_element} = $cgi->get_parameter ('e');
105    
106      return $self;
107    } # get_document
108    
109    sub _get_document ($$$$) {
110      die "$0: _get_document of " . ref $_[0];
111    } # _get_document
112    
113  sub generate_info_section ($$) {  sub generate_info_section ($$) {
114    my $self = shift;    my $self = shift;
115        
# Line 20  sub generate_info_section ($$) { Line 119  sub generate_info_section ($$) {
119    $out->start_section (id => 'document-info', title => 'Information');    $out->start_section (id => 'document-info', title => 'Information');
120    $out->start_tag ('dl');    $out->start_tag ('dl');
121    
122    $out->dt ('Request URL');    my $urls = $self->urls;
   $out->start_tag ('dd');  
   $out->url ($self->{request_uri});  
123    
124    $out->dt ('Document URL'); ## TODO: HTML5 "document's address"?    $out->dt (@$urls == 1 ? 'URL' : 'URLs');
125      my $url = pop @$urls;
126      for (@$urls) {
127        $out->start_tag ('dd');
128        $out->url ($_);
129      }
130    $out->start_tag ('dd');    $out->start_tag ('dd');
131    $out->url ($self->{uri}, id => 'anchor-document-url');    $out->url ($url, id => 'anchor-document-url');
132    $out->script (q[    $out->script (q[
133        document.title = '<'        document.title = '<'
134            + document.getElementById ('anchor-document-url').href + '> \\u2014 '            + document.getElementById ('anchor-document-url').href + '> \\u2014 '
# Line 42  sub generate_info_section ($$) { Line 144  sub generate_info_section ($$) {
144      $out->start_tag ('dd');      $out->start_tag ('dd');
145      $out->code ($self->{media_type}, class => 'MIME', lang => 'en');      $out->code ($self->{media_type}, class => 'MIME', lang => 'en');
146      if ($self->{media_type_overridden}) {      if ($self->{media_type_overridden}) {
147        $out->html (' <em>(overridden)</em>');        $out->nl_text ('... overridden');
148      } elsif (defined $self->{official_type}) {      } elsif (defined $self->{official_type}) {
149        if ($self->{media_type} eq $self->{official_type}) {        if ($self->{media_type} eq $self->{official_type}) {
150          #          #
151        } else {        } else {
152          $out->html (' <em>(sniffed; official type is: ');          $out->nl_text ('... sniffed, official type is #',
153          $out->code ($self->{official_type}, class => 'MIME', lang => 'en');                         text => $self->{official_type});
         $out->html (')</em>');  
154        }        }
155      } else {      } else {
156        $out->html ( '<em>(sniffed)</em>');        $out->nl_text ( '... sniffed');
157      }      }
158    
159      $out->dt ('Character Encoding');      $out->dt ('Character Encoding');
# Line 60  sub generate_info_section ($$) { Line 161  sub generate_info_section ($$) {
161      if (defined $self->{charset}) {      if (defined $self->{charset}) {
162        $out->code ($self->{charset}, class => 'charset', lang => 'en');        $out->code ($self->{charset}, class => 'charset', lang => 'en');
163      } else {      } else {
164        $out->text ('(none)');        $out->nl_text ('(unknown)');
165      }      }
166      $out->html (' <em>overridden</em>') if $self->{charset_overridden};      $out->nl_text ('... overridden') if $self->{charset_overridden};
167    
168      $out->dt ($self->{is_char_string} ? 'Character Length' : 'Byte Length');      $out->dt ($self->{is_char_string} ? 'Character Length' : 'Byte Length');
169      ## TODO: formatting      ## TODO: formatting
170      $out->start_tag ('dd');      $out->start_tag ('dd');
171      my $length = length $self->{s};      my $length = length $self->{s};
172      $out->text ($length . ($self->{is_char_string} ? ' character' : ' byte') .      $out->text ($length . ' ');
173                  ($length == 1 ? '' : 's'));      $out->nl_text (($self->{is_char_string} ? 'character' : 'byte') .
174                       ($length == 1 ? '' : 's'));
175    }    }
176    
177    $out->end_tag ('dl');    $out->end_tag ('dl');
178    $out->end_section;    $out->end_section;
179  } # generate_info_section  } # generate_info_section
180    
181    sub generate_transfer_sections ($$) { }
182    
183    package WebHACC::Input::HTTP;
184    push our @ISA, 'WebHACC::Input';
185    
186    {
187    my $HostPermit;
188    sub host_permit ($) {
189      return $HostPermit if $HostPermit;
190      
191      require Message::Util::HostPermit;
192      $HostPermit = new Message::Util::HostPermit;
193      $HostPermit->add_rule (<<'EOH');
194    Allow host=suika port=80
195    Deny host=suika
196    Allow host=suika.fam.cx port=80
197    Deny host=suika.fam.cx
198    Deny host=localhost
199    Deny host=*.localdomain
200    Deny ipv4=0.0.0.0/8
201    Deny ipv4=10.0.0.0/8
202    Deny ipv4=127.0.0.0/8
203    Deny ipv4=169.254.0.0/16
204    Deny ipv4=172.0.0.0/11
205    Deny ipv4=192.0.2.0/24
206    Deny ipv4=192.88.99.0/24
207    Deny ipv4=192.168.0.0/16
208    Deny ipv4=198.18.0.0/15
209    Deny ipv4=224.0.0.0/4
210    Deny ipv4=255.255.255.255/32
211    Deny ipv6=0::0/0
212    Allow host=*
213    EOH
214      return $HostPermit;
215    } # host_permit
216    }
217    
218    sub _get_document ($$$$$) {
219      my ($self, $cgi => $result => $out, $url_o) = @_;
220    
221      unless ($self->host_permit->check ($url_o->uri_host, $url_o->uri_port || 80)) {
222        $self->{error_status_text} = 'Connection to the host is forbidden';
223        bless $self, 'WebHACC::Input::Error';
224        return $self;
225      }
226    
227      my $ua = WDCC::LWPUA->new;
228      $ua->{wdcc_dom} = Message::DOM::DOMImplementation->new;
229      $ua->{wdcc_host_permit} = $self->host_permit;
230      $ua->agent ('Mozilla'); ## TODO: for now.
231      $ua->parse_head (0);
232      $ua->protocols_allowed ([qw/http/]);
233      $ua->max_size (1000_000);
234      my $req = HTTP::Request->new (GET => $url_o->uri_reference);
235      $req->header ('Accept-Encoding' => 'identity, *; q=0');
236      my $res = $ua->request ($req);
237      ## TODO: 401 sets |is_success| true.
238      ## TODO: Don't follow redirect if error-page=true
239      if ($res->is_success or $cgi->get_parameter ('error-page')) {
240        $self->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
241        my $new_url = $res->request->uri;
242        $self->add_url ($new_url) if $new_url ne $self->url;
243        
244        ## TODO: More strict parsing...
245        my $ct = $self->{http_content_type_bytes} = $res->header ('Content-Type');
246        if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
247          $self->{charset} = lc $1;
248          $self->{charset} =~ tr/\\//d;
249          $self->{official_charset} = $self->{charset};
250        }
251        
252        my $input_charset = $cgi->get_parameter ('charset');
253        if (defined $input_charset and length $input_charset) {
254          $self->{charset_overridden}
255              = (not defined $self->{charset} or $self->{charset} ne $input_charset);
256          $self->{charset} = $input_charset;
257        }
258    
259        ## TODO: Support for HTTP Content-Encoding
260        
261        $self->{s} = ''.$res->content;
262      } else {
263        $self->add_url ($res->request->uri);
264        $self->{error_status_text} = $res->status_line;
265        bless $self, 'WebHACC::Input::HTTPError';
266      }
267                  
268      $self->{header_field} = [];
269      $res->scan (sub {
270        push @{$self->{header_field}}, [$_[0], $_[1]];
271      });
272      $self->{header_status_code} = $res->code;
273      $self->{header_status_text} = $res->message;
274    
275      return $self;
276    } # _get_document
277    
278  sub generate_transfer_sections ($$) {  sub generate_transfer_sections ($$) {
279    my $self = shift;    my $self = shift;
280    my $result = shift;    my $result = shift;
281    
282      $result->layer_uncertain ('transfer');
283        
284    $self->generate_http_header_section ($result);    $self->generate_http_header_section ($result);
285  } # generate_transfer_sections  } # generate_transfer_sections
# Line 124  not be the real header.</p> Line 325  not be the real header.</p>
325    $out->end_section;    $out->end_section;
326  } # generate_http_header_section  } # generate_http_header_section
327    
328    package WebHACC::Input::Text;
329    push our @ISA, 'WebHACC::Input';
330    
331    sub _get_document ($$$$) {
332      my ($self, $cgi => $result => $out) = @_;
333      
334      $self->add_url (q<thismessage:/>);
335      $self->{base_uri} = q<thismessage:/>;
336        
337      $self->{s} = ''.$cgi->get_parameter ('s');
338      $self->{charset} = ''.$cgi->get_parameter ('_charset_');
339      $self->{charset} =~ s/\s+//g;
340      $self->{charset} = 'utf-8' if $self->{charset} eq '';
341      $self->{official_charset} = $self->{charset};
342      $self->{header_field} = [];
343    
344      return $self;
345    } # _get_document
346    
347  package WebHACC::Input::Subdocument;  package WebHACC::Input::Subdocument;
348  push our @ISA, 'WebHACC::Input';  push our @ISA, 'WebHACC::Input';
349    
350  sub new ($$) {  sub new ($$) {
351    my $self = bless {}, shift;    my $self = shift->SUPER::new;
352    $self->{subdocument_index} = shift;    $self->{subdocument_index} = shift;
353    return $self;    return $self;
354  } # new  } # new
355    
356  sub id_prefix ($) {  sub id_prefix ($) {
357    return 'subdoc-' . shift->{subdocument_index} . '-';    my $self = shift;
358      return $self->{parent_input}->id_prefix .
359          'subdoc-' . $self->{subdocument_index} . '-';
360  } # id_prefix  } # id_prefix
361    
362  sub nested ($) { 1 }  sub nested ($) { 1 }
# Line 143  sub subdocument_index ($) { Line 365  sub subdocument_index ($) {
365    return shift->{subdocument_index};    return shift->{subdocument_index};
366  } # subdocument_index  } # subdocument_index
367    
368    sub full_subdocument_index ($) {
369      my $self = shift;
370      my $parent = $self->{parent_input}->full_subdocument_index;
371      if ($parent) {
372        return $parent . '.' . $self->{subdocument_index};
373      } else {
374        return $self->{subdocument_index};
375      }
376    } # full_subdocument_index
377    
378  sub start_section ($$) {  sub start_section ($$) {
379    my $self = shift;    my $self = shift;
380    
381    my $result = shift;    my $result = shift;
382    my $out = $result->output;    my $out = $result->output;
383    
384    $out->start_section (id => $self->id_prefix,    my $index = $self->subdocument_index;
385                         title => qq[Subdocument #] . $self->subdocument_index,    $out->start_section (id => my $id = 'subdoc-' . $index . '-',
386                         short_title => 'Sub #' . $self->subdocument_index);                         title => qq[Subdocument #],
387                           short_title => 'Sub #',
388                           role => 'subdoc',
389                           text => $self->full_subdocument_index);
390      $out->script (q[ insertNavSections ('] . $out->input->id_prefix . $id . q[') ]);
391  } # start_section  } # start_section
392    
393  sub end_section ($$) {  sub end_section ($$) {
# Line 193  push our @ISA, 'WebHACC::Input'; Line 429  push our @ISA, 'WebHACC::Input';
429    
430  sub generate_transfer_sections ($$) {  sub generate_transfer_sections ($$) {
431    my $self = shift;    my $self = shift;
   
   $self->SUPER::generate_transfer_sections (@_);  
432        
433    my $result = shift;    my $result = shift;
434    my $out = $result->output;    my $out = $result->output;
435    
436    $out->start_section (id => 'transfer-errors', title => 'Transfer Errors');    $out->start_section (role => 'transfer-errors');
437      $out->start_error_list (role => 'transfer-errors');
438    
439    $out->start_tag ('dl');    $result->layer_applicable ('transfer');
440    $result->add_error (layer => 'transfer',    $result->add_error (layer => 'transfer',
441                        level => 'u',                        level => 'u',
442                        type => 'resource retrieval error',                        type => 'resource retrieval error',
443                        url => $self->{request_uri},                        url => $self->{request_uri},
444                        text => $self->{error_status_text});                        text => $self->{error_status_text});
   $out->end_tag ('dl');  
445    
446      $out->end_error_list (role => 'transfer-errors');
447    $out->end_section;    $out->end_section;
448  } # generate_transfer_sections  } # generate_transfer_sections
449    
450    package WebHACC::Input::HTTPError;
451    push our @ISA, 'WebHACC::Input::Error', 'WebHACC::Input::HTTP';
452    
453    sub generate_transfer_sections ($$) {
454      my $self = shift;
455      
456      my $result = shift;
457    
458      $self->WebHACC::Input::Error::generate_transfer_sections ($result);
459      $self->WebHACC::Input::HTTP::generate_transfer_sections ($result);
460    
461    } # generate_transfer_sections
462    
463    package WebHACC::Input::UnsupportedURLSchemeError;
464    push our @ISA, 'WebHACC::Input::Error';
465    
466    sub _get_document ($$$$) {
467      my ($self, $cgi => $result => $out) = @_;
468      
469      $self->{error_status_text} = 'URL scheme not allowed';
470    
471      return $self;
472    } # _get_document
473    
474    package WDCC::LWPUA;
475    require LWP::UserAgent;
476    push our @ISA, 'LWP::UserAgent';
477    
478    sub redirect_ok {
479      my $ua = shift;
480      unless ($ua->SUPER::redirect_ok (@_)) {
481        return 0;
482      }
483    
484      my $uris = $_[1]->header ('Location');
485      return 0 unless $uris;
486      my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
487      unless ({
488               http => 1,
489              }->{lc $uri->uri_scheme}) { ## TODO: html5_url_scheme
490        return 0;
491      }
492      unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
493        return 0;
494      }
495      return 1;
496    } # redirect_ok
497    
498  1;  1;

Legend:
Removed from v.1.3  
changed lines
  Added in v.1.10

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24