/[suikacvs]/test/html-webhacc/WebHACC/Input.pm
Suika

Diff of /test/html-webhacc/WebHACC/Input.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.5 by wakaba, Mon Jul 21 12:56:34 2008 UTC revision 1.6 by wakaba, Sat Jul 26 11:27:25 2008 UTC
# Line 2  package WebHACC::Input; Line 2  package WebHACC::Input;
2  use strict;  use strict;
3    
4  sub new ($) {  sub new ($) {
5    return bless {}, shift;    return bless {urls => []}, shift;
6  } # new  } # new
7    
8  sub id_prefix ($) { '' }  sub id_prefix ($) { '' }
# Line 13  sub subdocument_index ($) { 0 } Line 13  sub subdocument_index ($) { 0 }
13    
14  sub full_subdocument_index ($) { 0 }  sub full_subdocument_index ($) { 0 }
15    
16    sub url ($) {
17      my $self = shift;
18      if (@{$self->{urls}}) {
19        return $self->{urls}->[-1];
20      } else {
21        return undef;
22      }
23    } # url
24    
25    sub add_url ($$) {
26      my ($self, $url) = @_;
27      push @{$self->{urls}}, ''.$url;
28    } # add_url
29    
30    sub urls ($) {
31      my $self = shift;
32      return [@{$self->{urls}}];
33    } # urls
34    
35    sub get_document ($$$$) {
36      my $self = shift->new;
37    
38      my ($cgi => $result => $out) = @_;
39    
40      $out->input ($self);
41    
42      require Encode;
43      my $url_s = Encode::decode ('utf-8', $cgi->get_parameter ('uri'));
44      my $url_o;
45      if (defined $url_s and length $url_s) {
46        require Message::DOM::DOMImplementation;
47        my $dom = Message::DOM::DOMImplementation->new;
48        
49        $url_o = $dom->create_uri_reference ($url_s);
50        $url_o->uri_fragment (undef);
51    
52        $self->add_url ($url_o->uri_reference);
53    
54        my $url_scheme = lc $url_o->uri_scheme; ## TODO: html5_url_scheme
55        my $class = {
56          http => 'WebHACC::Input::HTTP',
57        }->{$url_scheme} || 'WebHACC::Input::UnsupportedURLSchemeError';
58        bless $self, $class;
59      } else {
60        bless $self, 'WebHACC::Input::Text';
61      }
62    
63      $self->_get_document ($cgi => $result => $out, $url_o);
64    
65      return $self unless defined $self->{s};
66    
67      if (length $self->{s} > 1000_000) {
68        $self->{error_status_text} = 'Entity-body too large';
69        delete $self->{s};
70        bless $self, 'WebHACC::Input::Error';
71        return $self;
72      }
73    
74      require Whatpm::ContentType;
75      ($self->{official_type}, $self->{media_type})
76            = Whatpm::ContentType->get_sniffed_type
77                (get_file_head => sub {
78                   return substr $self->{s}, 0, shift;
79                 },
80                 http_content_type_byte => $self->{http_content_type_bytes},
81                 supported_image_types => {});
82    
83      my $input_format = $cgi->get_parameter ('i');
84      if (defined $input_format and length $input_format) {
85        $self->{media_type_overridden}
86            = (not defined $self->{media_type} or
87               $input_format ne $self->{media_type});
88        $self->{media_type} = $input_format;
89      }
90      if (defined $self->{s} and not defined $self->{media_type}) {
91        $self->{media_type} = 'text/html';
92        $self->{media_type_overridden} = 1;
93      }
94    
95      if ($self->{media_type} eq 'text/xml') {
96        unless (defined $self->{charset}) {
97          $self->{charset} = 'us-ascii';
98          $self->{official_charset} = $self->{charset};
99        } elsif ($self->{charset_overridden} and $self->{charset} eq 'us-ascii') {
100          $self->{charset_overridden} = 0;
101        }
102      }
103    
104      $self->{inner_html_element} = $cgi->get_parameter ('e');
105    
106      return $self;
107    } # get_document
108    
109    sub _get_document ($$$$) {
110      die "$0: _get_document of " . ref $_[0];
111    } # _get_document
112    
113  sub generate_info_section ($$) {  sub generate_info_section ($$) {
114    my $self = shift;    my $self = shift;
115        
# Line 22  sub generate_info_section ($$) { Line 119  sub generate_info_section ($$) {
119    $out->start_section (id => 'document-info', title => 'Information');    $out->start_section (id => 'document-info', title => 'Information');
120    $out->start_tag ('dl');    $out->start_tag ('dl');
121    
122    $out->dt ('Request URL');    my $urls = $self->urls;
   $out->start_tag ('dd');  
   $out->url ($self->{request_uri});  
123    
124    $out->dt ('Document URL'); ## TODO: HTML5 "document's address"?    $out->dt (@$urls == 1 ? 'URL' : 'URLs');
125      my $url = pop @$urls;
126      for (@$urls) {
127        $out->start_tag ('dd');
128        $out->url ($_);
129      }
130    $out->start_tag ('dd');    $out->start_tag ('dd');
131    $out->url ($self->{uri}, id => 'anchor-document-url');    $out->url ($url, id => 'anchor-document-url');
132    $out->script (q[    $out->script (q[
133        document.title = '<'        document.title = '<'
134            + document.getElementById ('anchor-document-url').href + '> \\u2014 '            + document.getElementById ('anchor-document-url').href + '> \\u2014 '
# Line 126  not be the real header.</p> Line 226  not be the real header.</p>
226    $out->end_section;    $out->end_section;
227  } # generate_http_header_section  } # generate_http_header_section
228    
229    package WebHACC::Input::HTTP;
230    push our @ISA, 'WebHACC::Input';
231    
232    {
233    my $HostPermit;
234    sub host_permit ($) {
235      return $HostPermit if $HostPermit;
236      
237      require Message::Util::HostPermit;
238      $HostPermit = new Message::Util::HostPermit;
239      $HostPermit->add_rule (<<'EOH');
240    Allow host=suika port=80
241    Deny host=suika
242    Allow host=suika.fam.cx port=80
243    Deny host=suika.fam.cx
244    Deny host=localhost
245    Deny host=*.localdomain
246    Deny ipv4=0.0.0.0/8
247    Deny ipv4=10.0.0.0/8
248    Deny ipv4=127.0.0.0/8
249    Deny ipv4=169.254.0.0/16
250    Deny ipv4=172.0.0.0/11
251    Deny ipv4=192.0.2.0/24
252    Deny ipv4=192.88.99.0/24
253    Deny ipv4=192.168.0.0/16
254    Deny ipv4=198.18.0.0/15
255    Deny ipv4=224.0.0.0/4
256    Deny ipv4=255.255.255.255/32
257    Deny ipv6=0::0/0
258    Allow host=*
259    EOH
260      return $HostPermit;
261    } # host_permit
262    }
263    
264    sub _get_document ($$$$$) {
265      my ($self, $cgi => $result => $out, $url_o) = @_;
266    
267      unless ($self->host_permit->check ($url_o->uri_host, $url_o->uri_port || 80)) {
268        $self->{error_status_text} = 'Connection to the host is forbidden';
269        return $self;
270      }
271    
272      my $ua = WDCC::LWPUA->new;
273      $ua->{wdcc_dom} = Message::DOM::DOMImplementation->new;
274      $ua->{wdcc_host_permit} = $self->host_permit;
275      $ua->agent ('Mozilla'); ## TODO: for now.
276      $ua->parse_head (0);
277      $ua->protocols_allowed ([qw/http/]);
278      $ua->max_size (1000_000);
279      my $req = HTTP::Request->new (GET => $url_o->uri_reference);
280      $req->header ('Accept-Encoding' => 'identity, *; q=0');
281      my $res = $ua->request ($req);
282      ## TODO: 401 sets |is_success| true.
283      ## TODO: Don't follow redirect if error-page=true
284      if ($res->is_success or $cgi->get_parameter ('error-page')) {
285        $self->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
286        my $new_url = $res->request->uri;
287        $self->add_url ($new_url) if $new_url ne $self->url;
288        
289        ## TODO: More strict parsing...
290        my $ct = $self->{http_content_type_bytes} = $res->header ('Content-Type');
291        if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
292          $self->{charset} = lc $1;
293          $self->{charset} =~ tr/\\//d;
294          $self->{official_charset} = $self->{charset};
295        }
296        
297        my $input_charset = $cgi->get_parameter ('charset');
298        if (defined $input_charset and length $input_charset) {
299          $self->{charset_overridden}
300              = (not defined $self->{charset} or $self->{charset} ne $input_charset);
301          $self->{charset} = $input_charset;
302        }
303    
304        ## TODO: Support for HTTP Content-Encoding
305        
306        $self->{s} = ''.$res->content;
307      } else {
308        $self->add_url ($res->request->uri);
309        $self->{error_status_text} = $res->status_line;
310        bless $self, 'WebHACC::Input::HTTPError';
311      }
312                  
313      $self->{header_field} = [];
314      $res->scan (sub {
315        push @{$self->{header_field}}, [$_[0], $_[1]];
316      });
317      $self->{header_status_code} = $res->code;
318      $self->{header_status_text} = $res->message;
319    
320      return $self;
321    } # _get_document
322    
323    package WebHACC::Input::Text;
324    push our @ISA, 'WebHACC::Input';
325    
326    sub _get_document ($$$$) {
327      my ($self, $cgi => $result => $out) = @_;
328      
329      $self->add_url (q<thismessage:/>);
330      $self->{base_uri} = q<thismessage:/>;
331        
332      $self->{s} = ''.$cgi->get_parameter ('s');
333      $self->{charset} = ''.$cgi->get_parameter ('_charset_');
334      $self->{charset} =~ s/\s+//g;
335      $self->{charset} = 'utf-8' if $self->{charset} eq '';
336      $self->{official_charset} = $self->{charset};
337      $self->{header_field} = [];
338    
339      return $self;
340    } # _get_document
341    
342  package WebHACC::Input::Subdocument;  package WebHACC::Input::Subdocument;
343  push our @ISA, 'WebHACC::Input';  push our @ISA, 'WebHACC::Input';
344    
# Line 226  sub generate_transfer_sections ($$) { Line 439  sub generate_transfer_sections ($$) {
439    $out->end_section;    $out->end_section;
440  } # generate_transfer_sections  } # generate_transfer_sections
441    
442    package WebHACC::Input::HTTPError;
443    push our @ISA, 'WebHACC::Input::Error', 'WebHACC::Input::HTTP';
444    
445    package WebHACC::Input::UnsupportedURLSchemeError;
446    push our @ISA, 'WebHACC::Input::Error';
447    
448    sub _get_document ($$$$) {
449      my ($self, $cgi => $result => $out) = @_;
450      
451      $self->{error_status_text} = 'URL scheme not allowed';
452    
453      return $self;
454    } # _get_document
455    
456    package WDCC::LWPUA;
457    require LWP::UserAgent;
458    push our @ISA, 'LWP::UserAgent';
459    
460    sub redirect_ok {
461      my $ua = shift;
462      unless ($ua->SUPER::redirect_ok (@_)) {
463        return 0;
464      }
465    
466      my $uris = $_[1]->header ('Location');
467      return 0 unless $uris;
468      my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
469      unless ({
470               http => 1,
471              }->{lc $uri->uri_scheme}) { ## TODO: html5_url_scheme
472        return 0;
473      }
474      unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
475        return 0;
476      }
477      return 1;
478    } # redirect_ok
479    
480  1;  1;

Legend:
Removed from v.1.5  
changed lines
  Added in v.1.6

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24