/[suikacvs]/test/html-webhacc/WebHACC/Input.pm
Suika

Diff of /test/html-webhacc/WebHACC/Input.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1 by wakaba, Sun Jul 20 14:58:24 2008 UTC revision 1.10 by wakaba, Sat Aug 16 07:42:20 2008 UTC
# Line 2  package WebHACC::Input; Line 2  package WebHACC::Input;
2  use strict;  use strict;
3    
4  sub new ($) {  sub new ($) {
5    return bless {id_prefix => ''}, shift;    return bless {urls => []}, shift;
6  } # new  } # new
7    
8  sub id_prefix ($;$) {  sub id_prefix ($) { '' }
9    if (@_ > 1) {  
10      if (defined $_[1]) {  sub nested ($) { 0 }
11        $_[0]->{id_prefix} = ''.$_[1];  
12      } else {  sub subdocument_index ($) { 0 }
13        $_[0]->{id_prefix} = '';  
14    sub full_subdocument_index ($) { 0 }
15    
16    sub url ($) {
17      my $self = shift;
18      if (@{$self->{urls}}) {
19        return $self->{urls}->[-1];
20      } else {
21        return undef;
22      }
23    } # url
24    
25    sub add_url ($$) {
26      my ($self, $url) = @_;
27      push @{$self->{urls}}, ''.$url;
28    } # add_url
29    
30    sub urls ($) {
31      my $self = shift;
32      return [@{$self->{urls}}];
33    } # urls
34    
35    sub get_document ($$$$) {
36      my $self = shift->new;
37    
38      my ($cgi => $result => $out) = @_;
39    
40      $out->input ($self);
41    
42      require Encode;
43      my $url_s = Encode::decode ('utf-8', $cgi->get_parameter ('uri'));
44      my $url_o;
45      if (defined $url_s and length $url_s) {
46        require Message::DOM::DOMImplementation;
47        my $dom = Message::DOM::DOMImplementation->new;
48        
49        $url_o = $dom->create_uri_reference ($url_s);
50        $url_o->uri_fragment (undef);
51    
52        $self->add_url ($url_o->uri_reference);
53    
54        my $url_scheme = lc $url_o->uri_scheme; ## TODO: html5_url_scheme
55        my $class = {
56          http => 'WebHACC::Input::HTTP',
57        }->{$url_scheme} || 'WebHACC::Input::UnsupportedURLSchemeError';
58        bless $self, $class;
59      } else {
60        bless $self, 'WebHACC::Input::Text';
61      }
62    
63      $self->_get_document ($cgi => $result => $out, $url_o);
64    
65      return $self unless defined $self->{s};
66    
67      if (length $self->{s} > 1000_000) {
68        $self->{error_status_text} = 'Entity-body too large';
69        delete $self->{s};
70        bless $self, 'WebHACC::Input::Error';
71        return $self;
72      }
73    
74      require Whatpm::ContentType;
75      ($self->{official_type}, $self->{media_type})
76            = Whatpm::ContentType->get_sniffed_type
77                (get_file_head => sub {
78                   return substr $self->{s}, 0, shift;
79                 },
80                 http_content_type_byte => $self->{http_content_type_bytes},
81                 supported_image_types => {});
82    
83      my $input_format = $cgi->get_parameter ('i');
84      if (defined $input_format and length $input_format) {
85        $self->{media_type_overridden}
86            = (not defined $self->{media_type} or
87               $input_format ne $self->{media_type});
88        $self->{media_type} = $input_format;
89      }
90      if (defined $self->{s} and not defined $self->{media_type}) {
91        $self->{media_type} = 'text/html';
92        $self->{media_type_overridden} = 1;
93      }
94    
95      if ($self->{media_type} eq 'text/xml') {
96        unless (defined $self->{charset}) {
97          $self->{charset} = 'us-ascii';
98          $self->{official_charset} = $self->{charset};
99        } elsif ($self->{charset_overridden} and $self->{charset} eq 'us-ascii') {
100          $self->{charset_overridden} = 0;
101      }      }
102    }    }
103    
104    return $_[0]->{id_prefix};    $self->{inner_html_element} = $cgi->get_parameter ('e');
105  } # id_prefix  
106      return $self;
107    } # get_document
108    
109  sub nested ($;$) {  sub _get_document ($$$$) {
110    if (@_ > 1) {    die "$0: _get_document of " . ref $_[0];
111      if ($_[1]) {  } # _get_document
112        $_[0]->{nested} = 1;  
113    sub generate_info_section ($$) {
114      my $self = shift;
115      
116      my $result = shift;
117      my $out = $result->output;
118    
119      $out->start_section (id => 'document-info', title => 'Information');
120      $out->start_tag ('dl');
121    
122      my $urls = $self->urls;
123    
124      $out->dt (@$urls == 1 ? 'URL' : 'URLs');
125      my $url = pop @$urls;
126      for (@$urls) {
127        $out->start_tag ('dd');
128        $out->url ($_);
129      }
130      $out->start_tag ('dd');
131      $out->url ($url, id => 'anchor-document-url');
132      $out->script (q[
133          document.title = '<'
134              + document.getElementById ('anchor-document-url').href + '> \\u2014 '
135              + document.title;
136      ]);
137    
138      if (defined $self->{s}) {
139        $out->dt ('Base URL');
140        $out->start_tag ('dd');
141        $out->url ($self->{base_uri});
142        
143        $out->dt ('Internet Media Type');
144        $out->start_tag ('dd');
145        $out->code ($self->{media_type}, class => 'MIME', lang => 'en');
146        if ($self->{media_type_overridden}) {
147          $out->nl_text ('... overridden');
148        } elsif (defined $self->{official_type}) {
149          if ($self->{media_type} eq $self->{official_type}) {
150            #
151          } else {
152            $out->nl_text ('... sniffed, official type is #',
153                           text => $self->{official_type});
154          }
155      } else {      } else {
156        delete $_[0]->{nested};        $out->nl_text ( '... sniffed');
157      }      }
158    
159        $out->dt ('Character Encoding');
160        $out->start_tag ('dd');
161        if (defined $self->{charset}) {
162          $out->code ($self->{charset}, class => 'charset', lang => 'en');
163        } else {
164          $out->nl_text ('(unknown)');
165        }
166        $out->nl_text ('... overridden') if $self->{charset_overridden};
167    
168        $out->dt ($self->{is_char_string} ? 'Character Length' : 'Byte Length');
169        ## TODO: formatting
170        $out->start_tag ('dd');
171        my $length = length $self->{s};
172        $out->text ($length . ' ');
173        $out->nl_text (($self->{is_char_string} ? 'character' : 'byte') .
174                       ($length == 1 ? '' : 's'));
175    }    }
176    
177    return $_[0]->{nested};    $out->end_tag ('dl');
178  } # nested    $out->end_section;
179    } # generate_info_section
180    
181    sub generate_transfer_sections ($$) { }
182    
183    package WebHACC::Input::HTTP;
184    push our @ISA, 'WebHACC::Input';
185    
186    {
187    my $HostPermit;
188    sub host_permit ($) {
189      return $HostPermit if $HostPermit;
190      
191      require Message::Util::HostPermit;
192      $HostPermit = new Message::Util::HostPermit;
193      $HostPermit->add_rule (<<'EOH');
194    Allow host=suika port=80
195    Deny host=suika
196    Allow host=suika.fam.cx port=80
197    Deny host=suika.fam.cx
198    Deny host=localhost
199    Deny host=*.localdomain
200    Deny ipv4=0.0.0.0/8
201    Deny ipv4=10.0.0.0/8
202    Deny ipv4=127.0.0.0/8
203    Deny ipv4=169.254.0.0/16
204    Deny ipv4=172.0.0.0/11
205    Deny ipv4=192.0.2.0/24
206    Deny ipv4=192.88.99.0/24
207    Deny ipv4=192.168.0.0/16
208    Deny ipv4=198.18.0.0/15
209    Deny ipv4=224.0.0.0/4
210    Deny ipv4=255.255.255.255/32
211    Deny ipv6=0::0/0
212    Allow host=*
213    EOH
214      return $HostPermit;
215    } # host_permit
216    }
217    
218    sub _get_document ($$$$$) {
219      my ($self, $cgi => $result => $out, $url_o) = @_;
220    
221      unless ($self->host_permit->check ($url_o->uri_host, $url_o->uri_port || 80)) {
222        $self->{error_status_text} = 'Connection to the host is forbidden';
223        bless $self, 'WebHACC::Input::Error';
224        return $self;
225      }
226    
227      my $ua = WDCC::LWPUA->new;
228      $ua->{wdcc_dom} = Message::DOM::DOMImplementation->new;
229      $ua->{wdcc_host_permit} = $self->host_permit;
230      $ua->agent ('Mozilla'); ## TODO: for now.
231      $ua->parse_head (0);
232      $ua->protocols_allowed ([qw/http/]);
233      $ua->max_size (1000_000);
234      my $req = HTTP::Request->new (GET => $url_o->uri_reference);
235      $req->header ('Accept-Encoding' => 'identity, *; q=0');
236      my $res = $ua->request ($req);
237      ## TODO: 401 sets |is_success| true.
238      ## TODO: Don't follow redirect if error-page=true
239      if ($res->is_success or $cgi->get_parameter ('error-page')) {
240        $self->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
241        my $new_url = $res->request->uri;
242        $self->add_url ($new_url) if $new_url ne $self->url;
243        
244        ## TODO: More strict parsing...
245        my $ct = $self->{http_content_type_bytes} = $res->header ('Content-Type');
246        if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
247          $self->{charset} = lc $1;
248          $self->{charset} =~ tr/\\//d;
249          $self->{official_charset} = $self->{charset};
250        }
251        
252        my $input_charset = $cgi->get_parameter ('charset');
253        if (defined $input_charset and length $input_charset) {
254          $self->{charset_overridden}
255              = (not defined $self->{charset} or $self->{charset} ne $input_charset);
256          $self->{charset} = $input_charset;
257        }
258    
259        ## TODO: Support for HTTP Content-Encoding
260        
261        $self->{s} = ''.$res->content;
262      } else {
263        $self->add_url ($res->request->uri);
264        $self->{error_status_text} = $res->status_line;
265        bless $self, 'WebHACC::Input::HTTPError';
266      }
267                  
268      $self->{header_field} = [];
269      $res->scan (sub {
270        push @{$self->{header_field}}, [$_[0], $_[1]];
271      });
272      $self->{header_status_code} = $res->code;
273      $self->{header_status_text} = $res->message;
274    
275      return $self;
276    } # _get_document
277    
278    sub generate_transfer_sections ($$) {
279      my $self = shift;
280      my $result = shift;
281    
282      $result->layer_uncertain ('transfer');
283      
284      $self->generate_http_header_section ($result);
285    } # generate_transfer_sections
286    
287    sub generate_http_header_section ($$) {
288      my ($self, $result) = @_;
289      
290      return unless defined $self->{header_status_code} or
291          defined $self->{header_status_text} or
292          @{$self->{header_field} or []};
293    
294      my $out = $result->output;
295      
296      $out->start_section (id => 'source-header', title => 'HTTP Header');
297      $out->html (qq[<p><strong>Note</strong>: Due to the limitation of the
298    network library in use, the content of this section might
299    not be the real header.</p>
300    
301    <table><tbody>
302    ]);
303    
304      if (defined $self->{header_status_code}) {
305        $out->html (qq[<tr><th scope="row">Status code</th>]);
306        $out->start_tag ('td');
307        $out->code ($self->{header_status_code});
308      }
309      if (defined $self->{header_status_text}) {
310        $out->html (qq[<tr><th scope="row">Status text</th>]);
311        $out->start_tag ('td');
312        $out->code ($self->{header_status_text});
313      }
314      
315      for (@{$self->{header_field}}) {
316        $out->start_tag ('tr');
317        $out->start_tag ('th', scope => 'row');
318        $out->code ($_->[0]);
319        $out->start_tag ('td');
320        $out->code ($_->[1]);
321      }
322    
323      $out->end_tag ('table');
324    
325      $out->end_section;
326    } # generate_http_header_section
327    
328    package WebHACC::Input::Text;
329    push our @ISA, 'WebHACC::Input';
330    
331    sub _get_document ($$$$) {
332      my ($self, $cgi => $result => $out) = @_;
333      
334      $self->add_url (q<thismessage:/>);
335      $self->{base_uri} = q<thismessage:/>;
336        
337      $self->{s} = ''.$cgi->get_parameter ('s');
338      $self->{charset} = ''.$cgi->get_parameter ('_charset_');
339      $self->{charset} =~ s/\s+//g;
340      $self->{charset} = 'utf-8' if $self->{charset} eq '';
341      $self->{official_charset} = $self->{charset};
342      $self->{header_field} = [];
343    
344      return $self;
345    } # _get_document
346    
347    package WebHACC::Input::Subdocument;
348    push our @ISA, 'WebHACC::Input';
349    
350    sub new ($$) {
351      my $self = shift->SUPER::new;
352      $self->{subdocument_index} = shift;
353      return $self;
354    } # new
355    
356    sub id_prefix ($) {
357      my $self = shift;
358      return $self->{parent_input}->id_prefix .
359          'subdoc-' . $self->{subdocument_index} . '-';
360    } # id_prefix
361    
362    sub nested ($) { 1 }
363    
364    sub subdocument_index ($) {
365      return shift->{subdocument_index};
366    } # subdocument_index
367    
368    sub full_subdocument_index ($) {
369      my $self = shift;
370      my $parent = $self->{parent_input}->full_subdocument_index;
371      if ($parent) {
372        return $parent . '.' . $self->{subdocument_index};
373      } else {
374        return $self->{subdocument_index};
375      }
376    } # full_subdocument_index
377    
378    sub start_section ($$) {
379      my $self = shift;
380    
381      my $result = shift;
382      my $out = $result->output;
383    
384      my $index = $self->subdocument_index;
385      $out->start_section (id => my $id = 'subdoc-' . $index . '-',
386                           title => qq[Subdocument #],
387                           short_title => 'Sub #',
388                           role => 'subdoc',
389                           text => $self->full_subdocument_index);
390      $out->script (q[ insertNavSections ('] . $out->input->id_prefix . $id . q[') ]);
391    } # start_section
392    
393    sub end_section ($$) {
394      $_[1]->output->end_section;
395    } # end_section
396    
397    sub generate_info_section ($$) {
398      my $self = shift;
399    
400      my $result = shift;
401      my $out = $result->output;
402    
403      $out->start_section (id => 'document-info', title => 'Information');
404      $out->start_tag ('dl');
405    
406      $out->dt ('Internet Media Type');
407      $out->start_tag ('dd');
408      $out->code ($self->{media_type}, code => 'MIME', lang => 'en');
409    
410      if (defined $self->{container_node}) {
411        $out->dt ('Container Node');
412        $out->start_tag ('dd');
413        my $original_input = $out->input;
414        $out->input ($self->{parent_input});
415        $out->node_link ($self->{container_node});
416        $out->input ($original_input);
417      }
418    
419      $out->dt ('Base URL');
420      $out->start_tag ('dd');
421      $out->url ($self->{base_uri});
422    
423      $out->end_tag ('dl');
424      $out->end_section;
425    } # generate_info_section
426    
427    package WebHACC::Input::Error;
428    push our @ISA, 'WebHACC::Input';
429    
430    sub generate_transfer_sections ($$) {
431      my $self = shift;
432      
433      my $result = shift;
434      my $out = $result->output;
435    
436      $out->start_section (role => 'transfer-errors');
437      $out->start_error_list (role => 'transfer-errors');
438    
439      $result->layer_applicable ('transfer');
440      $result->add_error (layer => 'transfer',
441                          level => 'u',
442                          type => 'resource retrieval error',
443                          url => $self->{request_uri},
444                          text => $self->{error_status_text});
445    
446      $out->end_error_list (role => 'transfer-errors');
447      $out->end_section;
448    } # generate_transfer_sections
449    
450    package WebHACC::Input::HTTPError;
451    push our @ISA, 'WebHACC::Input::Error', 'WebHACC::Input::HTTP';
452    
453    sub generate_transfer_sections ($$) {
454      my $self = shift;
455      
456      my $result = shift;
457    
458      $self->WebHACC::Input::Error::generate_transfer_sections ($result);
459      $self->WebHACC::Input::HTTP::generate_transfer_sections ($result);
460    
461    } # generate_transfer_sections
462    
463    package WebHACC::Input::UnsupportedURLSchemeError;
464    push our @ISA, 'WebHACC::Input::Error';
465    
466    sub _get_document ($$$$) {
467      my ($self, $cgi => $result => $out) = @_;
468      
469      $self->{error_status_text} = 'URL scheme not allowed';
470    
471      return $self;
472    } # _get_document
473    
474    package WDCC::LWPUA;
475    require LWP::UserAgent;
476    push our @ISA, 'LWP::UserAgent';
477    
478    sub redirect_ok {
479      my $ua = shift;
480      unless ($ua->SUPER::redirect_ok (@_)) {
481        return 0;
482      }
483    
484      my $uris = $_[1]->header ('Location');
485      return 0 unless $uris;
486      my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
487      unless ({
488               http => 1,
489              }->{lc $uri->uri_scheme}) { ## TODO: html5_url_scheme
490        return 0;
491      }
492      unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
493        return 0;
494      }
495      return 1;
496    } # redirect_ok
497    
498  1;  1;

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.10

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24