/[suikacvs]/test/html-webhacc/WebHACC/Input.pm
Suika

Diff of /test/html-webhacc/WebHACC/Input.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1 by wakaba, Sun Jul 20 14:58:24 2008 UTC revision 1.7 by wakaba, Thu Aug 14 09:16:52 2008 UTC
# Line 2  package WebHACC::Input; Line 2  package WebHACC::Input;
2  use strict;  use strict;
3    
4  sub new ($) {  sub new ($) {
5    return bless {id_prefix => ''}, shift;    return bless {urls => []}, shift;
6  } # new  } # new
7    
8  sub id_prefix ($;$) {  sub id_prefix ($) { '' }
9    if (@_ > 1) {  
10      if (defined $_[1]) {  sub nested ($) { 0 }
11        $_[0]->{id_prefix} = ''.$_[1];  
12      } else {  sub subdocument_index ($) { 0 }
13        $_[0]->{id_prefix} = '';  
14    sub full_subdocument_index ($) { 0 }
15    
16    sub url ($) {
17      my $self = shift;
18      if (@{$self->{urls}}) {
19        return $self->{urls}->[-1];
20      } else {
21        return undef;
22      }
23    } # url
24    
25    sub add_url ($$) {
26      my ($self, $url) = @_;
27      push @{$self->{urls}}, ''.$url;
28    } # add_url
29    
30    sub urls ($) {
31      my $self = shift;
32      return [@{$self->{urls}}];
33    } # urls
34    
35    sub get_document ($$$$) {
36      my $self = shift->new;
37    
38      my ($cgi => $result => $out) = @_;
39    
40      $out->input ($self);
41    
42      require Encode;
43      my $url_s = Encode::decode ('utf-8', $cgi->get_parameter ('uri'));
44      my $url_o;
45      if (defined $url_s and length $url_s) {
46        require Message::DOM::DOMImplementation;
47        my $dom = Message::DOM::DOMImplementation->new;
48        
49        $url_o = $dom->create_uri_reference ($url_s);
50        $url_o->uri_fragment (undef);
51    
52        $self->add_url ($url_o->uri_reference);
53    
54        my $url_scheme = lc $url_o->uri_scheme; ## TODO: html5_url_scheme
55        my $class = {
56          http => 'WebHACC::Input::HTTP',
57        }->{$url_scheme} || 'WebHACC::Input::UnsupportedURLSchemeError';
58        bless $self, $class;
59      } else {
60        bless $self, 'WebHACC::Input::Text';
61      }
62    
63      $self->_get_document ($cgi => $result => $out, $url_o);
64    
65      return $self unless defined $self->{s};
66    
67      if (length $self->{s} > 1000_000) {
68        $self->{error_status_text} = 'Entity-body too large';
69        delete $self->{s};
70        bless $self, 'WebHACC::Input::Error';
71        return $self;
72      }
73    
74      require Whatpm::ContentType;
75      ($self->{official_type}, $self->{media_type})
76            = Whatpm::ContentType->get_sniffed_type
77                (get_file_head => sub {
78                   return substr $self->{s}, 0, shift;
79                 },
80                 http_content_type_byte => $self->{http_content_type_bytes},
81                 supported_image_types => {});
82    
83      my $input_format = $cgi->get_parameter ('i');
84      if (defined $input_format and length $input_format) {
85        $self->{media_type_overridden}
86            = (not defined $self->{media_type} or
87               $input_format ne $self->{media_type});
88        $self->{media_type} = $input_format;
89      }
90      if (defined $self->{s} and not defined $self->{media_type}) {
91        $self->{media_type} = 'text/html';
92        $self->{media_type_overridden} = 1;
93      }
94    
95      if ($self->{media_type} eq 'text/xml') {
96        unless (defined $self->{charset}) {
97          $self->{charset} = 'us-ascii';
98          $self->{official_charset} = $self->{charset};
99        } elsif ($self->{charset_overridden} and $self->{charset} eq 'us-ascii') {
100          $self->{charset_overridden} = 0;
101      }      }
102    }    }
103    
104    return $_[0]->{id_prefix};    $self->{inner_html_element} = $cgi->get_parameter ('e');
 } # id_prefix  
105    
106  sub nested ($;$) {    return $self;
107    if (@_ > 1) {  } # get_document
108      if ($_[1]) {  
109        $_[0]->{nested} = 1;  sub _get_document ($$$$) {
110      die "$0: _get_document of " . ref $_[0];
111    } # _get_document
112    
113    sub generate_info_section ($$) {
114      my $self = shift;
115      
116      my $result = shift;
117      my $out = $result->output;
118    
119      $out->start_section (id => 'document-info', title => 'Information');
120      $out->start_tag ('dl');
121    
122      my $urls = $self->urls;
123    
124      $out->dt (@$urls == 1 ? 'URL' : 'URLs');
125      my $url = pop @$urls;
126      for (@$urls) {
127        $out->start_tag ('dd');
128        $out->url ($_);
129      }
130      $out->start_tag ('dd');
131      $out->url ($url, id => 'anchor-document-url');
132      $out->script (q[
133          document.title = '<'
134              + document.getElementById ('anchor-document-url').href + '> \\u2014 '
135              + document.title;
136      ]);
137    
138      if (defined $self->{s}) {
139        $out->dt ('Base URL');
140        $out->start_tag ('dd');
141        $out->url ($self->{base_uri});
142        
143        $out->dt ('Internet Media Type');
144        $out->start_tag ('dd');
145        $out->code ($self->{media_type}, class => 'MIME', lang => 'en');
146        if ($self->{media_type_overridden}) {
147          $out->nl_text ('... overridden');
148        } elsif (defined $self->{official_type}) {
149          if ($self->{media_type} eq $self->{official_type}) {
150            #
151          } else {
152            $out->nl_text ('... sniffed, official type is #',
153                           text => $self->{official_type});
154          }
155        } else {
156          $out->nl_text ( '... sniffed');
157        }
158    
159        $out->dt ('Character Encoding');
160        $out->start_tag ('dd');
161        if (defined $self->{charset}) {
162          $out->code ($self->{charset}, class => 'charset', lang => 'en');
163      } else {      } else {
164        delete $_[0]->{nested};        $out->nl_text ('(unknown)');
165      }      }
166        $out->nl_text ('... overridden') if $self->{charset_overridden};
167    
168        $out->dt ($self->{is_char_string} ? 'Character Length' : 'Byte Length');
169        ## TODO: formatting
170        $out->start_tag ('dd');
171        my $length = length $self->{s};
172        $out->text ($length . ' ');
173        $out->nl_text (($self->{is_char_string} ? 'character' : 'byte') .
174                       ($length == 1 ? '' : 's'));
175      }
176    
177      $out->end_tag ('dl');
178      $out->end_section;
179    } # generate_info_section
180    
181    sub generate_transfer_sections ($$) {
182      my $self = shift;
183      my $result = shift;
184      
185      $self->generate_http_header_section ($result);
186    } # generate_transfer_sections
187    
188    sub generate_http_header_section ($$) {
189      my ($self, $result) = @_;
190      
191      return unless defined $self->{header_status_code} or
192          defined $self->{header_status_text} or
193          @{$self->{header_field} or []};
194    
195      my $out = $result->output;
196      
197      $out->start_section (id => 'source-header', title => 'HTTP Header');
198      $out->html (qq[<p><strong>Note</strong>: Due to the limitation of the
199    network library in use, the content of this section might
200    not be the real header.</p>
201    
202    <table><tbody>
203    ]);
204    
205      if (defined $self->{header_status_code}) {
206        $out->html (qq[<tr><th scope="row">Status code</th>]);
207        $out->start_tag ('td');
208        $out->code ($self->{header_status_code});
209    }    }
210      if (defined $self->{header_status_text}) {
211        $out->html (qq[<tr><th scope="row">Status text</th>]);
212        $out->start_tag ('td');
213        $out->code ($self->{header_status_text});
214      }
215      
216      for (@{$self->{header_field}}) {
217        $out->start_tag ('tr');
218        $out->start_tag ('th', scope => 'row');
219        $out->code ($_->[0]);
220        $out->start_tag ('td');
221        $out->code ($_->[1]);
222      }
223    
224      $out->end_tag ('table');
225    
226      $out->end_section;
227    } # generate_http_header_section
228    
229    package WebHACC::Input::HTTP;
230    push our @ISA, 'WebHACC::Input';
231    
232    {
233    my $HostPermit;
234    sub host_permit ($) {
235      return $HostPermit if $HostPermit;
236      
237      require Message::Util::HostPermit;
238      $HostPermit = new Message::Util::HostPermit;
239      $HostPermit->add_rule (<<'EOH');
240    Allow host=suika port=80
241    Deny host=suika
242    Allow host=suika.fam.cx port=80
243    Deny host=suika.fam.cx
244    Deny host=localhost
245    Deny host=*.localdomain
246    Deny ipv4=0.0.0.0/8
247    Deny ipv4=10.0.0.0/8
248    Deny ipv4=127.0.0.0/8
249    Deny ipv4=169.254.0.0/16
250    Deny ipv4=172.0.0.0/11
251    Deny ipv4=192.0.2.0/24
252    Deny ipv4=192.88.99.0/24
253    Deny ipv4=192.168.0.0/16
254    Deny ipv4=198.18.0.0/15
255    Deny ipv4=224.0.0.0/4
256    Deny ipv4=255.255.255.255/32
257    Deny ipv6=0::0/0
258    Allow host=*
259    EOH
260      return $HostPermit;
261    } # host_permit
262    }
263    
264    return $_[0]->{nested};  sub _get_document ($$$$$) {
265  } # nested    my ($self, $cgi => $result => $out, $url_o) = @_;
266    
267      unless ($self->host_permit->check ($url_o->uri_host, $url_o->uri_port || 80)) {
268        $self->{error_status_text} = 'Connection to the host is forbidden';
269        return $self;
270      }
271    
272      my $ua = WDCC::LWPUA->new;
273      $ua->{wdcc_dom} = Message::DOM::DOMImplementation->new;
274      $ua->{wdcc_host_permit} = $self->host_permit;
275      $ua->agent ('Mozilla'); ## TODO: for now.
276      $ua->parse_head (0);
277      $ua->protocols_allowed ([qw/http/]);
278      $ua->max_size (1000_000);
279      my $req = HTTP::Request->new (GET => $url_o->uri_reference);
280      $req->header ('Accept-Encoding' => 'identity, *; q=0');
281      my $res = $ua->request ($req);
282      ## TODO: 401 sets |is_success| true.
283      ## TODO: Don't follow redirect if error-page=true
284      if ($res->is_success or $cgi->get_parameter ('error-page')) {
285        $self->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and <base>. ## TODO: Use our own code!
286        my $new_url = $res->request->uri;
287        $self->add_url ($new_url) if $new_url ne $self->url;
288        
289        ## TODO: More strict parsing...
290        my $ct = $self->{http_content_type_bytes} = $res->header ('Content-Type');
291        if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
292          $self->{charset} = lc $1;
293          $self->{charset} =~ tr/\\//d;
294          $self->{official_charset} = $self->{charset};
295        }
296        
297        my $input_charset = $cgi->get_parameter ('charset');
298        if (defined $input_charset and length $input_charset) {
299          $self->{charset_overridden}
300              = (not defined $self->{charset} or $self->{charset} ne $input_charset);
301          $self->{charset} = $input_charset;
302        }
303    
304        ## TODO: Support for HTTP Content-Encoding
305        
306        $self->{s} = ''.$res->content;
307      } else {
308        $self->add_url ($res->request->uri);
309        $self->{error_status_text} = $res->status_line;
310        bless $self, 'WebHACC::Input::HTTPError';
311      }
312                  
313      $self->{header_field} = [];
314      $res->scan (sub {
315        push @{$self->{header_field}}, [$_[0], $_[1]];
316      });
317      $self->{header_status_code} = $res->code;
318      $self->{header_status_text} = $res->message;
319    
320      return $self;
321    } # _get_document
322    
323    package WebHACC::Input::Text;
324    push our @ISA, 'WebHACC::Input';
325    
326    sub _get_document ($$$$) {
327      my ($self, $cgi => $result => $out) = @_;
328      
329      $self->add_url (q<thismessage:/>);
330      $self->{base_uri} = q<thismessage:/>;
331        
332      $self->{s} = ''.$cgi->get_parameter ('s');
333      $self->{charset} = ''.$cgi->get_parameter ('_charset_');
334      $self->{charset} =~ s/\s+//g;
335      $self->{charset} = 'utf-8' if $self->{charset} eq '';
336      $self->{official_charset} = $self->{charset};
337      $self->{header_field} = [];
338    
339      return $self;
340    } # _get_document
341    
342    package WebHACC::Input::Subdocument;
343    push our @ISA, 'WebHACC::Input';
344    
345    sub new ($$) {
346      my $self = bless {}, shift;
347      $self->{subdocument_index} = shift;
348      return $self;
349    } # new
350    
351    sub id_prefix ($) {
352      my $self = shift;
353      return $self->{parent_input}->id_prefix .
354          'subdoc-' . $self->{subdocument_index} . '-';
355    } # id_prefix
356    
357    sub nested ($) { 1 }
358    
359    sub subdocument_index ($) {
360      return shift->{subdocument_index};
361    } # subdocument_index
362    
363    sub full_subdocument_index ($) {
364      my $self = shift;
365      my $parent = $self->{parent_input}->full_subdocument_index;
366      if ($parent) {
367        return $parent . '.' . $self->{subdocument_index};
368      } else {
369        return $self->{subdocument_index};
370      }
371    } # full_subdocument_index
372    
373    sub start_section ($$) {
374      my $self = shift;
375    
376      my $result = shift;
377      my $out = $result->output;
378    
379      my $index = $self->subdocument_index;
380      $out->start_section (id => my $id = 'subdoc-' . $index . '-',
381                           title => qq[Subdocument #],
382                           short_title => 'Sub #',
383                           role => 'subdoc',
384                           text => $self->full_subdocument_index);
385      $out->script (q[ insertNavSections ('] . $out->input->id_prefix . $id . q[') ]);
386    } # start_section
387    
388    sub end_section ($$) {
389      $_[1]->output->end_section;
390    } # end_section
391    
392    sub generate_info_section ($$) {
393      my $self = shift;
394    
395      my $result = shift;
396      my $out = $result->output;
397    
398      $out->start_section (id => 'document-info', title => 'Information');
399      $out->start_tag ('dl');
400    
401      $out->dt ('Internet Media Type');
402      $out->start_tag ('dd');
403      $out->code ($self->{media_type}, code => 'MIME', lang => 'en');
404    
405      if (defined $self->{container_node}) {
406        $out->dt ('Container Node');
407        $out->start_tag ('dd');
408        my $original_input = $out->input;
409        $out->input ($self->{parent_input});
410        $out->node_link ($self->{container_node});
411        $out->input ($original_input);
412      }
413    
414      $out->dt ('Base URL');
415      $out->start_tag ('dd');
416      $out->url ($self->{base_uri});
417    
418      $out->end_tag ('dl');
419      $out->end_section;
420    } # generate_info_section
421    
422    package WebHACC::Input::Error;
423    push our @ISA, 'WebHACC::Input';
424    
425    sub generate_transfer_sections ($$) {
426      my $self = shift;
427    
428      $self->SUPER::generate_transfer_sections (@_);
429      
430      my $result = shift;
431      my $out = $result->output;
432    
433      $out->start_section (id => 'transfer-errors', title => 'Transfer Errors');
434    
435      $out->start_tag ('dl');
436      $result->add_error (layer => 'transfer',
437                          level => 'u',
438                          type => 'resource retrieval error',
439                          url => $self->{request_uri},
440                          text => $self->{error_status_text});
441      $out->end_tag ('dl');
442    
443      $out->end_section;
444    } # generate_transfer_sections
445    
446    package WebHACC::Input::HTTPError;
447    push our @ISA, 'WebHACC::Input::Error', 'WebHACC::Input::HTTP';
448    
449    package WebHACC::Input::UnsupportedURLSchemeError;
450    push our @ISA, 'WebHACC::Input::Error';
451    
452    sub _get_document ($$$$) {
453      my ($self, $cgi => $result => $out) = @_;
454      
455      $self->{error_status_text} = 'URL scheme not allowed';
456    
457      return $self;
458    } # _get_document
459    
460    package WDCC::LWPUA;
461    require LWP::UserAgent;
462    push our @ISA, 'LWP::UserAgent';
463    
464    sub redirect_ok {
465      my $ua = shift;
466      unless ($ua->SUPER::redirect_ok (@_)) {
467        return 0;
468      }
469    
470      my $uris = $_[1]->header ('Location');
471      return 0 unless $uris;
472      my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris);
473      unless ({
474               http => 1,
475              }->{lc $uri->uri_scheme}) { ## TODO: html5_url_scheme
476        return 0;
477      }
478      unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) {
479        return 0;
480      }
481      return 1;
482    } # redirect_ok
483    
484  1;  1;

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.7

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24