--- test/html-webhacc/cc.cgi 2008/07/21 12:56:33 1.59 +++ test/html-webhacc/cc.cgi 2008/07/26 11:27:25 1.60 @@ -33,16 +33,14 @@ $out->html_header; $out->unset_flush; - my $input = get_input_document ($http); - $out->input ($input); - require WebHACC::Result; my $result = WebHACC::Result->new; - $result->output ($out); $result->{conforming_min} = 1; $result->{conforming_max} = 1; + $result->output ($out); - $out->html (''); + require WebHACC::Input; + my $input = WebHACC::Input->get_document ($http => $result => $out); check_and_print ($input => $result => $out); @@ -123,191 +121,6 @@ $out->input ($original_input); } # check_and_print -sub get_input_document ($) { - my $http = shift; - - require Message::DOM::DOMImplementation; - my $dom = Message::DOM::DOMImplementation->new; - - require Encode; - my $request_uri = Encode::decode ('utf-8', $http->get_parameter ('uri')); - my $r = WebHACC::Input->new; - if (defined $request_uri and length $request_uri) { - my $uri = $dom->create_uri_reference ($request_uri); - unless ({ - http => 1, - }->{lc $uri->uri_scheme}) { - $r = WebHACC::Input::Error->new; - $r->{uri} = $request_uri; - $r->{request_uri} = $request_uri; - $r->{error_status_text} = 'URL scheme not allowed'; - } - - require Message::Util::HostPermit; - my $host_permit = new Message::Util::HostPermit; - $host_permit->add_rule (<check ($uri->uri_host, $uri->uri_port || 80)) { - my $r = WebHACC::Input::Error->new; - $r->{uri} = $request_uri; - $r->{request_uri} = $request_uri; - $r->{error_status_text} = 'Connection to the host is forbidden'; - return $r; - } - - require LWP::UserAgent; - my $ua = WDCC::LWPUA->new; - $ua->{wdcc_dom} = $dom; - $ua->{wdcc_host_permit} = $host_permit; - $ua->agent ('Mozilla'); ## TODO: for now. - $ua->parse_head (0); - $ua->protocols_allowed ([qw/http/]); - $ua->max_size (1000_000); - my $req = HTTP::Request->new (GET => $request_uri); - $req->header ('Accept-Encoding' => 'identity, *; q=0'); - my $res = $ua->request ($req); - ## TODO: 401 sets |is_success| true. - if ($res->is_success or $http->get_parameter ('error-page')) { - $r->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and . ## TODO: Use our own code! - $r->{uri} = $res->request->uri; - $r->{request_uri} = $request_uri; - - ## TODO: More strict parsing... - my $ct = $res->header ('Content-Type'); - if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) { - $r->{charset} = lc $1; - $r->{charset} =~ tr/\\//d; - $r->{official_charset} = $r->{charset}; - } - - my $input_charset = $http->get_parameter ('charset'); - if (defined $input_charset and length $input_charset) { - $r->{charset_overridden} - = (not defined $r->{charset} or $r->{charset} ne $input_charset); - $r->{charset} = $input_charset; - } - - ## TODO: Support for HTTP Content-Encoding - - $r->{s} = ''.$res->content; - - require Whatpm::ContentType; - ($r->{official_type}, $r->{media_type}) - = Whatpm::ContentType->get_sniffed_type - (get_file_head => sub { - return substr $r->{s}, 0, shift; - }, - http_content_type_byte => $ct, - has_http_content_encoding => - defined $res->header ('Content-Encoding'), - supported_image_types => {}); - } else { - $r->{uri} = $res->request->uri; - $r->{request_uri} = $request_uri; - $r->{error_status_text} = $res->status_line; - } - - $r->{header_field} = []; - $res->scan (sub { - push @{$r->{header_field}}, [$_[0], $_[1]]; - }); - $r->{header_status_code} = $res->code; - $r->{header_status_text} = $res->message; - } else { - $r->{s} = ''.$http->get_parameter ('s'); - $r->{uri} = q; - $r->{request_uri} = q; - $r->{base_uri} = q; - $r->{charset} = ''.$http->get_parameter ('_charset_'); - $r->{charset} =~ s/\s+//g; - $r->{charset} = 'utf-8' if $r->{charset} eq ''; - $r->{official_charset} = $r->{charset}; - $r->{header_field} = []; - - require Whatpm::ContentType; - ($r->{official_type}, $r->{media_type}) - = Whatpm::ContentType->get_sniffed_type - (get_file_head => sub { - return substr $r->{s}, 0, shift; - }, - http_content_type_byte => undef, - has_http_content_encoding => 0, - supported_image_types => {}); - } - - my $input_format = $http->get_parameter ('i'); - if (defined $input_format and length $input_format) { - $r->{media_type_overridden} - = (not defined $r->{media_type} or $input_format ne $r->{media_type}); - $r->{media_type} = $input_format; - } - if (defined $r->{s} and not defined $r->{media_type}) { - $r->{media_type} = 'text/html'; - $r->{media_type_overridden} = 1; - } - - if ($r->{media_type} eq 'text/xml') { - unless (defined $r->{charset}) { - $r->{charset} = 'us-ascii'; - $r->{official_charset} = $r->{charset}; - } elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') { - $r->{charset_overridden} = 0; - } - } - - if (length $r->{s} > 1000_000) { - $r->{error_status_text} = 'Entity-body too large'; - delete $r->{s}; - return $r; - } - - $r->{inner_html_element} = $http->get_parameter ('e'); - - return $r; -} # get_input_document - -package WDCC::LWPUA; -BEGIN { push our @ISA, 'LWP::UserAgent'; } - -sub redirect_ok { - my $ua = shift; - unless ($ua->SUPER::redirect_ok (@_)) { - return 0; - } - - my $uris = $_[1]->header ('Location'); - return 0 unless $uris; - my $uri = $ua->{wdcc_dom}->create_uri_reference ($uris); - unless ({ - http => 1, - }->{lc $uri->uri_scheme}) { - return 0; - } - unless ($ua->{wdcc_host_permit}->check ($uri->uri_host, $uri->uri_port || 80)) { - return 0; - } - return 1; -} # redirect_ok - =head1 AUTHOR Wakaba . @@ -321,4 +134,4 @@ =cut -## $Date: 2008/07/21 12:56:33 $ +## $Date: 2008/07/26 11:27:25 $