package WebHACC::Input;
use strict;
sub new ($) {
return bless {urls => []}, shift;
} # new
sub id_prefix ($) { '' }
sub nested ($) { 0 }
sub subdocument_index ($) { 0 }
sub full_subdocument_index ($) { 0 }
sub url ($) {
my $self = shift;
if (@{$self->{urls}}) {
return $self->{urls}->[-1];
} else {
return undef;
}
} # url
sub add_url ($$) {
my ($self, $url) = @_;
push @{$self->{urls}}, ''.$url;
} # add_url
sub urls ($) {
my $self = shift;
return [@{$self->{urls}}];
} # urls
sub get_document ($$$$) {
my $self = shift->new;
my ($cgi => $result => $out) = @_;
$out->input ($self);
require Encode;
my $url_s = Encode::decode ('utf-8', $cgi->get_parameter ('uri'));
my $url_o;
if (defined $url_s and length $url_s) {
require Message::DOM::DOMImplementation;
my $dom = Message::DOM::DOMImplementation->new;
$url_o = $dom->create_uri_reference ($url_s);
$url_o->uri_fragment (undef);
$self->add_url ($url_o->uri_reference);
my $url_scheme = lc $url_o->uri_scheme; ## TODO: html5_url_scheme
my $class = {
http => 'WebHACC::Input::HTTP',
}->{$url_scheme} || 'WebHACC::Input::UnsupportedURLSchemeError';
bless $self, $class;
} else {
bless $self, 'WebHACC::Input::Text';
}
$self->_get_document ($cgi => $result => $out, $url_o);
return $self unless defined $self->{s};
if (length $self->{s} > 1000_000) {
$self->{error_status_text} = 'Entity-body too large';
delete $self->{s};
bless $self, 'WebHACC::Input::Error';
return $self;
}
require Whatpm::ContentType;
($self->{official_type}, $self->{media_type})
= Whatpm::ContentType->get_sniffed_type
(get_file_head => sub {
return substr $self->{s}, 0, shift;
},
http_content_type_byte => $self->{http_content_type_bytes},
supported_image_types => {});
my $input_format = $cgi->get_parameter ('i');
if (defined $input_format and length $input_format) {
$self->{media_type_overridden}
= (not defined $self->{media_type} or
$input_format ne $self->{media_type});
$self->{media_type} = $input_format;
}
if (defined $self->{s} and not defined $self->{media_type}) {
$self->{media_type} = 'text/html';
$self->{media_type_overridden} = 1;
}
if ($self->{media_type} eq 'text/xml') {
unless (defined $self->{charset}) {
$self->{charset} = 'us-ascii';
$self->{official_charset} = $self->{charset};
} elsif ($self->{charset_overridden} and $self->{charset} eq 'us-ascii') {
$self->{charset_overridden} = 0;
}
}
$self->{inner_html_element} = $cgi->get_parameter ('e');
return $self;
} # get_document
sub _get_document ($$$$) {
die "$0: _get_document of " . ref $_[0];
} # _get_document
sub generate_info_section ($$) {
my $self = shift;
my $result = shift;
my $out = $result->output;
$out->start_section (id => 'document-info', title => 'Information');
$out->start_tag ('dl');
my $urls = $self->urls;
$out->dt (@$urls == 1 ? 'URL' : 'URLs');
my $url = pop @$urls;
for (@$urls) {
$out->start_tag ('dd');
$out->url ($_);
}
$out->start_tag ('dd');
$out->url ($url, id => 'anchor-document-url');
$out->script (q[
document.title = '<'
+ document.getElementById ('anchor-document-url').href + '> \\u2014 '
+ document.title;
]);
if (defined $self->{s}) {
$out->dt ('Base URL');
$out->start_tag ('dd');
$out->url ($self->{base_uri});
$out->dt ('Internet Media Type');
$out->start_tag ('dd');
$out->code ($self->{media_type}, class => 'MIME', lang => 'en');
if ($self->{media_type_overridden}) {
$out->nl_text ('... overridden');
} elsif (defined $self->{official_type}) {
if ($self->{media_type} eq $self->{official_type}) {
#
} else {
$out->nl_text ('... sniffed, official type is #',
text => $self->{official_type});
}
} else {
$out->nl_text ( '... sniffed');
}
$out->dt ('Character Encoding');
$out->start_tag ('dd');
if (defined $self->{charset}) {
$out->code ($self->{charset}, class => 'charset', lang => 'en');
} else {
$out->nl_text ('(unknown)');
}
$out->nl_text ('... overridden') if $self->{charset_overridden};
$out->dt ($self->{is_char_string} ? 'Character Length' : 'Byte Length');
## TODO: formatting
$out->start_tag ('dd');
my $length = length $self->{s};
$out->text ($length . ' ');
$out->nl_text (($self->{is_char_string} ? 'character' : 'byte') .
($length == 1 ? '' : 's'));
}
$out->end_tag ('dl');
$out->end_section;
} # generate_info_section
sub generate_transfer_sections ($$) { }
package WebHACC::Input::HTTP;
push our @ISA, 'WebHACC::Input';
{
my $HostPermit;
sub host_permit ($) {
return $HostPermit if $HostPermit;
require Message::Util::HostPermit;
$HostPermit = new Message::Util::HostPermit;
$HostPermit->add_rule (<<'EOH');
Allow host=suika port=80
Deny host=suika
Allow host=suika.fam.cx port=80
Deny host=suika.fam.cx
Deny host=localhost
Deny host=*.localdomain
Deny ipv4=0.0.0.0/8
Deny ipv4=10.0.0.0/8
Deny ipv4=127.0.0.0/8
Deny ipv4=169.254.0.0/16
Deny ipv4=172.0.0.0/11
Deny ipv4=192.0.2.0/24
Deny ipv4=192.88.99.0/24
Deny ipv4=192.168.0.0/16
Deny ipv4=198.18.0.0/15
Deny ipv4=224.0.0.0/4
Deny ipv4=255.255.255.255/32
Deny ipv6=0::0/0
Allow host=*
EOH
return $HostPermit;
} # host_permit
}
sub _get_document ($$$$$) {
my ($self, $cgi => $result => $out, $url_o) = @_;
unless ($self->host_permit->check ($url_o->uri_host, $url_o->uri_port || 80)) {
$self->{error_status_text} = 'Connection to the host is forbidden';
bless $self, 'WebHACC::Input::Error';
return $self;
}
my $ua = WDCC::LWPUA->new;
$ua->{wdcc_dom} = Message::DOM::DOMImplementation->new;
$ua->{wdcc_host_permit} = $self->host_permit;
$ua->agent ('Mozilla'); ## TODO: for now.
$ua->parse_head (0);
$ua->protocols_allowed ([qw/http/]);
$ua->max_size (1000_000);
my $req = HTTP::Request->new (GET => $url_o->uri_reference);
$req->header ('Accept-Encoding' => 'identity, *; q=0');
my $res = $ua->request ($req);
## TODO: 401 sets |is_success| true.
## TODO: Don't follow redirect if error-page=true
if ($res->is_success or $cgi->get_parameter ('error-page')) {
$self->{base_uri} = $res->base; ## NOTE: It does check |Content-Base|, |Content-Location|, and
Note: Due to the limitation of the network library in use, the content of this section might not be the real header.
Status code | ]); $out->start_tag ('td'); $out->code ($self->{header_status_code}); } if (defined $self->{header_status_text}) { $out->html (qq[
---|
Status text | ]); $out->start_tag ('td'); $out->code ($self->{header_status_text}); } for (@{$self->{header_field}}) { $out->start_tag ('tr'); $out->start_tag ('th', scope => 'row'); $out->code ($_->[0]); $out->start_tag ('td'); $out->code ($_->[1]); } $out->end_tag ('table'); $out->end_section; } # generate_http_header_section package WebHACC::Input::Text; push our @ISA, 'WebHACC::Input'; sub _get_document ($$$$) { my ($self, $cgi => $result => $out) = @_; $self->add_url (q