--- test/html-webhacc/cc.cgi 2007/11/11 06:57:16 1.24
+++ test/html-webhacc/cc.cgi 2007/11/18 11:05:12 1.26
@@ -52,7 +52,6 @@
$| = 0;
my $input = get_input_document ($http, $dom);
- my $inner_html_element = $http->get_parameter ('e');
my $char_length = 0;
my %time;
@@ -62,7 +61,12 @@
Request URI
<@{[htescape $input->{request_uri}]}>
Document URI
- <@{[htescape $input->{uri}]}>
+ <@{[htescape $input->{uri}]}>
+
]; # no yet
push @nav, ['#document-info' => 'Information'];
@@ -74,7 +78,7 @@
<@{[htescape $input->{base_uri}]}>
Internet Media Type
@{[htescape $input->{media_type}]}
- @{[$input->{media_type_overridden} ? '(overridden)' : '']}
+ @{[$input->{media_type_overridden} ? '(overridden)' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '(sniffed; official type is: '.htescape ($input->{official_type}).'
)' : '(sniffed)']}
Character Encoding
@{[defined $input->{charset} ? ''.htescape ($input->{charset}).'
' : '(none)']}
@{[$input->{charset_overridden} ? '(overridden)' : '']}
@@ -93,7 +97,8 @@
if ($input->{media_type} eq 'text/html') {
($doc, $el) = print_syntax_error_html_section ($input, $result);
- print_source_string_section (\($input->{s}), $input->{charset});
+ print_source_string_section
+ (\($input->{s}), $input->{charset} || $doc->input_encoding);
} elsif ({
'text/xml' => 1,
'application/atom+xml' => 1,
@@ -247,8 +252,9 @@
my $doc = $dom->create_document;
my $el;
+ my $inner_html_element = $http->get_parameter ('e');
if (defined $inner_html_element and length $inner_html_element) {
- $input->{charset} ||= 'ISO-8859-1'; ## TODO: for now.
+ $input->{charset} ||= 'windows-1252'; ## TODO: for now.
my $time1 = time;
my $t = Encode::decode ($input->{charset}, $input->{s});
$time{decode} = time - $time1;
@@ -264,6 +270,8 @@
($input->{charset}, $input->{s} => $doc, $onerror);
$time{parse_html} = time - $time1;
}
+ $doc->manakai_charset ($input->{official_charset})
+ if defined $input->{official_charset};
print STDOUT qq[];
@@ -304,6 +312,8 @@
my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
($fh => $dom, $onerror, charset => $input->{charset});
$time{parse_xml} = time - $time1;
+ $doc->manakai_charset ($input->{official_charset})
+ if defined $input->{official_charset};
print STDOUT qq[];
@@ -889,7 +899,8 @@
sub load_text_catalog ($) {
my $lang = shift; # MUST be a canonical lang name
- open my $file, '<', "cc-msg.$lang.txt" or die "$0: cc-msg.$lang.txt: $!";
+ open my $file, '<:utf8', "cc-msg.$lang.txt"
+ or die "$0: cc-msg.$lang.txt: $!";
while (<$file>) {
if (s/^([^;]+);([^;]*);//) {
my ($type, $cls, $msg) = ($1, $2, $_);
@@ -997,12 +1008,10 @@
## TODO: More strict parsing...
my $ct = $res->header ('Content-Type');
- if (defined $ct and $ct =~ m#^([0-9A-Za-z._+-]+/[0-9A-Za-z._+-]+)#) {
- $r->{media_type} = lc $1;
- }
if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
$r->{charset} = lc $1;
$r->{charset} =~ tr/\\//d;
+ $r->{official_charset} = $r->{charset};
}
my $input_charset = $http->get_parameter ('charset');
@@ -1010,9 +1019,22 @@
$r->{charset_overridden}
= (not defined $r->{charset} or $r->{charset} ne $input_charset);
$r->{charset} = $input_charset;
- }
+ }
+
+ ## TODO: Support for HTTP Content-Encoding
$r->{s} = ''.$res->content;
+
+ require Whatpm::ContentType;
+ ($r->{official_type}, $r->{media_type})
+ = Whatpm::ContentType->get_sniffed_type
+ (get_file_head => sub {
+ return substr $r->{s}, 0, shift;
+ },
+ http_content_type_byte => $ct,
+ has_http_content_encoding =>
+ defined $res->header ('Content-Encoding'),
+ supported_image_types => {});
} else {
$r->{uri} = $res->request->uri;
$r->{request_uri} = $request_uri;
@@ -1033,7 +1055,18 @@
$r->{charset} = ''.$http->get_parameter ('_charset_');
$r->{charset} =~ s/\s+//g;
$r->{charset} = 'utf-8' if $r->{charset} eq '';
+ $r->{official_charset} = $r->{charset};
$r->{header_field} = [];
+
+ require Whatpm::ContentType;
+ ($r->{official_type}, $r->{media_type})
+ = Whatpm::ContentType->get_sniffed_type
+ (get_file_head => sub {
+ return substr $r->{s}, 0, shift;
+ },
+ http_content_type_byte => undef,
+ has_http_content_encoding => 0,
+ supported_image_types => {});
}
my $input_format = $http->get_parameter ('i');
@@ -1050,6 +1083,7 @@
if ($r->{media_type} eq 'text/xml') {
unless (defined $r->{charset}) {
$r->{charset} = 'us-ascii';
+ $r->{official_charset} = $r->{charset};
} elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
$r->{charset_overridden} = 0;
}
@@ -1100,4 +1134,4 @@
=cut
-## $Date: 2007/11/11 06:57:16 $
+## $Date: 2007/11/18 11:05:12 $