--- test/html-webhacc/cc.cgi 2007/11/11 06:57:16 1.24
+++ test/html-webhacc/cc.cgi 2007/11/23 12:08:32 1.29
@@ -52,7 +52,6 @@
$| = 0;
my $input = get_input_document ($http, $dom);
- my $inner_html_element = $http->get_parameter ('e');
my $char_length = 0;
my %time;
@@ -62,7 +61,12 @@
Request URI
<@{[htescape $input->{request_uri}]}>
Document URI
- <@{[htescape $input->{uri}]}>
+ <@{[htescape $input->{uri}]}>
+
]; # no yet
push @nav, ['#document-info' => 'Information'];
@@ -74,7 +78,7 @@
<@{[htescape $input->{base_uri}]}>
Internet Media Type
@{[htescape $input->{media_type}]}
- @{[$input->{media_type_overridden} ? '(overridden)' : '']}
+ @{[$input->{media_type_overridden} ? '(overridden)' : defined $input->{official_type} ? $input->{media_type} eq $input->{official_type} ? '' : '(sniffed; official type is: '.htescape ($input->{official_type}).'
)' : '(sniffed)']}
Character Encoding
@{[defined $input->{charset} ? ''.htescape ($input->{charset}).'
' : '(none)']}
@{[$input->{charset_overridden} ? '(overridden)' : '']}
@@ -93,7 +97,8 @@
if ($input->{media_type} eq 'text/html') {
($doc, $el) = print_syntax_error_html_section ($input, $result);
- print_source_string_section (\($input->{s}), $input->{charset});
+ print_source_string_section
+ (\($input->{s}), $input->{charset} || $doc->input_encoding);
} elsif ({
'text/xml' => 1,
'application/atom+xml' => 1,
@@ -247,8 +252,9 @@
my $doc = $dom->create_document;
my $el;
+ my $inner_html_element = $http->get_parameter ('e');
if (defined $inner_html_element and length $inner_html_element) {
- $input->{charset} ||= 'ISO-8859-1'; ## TODO: for now.
+ $input->{charset} ||= 'windows-1252'; ## TODO: for now.
my $time1 = time;
my $t = Encode::decode ($input->{charset}, $input->{s});
$time{decode} = time - $time1;
@@ -264,6 +270,8 @@
($input->{charset}, $input->{s} => $doc, $onerror);
$time{parse_html} = time - $time1;
}
+ $doc->manakai_charset ($input->{official_charset})
+ if defined $input->{official_charset};
print STDOUT qq[];
@@ -304,6 +312,8 @@
my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
($fh => $dom, $onerror, charset => $input->{charset});
$time{parse_xml} = time - $time1;
+ $doc->manakai_charset ($input->{official_charset})
+ if defined $input->{official_charset};
print STDOUT qq[];
@@ -393,7 +403,7 @@
$r .= '';
for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
@{$child->attributes}) {
- $r .= qq[] . htescape ($attr->[0]) . '
= '; ## ISSUE: case?
+ $r .= qq[] . htescape ($attr->[0]) . '
= '; ## ISSUE: case?
$r .= '' . htescape ($attr->[1]) . '
'; ## TODO: children
}
$r .= '
';
@@ -414,6 +424,21 @@
} elsif ($nt == $child->DOCUMENT_NODE) {
$r .= qq'Document';
$r .= qq[];
+ my $cp = $child->manakai_charset;
+ if (defined $cp) {
+ $r .= qq[charset
parameter = ];
+ $r .= htescape ($cp) . qq[
];
+ }
+ $r .= qq[inputEncoding
= ];
+ my $ie = $child->input_encoding;
+ if (defined $ie) {
+ $r .= qq[@{[htescape ($ie)]}
];
+ if ($child->manakai_has_bom) {
+ $r .= qq[ (with BOM
)];
+ }
+ } else {
+ $r .= qq[(null
)];
+ }
$r .= qq[- @{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}
];
$r .= qq[- @{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}
];
unless ($child->manakai_is_html) {
@@ -889,7 +914,8 @@
sub load_text_catalog ($) {
my $lang = shift; # MUST be a canonical lang name
- open my $file, '<', "cc-msg.$lang.txt" or die "$0: cc-msg.$lang.txt: $!";
+ open my $file, '<:utf8', "cc-msg.$lang.txt"
+ or die "$0: cc-msg.$lang.txt: $!";
while (<$file>) {
if (s/^([^;]+);([^;]*);//) {
my ($type, $cls, $msg) = ($1, $2, $_);
@@ -902,6 +928,7 @@
sub get_text ($) {
my ($type, $level, $node) = @_;
$type = $level . ':' . $type if defined $level;
+ $level = 'm' unless defined $level;
my @arg;
{
if (defined $Msg->{$type}) {
@@ -926,13 +953,13 @@
? htescape ($node->owner_element->manakai_local_name)
: ''
}ge;
- return ($type, $Msg->{$type}->[0], $msg);
+ return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
} elsif ($type =~ s/:([^:]*)$//) {
unshift @arg, $1;
redo;
}
}
- return ($type, '', htescape ($_[0]));
+ return ($type, 'level-'.$level, htescape ($_[0]));
} # get_text
}
@@ -988,6 +1015,7 @@
$ua->protocols_allowed ([qw/http/]);
$ua->max_size (1000_000);
my $req = HTTP::Request->new (GET => $request_uri);
+ $req->header ('Accept-Encoding' => 'identity, *; q=0');
my $res = $ua->request ($req);
## TODO: 401 sets |is_success| true.
if ($res->is_success or $http->get_parameter ('error-page')) {
@@ -997,12 +1025,10 @@
## TODO: More strict parsing...
my $ct = $res->header ('Content-Type');
- if (defined $ct and $ct =~ m#^([0-9A-Za-z._+-]+/[0-9A-Za-z._+-]+)#) {
- $r->{media_type} = lc $1;
- }
if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
$r->{charset} = lc $1;
$r->{charset} =~ tr/\\//d;
+ $r->{official_charset} = $r->{charset};
}
my $input_charset = $http->get_parameter ('charset');
@@ -1010,9 +1036,22 @@
$r->{charset_overridden}
= (not defined $r->{charset} or $r->{charset} ne $input_charset);
$r->{charset} = $input_charset;
- }
+ }
+
+ ## TODO: Support for HTTP Content-Encoding
$r->{s} = ''.$res->content;
+
+ require Whatpm::ContentType;
+ ($r->{official_type}, $r->{media_type})
+ = Whatpm::ContentType->get_sniffed_type
+ (get_file_head => sub {
+ return substr $r->{s}, 0, shift;
+ },
+ http_content_type_byte => $ct,
+ has_http_content_encoding =>
+ defined $res->header ('Content-Encoding'),
+ supported_image_types => {});
} else {
$r->{uri} = $res->request->uri;
$r->{request_uri} = $request_uri;
@@ -1033,7 +1072,18 @@
$r->{charset} = ''.$http->get_parameter ('_charset_');
$r->{charset} =~ s/\s+//g;
$r->{charset} = 'utf-8' if $r->{charset} eq '';
+ $r->{official_charset} = $r->{charset};
$r->{header_field} = [];
+
+ require Whatpm::ContentType;
+ ($r->{official_type}, $r->{media_type})
+ = Whatpm::ContentType->get_sniffed_type
+ (get_file_head => sub {
+ return substr $r->{s}, 0, shift;
+ },
+ http_content_type_byte => undef,
+ has_http_content_encoding => 0,
+ supported_image_types => {});
}
my $input_format = $http->get_parameter ('i');
@@ -1050,6 +1100,7 @@
if ($r->{media_type} eq 'text/xml') {
unless (defined $r->{charset}) {
$r->{charset} = 'us-ascii';
+ $r->{official_charset} = $r->{charset};
} elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
$r->{charset_overridden} = 0;
}
@@ -1100,4 +1151,4 @@
=cut
-## $Date: 2007/11/11 06:57:16 $
+## $Date: 2007/11/23 12:08:32 $