--- test/html-webhacc/cc.cgi 2007/11/18 05:30:03 1.25
+++ test/html-webhacc/cc.cgi 2007/11/23 12:08:32 1.29
@@ -52,7 +52,6 @@
$| = 0;
my $input = get_input_document ($http, $dom);
- my $inner_html_element = $http->get_parameter ('e');
my $char_length = 0;
my %time;
@@ -253,8 +252,9 @@
my $doc = $dom->create_document;
my $el;
+ my $inner_html_element = $http->get_parameter ('e');
if (defined $inner_html_element and length $inner_html_element) {
- $input->{charset} ||= 'ISO-8859-1'; ## TODO: for now.
+ $input->{charset} ||= 'windows-1252'; ## TODO: for now.
my $time1 = time;
my $t = Encode::decode ($input->{charset}, $input->{s});
$time{decode} = time - $time1;
@@ -270,6 +270,8 @@
($input->{charset}, $input->{s} => $doc, $onerror);
$time{parse_html} = time - $time1;
}
+ $doc->manakai_charset ($input->{official_charset})
+ if defined $input->{official_charset};
print STDOUT qq[];
@@ -310,6 +312,8 @@
my $doc = Message::DOM::XMLParserTemp->parse_byte_stream
($fh => $dom, $onerror, charset => $input->{charset});
$time{parse_xml} = time - $time1;
+ $doc->manakai_charset ($input->{official_charset})
+ if defined $input->{official_charset};
print STDOUT qq[];
@@ -399,7 +403,7 @@
$r .= '
';
for my $attr (sort {$a->[0] cmp $b->[0]} map { [$_->name, $_->value, $_->namespace_uri, 'node-'.refaddr $_] }
@{$child->attributes}) {
- $r .= qq[] . htescape ($attr->[0]) . '
= '; ## ISSUE: case?
+ $r .= qq[] . htescape ($attr->[0]) . '
= '; ## ISSUE: case?
$r .= '' . htescape ($attr->[1]) . '
'; ## TODO: children
}
$r .= '
';
@@ -420,6 +424,21 @@
} elsif ($nt == $child->DOCUMENT_NODE) {
$r .= qq'Document';
$r .= qq[];
+ my $cp = $child->manakai_charset;
+ if (defined $cp) {
+ $r .= qq[charset
parameter = ];
+ $r .= htescape ($cp) . qq[
];
+ }
+ $r .= qq[inputEncoding
= ];
+ my $ie = $child->input_encoding;
+ if (defined $ie) {
+ $r .= qq[@{[htescape ($ie)]}
];
+ if ($child->manakai_has_bom) {
+ $r .= qq[ (with BOM
)];
+ }
+ } else {
+ $r .= qq[(null
)];
+ }
$r .= qq[- @{[scalar get_text ('manakaiIsHTML:'.($child->manakai_is_html?1:0))]}
];
$r .= qq[- @{[scalar get_text ('manakaiCompatMode:'.$child->manakai_compat_mode)]}
];
unless ($child->manakai_is_html) {
@@ -895,7 +914,8 @@
sub load_text_catalog ($) {
my $lang = shift; # MUST be a canonical lang name
- open my $file, '<', "cc-msg.$lang.txt" or die "$0: cc-msg.$lang.txt: $!";
+ open my $file, '<:utf8', "cc-msg.$lang.txt"
+ or die "$0: cc-msg.$lang.txt: $!";
while (<$file>) {
if (s/^([^;]+);([^;]*);//) {
my ($type, $cls, $msg) = ($1, $2, $_);
@@ -908,6 +928,7 @@
sub get_text ($) {
my ($type, $level, $node) = @_;
$type = $level . ':' . $type if defined $level;
+ $level = 'm' unless defined $level;
my @arg;
{
if (defined $Msg->{$type}) {
@@ -932,13 +953,13 @@
? htescape ($node->owner_element->manakai_local_name)
: ''
}ge;
- return ($type, $Msg->{$type}->[0], $msg);
+ return ($type, 'level-' . $level . ' ' . $Msg->{$type}->[0], $msg);
} elsif ($type =~ s/:([^:]*)$//) {
unshift @arg, $1;
redo;
}
}
- return ($type, '', htescape ($_[0]));
+ return ($type, 'level-'.$level, htescape ($_[0]));
} # get_text
}
@@ -994,6 +1015,7 @@
$ua->protocols_allowed ([qw/http/]);
$ua->max_size (1000_000);
my $req = HTTP::Request->new (GET => $request_uri);
+ $req->header ('Accept-Encoding' => 'identity, *; q=0');
my $res = $ua->request ($req);
## TODO: 401 sets |is_success| true.
if ($res->is_success or $http->get_parameter ('error-page')) {
@@ -1006,6 +1028,7 @@
if (defined $ct and $ct =~ /;\s*charset\s*=\s*"?([^\s;"]+)"?/i) {
$r->{charset} = lc $1;
$r->{charset} =~ tr/\\//d;
+ $r->{official_charset} = $r->{charset};
}
my $input_charset = $http->get_parameter ('charset');
@@ -1049,6 +1072,7 @@
$r->{charset} = ''.$http->get_parameter ('_charset_');
$r->{charset} =~ s/\s+//g;
$r->{charset} = 'utf-8' if $r->{charset} eq '';
+ $r->{official_charset} = $r->{charset};
$r->{header_field} = [];
require Whatpm::ContentType;
@@ -1076,6 +1100,7 @@
if ($r->{media_type} eq 'text/xml') {
unless (defined $r->{charset}) {
$r->{charset} = 'us-ascii';
+ $r->{official_charset} = $r->{charset};
} elsif ($r->{charset_overridden} and $r->{charset} eq 'us-ascii') {
$r->{charset_overridden} = 0;
}
@@ -1126,4 +1151,4 @@
=cut
-## $Date: 2007/11/18 05:30:03 $
+## $Date: 2007/11/23 12:08:32 $