package Whatpm::Charset::DecodeHandle;
use strict;
my $XML_AUTO_CHARSET = q;
my $IANA_CHARSET = q;
my $PERL_CHARSET = q;
my $XML_CHARSET = q;
## ->create_decode_handle ($charset_uri, $byte_stream, $onerror)
sub create_decode_handle ($$$;$) {
my $csdef = $Whatpm::Charset::CharsetDef->{$_[1]};
my $obj = {
character_queue => [],
filehandle => $_[2],
charset => $_[1],
byte_buffer => '',
onerror => $_[3] || sub {},
};
if ($csdef->{uri}->{$XML_AUTO_CHARSET} or
$obj->{charset} eq $XML_AUTO_CHARSET) {
my $b = ''; # UTF-8 w/o BOM
$csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
$obj->{input_encoding} = 'UTF-8';
if (read $obj->{filehandle}, $b, 256) {
no warnings "substr";
no warnings "uninitialized";
if (substr ($b, 0, 1) eq "<") {
if (substr ($b, 1, 1) eq "?") { # ASCII8
if ($b =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
encoding\s*=\s*["']([^"']*)/x) {
$obj->{input_encoding} = $1;
my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
$csdef = $Whatpm::Charset::CharsetDef->{$uri};
if (not $csdef->{ascii8} or $csdef->{bom_required}) {
$obj->{onerror}->(undef, 'charset-name-mismatch-error',
charset_uri => $uri,
charset_name => $obj->{input_encoding});
}
} else {
$csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
$obj->{input_encoding} = 'UTF-8';
}
if (defined $csdef->{no_bom_variant}) {
$csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant}};
}
} elsif (substr ($b, 1, 1) eq "\x00") {
if (substr ($b, 2, 2) eq "?\x00") { # ASCII16LE
my $c = $b; $c =~ tr/\x00//d;
if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
encoding\s*=\s*["']([^"']*)/x) {
$obj->{input_encoding} = $1;
my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
$csdef = $Whatpm::Charset::CharsetDef->{$uri};
if (not $csdef->{ascii16} or $csdef->{ascii16be} or
$csdef->{bom_required}) {
$obj->{onerror}->(undef, 'charset-name-mismatch-error',
charset_uri => $uri,
charset_name => $obj->{input_encoding});
}
} else {
$csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
$obj->{input_encoding} = 'UTF-8';
}
if (defined $csdef->{no_bom_variant16le}) {
$csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16le}};
}
} elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian4321
my $c = $b; $c =~ tr/\x00//d;
if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
encoding\s*=\s*["']([^"']*)/x) {
$obj->{input_encoding} = $1;
my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
$csdef = $Whatpm::Charset::CharsetDef->{$uri};
if (not $csdef->{ascii32} or
$csdef->{ascii32endian1234} or
$csdef->{ascii32endian2143} or
$csdef->{ascii32endian3412} or
$csdef->{bom_required}) {
$obj->{onerror}->(undef, 'charset-name-mismatch-error',
charset_uri => $uri,
charset_name => $obj->{input_encoding});
}
} else {
$csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
$obj->{input_encoding} = 'UTF-8';
}
if (defined $csdef->{no_bom_variant32endian4321}) {
$csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian4321}};
}
}
}
} elsif (substr ($b, 0, 3) eq "\xEF\xBB\xBF") { # UTF8
$obj->{has_bom} = 1;
substr ($b, 0, 3) = '';
my $c = $b;
if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
encoding\s*=\s*["']([^"']*)/x) {
$obj->{input_encoding} = $1;
my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
$csdef = $Whatpm::Charset::CharsetDef->{$uri};
if (not $csdef->{utf8_encoding_scheme} or
not $csdef->{bom_allowed}) {
$obj->{onerror}->(undef, 'charset-name-mismatch-error',
charset_uri => $uri,
charset_name => $obj->{input_encoding});
}
} else {
$csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
$obj->{input_encoding} = 'UTF-8';
}
if (defined $csdef->{no_bom_variant}) {
$csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant}};
}
} elsif (substr ($b, 0, 2) eq "\x00<") {
if (substr ($b, 2, 2) eq "\x00?") { # ASCII16BE
my $c = $b; $c =~ tr/\x00//d;
if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
encoding\s*=\s*["']([^"']*)/x) {
$obj->{input_encoding} = $1;
my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
$csdef = $Whatpm::Charset::CharsetDef->{$uri};
if (not $csdef->{ascii16} or $csdef->{ascii16le} or
$csdef->{bom_required}) {
$obj->{onerror}->(undef, 'charset-name-mismatch-error',
charset_uri => $uri,
charset_name => $obj->{input_encoding});
}
} else {
$csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
$obj->{input_encoding} = 'UTF-8';
}
if (defined $csdef->{no_bom_variant16be}) {
$csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16be}};
}
} elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian3412
my $c = $b; $c =~ tr/\x00//d;
if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
encoding\s*=\s*["']([^"']*)/x) {
$obj->{input_encoding} = $1;
my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
$csdef = $Whatpm::Charset::CharsetDef->{$uri};
if (not $csdef->{ascii32} or
$csdef->{ascii32endian1234} or
$csdef->{ascii32endian2143} or
$csdef->{ascii32endian4321} or
$csdef->{bom_required}) {
$obj->{onerror}->(undef, 'charset-name-mismatch-error',
charset_uri => $uri,
charset_name => $obj->{input_encoding});
}
} else {
$csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
$obj->{input_encoding} = 'UTF-8';
}
if (defined $csdef->{no_bom_variant32endian3412}) {
$csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian3412}};
}
}
} elsif (substr ($b, 0, 2) eq "\xFE\xFF") {
if (substr ($b, 2, 2) eq "\x00<") { # ASCII16BE
$obj->{has_bom} = 1;
substr ($b, 0, 2) = '';
my $c = $b; $c =~ tr/\x00//d;
if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
encoding\s*=\s*["']([^"']*)/x) {
$obj->{input_encoding} = $1;
my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
$csdef = $Whatpm::Charset::CharsetDef->{$uri};
if (not $csdef->{ascii16} or
$csdef->{ascii16le} or
not $csdef->{bom_allowed}) {
$obj->{onerror}->(undef, 'charset-name-mismatch-error',
charset_uri => $uri,
charset_name => $obj->{input_encoding});
}
} else {
$csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
$obj->{input_encoding} = 'UTF-16';
}
if (defined $csdef->{no_bom_variant16be}) {
$csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16be}};
}
} elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian3412
$obj->{has_bom} = 1;
substr ($b, 0, 4) = '';
my $c = $b; $c =~ tr/\x00//d;
if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
encoding\s*=\s*["']([^"']*)/x) {
$obj->{input_encoding} = $1;
my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
$csdef = $Whatpm::Charset::CharsetDef->{$uri};
if (not $csdef->{ascii32} or
$csdef->{ascii32endian1234} or
$csdef->{ascii32endian2143} or
$csdef->{ascii32endian4321} or
not $csdef->{bom_allowed}) {
$obj->{onerror}->(undef, 'charset-name-mismatch-error',
charset_uri => $uri,
charset_name => $obj->{input_encoding});
}
} else {
$csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
$obj->{input_encoding} = 'UTF-16';
$obj->{byte_buffer} .= "\x00\x00";
}
if (defined $csdef->{no_bom_variant32endian3412}) {
$csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian3412}};
}
} else {
$csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
$obj->{input_encoding} = 'UTF-16';
substr ($b, 0, 2) = '';
$obj->{has_bom} = 1;
}
} elsif (substr ($b, 0, 2) eq "\xFF\xFE") {
if (substr ($b, 2, 2) eq "<\x00") { # ASCII16LE
$obj->{has_bom} = 1;
substr ($b, 0, 2) = '';
my $c = $b; $c =~ tr/\x00//d;
if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
encoding\s*=\s*["']([^"']*)/x) {
$obj->{input_encoding} = $1;
my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
$csdef = $Whatpm::Charset::CharsetDef->{$uri};
if (not $csdef->{ascii16} or
$csdef->{ascii16be} or
not $csdef->{bom_allowed}) {
$obj->{onerror}->(undef, 'charset-name-mismatch-error',
charset_uri => $uri,
charset_name => $obj->{input_encoding});
}
} else {
$csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
$obj->{input_encoding} = 'UTF-16';
}
if (defined $csdef->{no_bom_variant16le}) {
$csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16le}};
}
} elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian4321
$obj->{has_bom} = 1;
substr ($b, 0, 4) = '';
my $c = $b; $c =~ tr/\x00//d;
if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
encoding\s*=\s*["']([^"']*)/x) {
$obj->{input_encoding} = $1;
my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
$csdef = $Whatpm::Charset::CharsetDef->{$uri};
if (not $csdef->{ascii32} or
$csdef->{ascii32endian1234} or
$csdef->{ascii32endian2143} or
$csdef->{ascii32endian3412} or
not $csdef->{bom_allowed}) {
$obj->{onerror}->(undef, 'charset-name-mismatch-error',
charset_uri => $uri,
charset_name => $obj->{input_encoding});
}
} else {
$csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
$obj->{input_encoding} = 'UTF-16';
$obj->{byte_buffer} .= "\x00\x00";
}
if (defined $csdef->{no_bom_variant32endian4321}) {
$csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian4321}};
}
} else {
$csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
$obj->{input_encoding} = 'UTF-16';
substr ($b, 0, 2) = '';
$obj->{has_bom} = 1;
}
} elsif (substr ($b, 0, 2) eq "\x00\x00") {
if (substr ($b, 2, 2) eq "\x00<") { # ASCII32Endian1234
my $c = $b; $c =~ tr/\x00//d;
if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
encoding\s*=\s*["']([^"']*)/x) {
$obj->{input_encoding} = $1;
my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
$csdef = $Whatpm::Charset::CharsetDef->{$uri};
if (not $csdef->{ascii32} or
$csdef->{ascii32endian2143} or
$csdef->{ascii32endian3412} or
$csdef->{ascii32endian4321} or
$csdef->{bom_required}) {
$obj->{onerror}->(undef, 'charset-name-mismatch-error',
charset_uri => $uri,
charset_name => $obj->{input_encoding});
}
} else {
$csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
$obj->{input_encoding} = 'UTF-8';
}
if (defined $csdef->{no_bom_variant32endian1234}) {
$csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian1234}};
}
} elsif (substr ($b, 2, 2) eq "<\x00") { # ASCII32Endian2143
my $c = $b; $c =~ tr/\x00//d;
if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
encoding\s*=\s*["']([^"']*)/x) {
$obj->{input_encoding} = $1;
my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
$csdef = $Whatpm::Charset::CharsetDef->{$uri};
if (not $csdef->{ascii32} or
$csdef->{ascii32endian1234} or
$csdef->{ascii32endian3412} or
$csdef->{ascii32endian4321} or
$csdef->{bom_required}) {
$obj->{onerror}->(undef, 'charset-name-mismatch-error',
charset_uri => $uri,
charset_name => $obj->{input_encoding});
}
} else {
$csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
$obj->{input_encoding} = 'UTF-8';
}
if (defined $csdef->{no_bom_variant32endian2143}) {
$csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian2143}};
}
} elsif (substr ($b, 2, 2) eq "\xFE\xFF") { # ASCII32Endian1234
$obj->{has_bom} = 1;
substr ($b, 0, 4) = '';
my $c = $b; $c =~ tr/\x00//d;
if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
encoding\s*=\s*["']([^"']*)/x) {
$obj->{input_encoding} = $1;
my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
$csdef = $Whatpm::Charset::CharsetDef->{$uri};
if (not $csdef->{ascii32} or
$csdef->{ascii32endian2143} or
$csdef->{ascii32endian3412} or
$csdef->{ascii32endian4321} or
$csdef->{bom_required}) {
$obj->{onerror}->(undef, 'charset-name-mismatch-error',
charset_uri => $uri,
charset_name => $obj->{input_encoding});
}
} else {
$csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
$obj->{input_encoding} = 'UTF-8';
$obj->{has_bom} = 0;
$obj->{byte_buffer} .= "\x00\x00\xFE\xFF";
}
if (defined $csdef->{no_bom_variant32endian1234}) {
$csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian1234}};
}
} elsif (substr ($b, 2, 2) eq "\xFF\xFE") { # ASCII32Endian2143
$obj->{has_bom} = 1;
substr ($b, 0, 4) = '';
my $c = $b; $c =~ tr/\x00//d;
if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
encoding\s*=\s*["']([^"']*)/x) {
$obj->{input_encoding} = $1;
my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
$csdef = $Whatpm::Charset::CharsetDef->{$uri};
if (not $csdef->{ascii32} or
$csdef->{ascii32endian1234} or
$csdef->{ascii32endian3412} or
$csdef->{ascii32endian4321} or
$csdef->{bom_required}) {
$obj->{onerror}->(undef, 'charset-name-mismatch-error',
charset_uri => $uri,
charset_name => $obj->{input_encoding});
}
} else {
$csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
$obj->{input_encoding} = 'UTF-8';
$obj->{has_bom} = 0;
$obj->{byte_buffer} .= "\x00\x00\xFF\xFE";
}
if (defined $csdef->{no_bom_variant32endian2143}) {
$csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian2143}};
}
}
# \x4C\x6F\xA7\x94 EBCDIC
} # buffer
$obj->{byte_buffer} .= $b;
} # read
} elsif ($csdef->{uri}->{$XML_CHARSET.'utf-8'}) {
## BOM is optional.
my $b = '';
if (read $obj->{filehandle}, $b, 3) {
if ($b eq "\xEF\xBB\xBF") {
$obj->{has_bom} = 1;
} else {
$obj->{byte_buffer} .= $b;
}
}
$csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'}; # UTF-8 w/o BOM
} elsif ($csdef->{uri}->{$XML_CHARSET.'utf-16'}) {
## BOM is mandated.
my $b = '';
if (read $obj->{filehandle}, $b, 2) {
if ($b eq "\xFE\xFF") {
$obj->{has_bom} = 1; # UTF-16BE w/o BOM
$csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
} elsif ($b eq "\xFF\xFE") {
$obj->{has_bom} = 1; # UTF-16LE w/o BOM
$csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
} else {
$obj->{onerror}->(undef, 'no-bom-error', charset_uri => $obj->{charset});
$obj->{has_bom} = 0;
$obj->{byte_buffer} .= $b; # UTF-16BE w/o BOM
$csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
}
} else {
$obj->{onerror}->(undef, 'no-bom-error', charset_uri => $obj->{charset});
$obj->{has_bom} = 0; # UTF-16BE w/o BOM
$csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
}
}
if ($csdef->{uri}->{$XML_CHARSET.'iso-2022-jp'}) {
$obj->{state_2440} = 'gl-jis-1997-swapped';
$obj->{state_2442} = 'gl-jis-1997';
$obj->{state} = 'state_2842';
require Encode::GLJIS1997Swapped;
require Encode::GLJIS1997;
if (Encode::find_encoding ($obj->{state_2440}) and
Encode::find_encoding ($obj->{state_2442})) {
return bless $obj, 'Whatpm::Charset::DecodeHandle::ISO2022JP';
}
} elsif ($csdef->{uri}->{$IANA_CHARSET.'iso-2022-jp'}) {
$obj->{state_2440} = 'gl-jis-1978';
$obj->{state_2442} = 'gl-jis-1983';
$obj->{state} = 'state_2842';
require Encode::GLJIS1978;
require Encode::GLJIS1983;
if (Encode::find_encoding ($obj->{state_2440}) and
Encode::find_encoding ($obj->{state_2442})) {
return bless $obj, 'Whatpm::Charset::DecodeHandle::ISO2022JP';
}
} elsif (defined $csdef->{perl_name}->[0]) {
if ($csdef->{uri}->{$XML_CHARSET.'euc-jp'} or
$csdef->{uri}->{$IANA_CHARSET.'euc-jp'}) {
$obj->{perl_encoding_name} = $csdef->{perl_name}->[0];
require Encode::EUCJP1997;
if (Encode::find_encoding ($obj->{perl_encoding_name})) {
return bless $obj, 'Whatpm::Charset::DecodeHandle::EUCJP';
}
} elsif ($csdef->{uri}->{$XML_CHARSET.'shift_jis'} or
$csdef->{uri}->{$IANA_CHARSET.'shift_jis'}) {
$obj->{perl_encoding_name} = $csdef->{perl_name}->[0];
require Encode::ShiftJIS1997;
if (Encode::find_encoding ($obj->{perl_encoding_name})) {
return bless $obj, 'Whatpm::Charset::DecodeHandle::ShiftJIS';
}
} elsif ($csdef->{is_block_safe}) {
$obj->{perl_encoding_name} = $csdef->{perl_name}->[0];
require Encode;
if (Encode::find_encoding ($obj->{perl_encoding_name})) {
return bless $obj, 'Whatpm::Charset::DecodeHandle::Encode';
}
}
}
$obj->{onerror}->(undef, 'charset-not-supported-error',
charset_uri => $obj->{charset});
return undef;
} # create_decode_handle
sub name_to_uri ($$$) {
my $domain = $_[1];
my $name = lc $_[2];
if ($domain eq 'ietf') {
return $IANA_CHARSET . $name;
} elsif ($domain eq 'xml') {
if ({
'utf-8' => 1,
'utf-16' => 1,
'iso-10646-ucs-2' => 1,
'iso-10646-ucs-4' => 1,
'iso-8859-1' => 1,
'iso-8859-2' => 1,
'iso-8859-3' => 1,
'iso-8859-4' => 1,
'iso-8859-5' => 1,
'iso-8859-6' => 1,
'iso-8859-7' => 1,
'iso-8859-8' => 1,
'iso-8859-9' => 1,
'iso-8859-10' => 1,
'iso-8859-11' => 1,
'iso-8859-13' => 1,
'iso-8859-14' => 1,
'iso-8859-15' => 1,
'iso-8859-16' => 1,
'iso-2022-jp' => 1,
'shift_jis' => 1,
'euc-jp' => 1,
}->{$name}) {
return $XML_CHARSET . $name;
}
my $uri = $IANA_CHARSET . $name;
return $uri if $Whatpm::Charset::CharsetDef->{$uri};
return $XML_CHARSET . $name;
} else {
return undef;
}
} # name_to_uri
sub uri_to_name ($$$) {
my (undef, $domain, $uri) = @_;
if ($domain eq 'xml') {
my $v = $Whatpm::Charset::CharsetDef->{$uri}->{xml_name};
return $v if defined $v;
if (substr ($uri, 0, length $XML_CHARSET) eq $XML_CHARSET) {
return substr ($uri, length $XML_CHARSET);
}
$domain = 'ietf'; ## TODO: XML encoding name has smaller range
}
if ($domain eq 'ietf') {
my $v = $Whatpm::Charset::CharsetDef->{$uri}->{iana_name};
return $v->[0] if defined $v;
if (substr ($uri, 0, length $IANA_CHARSET) eq $IANA_CHARSET) {
return substr ($uri, length $IANA_CHARSET);
}
}
return undef;
} # uri_to_name
require IO::Handle;
package Whatpm::Charset::DecodeHandle::ByteBuffer;
sub new ($$) {
my $self = bless {
buffer => '',
}, shift;
$self->{filehandle} = shift;
return $self;
} # new
sub read {
my $self = shift;
my $pos = length $self->{buffer};
my $r = $self->{filehandle}->read ($self->{buffer}, $_[1], $pos);
substr ($_[0], $_[2]) = substr ($self->{buffer}, $pos);
return $r;
} # read
sub close { $_[0]->{filehandle}->close }
package Whatpm::Charset::DecodeHandle::Encode;
sub charset ($) { $_[0]->{charset} }
sub close ($) { $_[0]->{filehandle}->close }
sub getc ($) {
my $self = $_[0];
return shift @{$self->{character_queue}} if @{$self->{character_queue}};
my $error;
if ($self->{continue}) {
if ($self->{filehandle}->read ($self->{byte_buffer}, 256,
length $self->{byte_buffer})) {
#
} else {
$error = 1;
}
$self->{continue} = 0;
} elsif (512 > length $self->{byte_buffer}) {
$self->{filehandle}->read ($self->{byte_buffer}, 256,
length $self->{byte_buffer});
}
my $r;
unless ($error) {
my $string = Encode::decode ($self->{perl_encoding_name},
$self->{byte_buffer},
Encode::FB_QUIET ());
if (length $string) {
push @{$self->{character_queue}}, split //, $string;
$r = shift @{$self->{character_queue}};
if (length $self->{byte_buffer}) {
$self->{continue} = 1;
}
} else {
if (length $self->{byte_buffer}) {
$error = 1;
} else {
$r = undef;
}
}
}
if ($error) {
$r = substr $self->{byte_buffer}, 0, 1, '';
$self->{onerror}->($self, 'illegal-octets-error', octets => \$r);
}
return $r;
} # getc
sub has_bom ($) { $_[0]->{has_bom} }
sub input_encoding ($) {
my $v = $_[0]->{input_encoding};
return $v if defined $v;
my $uri = $_[0]->{charset};
if (defined $uri) {
return Whatpm::Charset::DecodeHandle->uri_to_name (xml => $uri);
}
return undef;
} # input_encoding
sub onerror ($;$) {
if (@_ > 1) {
$_[0]->{onerror} = $_[1];
}
return $_[0]->{onerror};
} # onerror
sub ungetc ($$) {
unshift @{$_[0]->{character_queue}}, chr int ($_[1] or 0);
} # ungetc
package Whatpm::Charset::DecodeHandle::EUCJP;
push our @ISA, 'Whatpm::Charset::DecodeHandle::Encode';
sub getc ($) {
my $self = $_[0];
return shift @{$self->{character_queue}} if @{$self->{character_queue}};
my $error;
if ($self->{continue}) {
if ($self->{filehandle}->read ($self->{byte_buffer}, 256,
length $self->{byte_buffer})) {
#
} else {
$error = 1;
}
$self->{continue} = 0;
} elsif (512 > length $self->{byte_buffer}) {
$self->{filehandle}->read ($self->{byte_buffer}, 256,
length $self->{byte_buffer});
}
my $r;
unless ($error) {
my $string = Encode::decode ($self->{perl_encoding_name},
$self->{byte_buffer},
Encode::FB_QUIET ());
if (length $string) {
push @{$self->{character_queue}}, split //, $string;
$r = shift @{$self->{character_queue}};
if (length $self->{byte_buffer}) {
$self->{continue} = 1;
}
} else {
if (length $self->{byte_buffer}) {
$error = 1;
} else {
$r = undef;
}
}
}
if ($error) {
$r = substr $self->{byte_buffer}, 0, 1, '';
my $etype = 'illegal-octets-error';
if ($r =~ /^[\xA1-\xFE]/) {
if ($self->{byte_buffer} =~ s/^([\xA1-\xFE])//) {
$r .= $1;
$etype = 'unassigned-code-point-error';
}
} elsif ($r eq "\x8F") {
if ($self->{byte_buffer} =~ s/^([\xA1-\xFE][\xA1-\xFE]?)//) {
$r .= $1;
$etype = 'unassigned-code-point-error' if length $1 == 2;
}
} elsif ($r eq "\x8E") {
if ($self->{byte_buffer} =~ s/^([\xA1-\xFE])//) {
$r .= $1;
$etype = 'unassigned-code-point-error';
}
} elsif ($r eq "\xA0" or $r eq "\xFF") {
$etype = 'unassigned-code-point-error';
}
$self->{onerror}->($self, $etype, octets => \$r);
}
return $r;
} # getc
package Whatpm::Charset::DecodeHandle::ISO2022JP;
push our @ISA, 'Whatpm::Charset::DecodeHandle::Encode';
sub getc ($) {
my $self = $_[0];
return shift @{$self->{character_queue}} if @{$self->{character_queue}};
my $r;
A: {
my $error;
if ($self->{continue}) {
if ($self->{filehandle}->read ($self->{byte_buffer}, 256,
length $self->{byte_buffer})) {
#
} else {
$error = 1;
}
$self->{continue} = 0;
} elsif (512 > length $self->{byte_buffer}) {
$self->{filehandle}->read ($self->{byte_buffer}, 256,
length $self->{byte_buffer});
}
unless ($error) {
if ($self->{byte_buffer} =~ s/^\x1B(\x24[\x40\x42]|\x28[\x42\x4A])//) {
$self->{state} = {
"\x24\x40" => 'state_2440',
"\x24\x42" => 'state_2442',
"\x28\x42" => 'state_2842',
"\x28\x4A" => 'state_284A',
}->{$1};
redo A;
} elsif ($self->{state} eq 'state_2842') { # IRV
if ($self->{byte_buffer} =~ s/^([\x00-\x0D\x10-\x1A\x1C-\x7F]+)//) {
push @{$self->{character_queue}}, split //, $1;
$r = shift @{$self->{character_queue}};
} else {
if (length $self->{byte_buffer}) {
$error = 1;
} else {
$r = undef;
}
}
} elsif ($self->{state} eq 'state_284A') { # 0201
if ($self->{byte_buffer} =~ s/^([\x00-\x0D\x10-\x1A\x1C-\x7F]+)//) {
my $v = $1;
$v =~ tr/\x5C\x7E/\xA5\x{203E}/;
push @{$self->{character_queue}}, split //, $v;
$r = shift @{$self->{character_queue}};
} else {
if (length $self->{byte_buffer}) {
$error = 1;
} else {
$r = undef;
$self->{onerror}->($self, 'invalid-state-error',
state => $self->{state});
}
}
} elsif ($self->{state} eq 'state_2442') { # 1983
my $v = Encode::decode ($self->{state_2442},
$self->{byte_buffer},
Encode::FB_QUIET ());
if (length $v) {
push @{$self->{character_queue}}, split //, $v;
$r = shift @{$self->{character_queue}};
} else {
if (length $self->{byte_buffer}) {
$error = 1;
} else {
$r = undef;
$self->{onerror}->($self, 'invalid-state-error',
state => $self->{state});
}
}
} elsif ($self->{state} eq 'state_2440') { # 1978
my $v = Encode::decode ($self->{state_2440},
$self->{byte_buffer},
Encode::FB_QUIET ());
if (length $v) {
push @{$self->{character_queue}}, split //, $v;
$r = shift @{$self->{character_queue}};
} else {
if (length $self->{byte_buffer}) {
$error = 1;
} else {
$r = undef;
$self->{onerror}->($self, 'invalid-state-error',
state => $self->{state});
}
}
} else {
$error = 1;
}
}
if ($error) {
$r = substr $self->{byte_buffer}, 0, 1, '';
my $etype = 'illegal-octets-error';
if (($self->{state} eq 'state_2442' or
$self->{state} eq 'state_2440') and
$r =~ /^[\x21-\x7E]/ and
$self->{byte_buffer} =~ s/^([\x21-\x7E])//) {
$r .= $1;
$etype = 'unassigned-code-point-error';
} elsif ($r eq "\x1B" and
$self->{byte_buffer} =~ s/^\(H//) { # Old 0201
$r .= "(H";
$self->{state} = 'state_284A';
}
$self->{onerror}->($self, $etype, octets => \$r);
}
} # A
return $r;
} # getc
package Whatpm::Charset::DecodeHandle::ShiftJIS;
push our @ISA, 'Whatpm::Charset::DecodeHandle::Encode';
sub getc ($) {
my $self = $_[0];
return shift @{$self->{character_queue}} if @{$self->{character_queue}};
my $error;
if ($self->{continue}) {
if ($self->{filehandle}->read ($self->{byte_buffer}, 256,
length $self->{byte_buffer})) {
#
} else {
$error = 1;
}
$self->{continue} = 0;
} elsif (512 > length $self->{byte_buffer}) {
$self->{filehandle}->read ($self->{byte_buffer}, 256,
length $self->{byte_buffer});
}
my $r;
unless ($error) {
my $string = Encode::decode ($self->{perl_encoding_name},
$self->{byte_buffer},
Encode::FB_QUIET ());
if (length $string) {
push @{$self->{character_queue}}, split //, $string;
$r = shift @{$self->{character_queue}};
if (length $self->{byte_buffer}) {
$self->{continue} = 1;
}
} else {
if (length $self->{byte_buffer}) {
$error = 1;
} else {
$r = undef;
}
}
}
if ($error) {
$r = substr $self->{byte_buffer}, 0, 1, '';
my $etype = 'illegal-octets-error';
if ($r =~ /^[\x81-\x9F\xE0-\xEF]/) {
if ($self->{byte_buffer} =~ s/(.)//s) {
$r .= $1; # not limited to \x40-\xFC - \x7F
$etype = 'unassigned-code-point-error';
}
} elsif ($r =~ /^[\x80\xA0\xF0-\xFF]/) {
$etype = 'unassigned-code-point-error';
}
$self->{onerror}->($self, $etype, octets => \$r);
}
return $r;
} # getc
$Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:us-ascii'} =
$Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:us'} =
$Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso646-us'} =
$Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:cp367'} =
$Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ibm367'} =
$Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ansi_x3.4-1986'} =
$Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ansi_x3.4-1968'} =
$Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-ir-6'} =
$Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:csascii'} =
$Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso_646.irv:1991'} =
$Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ascii'} = {ascii8 =>
'1',
is_block_safe =>
'1',
ietf_name =>
['ansi_x3.4-1968',
'ansi_x3.4-1986',
'ascii',
'cp367',
'csascii',
'ibm367',
'iso-ir-6',
'iso646-us',
'iso_646.irv:1991',
'us',
'us-ascii',
'us-ascii'],
mime_name =>
'us-ascii',
perl_name =>
['ascii',
'iso-646-us',
'us-ascii'],
utf8_encoding_scheme =>
'1',
'uri',
{'urn:x-suika-fam-cx:charset:ansi_x3.4-1968',
'1',
'urn:x-suika-fam-cx:charset:ansi_x3.4-1986',
'1',
'urn:x-suika-fam-cx:charset:ascii',
'1',
'urn:x-suika-fam-cx:charset:cp367',
'1',
'urn:x-suika-fam-cx:charset:csascii',
'1',
'urn:x-suika-fam-cx:charset:ibm367',
'1',
'urn:x-suika-fam-cx:charset:iso-ir-6',
'1',
'urn:x-suika-fam-cx:charset:iso646-us',
'1',
'urn:x-suika-fam-cx:charset:iso_646.irv:1991',
'1',
'urn:x-suika-fam-cx:charset:us',
'1',
'urn:x-suika-fam-cx:charset:us-ascii',
'1'},
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ascii-ctrl'} = {perl_name =>
['ascii-ctrl'],
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ascii-ctrl',
'1'}};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.null'} = {perl_name =>
['null'],
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.null',
'1'}};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-8'} = {ascii8 =>
'1',
bom_allowed =>
'1',
no_bom_variant =>
'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8',
utf8_encoding_scheme =>
'1',
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-8',
'1'},
xml_name => 'UTF-8',
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/UTF-8.RFC2279'} = {ascii8 =>
'1',
bom_allowed =>
'1',
no_bom_variant =>
'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8',
utf8_encoding_scheme =>
'1',
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/UTF-8.RFC2279',
'1'}};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8'} = {
ascii8 => 1,
is_block_safe =>
'1',
perl_name =>
['utf-8'],
utf8_encoding_scheme =>
'1',
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8',
'1'}};
$Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-8'} = {
ascii8 => 1,
bom_allowed =>
'1',
no_bom_variant =>
'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8',
ietf_name =>
['utf-8'],
mime_name =>
'utf-8',
utf8_encoding_scheme =>
'1',
'uri',
{'urn:x-suika-fam-cx:charset:utf-8',
'1'},
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8'} = {ascii8 =>
'1',
is_block_safe =>
'1',
perl_name =>
['utf8'],
utf8_encoding_scheme =>
'1',
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8',
'1'}};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-16'} = {
ascii16 => 1,
bom_allowed =>
'1',
bom_required =>
'1',
no_bom_variant =>
'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
no_bom_variant16be =>
'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
no_bom_variant16le =>
'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
perl_name =>
['utf-16'],
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-16',
'1'},
xml_name => 'UTF-16',
};
$Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16'} = {
ascii16 => 1,
bom_allowed =>
'1',
no_bom_variant =>
'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
no_bom_variant16be =>
'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
no_bom_variant16le =>
'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
ietf_name =>
['utf-16'],
mime_name =>
'utf-16',
'uri',
{'urn:x-suika-fam-cx:charset:utf-16',
'1'},
};
$Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16be'} = {
ascii16 => 1,
ascii16be => 1,
bom_allowed =>
'1',
no_bom_variant =>
'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
no_bom_variant16be =>
'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
ietf_name =>
['utf-16be'],
mime_name =>
'utf-16be',
'uri',
{'urn:x-suika-fam-cx:charset:utf-16be',
'1'},
};
$Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16le'} = {
ascii16 => 1,
ascii16le => 1,
bom_allowed =>
'1',
no_bom_variant =>
'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
no_bom_variant16le =>
'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
ietf_name =>
['utf-16le'],
mime_name =>
'utf-16le',
'uri',
{'urn:x-suika-fam-cx:charset:utf-16le',
'1'},
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be'} = {
ascii16 => 1,
ascii16be => 1,
is_block_safe =>
'1',
perl_name =>
['utf-16be'],
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
'1'}};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le'} = {
ascii16 => 1,
ascii16le => 1,
is_block_safe =>
'1',
perl_name =>
['utf-16le'],
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
'1'}};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-2'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-10646-ucs-2'} = {
ascii16 => 1,
bom_allowed =>
'1',
no_bom_variant =>
'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le',
no_bom_variant16be =>
'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be',
no_bom_variant16le =>
'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le',
ietf_name =>
['csunicode',
'iso-10646-ucs-2'],
mime_name =>
'iso-10646-ucs-2',
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-2',
'1',
'urn:x-suika-fam-cx:charset:iso-10646-ucs-2',
'1'},
xml_name => 'ISO-10646-UCS-2',
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be'} = {
ascii16 => 1,
ascii16be => 1,
is_block_safe =>
'1',
perl_name =>
['ucs-2be'],
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be',
'1'}};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le'} = {
ascii16 => 1,
ascii16le => 1,
is_block_safe =>
'1',
perl_name =>
['ucs-2le'],
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le',
'1'}};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-4'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-10646-ucs-4'} = {
ascii32 => 1,
bom_allowed =>
'1',
no_bom_variant =>
'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le',
no_bom_variant32endian1234 =>
'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be',
no_bom_variant32endian4321 =>
'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le',
ietf_name =>
['csucs4',
'iso-10646-ucs-4'],
mime_name =>
'iso-10646-ucs-4',
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-4',
'1',
'urn:x-suika-fam-cx:charset:iso-10646-ucs-4',
'1'},
xml_name => 'ISO-10646-UCS-4',
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be'} = {
ascii32 => 1,
ascii32endian1234 => 1,
is_block_safe =>
'1',
perl_name =>
['ucs-4be',
'utf-32be'],
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be',
'1'}};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le'} = {
ascii32 => 1,
ascii32endian4321 => 1,
is_block_safe =>
'1',
perl_name =>
['ucs-4le',
'utf-32le'],
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le',
'1'}};
$Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso_8859-1:1987'} = $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-1'} = {ascii8 =>
'1',
is_block_safe =>
'1',
ietf_name =>
['cp819',
'csisolatin1',
'ibm819',
'iso-8859-1',
'iso-8859-1',
'iso-ir-100',
'iso_8859-1',
'iso_8859-1:1987',
'l1',
'latin1'],
mime_name =>
'iso-8859-1',
perl_name =>
['iso-8859-1',
'latin1'],
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-1',
'1',
'urn:x-suika-fam-cx:charset:iso_8859-1:1987',
'1'},
xml_name => 'ISO-8859-1',
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-2'} = {ascii8 =>
'1',
is_block_safe =>
'1',
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-2',
'1'},
xml_name => 'ISO-8859-2',
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-3'} = {ascii8 =>
'1',
is_block_safe =>
'1',
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-3',
'1'},
xml_name => 'ISO-8859-3',
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-4'} = {ascii8 =>
'1',
is_block_safe =>
'1',
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-4',
'1'},
xml_name => 'ISO-8859-4',
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-5'} = {ascii8 =>
'1',
is_block_safe =>
'1',
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-5',
'1'},
xml_name => 'ISO-8859-5',
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-6'} = {ascii8 =>
'1',
is_block_safe =>
'1',
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-6',
'1'},
xml_name => 'ISO-8859-6',
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-7'} = {ascii8 =>
'1',
is_block_safe =>
'1',
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-7',
'1'},
xml_name => 'ISO-8859-7',
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-8'} = {ascii8 =>
'1',
is_block_safe =>
'1',
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-8',
'1'},
xml_name => 'ISO-8859-8',
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-9'} = {ascii8 =>
'1',
is_block_safe =>
'1',
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-9',
'1'},
xml_name => 'ISO-8859-9',
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-10'} = {ascii8 =>
'1',
is_block_safe =>
'1',
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-10',
'1'},
xml_name => 'ISO-8859-10',
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-11'} = {ascii8 =>
'1',
is_block_safe =>
'1',
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-11',
'1'},
xml_name => 'ISO-8859-11',
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-13'} = {ascii8 =>
'1',
is_block_safe =>
'1',
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-13',
'1'},
xml_name => 'ISO-8859-13',
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-14'} = {ascii8 =>
'1',
is_block_safe =>
'1',
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-14',
'1'},
xml_name => 'ISO-8859-14',
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-15'} = {ascii8 =>
'1',
is_block_safe =>
'1',
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-15',
'1'},
xml_name => 'ISO-8859-15',
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-16'} = {ascii8 =>
'1',
is_block_safe =>
'1',
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-16',
'1'},
xml_name => 'ISO-8859-16',
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-2022-jp'} = {ascii8 =>
'1',
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-2022-jp',
'1'},
xml_name => 'ISO-2022-JP',
};
$Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-2022-jp'} = {ascii8 =>
'1',
ietf_name =>
['csiso2022jp',
'iso-2022-jp',
'iso-2022-jp'],
mime_name =>
'iso-2022-jp',
'uri',
{'urn:x-suika-fam-cx:charset:iso-2022-jp',
'1'},
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.iso-2022-jp'} = {ascii8 =>
'1',
perl_name =>
['iso-2022-jp'],
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.iso-2022-jp',
'1'}};
$Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:shift_jis'} = $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.shift_jis'} = {ascii8 =>
'1',
is_block_safe =>
'1',
ietf_name =>
['csshiftjis',
'ms_kanji',
'shift_jis',
'shift_jis'],
mime_name =>
'shift_jis',
perl_name =>
['shift-jis-1997'],
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.shift_jis',
'1',
'urn:x-suika-fam-cx:charset:shift_jis',
'1'},
xml_name => 'Shift_JIS',
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.shiftjis'} = {ascii8 =>
'1',
is_block_safe =>
'1',
perl_name =>
['shiftjis',
'sjis'],
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.shiftjis',
'1'}};
$Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:euc-jp'} = $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.euc-jp'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:extended_unix_code_packed_format_for_japanese'} = {ascii8 =>
'1',
is_block_safe =>
'1',
ietf_name =>
['cseucpkdfmtjapanese',
'euc-jp',
'euc-jp',
'extended_unix_code_packed_format_for_japanese'],
mime_name =>
'euc-jp',
perl_name =>
['euc-jp-1997'],
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.euc-jp',
'1',
'urn:x-suika-fam-cx:charset:euc-jp',
'1',
'urn:x-suika-fam-cx:charset:extended_unix_code_packed_format_for_japanese',
'1'},
xml_name => 'EUC-JP',
};
$Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.euc-jp'} = {ascii8 =>
'1',
is_block_safe =>
'1',
perl_name =>
['euc-jp',
'ujis'],
'uri',
{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.euc-jp',
'1'}};
1;
## $Date: 2008/05/17 12:29:24 $