package Whatpm::Charset::DecodeHandle; use strict; my $XML_AUTO_CHARSET = q; my $IANA_CHARSET = q; my $PERL_CHARSET = q; my $XML_CHARSET = q; ## ->create_decode_handle ($charset_uri, $byte_stream, $onerror) sub create_decode_handle ($$$;$) { my $csdef = $Whatpm::Charset::CharsetDef->{$_[1]}; my $obj = { character_queue => [], filehandle => $_[2], charset => $_[1], byte_buffer => '', onerror => $_[3] || sub {}, }; if ($csdef->{uri}->{$XML_AUTO_CHARSET} or $obj->{charset} eq $XML_AUTO_CHARSET) { my $b = ''; # UTF-8 w/o BOM $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'}; $obj->{input_encoding} = 'UTF-8'; if (read $obj->{filehandle}, $b, 256) { no warnings "substr"; no warnings "uninitialized"; if (substr ($b, 0, 1) eq "<") { if (substr ($b, 1, 1) eq "?") { # ASCII8 if ($b =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)? encoding\s*=\s*["']([^"']*)/x) { $obj->{input_encoding} = $1; my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding}); $csdef = $Whatpm::Charset::CharsetDef->{$uri}; if (not $csdef->{ascii8} or $csdef->{bom_required}) { $obj->{onerror}->(undef, 'charset-name-mismatch-error', charset_uri => $uri, charset_name => $obj->{input_encoding}); } } else { $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'}; $obj->{input_encoding} = 'UTF-8'; } if (defined $csdef->{no_bom_variant}) { $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant}}; } } elsif (substr ($b, 1, 1) eq "\x00") { if (substr ($b, 2, 2) eq "?\x00") { # ASCII16LE my $c = $b; $c =~ tr/\x00//d; if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)? encoding\s*=\s*["']([^"']*)/x) { $obj->{input_encoding} = $1; my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding}); $csdef = $Whatpm::Charset::CharsetDef->{$uri}; if (not $csdef->{ascii16} or $csdef->{ascii16be} or $csdef->{bom_required}) { $obj->{onerror}->(undef, 'charset-name-mismatch-error', charset_uri => $uri, charset_name => $obj->{input_encoding}); } } else { $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'}; $obj->{input_encoding} = 'UTF-8'; } if (defined $csdef->{no_bom_variant16le}) { $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16le}}; } } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian4321 my $c = $b; $c =~ tr/\x00//d; if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)? encoding\s*=\s*["']([^"']*)/x) { $obj->{input_encoding} = $1; my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding}); $csdef = $Whatpm::Charset::CharsetDef->{$uri}; if (not $csdef->{ascii32} or $csdef->{ascii32endian1234} or $csdef->{ascii32endian2143} or $csdef->{ascii32endian3412} or $csdef->{bom_required}) { $obj->{onerror}->(undef, 'charset-name-mismatch-error', charset_uri => $uri, charset_name => $obj->{input_encoding}); } } else { $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'}; $obj->{input_encoding} = 'UTF-8'; } if (defined $csdef->{no_bom_variant32endian4321}) { $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian4321}}; } } } } elsif (substr ($b, 0, 3) eq "\xEF\xBB\xBF") { # UTF8 $obj->{has_bom} = 1; substr ($b, 0, 3) = ''; my $c = $b; if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)? encoding\s*=\s*["']([^"']*)/x) { $obj->{input_encoding} = $1; my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding}); $csdef = $Whatpm::Charset::CharsetDef->{$uri}; if (not $csdef->{utf8_encoding_scheme} or not $csdef->{bom_allowed}) { $obj->{onerror}->(undef, 'charset-name-mismatch-error', charset_uri => $uri, charset_name => $obj->{input_encoding}); } } else { $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'}; $obj->{input_encoding} = 'UTF-8'; } if (defined $csdef->{no_bom_variant}) { $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant}}; } } elsif (substr ($b, 0, 2) eq "\x00<") { if (substr ($b, 2, 2) eq "\x00?") { # ASCII16BE my $c = $b; $c =~ tr/\x00//d; if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)? encoding\s*=\s*["']([^"']*)/x) { $obj->{input_encoding} = $1; my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding}); $csdef = $Whatpm::Charset::CharsetDef->{$uri}; if (not $csdef->{ascii16} or $csdef->{ascii16le} or $csdef->{bom_required}) { $obj->{onerror}->(undef, 'charset-name-mismatch-error', charset_uri => $uri, charset_name => $obj->{input_encoding}); } } else { $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'}; $obj->{input_encoding} = 'UTF-8'; } if (defined $csdef->{no_bom_variant16be}) { $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16be}}; } } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian3412 my $c = $b; $c =~ tr/\x00//d; if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)? encoding\s*=\s*["']([^"']*)/x) { $obj->{input_encoding} = $1; my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding}); $csdef = $Whatpm::Charset::CharsetDef->{$uri}; if (not $csdef->{ascii32} or $csdef->{ascii32endian1234} or $csdef->{ascii32endian2143} or $csdef->{ascii32endian4321} or $csdef->{bom_required}) { $obj->{onerror}->(undef, 'charset-name-mismatch-error', charset_uri => $uri, charset_name => $obj->{input_encoding}); } } else { $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'}; $obj->{input_encoding} = 'UTF-8'; } if (defined $csdef->{no_bom_variant32endian3412}) { $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian3412}}; } } } elsif (substr ($b, 0, 2) eq "\xFE\xFF") { if (substr ($b, 2, 2) eq "\x00<") { # ASCII16BE $obj->{has_bom} = 1; substr ($b, 0, 2) = ''; my $c = $b; $c =~ tr/\x00//d; if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)? encoding\s*=\s*["']([^"']*)/x) { $obj->{input_encoding} = $1; my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding}); $csdef = $Whatpm::Charset::CharsetDef->{$uri}; if (not $csdef->{ascii16} or $csdef->{ascii16le} or not $csdef->{bom_allowed}) { $obj->{onerror}->(undef, 'charset-name-mismatch-error', charset_uri => $uri, charset_name => $obj->{input_encoding}); } } else { $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'}; $obj->{input_encoding} = 'UTF-16'; } if (defined $csdef->{no_bom_variant16be}) { $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16be}}; } } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian3412 $obj->{has_bom} = 1; substr ($b, 0, 4) = ''; my $c = $b; $c =~ tr/\x00//d; if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)? encoding\s*=\s*["']([^"']*)/x) { $obj->{input_encoding} = $1; my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding}); $csdef = $Whatpm::Charset::CharsetDef->{$uri}; if (not $csdef->{ascii32} or $csdef->{ascii32endian1234} or $csdef->{ascii32endian2143} or $csdef->{ascii32endian4321} or not $csdef->{bom_allowed}) { $obj->{onerror}->(undef, 'charset-name-mismatch-error', charset_uri => $uri, charset_name => $obj->{input_encoding}); } } else { $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'}; $obj->{input_encoding} = 'UTF-16'; $obj->{byte_buffer} .= "\x00\x00"; } if (defined $csdef->{no_bom_variant32endian3412}) { $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian3412}}; } } else { $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'}; $obj->{input_encoding} = 'UTF-16'; substr ($b, 0, 2) = ''; $obj->{has_bom} = 1; } } elsif (substr ($b, 0, 2) eq "\xFF\xFE") { if (substr ($b, 2, 2) eq "<\x00") { # ASCII16LE $obj->{has_bom} = 1; substr ($b, 0, 2) = ''; my $c = $b; $c =~ tr/\x00//d; if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)? encoding\s*=\s*["']([^"']*)/x) { $obj->{input_encoding} = $1; my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding}); $csdef = $Whatpm::Charset::CharsetDef->{$uri}; if (not $csdef->{ascii16} or $csdef->{ascii16be} or not $csdef->{bom_allowed}) { $obj->{onerror}->(undef, 'charset-name-mismatch-error', charset_uri => $uri, charset_name => $obj->{input_encoding}); } } else { $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'}; $obj->{input_encoding} = 'UTF-16'; } if (defined $csdef->{no_bom_variant16le}) { $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16le}}; } } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian4321 $obj->{has_bom} = 1; substr ($b, 0, 4) = ''; my $c = $b; $c =~ tr/\x00//d; if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)? encoding\s*=\s*["']([^"']*)/x) { $obj->{input_encoding} = $1; my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding}); $csdef = $Whatpm::Charset::CharsetDef->{$uri}; if (not $csdef->{ascii32} or $csdef->{ascii32endian1234} or $csdef->{ascii32endian2143} or $csdef->{ascii32endian3412} or not $csdef->{bom_allowed}) { $obj->{onerror}->(undef, 'charset-name-mismatch-error', charset_uri => $uri, charset_name => $obj->{input_encoding}); } } else { $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'}; $obj->{input_encoding} = 'UTF-16'; $obj->{byte_buffer} .= "\x00\x00"; } if (defined $csdef->{no_bom_variant32endian4321}) { $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian4321}}; } } else { $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'}; $obj->{input_encoding} = 'UTF-16'; substr ($b, 0, 2) = ''; $obj->{has_bom} = 1; } } elsif (substr ($b, 0, 2) eq "\x00\x00") { if (substr ($b, 2, 2) eq "\x00<") { # ASCII32Endian1234 my $c = $b; $c =~ tr/\x00//d; if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)? encoding\s*=\s*["']([^"']*)/x) { $obj->{input_encoding} = $1; my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding}); $csdef = $Whatpm::Charset::CharsetDef->{$uri}; if (not $csdef->{ascii32} or $csdef->{ascii32endian2143} or $csdef->{ascii32endian3412} or $csdef->{ascii32endian4321} or $csdef->{bom_required}) { $obj->{onerror}->(undef, 'charset-name-mismatch-error', charset_uri => $uri, charset_name => $obj->{input_encoding}); } } else { $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'}; $obj->{input_encoding} = 'UTF-8'; } if (defined $csdef->{no_bom_variant32endian1234}) { $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian1234}}; } } elsif (substr ($b, 2, 2) eq "<\x00") { # ASCII32Endian2143 my $c = $b; $c =~ tr/\x00//d; if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)? encoding\s*=\s*["']([^"']*)/x) { $obj->{input_encoding} = $1; my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding}); $csdef = $Whatpm::Charset::CharsetDef->{$uri}; if (not $csdef->{ascii32} or $csdef->{ascii32endian1234} or $csdef->{ascii32endian3412} or $csdef->{ascii32endian4321} or $csdef->{bom_required}) { $obj->{onerror}->(undef, 'charset-name-mismatch-error', charset_uri => $uri, charset_name => $obj->{input_encoding}); } } else { $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'}; $obj->{input_encoding} = 'UTF-8'; } if (defined $csdef->{no_bom_variant32endian2143}) { $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian2143}}; } } elsif (substr ($b, 2, 2) eq "\xFE\xFF") { # ASCII32Endian1234 $obj->{has_bom} = 1; substr ($b, 0, 4) = ''; my $c = $b; $c =~ tr/\x00//d; if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)? encoding\s*=\s*["']([^"']*)/x) { $obj->{input_encoding} = $1; my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding}); $csdef = $Whatpm::Charset::CharsetDef->{$uri}; if (not $csdef->{ascii32} or $csdef->{ascii32endian2143} or $csdef->{ascii32endian3412} or $csdef->{ascii32endian4321} or $csdef->{bom_required}) { $obj->{onerror}->(undef, 'charset-name-mismatch-error', charset_uri => $uri, charset_name => $obj->{input_encoding}); } } else { $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'}; $obj->{input_encoding} = 'UTF-8'; $obj->{has_bom} = 0; $obj->{byte_buffer} .= "\x00\x00\xFE\xFF"; } if (defined $csdef->{no_bom_variant32endian1234}) { $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian1234}}; } } elsif (substr ($b, 2, 2) eq "\xFF\xFE") { # ASCII32Endian2143 $obj->{has_bom} = 1; substr ($b, 0, 4) = ''; my $c = $b; $c =~ tr/\x00//d; if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)? encoding\s*=\s*["']([^"']*)/x) { $obj->{input_encoding} = $1; my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding}); $csdef = $Whatpm::Charset::CharsetDef->{$uri}; if (not $csdef->{ascii32} or $csdef->{ascii32endian1234} or $csdef->{ascii32endian3412} or $csdef->{ascii32endian4321} or $csdef->{bom_required}) { $obj->{onerror}->(undef, 'charset-name-mismatch-error', charset_uri => $uri, charset_name => $obj->{input_encoding}); } } else { $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'}; $obj->{input_encoding} = 'UTF-8'; $obj->{has_bom} = 0; $obj->{byte_buffer} .= "\x00\x00\xFF\xFE"; } if (defined $csdef->{no_bom_variant32endian2143}) { $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian2143}}; } } # \x4C\x6F\xA7\x94 EBCDIC } # buffer $obj->{byte_buffer} .= $b; } # read } elsif ($csdef->{uri}->{$XML_CHARSET.'utf-8'}) { ## BOM is optional. my $b = ''; if (read $obj->{filehandle}, $b, 3) { if ($b eq "\xEF\xBB\xBF") { $obj->{has_bom} = 1; } else { $obj->{byte_buffer} .= $b; } } $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'}; # UTF-8 w/o BOM } elsif ($csdef->{uri}->{$XML_CHARSET.'utf-16'}) { ## BOM is mandated. my $b = ''; if (read $obj->{filehandle}, $b, 2) { if ($b eq "\xFE\xFF") { $obj->{has_bom} = 1; # UTF-16BE w/o BOM $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'}; } elsif ($b eq "\xFF\xFE") { $obj->{has_bom} = 1; # UTF-16LE w/o BOM $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'}; } else { $obj->{onerror}->(undef, 'no-bom-error', charset_uri => $obj->{charset}); $obj->{has_bom} = 0; $obj->{byte_buffer} .= $b; # UTF-16BE w/o BOM $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'}; } } else { $obj->{onerror}->(undef, 'no-bom-error', charset_uri => $obj->{charset}); $obj->{has_bom} = 0; # UTF-16BE w/o BOM $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'}; } } if ($csdef->{uri}->{$XML_CHARSET.'iso-2022-jp'}) { $obj->{state_2440} = 'gl-jis-1997-swapped'; $obj->{state_2442} = 'gl-jis-1997'; $obj->{state} = 'state_2842'; require Encode::GLJIS1997Swapped; require Encode::GLJIS1997; if (Encode::find_encoding ($obj->{state_2440}) and Encode::find_encoding ($obj->{state_2442})) { return bless $obj, 'Whatpm::Charset::DecodeHandle::ISO2022JP'; } } elsif ($csdef->{uri}->{$IANA_CHARSET.'iso-2022-jp'}) { $obj->{state_2440} = 'gl-jis-1978'; $obj->{state_2442} = 'gl-jis-1983'; $obj->{state} = 'state_2842'; require Encode::GLJIS1978; require Encode::GLJIS1983; if (Encode::find_encoding ($obj->{state_2440}) and Encode::find_encoding ($obj->{state_2442})) { return bless $obj, 'Whatpm::Charset::DecodeHandle::ISO2022JP'; } } elsif (defined $csdef->{perl_name}->[0]) { if ($csdef->{uri}->{$XML_CHARSET.'euc-jp'} or $csdef->{uri}->{$IANA_CHARSET.'euc-jp'}) { $obj->{perl_encoding_name} = $csdef->{perl_name}->[0]; require Encode::EUCJP1997; if (Encode::find_encoding ($obj->{perl_encoding_name})) { return bless $obj, 'Whatpm::Charset::DecodeHandle::EUCJP'; } } elsif ($csdef->{uri}->{$XML_CHARSET.'shift_jis'} or $csdef->{uri}->{$IANA_CHARSET.'shift_jis'}) { $obj->{perl_encoding_name} = $csdef->{perl_name}->[0]; require Encode::ShiftJIS1997; if (Encode::find_encoding ($obj->{perl_encoding_name})) { return bless $obj, 'Whatpm::Charset::DecodeHandle::ShiftJIS'; } } elsif ($csdef->{is_block_safe}) { $obj->{perl_encoding_name} = $csdef->{perl_name}->[0]; require Encode; if (Encode::find_encoding ($obj->{perl_encoding_name})) { return bless $obj, 'Whatpm::Charset::DecodeHandle::Encode'; } } } $obj->{onerror}->(undef, 'charset-not-supported-error', charset_uri => $obj->{charset}); return undef; } # create_decode_handle sub name_to_uri ($$$) { my $domain = $_[1]; my $name = lc $_[2]; if ($domain eq 'ietf') { return $IANA_CHARSET . $name; } elsif ($domain eq 'xml') { if ({ 'utf-8' => 1, 'utf-16' => 1, 'iso-10646-ucs-2' => 1, 'iso-10646-ucs-4' => 1, 'iso-8859-1' => 1, 'iso-8859-2' => 1, 'iso-8859-3' => 1, 'iso-8859-4' => 1, 'iso-8859-5' => 1, 'iso-8859-6' => 1, 'iso-8859-7' => 1, 'iso-8859-8' => 1, 'iso-8859-9' => 1, 'iso-8859-10' => 1, 'iso-8859-11' => 1, 'iso-8859-13' => 1, 'iso-8859-14' => 1, 'iso-8859-15' => 1, 'iso-8859-16' => 1, 'iso-2022-jp' => 1, 'shift_jis' => 1, 'euc-jp' => 1, }->{$name}) { return $XML_CHARSET . $name; } my $uri = $IANA_CHARSET . $name; return $uri if $Whatpm::Charset::CharsetDef->{$uri}; return $XML_CHARSET . $name; } else { return undef; } } # name_to_uri sub uri_to_name ($$$) { my (undef, $domain, $uri) = @_; if ($domain eq 'xml') { my $v = $Whatpm::Charset::CharsetDef->{$uri}->{xml_name}; return $v if defined $v; if (substr ($uri, 0, length $XML_CHARSET) eq $XML_CHARSET) { return substr ($uri, length $XML_CHARSET); } $domain = 'ietf'; ## TODO: XML encoding name has smaller range } if ($domain eq 'ietf') { my $v = $Whatpm::Charset::CharsetDef->{$uri}->{iana_name}; return $v->[0] if defined $v; if (substr ($uri, 0, length $IANA_CHARSET) eq $IANA_CHARSET) { return substr ($uri, length $IANA_CHARSET); } } return undef; } # uri_to_name require IO::Handle; package Whatpm::Charset::DecodeHandle::ByteBuffer; sub new ($$) { my $self = bless { buffer => '', }, shift; $self->{filehandle} = shift; return $self; } # new sub read { my $self = shift; my $pos = length $self->{buffer}; my $r = $self->{filehandle}->read ($self->{buffer}, $_[1], $pos); substr ($_[0], $_[2]) = substr ($self->{buffer}, $pos); return $r; } # read sub close { $_[0]->{filehandle}->close } package Whatpm::Charset::DecodeHandle::Encode; sub charset ($) { $_[0]->{charset} } sub close ($) { $_[0]->{filehandle}->close } sub getc ($) { my $self = $_[0]; return shift @{$self->{character_queue}} if @{$self->{character_queue}}; my $error; if ($self->{continue}) { if ($self->{filehandle}->read ($self->{byte_buffer}, 256, length $self->{byte_buffer})) { # } else { $error = 1; } $self->{continue} = 0; } elsif (512 > length $self->{byte_buffer}) { $self->{filehandle}->read ($self->{byte_buffer}, 256, length $self->{byte_buffer}); } my $r; unless ($error) { my $string = Encode::decode ($self->{perl_encoding_name}, $self->{byte_buffer}, Encode::FB_QUIET ()); if (length $string) { push @{$self->{character_queue}}, split //, $string; $r = shift @{$self->{character_queue}}; if (length $self->{byte_buffer}) { $self->{continue} = 1; } } else { if (length $self->{byte_buffer}) { $error = 1; } else { $r = undef; } } } if ($error) { $r = substr $self->{byte_buffer}, 0, 1, ''; $self->{onerror}->($self, 'illegal-octets-error', octets => \$r); } return $r; } # getc sub has_bom ($) { $_[0]->{has_bom} } sub input_encoding ($) { my $v = $_[0]->{input_encoding}; return $v if defined $v; my $uri = $_[0]->{charset}; if (defined $uri) { return Whatpm::Charset::DecodeHandle->uri_to_name (xml => $uri); } return undef; } # input_encoding sub onerror ($;$) { if (@_ > 1) { $_[0]->{onerror} = $_[1]; } return $_[0]->{onerror}; } # onerror sub ungetc ($$) { unshift @{$_[0]->{character_queue}}, chr int ($_[1] or 0); } # ungetc package Whatpm::Charset::DecodeHandle::EUCJP; push our @ISA, 'Whatpm::Charset::DecodeHandle::Encode'; sub getc ($) { my $self = $_[0]; return shift @{$self->{character_queue}} if @{$self->{character_queue}}; my $error; if ($self->{continue}) { if ($self->{filehandle}->read ($self->{byte_buffer}, 256, length $self->{byte_buffer})) { # } else { $error = 1; } $self->{continue} = 0; } elsif (512 > length $self->{byte_buffer}) { $self->{filehandle}->read ($self->{byte_buffer}, 256, length $self->{byte_buffer}); } my $r; unless ($error) { my $string = Encode::decode ($self->{perl_encoding_name}, $self->{byte_buffer}, Encode::FB_QUIET ()); if (length $string) { push @{$self->{character_queue}}, split //, $string; $r = shift @{$self->{character_queue}}; if (length $self->{byte_buffer}) { $self->{continue} = 1; } } else { if (length $self->{byte_buffer}) { $error = 1; } else { $r = undef; } } } if ($error) { $r = substr $self->{byte_buffer}, 0, 1, ''; my $etype = 'illegal-octets-error'; if ($r =~ /^[\xA1-\xFE]/) { if ($self->{byte_buffer} =~ s/^([\xA1-\xFE])//) { $r .= $1; $etype = 'unassigned-code-point-error'; } } elsif ($r eq "\x8F") { if ($self->{byte_buffer} =~ s/^([\xA1-\xFE][\xA1-\xFE]?)//) { $r .= $1; $etype = 'unassigned-code-point-error' if length $1 == 2; } } elsif ($r eq "\x8E") { if ($self->{byte_buffer} =~ s/^([\xA1-\xFE])//) { $r .= $1; $etype = 'unassigned-code-point-error'; } } elsif ($r eq "\xA0" or $r eq "\xFF") { $etype = 'unassigned-code-point-error'; } $self->{onerror}->($self, $etype, octets => \$r); } return $r; } # getc package Whatpm::Charset::DecodeHandle::ISO2022JP; push our @ISA, 'Whatpm::Charset::DecodeHandle::Encode'; sub getc ($) { my $self = $_[0]; return shift @{$self->{character_queue}} if @{$self->{character_queue}}; my $r; A: { my $error; if ($self->{continue}) { if ($self->{filehandle}->read ($self->{byte_buffer}, 256, length $self->{byte_buffer})) { # } else { $error = 1; } $self->{continue} = 0; } elsif (512 > length $self->{byte_buffer}) { $self->{filehandle}->read ($self->{byte_buffer}, 256, length $self->{byte_buffer}); } unless ($error) { if ($self->{byte_buffer} =~ s/^\x1B(\x24[\x40\x42]|\x28[\x42\x4A])//) { $self->{state} = { "\x24\x40" => 'state_2440', "\x24\x42" => 'state_2442', "\x28\x42" => 'state_2842', "\x28\x4A" => 'state_284A', }->{$1}; redo A; } elsif ($self->{state} eq 'state_2842') { # IRV if ($self->{byte_buffer} =~ s/^([\x00-\x0D\x10-\x1A\x1C-\x7F]+)//) { push @{$self->{character_queue}}, split //, $1; $r = shift @{$self->{character_queue}}; } else { if (length $self->{byte_buffer}) { $error = 1; } else { $r = undef; } } } elsif ($self->{state} eq 'state_284A') { # 0201 if ($self->{byte_buffer} =~ s/^([\x00-\x0D\x10-\x1A\x1C-\x7F]+)//) { my $v = $1; $v =~ tr/\x5C\x7E/\xA5\x{203E}/; push @{$self->{character_queue}}, split //, $v; $r = shift @{$self->{character_queue}}; } else { if (length $self->{byte_buffer}) { $error = 1; } else { $r = undef; $self->{onerror}->($self, 'invalid-state-error', state => $self->{state}); } } } elsif ($self->{state} eq 'state_2442') { # 1983 my $v = Encode::decode ($self->{state_2442}, $self->{byte_buffer}, Encode::FB_QUIET ()); if (length $v) { push @{$self->{character_queue}}, split //, $v; $r = shift @{$self->{character_queue}}; } else { if (length $self->{byte_buffer}) { $error = 1; } else { $r = undef; $self->{onerror}->($self, 'invalid-state-error', state => $self->{state}); } } } elsif ($self->{state} eq 'state_2440') { # 1978 my $v = Encode::decode ($self->{state_2440}, $self->{byte_buffer}, Encode::FB_QUIET ()); if (length $v) { push @{$self->{character_queue}}, split //, $v; $r = shift @{$self->{character_queue}}; } else { if (length $self->{byte_buffer}) { $error = 1; } else { $r = undef; $self->{onerror}->($self, 'invalid-state-error', state => $self->{state}); } } } else { $error = 1; } } if ($error) { $r = substr $self->{byte_buffer}, 0, 1, ''; my $etype = 'illegal-octets-error'; if (($self->{state} eq 'state_2442' or $self->{state} eq 'state_2440') and $r =~ /^[\x21-\x7E]/ and $self->{byte_buffer} =~ s/^([\x21-\x7E])//) { $r .= $1; $etype = 'unassigned-code-point-error'; } elsif ($r eq "\x1B" and $self->{byte_buffer} =~ s/^\(H//) { # Old 0201 $r .= "(H"; $self->{state} = 'state_284A'; } $self->{onerror}->($self, $etype, octets => \$r); } } # A return $r; } # getc package Whatpm::Charset::DecodeHandle::ShiftJIS; push our @ISA, 'Whatpm::Charset::DecodeHandle::Encode'; sub getc ($) { my $self = $_[0]; return shift @{$self->{character_queue}} if @{$self->{character_queue}}; my $error; if ($self->{continue}) { if ($self->{filehandle}->read ($self->{byte_buffer}, 256, length $self->{byte_buffer})) { # } else { $error = 1; } $self->{continue} = 0; } elsif (512 > length $self->{byte_buffer}) { $self->{filehandle}->read ($self->{byte_buffer}, 256, length $self->{byte_buffer}); } my $r; unless ($error) { my $string = Encode::decode ($self->{perl_encoding_name}, $self->{byte_buffer}, Encode::FB_QUIET ()); if (length $string) { push @{$self->{character_queue}}, split //, $string; $r = shift @{$self->{character_queue}}; if (length $self->{byte_buffer}) { $self->{continue} = 1; } } else { if (length $self->{byte_buffer}) { $error = 1; } else { $r = undef; } } } if ($error) { $r = substr $self->{byte_buffer}, 0, 1, ''; my $etype = 'illegal-octets-error'; if ($r =~ /^[\x81-\x9F\xE0-\xEF]/) { if ($self->{byte_buffer} =~ s/(.)//s) { $r .= $1; # not limited to \x40-\xFC - \x7F $etype = 'unassigned-code-point-error'; } } elsif ($r =~ /^[\x80\xA0\xF0-\xFF]/) { $etype = 'unassigned-code-point-error'; } $self->{onerror}->($self, $etype, octets => \$r); } return $r; } # getc $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:us-ascii'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:us'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso646-us'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:cp367'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ibm367'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ansi_x3.4-1986'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ansi_x3.4-1968'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-ir-6'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:csascii'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso_646.irv:1991'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ascii'} = {ascii8 => '1', is_block_safe => '1', ietf_name => ['ansi_x3.4-1968', 'ansi_x3.4-1986', 'ascii', 'cp367', 'csascii', 'ibm367', 'iso-ir-6', 'iso646-us', 'iso_646.irv:1991', 'us', 'us-ascii', 'us-ascii'], mime_name => 'us-ascii', perl_name => ['ascii', 'iso-646-us', 'us-ascii'], utf8_encoding_scheme => '1', 'uri', {'urn:x-suika-fam-cx:charset:ansi_x3.4-1968', '1', 'urn:x-suika-fam-cx:charset:ansi_x3.4-1986', '1', 'urn:x-suika-fam-cx:charset:ascii', '1', 'urn:x-suika-fam-cx:charset:cp367', '1', 'urn:x-suika-fam-cx:charset:csascii', '1', 'urn:x-suika-fam-cx:charset:ibm367', '1', 'urn:x-suika-fam-cx:charset:iso-ir-6', '1', 'urn:x-suika-fam-cx:charset:iso646-us', '1', 'urn:x-suika-fam-cx:charset:iso_646.irv:1991', '1', 'urn:x-suika-fam-cx:charset:us', '1', 'urn:x-suika-fam-cx:charset:us-ascii', '1'}, }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ascii-ctrl'} = {perl_name => ['ascii-ctrl'], 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ascii-ctrl', '1'}}; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.null'} = {perl_name => ['null'], 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.null', '1'}}; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-8'} = {ascii8 => '1', bom_allowed => '1', no_bom_variant => 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8', utf8_encoding_scheme => '1', 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-8', '1'}, xml_name => 'UTF-8', }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/UTF-8.RFC2279'} = {ascii8 => '1', bom_allowed => '1', no_bom_variant => 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8', utf8_encoding_scheme => '1', 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/UTF-8.RFC2279', '1'}}; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8'} = { ascii8 => 1, is_block_safe => '1', perl_name => ['utf-8'], utf8_encoding_scheme => '1', 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8', '1'}}; $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-8'} = { ascii8 => 1, bom_allowed => '1', no_bom_variant => 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8', ietf_name => ['utf-8'], mime_name => 'utf-8', utf8_encoding_scheme => '1', 'uri', {'urn:x-suika-fam-cx:charset:utf-8', '1'}, }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8'} = {ascii8 => '1', is_block_safe => '1', perl_name => ['utf8'], utf8_encoding_scheme => '1', 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8', '1'}}; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-16'} = { ascii16 => 1, bom_allowed => '1', bom_required => '1', no_bom_variant => 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le', no_bom_variant16be => 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be', no_bom_variant16le => 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le', perl_name => ['utf-16'], 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-16', '1'}, xml_name => 'UTF-16', }; $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16'} = { ascii16 => 1, bom_allowed => '1', no_bom_variant => 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le', no_bom_variant16be => 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be', no_bom_variant16le => 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le', ietf_name => ['utf-16'], mime_name => 'utf-16', 'uri', {'urn:x-suika-fam-cx:charset:utf-16', '1'}, }; $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16be'} = { ascii16 => 1, ascii16be => 1, bom_allowed => '1', no_bom_variant => 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be', no_bom_variant16be => 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be', ietf_name => ['utf-16be'], mime_name => 'utf-16be', 'uri', {'urn:x-suika-fam-cx:charset:utf-16be', '1'}, }; $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16le'} = { ascii16 => 1, ascii16le => 1, bom_allowed => '1', no_bom_variant => 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le', no_bom_variant16le => 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le', ietf_name => ['utf-16le'], mime_name => 'utf-16le', 'uri', {'urn:x-suika-fam-cx:charset:utf-16le', '1'}, }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be'} = { ascii16 => 1, ascii16be => 1, is_block_safe => '1', perl_name => ['utf-16be'], 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be', '1'}}; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le'} = { ascii16 => 1, ascii16le => 1, is_block_safe => '1', perl_name => ['utf-16le'], 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le', '1'}}; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-2'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-10646-ucs-2'} = { ascii16 => 1, bom_allowed => '1', no_bom_variant => 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le', no_bom_variant16be => 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be', no_bom_variant16le => 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le', ietf_name => ['csunicode', 'iso-10646-ucs-2'], mime_name => 'iso-10646-ucs-2', 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-2', '1', 'urn:x-suika-fam-cx:charset:iso-10646-ucs-2', '1'}, xml_name => 'ISO-10646-UCS-2', }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be'} = { ascii16 => 1, ascii16be => 1, is_block_safe => '1', perl_name => ['ucs-2be'], 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be', '1'}}; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le'} = { ascii16 => 1, ascii16le => 1, is_block_safe => '1', perl_name => ['ucs-2le'], 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le', '1'}}; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-4'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-10646-ucs-4'} = { ascii32 => 1, bom_allowed => '1', no_bom_variant => 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le', no_bom_variant32endian1234 => 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be', no_bom_variant32endian4321 => 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le', ietf_name => ['csucs4', 'iso-10646-ucs-4'], mime_name => 'iso-10646-ucs-4', 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-4', '1', 'urn:x-suika-fam-cx:charset:iso-10646-ucs-4', '1'}, xml_name => 'ISO-10646-UCS-4', }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be'} = { ascii32 => 1, ascii32endian1234 => 1, is_block_safe => '1', perl_name => ['ucs-4be', 'utf-32be'], 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be', '1'}}; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le'} = { ascii32 => 1, ascii32endian4321 => 1, is_block_safe => '1', perl_name => ['ucs-4le', 'utf-32le'], 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le', '1'}}; $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso_8859-1:1987'} = $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-1'} = {ascii8 => '1', is_block_safe => '1', ietf_name => ['cp819', 'csisolatin1', 'ibm819', 'iso-8859-1', 'iso-8859-1', 'iso-ir-100', 'iso_8859-1', 'iso_8859-1:1987', 'l1', 'latin1'], mime_name => 'iso-8859-1', perl_name => ['iso-8859-1', 'latin1'], 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-1', '1', 'urn:x-suika-fam-cx:charset:iso_8859-1:1987', '1'}, xml_name => 'ISO-8859-1', }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-2'} = {ascii8 => '1', is_block_safe => '1', 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-2', '1'}, xml_name => 'ISO-8859-2', }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-3'} = {ascii8 => '1', is_block_safe => '1', 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-3', '1'}, xml_name => 'ISO-8859-3', }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-4'} = {ascii8 => '1', is_block_safe => '1', 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-4', '1'}, xml_name => 'ISO-8859-4', }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-5'} = {ascii8 => '1', is_block_safe => '1', 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-5', '1'}, xml_name => 'ISO-8859-5', }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-6'} = {ascii8 => '1', is_block_safe => '1', 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-6', '1'}, xml_name => 'ISO-8859-6', }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-7'} = {ascii8 => '1', is_block_safe => '1', 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-7', '1'}, xml_name => 'ISO-8859-7', }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-8'} = {ascii8 => '1', is_block_safe => '1', 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-8', '1'}, xml_name => 'ISO-8859-8', }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-9'} = {ascii8 => '1', is_block_safe => '1', 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-9', '1'}, xml_name => 'ISO-8859-9', }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-10'} = {ascii8 => '1', is_block_safe => '1', 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-10', '1'}, xml_name => 'ISO-8859-10', }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-11'} = {ascii8 => '1', is_block_safe => '1', 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-11', '1'}, xml_name => 'ISO-8859-11', }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-13'} = {ascii8 => '1', is_block_safe => '1', 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-13', '1'}, xml_name => 'ISO-8859-13', }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-14'} = {ascii8 => '1', is_block_safe => '1', 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-14', '1'}, xml_name => 'ISO-8859-14', }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-15'} = {ascii8 => '1', is_block_safe => '1', 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-15', '1'}, xml_name => 'ISO-8859-15', }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-16'} = {ascii8 => '1', is_block_safe => '1', 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-16', '1'}, xml_name => 'ISO-8859-16', }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-2022-jp'} = {ascii8 => '1', 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-2022-jp', '1'}, xml_name => 'ISO-2022-JP', }; $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-2022-jp'} = {ascii8 => '1', ietf_name => ['csiso2022jp', 'iso-2022-jp', 'iso-2022-jp'], mime_name => 'iso-2022-jp', 'uri', {'urn:x-suika-fam-cx:charset:iso-2022-jp', '1'}, }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.iso-2022-jp'} = {ascii8 => '1', perl_name => ['iso-2022-jp'], 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.iso-2022-jp', '1'}}; $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:shift_jis'} = $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.shift_jis'} = {ascii8 => '1', is_block_safe => '1', ietf_name => ['csshiftjis', 'ms_kanji', 'shift_jis', 'shift_jis'], mime_name => 'shift_jis', perl_name => ['shift-jis-1997'], 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.shift_jis', '1', 'urn:x-suika-fam-cx:charset:shift_jis', '1'}, xml_name => 'Shift_JIS', }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.shiftjis'} = {ascii8 => '1', is_block_safe => '1', perl_name => ['shiftjis', 'sjis'], 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.shiftjis', '1'}}; $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:euc-jp'} = $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.euc-jp'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:extended_unix_code_packed_format_for_japanese'} = {ascii8 => '1', is_block_safe => '1', ietf_name => ['cseucpkdfmtjapanese', 'euc-jp', 'euc-jp', 'extended_unix_code_packed_format_for_japanese'], mime_name => 'euc-jp', perl_name => ['euc-jp-1997'], 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.euc-jp', '1', 'urn:x-suika-fam-cx:charset:euc-jp', '1', 'urn:x-suika-fam-cx:charset:extended_unix_code_packed_format_for_japanese', '1'}, xml_name => 'EUC-JP', }; $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.euc-jp'} = {ascii8 => '1', is_block_safe => '1', perl_name => ['euc-jp', 'ujis'], 'uri', {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.euc-jp', '1'}}; 1; ## $Date: 2008/05/17 12:29:24 $