101 |
'iso-2022-jp-3-plane1' => \&_name_8bit_iso2022, |
'iso-2022-jp-3-plane1' => \&_name_8bit_iso2022, |
102 |
'iso-2022-kr' => \&_name_8bit_iso2022, |
'iso-2022-kr' => \&_name_8bit_iso2022, |
103 |
'iso-8859-1' => \&_name_8bit_iso2022, |
'iso-8859-1' => \&_name_8bit_iso2022, |
|
'iso-10646-j-1' => \&_name_utf16be, |
|
|
'iso-10646-ucs-2' => \&_name_utf16be, |
|
|
'iso-10646-ucs-4' => \&_name_utf32be, |
|
|
'iso-10646-ucs-basic' => \&_name_utf16be, |
|
|
'iso-10646-unicode-latin1' => \&_name_utf16be, |
|
104 |
jis_x0201 => \&_name_shift_jis, |
jis_x0201 => \&_name_shift_jis, |
105 |
junet => \&_name_8bit_iso2022, |
junet => \&_name_8bit_iso2022, |
106 |
'x-junet8' => \&_name_net_ascii_8bit, |
'x-junet8' => \&_name_net_ascii_8bit, |
110 |
'x-sjis' => \&_name_shift_jis, |
'x-sjis' => \&_name_shift_jis, |
111 |
'us-ascii' => \&_name_net_ascii_8bit, |
'us-ascii' => \&_name_net_ascii_8bit, |
112 |
'utf-8' => \&_name_net_ascii_8bit, |
'utf-8' => \&_name_net_ascii_8bit, |
|
'utf-16be' => \&_name_utf16be, |
|
|
'utf-32be' => \&_name_utf32be, |
|
113 |
); |
); |
114 |
|
|
115 |
my %_IsMimeText; |
my %_IsMimeText; |
220 |
} |
} |
221 |
|
|
222 |
sub name_minimumize ($$) { |
sub name_minimumize ($$) { |
223 |
|
require Message::MIME::Charset::MinName; |
224 |
my ($charset, $s) = (lc shift, shift); |
my ($charset, $s) = (lc shift, shift); |
225 |
if (ref $CHARSET{$charset}->{name_minimumizer} eq 'CODE') { |
if (ref $CHARSET{$charset}->{name_minimumizer} eq 'CODE') { |
226 |
return &{$CHARSET{$charset}->{name_minimumizer}} ($charset, $s); |
return &{$CHARSET{$charset}->{name_minimumizer}} ($charset, $s); |
227 |
|
} elsif (ref $Message::MIME::Charset::MinName::MIN{$charset}) { |
228 |
|
return &{$Message::MIME::Charset::MinName::MIN{$charset}} ($charset, $s); |
229 |
} elsif (ref $_MINIMUMIZER{$charset}) { |
} elsif (ref $_MINIMUMIZER{$charset}) { |
230 |
return &{$_MINIMUMIZER{$charset}} ($charset, $s); |
return &{$_MINIMUMIZER{$charset}} ($charset, $s); |
231 |
} elsif (ref $CHARSET{'*undef'}->{name_minimumizer} eq 'CODE') { |
} elsif (ref $CHARSET{'*undef'}->{name_minimumizer} eq 'CODE') { |
463 |
} |
} |
464 |
} |
} |
465 |
|
|
|
sub _name_utf16be ($$) { |
|
|
shift; my $s = shift; |
|
|
if ($s =~ /[\xD8-\xDB][\x00-\xFF][\xDC-\xDF][\x00-\xFF] |
|
|
(?=(?:[\x00-\xFF][\x00-\xFF])*\z)/sx) { |
|
|
(charset => 'utf-16be'); |
|
|
} elsif ($s =~ /[\x01-\xFF][\x00-\xFF] |
|
|
(?=(?:[\x00-\xFF][\x00-\xFF])*\z)/sx) { |
|
|
if ($s =~ /([^\x00\x03\x04\x23\x25\x30\xFE\xFF] |
|
|
[\x00-\xFF] # ^\x20\x22\x4E-\x9F\xF9\xFA |
|
|
|\x03[^\x00-\x6F\xD0-\xFF] |
|
|
#|\x20[^\x00-\x6F] |
|
|
|\x25[^\x00-\x7F] |
|
|
|\xFE[^\x30-\x4F] |
|
|
|\xFF[^\x00-\xEF] |
|
|
## note 1 of RFC 1816 is ambitious, so block entire |
|
|
## is excepted |
|
|
|\x30[\x00-\x3F] |
|
|
) |
|
|
(?=(?:[\x00-\xFF][\x00-\xFF])*\z)/sx) { |
|
|
(charset => 'iso-10646-ucs-2'); |
|
|
} else { |
|
|
(charset => 'iso-10646-j-1'); |
|
|
} |
|
|
} elsif ($s =~ /\x00[\x80-\xFF] |
|
|
(?=(?:[\x00-\xFF][\x00-\xFF])*\z)/sx) { |
|
|
(charset => 'iso-10646-unicode-latin1'); |
|
|
} else { |
|
|
(charset => 'iso-10646-ucs-basic'); |
|
|
} |
|
|
} |
|
|
|
|
|
sub _name_utf32be ($$) { |
|
|
shift; my $s = shift; |
|
|
if ($s =~ / |
|
|
([\x01-\x7F][\x00-\xFF]{3} |
|
|
|\x00[\x11-\xFF][\x00-\xFF][\x00-\xFF]) |
|
|
(?=(?:[\x00-\xFF]{4})*\z)/sx) { |
|
|
(charset => 'iso-10646-ucs-4'); |
|
|
} else { |
|
|
(charset => 'utf-32be'); |
|
|
} |
|
|
} |
|
|
|
|
466 |
sub _utf8_on ($) { |
sub _utf8_on ($) { |
467 |
Encode::_utf8_on ($_[0]) if $Encode::VERSION; |
Encode::_utf8_on ($_[0]) if $Encode::VERSION; |
468 |
} |
} |