33 |
|
|
34 |
encoder => sub { $_[1] }, |
encoder => sub { $_[1] }, |
35 |
decoder => sub { $_[1] }, |
decoder => sub { $_[1] }, |
36 |
name_minimumizer => sub { |
name_minimumizer => \&_charset_name_of_junet8, |
|
shift; my $s = shift; |
|
|
return (charset => 'unknown-8bit') if $s =~ /[\x80-\xFF]/; |
|
|
(charset => 'us-ascii'); |
|
|
}, |
|
37 |
|
|
38 |
mime_text => 1, |
mime_text => 1, |
39 |
}; |
}; |
43 |
|
|
44 |
encoder => sub { $_[1] }, |
encoder => sub { $_[1] }, |
45 |
decoder => sub { $_[1] }, |
decoder => sub { $_[1] }, |
46 |
name_minimumizer => sub { |
name_minimumizer => \&_charset_name_of_junet8, |
|
shift; my $s = shift; |
|
|
return (charset => 'unknown-8bit') if $s =~ /[\x80-\xFF]/; |
|
|
return (charset => 'us-ascii') unless $s =~ /[\x1B\x0E\x0F]/; |
|
|
return (charset => 'iso-2022-jp') unless $s =~ /\x1B[^\x24\x28]|\x1B\x24[^\x40B]|\x1B\x28[^BJ]|\x0E|\x0F/; |
|
|
return (charset => 'iso-2022-jp-1') unless $s =~ /\x1B[^\x24\x28]|\x1B\x24[^\x40B\x28]|\x1B\x28[^BJ]|\x1B\x24\x28[^D]|\x0E|\x0F/; |
|
|
return (charset => 'iso-2022-jp-3-plane1') unless $s =~ /\x1B[^\x24\x28]|\x1B\x24[^B\x28]|\x1B\x28[^B]|\x1B\x24\x28[^O]|\x0E|\x0F/; |
|
|
return (charset => 'iso-2022-jp-3') unless $s =~ /\x1B[^\x24\x28]|\x1B\x24[^B\x28]|\x1B\x28[^B]|\x1B\x24\x28[^OP]|\x0E|\x0F/; |
|
|
return (charset => 'iso-2022-kr') unless $s =~ /\x1B[^\x24]|\x1B\x24[^\x29]|\x1B\x24\x29C/; |
|
|
return (charset => 'iso-2022-cn') unless $s =~ /\x1B[^\x24\x28]|\x1B\x24[^B]|\x1B\x28[^A]|\x1B\x24\x28[^GH]|\x0E|\x0F/; |
|
|
(charset => 'iso-2022-int-1'); |
|
|
}, |
|
47 |
|
|
48 |
mime_text => 1, |
mime_text => 1, |
49 |
}; |
}; |
115 |
$charset; |
$charset; |
116 |
} |
} |
117 |
|
|
118 |
|
sub _charset_name_of_junet8 ($) { |
119 |
|
shift; my $s = shift; |
120 |
|
return (charset => 'us-ascii') unless $s =~ /[\x1B\x0E\x0F\x80-\xFF]/; |
121 |
|
if ($s =~ /[\x80-\xFF]/) { |
122 |
|
if ($s =~ /[\xC0-\xFD][\x80-\xBF]*[\x80-\x8F]/) { |
123 |
|
if ($s =~ /\x1B/) { |
124 |
|
return (charset => 'x-junet8'); |
125 |
|
} else { |
126 |
|
return (charset => 'utf-8'); |
127 |
|
} |
128 |
|
} elsif ($s =~ /\x1B/) { |
129 |
|
return (charset => 'x-ctext'); |
130 |
|
} else { |
131 |
|
return (charset => 'iso-8859-1'); |
132 |
|
} |
133 |
|
} |
134 |
|
return (charset => 'iso-2022-jp') unless $s =~ /\x1B[^\x24\x28]|\x1B\x24[^\x40B]|\x1B\x28[^BJ]|\x0E|\x0F/; |
135 |
|
return (charset => 'iso-2022-jp-1') unless $s =~ /\x1B[^\x24\x28]|\x1B\x24[^\x40B\x28]|\x1B\x28[^BJ]|\x1B\x24\x28[^D]|\x0E|\x0F/; |
136 |
|
return (charset => 'iso-2022-jp-3-plane1') unless $s =~ /\x1B[^\x24\x28]|\x1B\x24[^B\x28]|\x1B\x28[^B]|\x1B\x24\x28[^O]|\x0E|\x0F/; |
137 |
|
return (charset => 'iso-2022-jp-3') unless $s =~ /\x1B[^\x24\x28]|\x1B\x24[^B\x28]|\x1B\x28[^B]|\x1B\x24\x28[^OP]|\x0E|\x0F/; |
138 |
|
return (charset => 'iso-2022-kr') unless $s =~ /\x1B[^\x24]|\x1B\x24[^\x29]|\x1B\x24\x29C/; |
139 |
|
return (charset => 'iso-2022-cn') unless $s =~ /\x1B[^\x4E\x24]|\x1B\x24[^\x29\x2A]|\x1B\x24\x29[^AG]|\x1B\x24\x2A[^H]/; |
140 |
|
return (charset => 'iso-2022-cn-ext') unless $s =~ /\x1B[^\x4E\x4F\x24]|\x1B\x24[^\x29\x2A]|\x1B\x24\x29[^AEG]|\x1B\x24\x2A[^HIJKLM]/; |
141 |
|
return (charset => 'iso-2022-jp-2') unless $s =~ /\x1B[^\x24\x28\x2E\x4E]|\x1B\x24[^\x40AB\x28]|\x1B\x24\x28[^CD]|\x1B\x28[^BJ]|\x1B\x2E[^AF]|\x0E|\x0F/; |
142 |
|
return (charset => 'iso-2022-int-1') unless $s =~ /\x1B[^\x24\x28\x2D]|\x1B\x24[^\x40AB\x28\x29]|\x1B\x24\x28[^DGH]|\x1B\x24\x29[^C]|\x1B\x28[^BJ]|\x1B\x2D[^AF]/; |
143 |
|
(charset => 'x-iso-2022'); |
144 |
|
} |
145 |
|
|
146 |
=head1 LICENSE |
=head1 LICENSE |
147 |
|
|
148 |
Copyright 2002 wakaba E<lt>w@suika.fam.cxE<gt>. |
Copyright 2002 wakaba E<lt>w@suika.fam.cxE<gt>. |