1 |
wakaba |
1.1 |
|
2 |
|
|
=head1 NAME
|
3 |
|
|
|
4 |
|
|
Message::MIME::Charset Perl module
|
5 |
|
|
|
6 |
|
|
=head1 DESCRIPTION
|
7 |
|
|
|
8 |
|
|
Perl module for MIME charset.
|
9 |
|
|
|
10 |
|
|
=cut
|
11 |
|
|
|
12 |
|
|
package Message::MIME::Charset;
|
13 |
|
|
use strict;
|
14 |
|
|
use vars qw(%ENCODER %DECODER %N11NTABLE %REG $VERSION);
|
15 |
wakaba |
1.8 |
$VERSION=do{my @r=(q$Revision: 1.7 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
|
16 |
wakaba |
1.1 |
|
17 |
wakaba |
1.4 |
our %CHARSET;
|
18 |
|
|
|
19 |
|
|
$CHARSET{'*DEFAULT'} = {
|
20 |
|
|
preferred_name => '',
|
21 |
|
|
|
22 |
|
|
encoder => sub { $_[1] },
|
23 |
|
|
decoder => sub { $_[1] },
|
24 |
|
|
|
25 |
|
|
mime_text => 1, ## Suitability in use as MIME text/* charset
|
26 |
|
|
#accept_cte => [qw/7bit .../],
|
27 |
|
|
cte_7bit_preferred => 'quoted-printable',
|
28 |
|
|
};
|
29 |
|
|
$CHARSET{'*default'} = $CHARSET{'*DEFAULT'};
|
30 |
|
|
|
31 |
|
|
$CHARSET{'us-ascii'} = {
|
32 |
|
|
preferred_name => 'us-ascii',
|
33 |
|
|
|
34 |
|
|
encoder => sub { $_[1] },
|
35 |
|
|
decoder => sub { $_[1] },
|
36 |
wakaba |
1.8 |
name_minimumizer => \&_charset_name_of_junet8,
|
37 |
wakaba |
1.5 |
|
38 |
|
|
mime_text => 1,
|
39 |
wakaba |
1.4 |
};
|
40 |
|
|
|
41 |
|
|
$CHARSET{'iso-2022-int-1'} = {
|
42 |
|
|
preferred_name => 'iso-2022-int-1',
|
43 |
|
|
|
44 |
|
|
encoder => sub { $_[1] },
|
45 |
|
|
decoder => sub { $_[1] },
|
46 |
wakaba |
1.8 |
name_minimumizer => \&_charset_name_of_junet8,
|
47 |
wakaba |
1.5 |
|
48 |
|
|
mime_text => 1,
|
49 |
wakaba |
1.4 |
};
|
50 |
|
|
|
51 |
|
|
$CHARSET{'unknown-8bit'} = {
|
52 |
|
|
preferred_name => 'unknown-8bit',
|
53 |
|
|
|
54 |
|
|
encoder => sub { $_[1] },
|
55 |
|
|
decoder => sub { $_[1] },
|
56 |
|
|
|
57 |
|
|
mime_text => 0,
|
58 |
wakaba |
1.6 |
cte_7bit_preferred => 'base64',
|
59 |
wakaba |
1.4 |
};
|
60 |
|
|
$CHARSET{'x-unknown'} = $CHARSET{'unknown-8bit'};
|
61 |
|
|
|
62 |
|
|
sub make_charset ($%) {
|
63 |
|
|
my $name = shift;
|
64 |
|
|
return unless $name; ## Note: charset "0" is not supported.
|
65 |
|
|
my %definition = @_;
|
66 |
wakaba |
1.6 |
|
67 |
|
|
$definition{preferred_name} ||= $name;
|
68 |
wakaba |
1.4 |
if ($definition{preferred_name} ne $name
|
69 |
|
|
&& ref $CHARSET{$definition{preferred_name}}) {
|
70 |
|
|
## New charset is an alias of defined charset,
|
71 |
|
|
$CHARSET{$name} = $CHARSET{$definition{preferred_name}};
|
72 |
|
|
return;
|
73 |
|
|
} elsif ($definition{alias_of} && ref $CHARSET{$definition{alias_of}}) {
|
74 |
|
|
## New charset is an alias of defined charset,
|
75 |
|
|
$CHARSET{$name} = $CHARSET{$definition{alias_of}};
|
76 |
|
|
return;
|
77 |
|
|
}
|
78 |
|
|
$CHARSET{$name} = \%definition;
|
79 |
|
|
|
80 |
|
|
## Set default values
|
81 |
wakaba |
1.6 |
#$definition{encoder} ||= sub { $_[1] };
|
82 |
|
|
#$definition{decoder} ||= sub { $_[1] };
|
83 |
wakaba |
1.4 |
|
84 |
|
|
$definition{mime_text} = 0 unless defined $definition{mime_text};
|
85 |
|
|
$definition{cte_7bit_preferred} = 'base64'
|
86 |
|
|
unless defined $definition{cte_7bit_preferred};
|
87 |
|
|
}
|
88 |
wakaba |
1.1 |
|
89 |
|
|
sub encode ($$) {
|
90 |
|
|
my ($charset, $s) = (lc shift, shift);
|
91 |
wakaba |
1.4 |
if (ref $CHARSET{$charset}->{encoder}) {
|
92 |
wakaba |
1.5 |
return (&{$CHARSET{$charset}->{encoder}} ($charset, $s), success => 1);
|
93 |
wakaba |
1.1 |
}
|
94 |
wakaba |
1.5 |
($s, success => 0);
|
95 |
wakaba |
1.1 |
}
|
96 |
|
|
|
97 |
|
|
sub decode ($$) {
|
98 |
|
|
my ($charset, $s) = (lc shift, shift);
|
99 |
wakaba |
1.4 |
if (ref $CHARSET{$charset}->{decoder}) {
|
100 |
|
|
return (&{$CHARSET{$charset}->{decoder}} ($charset, $s), 1);
|
101 |
wakaba |
1.1 |
}
|
102 |
wakaba |
1.2 |
($s, 0);
|
103 |
wakaba |
1.1 |
}
|
104 |
|
|
|
105 |
|
|
sub name_normalize ($) {
|
106 |
|
|
my $name = lc shift;
|
107 |
wakaba |
1.4 |
$CHARSET{$name}->{preferred_name} || $name;
|
108 |
wakaba |
1.1 |
}
|
109 |
|
|
|
110 |
wakaba |
1.6 |
sub name_minimumize ($$) {
|
111 |
|
|
my ($charset, $s) = (lc shift, shift);
|
112 |
|
|
if (ref $CHARSET{$charset}->{name_minimumizer}) {
|
113 |
|
|
return &{$CHARSET{$charset}->{name_minimumizer}} ($charset, $s);
|
114 |
|
|
}
|
115 |
|
|
$charset;
|
116 |
|
|
}
|
117 |
|
|
|
118 |
wakaba |
1.8 |
sub _charset_name_of_junet8 ($) {
|
119 |
|
|
shift; my $s = shift;
|
120 |
|
|
return (charset => 'us-ascii') unless $s =~ /[\x1B\x0E\x0F\x80-\xFF]/;
|
121 |
|
|
if ($s =~ /[\x80-\xFF]/) {
|
122 |
|
|
if ($s =~ /[\xC0-\xFD][\x80-\xBF]*[\x80-\x8F]/) {
|
123 |
|
|
if ($s =~ /\x1B/) {
|
124 |
|
|
return (charset => 'x-junet8');
|
125 |
|
|
} else {
|
126 |
|
|
return (charset => 'utf-8');
|
127 |
|
|
}
|
128 |
|
|
} elsif ($s =~ /\x1B/) {
|
129 |
|
|
return (charset => 'x-ctext');
|
130 |
|
|
} else {
|
131 |
|
|
return (charset => 'iso-8859-1');
|
132 |
|
|
}
|
133 |
|
|
}
|
134 |
|
|
return (charset => 'iso-2022-jp') unless $s =~ /\x1B[^\x24\x28]|\x1B\x24[^\x40B]|\x1B\x28[^BJ]|\x0E|\x0F/;
|
135 |
|
|
return (charset => 'iso-2022-jp-1') unless $s =~ /\x1B[^\x24\x28]|\x1B\x24[^\x40B\x28]|\x1B\x28[^BJ]|\x1B\x24\x28[^D]|\x0E|\x0F/;
|
136 |
|
|
return (charset => 'iso-2022-jp-3-plane1') unless $s =~ /\x1B[^\x24\x28]|\x1B\x24[^B\x28]|\x1B\x28[^B]|\x1B\x24\x28[^O]|\x0E|\x0F/;
|
137 |
|
|
return (charset => 'iso-2022-jp-3') unless $s =~ /\x1B[^\x24\x28]|\x1B\x24[^B\x28]|\x1B\x28[^B]|\x1B\x24\x28[^OP]|\x0E|\x0F/;
|
138 |
|
|
return (charset => 'iso-2022-kr') unless $s =~ /\x1B[^\x24]|\x1B\x24[^\x29]|\x1B\x24\x29C/;
|
139 |
|
|
return (charset => 'iso-2022-cn') unless $s =~ /\x1B[^\x4E\x24]|\x1B\x24[^\x29\x2A]|\x1B\x24\x29[^AG]|\x1B\x24\x2A[^H]/;
|
140 |
|
|
return (charset => 'iso-2022-cn-ext') unless $s =~ /\x1B[^\x4E\x4F\x24]|\x1B\x24[^\x29\x2A]|\x1B\x24\x29[^AEG]|\x1B\x24\x2A[^HIJKLM]/;
|
141 |
|
|
return (charset => 'iso-2022-jp-2') unless $s =~ /\x1B[^\x24\x28\x2E\x4E]|\x1B\x24[^\x40AB\x28]|\x1B\x24\x28[^CD]|\x1B\x28[^BJ]|\x1B\x2E[^AF]|\x0E|\x0F/;
|
142 |
|
|
return (charset => 'iso-2022-int-1') unless $s =~ /\x1B[^\x24\x28\x2D]|\x1B\x24[^\x40AB\x28\x29]|\x1B\x24\x28[^DGH]|\x1B\x24\x29[^C]|\x1B\x28[^BJ]|\x1B\x2D[^AF]/;
|
143 |
|
|
(charset => 'x-iso-2022');
|
144 |
|
|
}
|
145 |
|
|
|
146 |
wakaba |
1.1 |
=head1 LICENSE
|
147 |
|
|
|
148 |
|
|
Copyright 2002 wakaba E<lt>w@suika.fam.cxE<gt>.
|
149 |
|
|
|
150 |
|
|
This program is free software; you can redistribute it and/or modify
|
151 |
|
|
it under the terms of the GNU General Public License as published by
|
152 |
|
|
the Free Software Foundation; either version 2 of the License, or
|
153 |
|
|
(at your option) any later version.
|
154 |
|
|
|
155 |
|
|
This program is distributed in the hope that it will be useful,
|
156 |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
157 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
158 |
|
|
GNU General Public License for more details.
|
159 |
|
|
|
160 |
|
|
You should have received a copy of the GNU General Public License
|
161 |
|
|
along with this program; see the file COPYING. If not, write to
|
162 |
|
|
the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
163 |
|
|
Boston, MA 02111-1307, USA.
|
164 |
|
|
|
165 |
|
|
=head1 CHANGE
|
166 |
|
|
|
167 |
|
|
See F<ChangeLog>.
|
168 |
wakaba |
1.8 |
$Date: 2002/06/11 12:59:27 $
|
169 |
wakaba |
1.1 |
|
170 |
|
|
=cut
|
171 |
|
|
|
172 |
|
|
1;
|