14 |
use vars qw(%ENCODER %DECODER %N11NTABLE %REG $VERSION); |
use vars qw(%ENCODER %DECODER %N11NTABLE %REG $VERSION); |
15 |
$VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
$VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
16 |
|
|
17 |
%ENCODER = ( |
our %CHARSET; |
18 |
'*DEFAULT' => sub {$_[1]}, |
|
19 |
'us-ascii' => sub {$_[1]}, |
$CHARSET{'*DEFAULT'} = { |
20 |
'unknown-8bit' => sub {$_[1]}, |
preferred_name => '', |
21 |
); |
|
22 |
|
encoder => sub { $_[1] }, |
23 |
%DECODER = ( |
decoder => sub { $_[1] }, |
24 |
'*DEFAULT' => sub {$_[1]}, |
|
25 |
'us-ascii' => sub {$_[1]}, |
mime_text => 1, ## Suitability in use as MIME text/* charset |
26 |
'unknown-8bit' => sub {$_[1]}, |
#accept_cte => [qw/7bit .../], |
27 |
); |
cte_7bit_preferred => 'quoted-printable', |
28 |
|
}; |
29 |
## Charset name normalization |
$CHARSET{'*default'} = $CHARSET{'*DEFAULT'}; |
30 |
%N11NTABLE = ( |
|
31 |
'euc' => 'euc-jp', ## ... |
$CHARSET{'us-ascii'} = { |
32 |
'jis' => 'iso-2022-jp', ## Really? |
preferred_name => 'us-ascii', |
33 |
'shift-jis' => 'shift_jis', |
|
34 |
'shift-jisx0213' => 'shift_jisx0213', |
encoder => sub { $_[1] }, |
35 |
'x-big5' => 'big5', |
decoder => sub { $_[1] }, |
36 |
'x-x-big5' => 'big5', |
}; |
37 |
'x-euc' => 'euc-jp', ## ... |
|
38 |
'x-euc-jp' => 'euc-jp', |
$CHARSET{'iso-2022-int-1'} = { |
39 |
'x-gbk' => 'gbk', |
preferred_name => 'iso-2022-int-1', |
40 |
'x-gbk2k' => 'gb18030', |
|
41 |
'x-x-gbk' => 'gbk', |
encoder => sub { $_[1] }, |
42 |
'x-sjis' => 'shift_jis', |
decoder => sub { $_[1] }, |
43 |
); |
}; |
44 |
|
|
45 |
|
$CHARSET{'unknown-8bit'} = { |
46 |
|
preferred_name => 'unknown-8bit', |
47 |
|
|
48 |
|
encoder => sub { $_[1] }, |
49 |
|
decoder => sub { $_[1] }, |
50 |
|
|
51 |
|
mime_text => 0, |
52 |
|
cte_7bit_preferred => 'quoted-printable', |
53 |
|
}; |
54 |
|
$CHARSET{'x-unknown'} = $CHARSET{'unknown-8bit'}; |
55 |
|
|
56 |
|
sub make_charset ($%) { |
57 |
|
my $name = shift; |
58 |
|
return unless $name; ## Note: charset "0" is not supported. |
59 |
|
my %definition = @_; |
60 |
|
if ($definition{preferred_name} ne $name |
61 |
|
&& ref $CHARSET{$definition{preferred_name}}) { |
62 |
|
## New charset is an alias of defined charset, |
63 |
|
$CHARSET{$name} = $CHARSET{$definition{preferred_name}}; |
64 |
|
return; |
65 |
|
} elsif ($definition{alias_of} && ref $CHARSET{$definition{alias_of}}) { |
66 |
|
## New charset is an alias of defined charset, |
67 |
|
$CHARSET{$name} = $CHARSET{$definition{alias_of}}; |
68 |
|
return; |
69 |
|
} |
70 |
|
$CHARSET{$name} = \%definition; |
71 |
|
|
72 |
|
## Set default values |
73 |
|
$definition{preferred_name} ||= $name; |
74 |
|
|
75 |
|
$definition{encoder} ||= sub { $_[1] }; |
76 |
|
$definition{decoder} ||= sub { $_[1] }; |
77 |
|
|
78 |
|
$definition{mime_text} = 0 unless defined $definition{mime_text}; |
79 |
|
$definition{cte_7bit_preferred} = 'base64' |
80 |
|
unless defined $definition{cte_7bit_preferred}; |
81 |
|
} |
82 |
|
|
83 |
sub encode ($$) { |
sub encode ($$) { |
84 |
my ($charset, $s) = (lc shift, shift); |
my ($charset, $s) = (lc shift, shift); |
85 |
if (ref $ENCODER{$charset}) { |
if (ref $CHARSET{$charset}->{encoder}) { |
86 |
return (&{$ENCODER{$charset}} ($charset, $s), 1); |
return (&{$CHARSET{$charset}->{encoder}} ($charset, $s), 1); |
87 |
} |
} |
88 |
($s, 0); |
($s, 0); |
89 |
} |
} |
90 |
|
|
91 |
sub decode ($$) { |
sub decode ($$) { |
92 |
my ($charset, $s) = (lc shift, shift); |
my ($charset, $s) = (lc shift, shift); |
93 |
if (ref $DECODER{$charset}) { |
if (ref $CHARSET{$charset}->{decoder}) { |
94 |
return (&{$DECODER{$charset}} ($charset, $s), 1); |
return (&{$CHARSET{$charset}->{decoder}} ($charset, $s), 1); |
95 |
} |
} |
96 |
($s, 0); |
($s, 0); |
97 |
} |
} |
98 |
|
|
99 |
sub name_normalize ($) { |
sub name_normalize ($) { |
100 |
my $name = lc shift; |
my $name = lc shift; |
101 |
$N11NTABLE{$name} || $name; |
$CHARSET{$name}->{preferred_name} || $name; |
102 |
} |
} |
103 |
|
|
104 |
=head1 LICENSE |
=head1 LICENSE |