/[suikacvs]/messaging/manakai/lib/Message/MIME/Charset.pm
Suika

Contents of /messaging/manakai/lib/Message/MIME/Charset.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (hide annotations) (download)
Sun Jun 16 10:45:54 2002 UTC (22 years, 5 months ago) by wakaba
Branch: MAIN
Changes since 1.7: +32 -19 lines
2002-06-16  wakaba <w@suika.fam.cx>

	* Header.pm (_n11n_field_name): Check namespace definition's
	case_sensible option.
	* Entity.pm (_add_ua): Removed.  (Moved to Message::Field::UA.)

1 wakaba 1.1
2     =head1 NAME
3    
4     Message::MIME::Charset Perl module
5    
6     =head1 DESCRIPTION
7    
8     Perl module for MIME charset.
9    
10     =cut
11    
12     package Message::MIME::Charset;
13     use strict;
14     use vars qw(%ENCODER %DECODER %N11NTABLE %REG $VERSION);
15 wakaba 1.8 $VERSION=do{my @r=(q$Revision: 1.7 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
16 wakaba 1.1
17 wakaba 1.4 our %CHARSET;
18    
19     $CHARSET{'*DEFAULT'} = {
20     preferred_name => '',
21    
22     encoder => sub { $_[1] },
23     decoder => sub { $_[1] },
24    
25     mime_text => 1, ## Suitability in use as MIME text/* charset
26     #accept_cte => [qw/7bit .../],
27     cte_7bit_preferred => 'quoted-printable',
28     };
29     $CHARSET{'*default'} = $CHARSET{'*DEFAULT'};
30    
31     $CHARSET{'us-ascii'} = {
32     preferred_name => 'us-ascii',
33    
34     encoder => sub { $_[1] },
35     decoder => sub { $_[1] },
36 wakaba 1.8 name_minimumizer => \&_charset_name_of_junet8,
37 wakaba 1.5
38     mime_text => 1,
39 wakaba 1.4 };
40    
41     $CHARSET{'iso-2022-int-1'} = {
42     preferred_name => 'iso-2022-int-1',
43    
44     encoder => sub { $_[1] },
45     decoder => sub { $_[1] },
46 wakaba 1.8 name_minimumizer => \&_charset_name_of_junet8,
47 wakaba 1.5
48     mime_text => 1,
49 wakaba 1.4 };
50    
51     $CHARSET{'unknown-8bit'} = {
52     preferred_name => 'unknown-8bit',
53    
54     encoder => sub { $_[1] },
55     decoder => sub { $_[1] },
56    
57     mime_text => 0,
58 wakaba 1.6 cte_7bit_preferred => 'base64',
59 wakaba 1.4 };
60     $CHARSET{'x-unknown'} = $CHARSET{'unknown-8bit'};
61    
62     sub make_charset ($%) {
63     my $name = shift;
64     return unless $name; ## Note: charset "0" is not supported.
65     my %definition = @_;
66 wakaba 1.6
67     $definition{preferred_name} ||= $name;
68 wakaba 1.4 if ($definition{preferred_name} ne $name
69     && ref $CHARSET{$definition{preferred_name}}) {
70     ## New charset is an alias of defined charset,
71     $CHARSET{$name} = $CHARSET{$definition{preferred_name}};
72     return;
73     } elsif ($definition{alias_of} && ref $CHARSET{$definition{alias_of}}) {
74     ## New charset is an alias of defined charset,
75     $CHARSET{$name} = $CHARSET{$definition{alias_of}};
76     return;
77     }
78     $CHARSET{$name} = \%definition;
79    
80     ## Set default values
81 wakaba 1.6 #$definition{encoder} ||= sub { $_[1] };
82     #$definition{decoder} ||= sub { $_[1] };
83 wakaba 1.4
84     $definition{mime_text} = 0 unless defined $definition{mime_text};
85     $definition{cte_7bit_preferred} = 'base64'
86     unless defined $definition{cte_7bit_preferred};
87     }
88 wakaba 1.1
89     sub encode ($$) {
90     my ($charset, $s) = (lc shift, shift);
91 wakaba 1.4 if (ref $CHARSET{$charset}->{encoder}) {
92 wakaba 1.5 return (&{$CHARSET{$charset}->{encoder}} ($charset, $s), success => 1);
93 wakaba 1.1 }
94 wakaba 1.5 ($s, success => 0);
95 wakaba 1.1 }
96    
97     sub decode ($$) {
98     my ($charset, $s) = (lc shift, shift);
99 wakaba 1.4 if (ref $CHARSET{$charset}->{decoder}) {
100     return (&{$CHARSET{$charset}->{decoder}} ($charset, $s), 1);
101 wakaba 1.1 }
102 wakaba 1.2 ($s, 0);
103 wakaba 1.1 }
104    
105     sub name_normalize ($) {
106     my $name = lc shift;
107 wakaba 1.4 $CHARSET{$name}->{preferred_name} || $name;
108 wakaba 1.1 }
109    
110 wakaba 1.6 sub name_minimumize ($$) {
111     my ($charset, $s) = (lc shift, shift);
112     if (ref $CHARSET{$charset}->{name_minimumizer}) {
113     return &{$CHARSET{$charset}->{name_minimumizer}} ($charset, $s);
114     }
115     $charset;
116     }
117    
118 wakaba 1.8 sub _charset_name_of_junet8 ($) {
119     shift; my $s = shift;
120     return (charset => 'us-ascii') unless $s =~ /[\x1B\x0E\x0F\x80-\xFF]/;
121     if ($s =~ /[\x80-\xFF]/) {
122     if ($s =~ /[\xC0-\xFD][\x80-\xBF]*[\x80-\x8F]/) {
123     if ($s =~ /\x1B/) {
124     return (charset => 'x-junet8');
125     } else {
126     return (charset => 'utf-8');
127     }
128     } elsif ($s =~ /\x1B/) {
129     return (charset => 'x-ctext');
130     } else {
131     return (charset => 'iso-8859-1');
132     }
133     }
134     return (charset => 'iso-2022-jp') unless $s =~ /\x1B[^\x24\x28]|\x1B\x24[^\x40B]|\x1B\x28[^BJ]|\x0E|\x0F/;
135     return (charset => 'iso-2022-jp-1') unless $s =~ /\x1B[^\x24\x28]|\x1B\x24[^\x40B\x28]|\x1B\x28[^BJ]|\x1B\x24\x28[^D]|\x0E|\x0F/;
136     return (charset => 'iso-2022-jp-3-plane1') unless $s =~ /\x1B[^\x24\x28]|\x1B\x24[^B\x28]|\x1B\x28[^B]|\x1B\x24\x28[^O]|\x0E|\x0F/;
137     return (charset => 'iso-2022-jp-3') unless $s =~ /\x1B[^\x24\x28]|\x1B\x24[^B\x28]|\x1B\x28[^B]|\x1B\x24\x28[^OP]|\x0E|\x0F/;
138     return (charset => 'iso-2022-kr') unless $s =~ /\x1B[^\x24]|\x1B\x24[^\x29]|\x1B\x24\x29C/;
139     return (charset => 'iso-2022-cn') unless $s =~ /\x1B[^\x4E\x24]|\x1B\x24[^\x29\x2A]|\x1B\x24\x29[^AG]|\x1B\x24\x2A[^H]/;
140     return (charset => 'iso-2022-cn-ext') unless $s =~ /\x1B[^\x4E\x4F\x24]|\x1B\x24[^\x29\x2A]|\x1B\x24\x29[^AEG]|\x1B\x24\x2A[^HIJKLM]/;
141     return (charset => 'iso-2022-jp-2') unless $s =~ /\x1B[^\x24\x28\x2E\x4E]|\x1B\x24[^\x40AB\x28]|\x1B\x24\x28[^CD]|\x1B\x28[^BJ]|\x1B\x2E[^AF]|\x0E|\x0F/;
142     return (charset => 'iso-2022-int-1') unless $s =~ /\x1B[^\x24\x28\x2D]|\x1B\x24[^\x40AB\x28\x29]|\x1B\x24\x28[^DGH]|\x1B\x24\x29[^C]|\x1B\x28[^BJ]|\x1B\x2D[^AF]/;
143     (charset => 'x-iso-2022');
144     }
145    
146 wakaba 1.1 =head1 LICENSE
147    
148     Copyright 2002 wakaba E<lt>w@suika.fam.cxE<gt>.
149    
150     This program is free software; you can redistribute it and/or modify
151     it under the terms of the GNU General Public License as published by
152     the Free Software Foundation; either version 2 of the License, or
153     (at your option) any later version.
154    
155     This program is distributed in the hope that it will be useful,
156     but WITHOUT ANY WARRANTY; without even the implied warranty of
157     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
158     GNU General Public License for more details.
159    
160     You should have received a copy of the GNU General Public License
161     along with this program; see the file COPYING. If not, write to
162     the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
163     Boston, MA 02111-1307, USA.
164    
165     =head1 CHANGE
166    
167     See F<ChangeLog>.
168 wakaba 1.8 $Date: 2002/06/11 12:59:27 $
169 wakaba 1.1
170     =cut
171    
172     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24