/[suikacvs]/messaging/manakai/lib/Message/MIME/Charset/Encode.pm
Suika

Contents of /messaging/manakai/lib/Message/MIME/Charset/Encode.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (show annotations) (download)
Sun Dec 29 03:04:53 2002 UTC (23 years, 6 months ago) by wakaba
Branch: MAIN
CVS Tags: before-dis2-200411, manakai-release-0-3-2, manakai-release-0-3-1, manakai-release-0-4-0, manakai-200612, HEAD
Branch point for: branch-suikawiki-1, experimental-xml-parser-200401
Changes since 1.4: +28 -11 lines
en_quoted_string, en_phrase: New

1
2 =head1 NAME
3
4 Message::MIME::Charset::Encode --- Message-pm: Encode module plug-in for Message::* Perl Modules
5
6 =head1 DESCRIPTION
7
8 Message::* therselves don't convert coding systems of parts of
9 messages, but have mechanism to define to call external functions.
10 This module provides such macros for Encode modules.
11
12 This module is part of Message::* Perl Modules.
13
14 =head1 USAGE
15
16 use Message::MIME::Charset::Encode;
17
18 =cut
19
20 package Message::MIME::Charset::Encode;
21 use strict;
22 use vars qw(%CODE $VERSION);
23 $VERSION=do{my @r=(q$Revision: 1.4 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
24
25 require Message::MIME::Charset;
26 require Encode;
27
28 $CODE{internal} = 'utf-8';
29
30 =head1 $Message::MIME::Charset::Encode::CODE{input} = $perl_charset_name
31 =head1 $Message::MIME::Charset::Encode::CODE{output} = $perl_charset_name
32
33 Perl Encode module's name of '*default' charset.
34 You should change these value if necessary.
35
36 =cut
37
38 $CODE{input} = '7bit-jis';
39 $CODE{output} = '7bit-jis';
40
41 require Encode::Alias;
42 Encode::Alias::define_alias( qr/^(?:x-)?mac[-_]?(\w+)$/i => '"mac$1"' );
43 Encode::Alias::define_alias( qr/^macintosh$/i => '"macroman"' );
44 Encode::Alias::define_alias( qr/^windows[-_]?31j$/i => '"cp932"' );
45 unless ($Encode::EUCFixed::VERSION) {
46 Encode::Alias::define_alias( qr/^cseucfixwidjapanese$/i => '"extended_unix_code_fixed_width_for_japanese"' );
47 }
48 unless ($Encode::HZ::VERSION) {
49 Encode::Alias::define_alias( qr/^hz-gb-2312$/i => '"hz"' );
50 }
51 unless ($Encode::UTF1::VERSION) {
52 Encode::Alias::define_alias( qr/^csiso10646utf1$/i => '"iso-10646-utf-1"' );
53 Encode::Alias::define_alias( qr/^utf-?1$/i => '"iso-10646-utf-1"' );
54 }
55 unless ($Encode::UTF7::VERSION) {
56 Encode::Alias::define_alias( qr/^(?:x-)?unicode-.-.-utf-?7$/i => '"utf-7"' );
57 Encode::Alias::define_alias( qr/^csunicode11utf7$/i => '"utf-7"' );
58 Encode::Alias::define_alias( qr/^cp65000$/i => '"utf-7"' );
59 }
60
61 my %_PerlName2IanaName = qw(
62 7bit-jis iso-2022-jp-1
63 adobestandardencoding adobe-standard-encoding
64 adobesymbol adobe-symbol-encoding
65 ascii-ctrl us-ascii
66 cp37 ibm037
67 cp932 windows-31j cp936 gbk cp949 windows-949
68 cp1250 windows-1250 cp1251 windows-1251
69 cp1252 windows-1252 cp1253 windows-1253
70 cp1254 windows-1254 cp1255 windows-1255
71 cp1256 windows-1256 cp1257 windows-1257
72 cp1258 windows-1258
73 euc-cn gb2312
74 gsm0338 gsm-default-alphabet
75 hz hz-gb-2312
76 iso-8859-11 tis-620
77 macarabic x-mac-arabic maccentraleurroman x-mac-centralroman
78 maccyrillic x-mac-cyrillic macgreek x-mac-greek
79 machebrew x-mac-hebrew macicelandic x-mac-icelandic
80 macroman macintosh macturkish x-mac-turkish
81 macukrainian x-mac-ukrainian macchinesesimp x-mac-chinesesimp
82 macjapanese x-mac-japanese mackorean x-mac-korean
83 shiftjis shift_jis shiftjisx0213 shift_jisx0213
84 ucs-2be iso-10646-ucs-2 ucs-4be iso-10646-ucs-4
85 ucs-2le utf-16le ucs-2 utf-16
86 );
87 # MacCroatian
88 # MacFarsi
89 # MacRomanian
90 # MacRumanian
91 # MacSami
92 # MacThai
93
94 sub import ($;%) {
95 shift;
96 Message::MIME::Charset::make_charset ('*undef' =>
97 encoder => sub {
98 my ($name, $s) = @_;
99 $name = $CODE{output} if $name =~ /\*/;
100 my $e = Encode::find_encoding ($name);
101 unless ($e) {
102 Message::MIME::Charset::_utf8_off ($s);
103 return ($s, success => 0);
104 }
105 return ($e->encode ($s), success => 1);
106 },
107 decoder => sub {
108 my ($name, $s) = @_;
109 $name = $CODE{input} if $name =~ /\*/;
110 #unless ($name) {
111 # use Encode::Guess qw/utf-8 iso-8859-1 iso-2022-jp/;
112 # $name = Encode::Guess->guess ($s);
113 # return ($name->decode ($s), success => 1) if ref $name;
114 #}
115 return ($s, success => 0) unless my $e = Encode::find_encoding ($name);
116 if (Message::MIME::Charset::is_utf8 ($s)) {
117 if ($name eq 'utf-8') {
118 return ($s, success => 1);
119 } else {
120 $s = Encode::encode ('iso-8859-1', $s, Encode::XMLCREF ());
121 return ($e->decode ($s), success => 1);
122 }
123 } else {
124 return ($e->decode ($s), success => 1);
125 }
126 },
127 preferred_name => \&_preferred_name,
128 );
129 Message::MIME::Charset::make_charset ('*default' => alias_of => '*undef');
130 Message::MIME::Charset::make_charset (extended_unix_code_fixed_width_for_japanese =>
131 encoder => sub { &_encoder ('EUCFixed', 'EUCFixed', @_) },
132 decoder => sub { &_decoder ('EUCFixed', 'EUCFixed', @_) },
133 );
134 Message::MIME::Charset::make_charset ('x-iso2022jp-cp932' =>
135 encoder => sub { &_encoder ('ISO2022::CP932', 'ISO2022::CP932', @_) },
136 decoder => sub { &_decoder ('ISO2022::CP932', 'ISO2022::CP932', @_) },
137 );
138 Message::MIME::Charset::make_charset ('iso-10646-utf-1' =>
139 encoder => sub { &_encoder ('Unicode::UTF1', 'Unicode::UTF1', @_) },
140 decoder => sub { &_decoder ('Unicode::UTF1', 'Unicode::UTF1', @_) },
141 );
142 Message::MIME::Charset::make_charset ('utf-7' =>
143 encoder => sub { &_encoder ('Unicode::UTF7', 'Unicode::UTF7', @_) },
144 decoder => sub { &_decoder ('Unicode::UTF7', 'Unicode::UTF7', @_) },
145 );
146 Message::MIME::Charset::make_charset ('x-imap4-modified-utf7' =>
147 encoder => sub { &_encoder ('Unicode::UTF7', 'Unicode::UTF7::IMAP', @_) },
148 decoder => sub { &_decoder ('Unicode::UTF7', 'Unicode::UTF7::IMAP', @_) },
149 );
150 }
151
152 sub _encoder ($$@) {
153 no strict 'refs';
154 my $p1 = shift;
155 my $p2 = shift;
156 if (!${'Encode::'.$p2.'::VERSION'} && !eval qq{use Encode::$p1}) {
157 my $s = shift;
158 Message::MIME::Charset::_utf8_off ($s);
159 return ($s, success => 0);
160 }
161 return (Encode::encode ($_[0],$_[1],$_[2]), success => 1);
162 }
163 sub _decoder ($@) {
164 no strict 'refs';
165 my $p = shift;
166 return ($_[1], success => 0)
167 if !${'Encode::'.$p.'::VERSION'} && !eval qq{use Encode::$p};
168 if (Message::MIME::Charset::is_utf8 ($_[1])) {
169 my $s = Encode::encode ('iso-8859-1', $_[1]);
170 return (Encode::decode ($_[0],$s,$_[2]), success => 1);
171 } else {
172 return (Encode::decode ($_[0],$_[1],$_[2]), success => 1);
173 }
174 }
175 sub _preferred_name ($) {
176 my $name = shift;
177 my $perlname = lc Encode::resolve_alias ($name);
178 $_PerlName2IanaName{$perlname} || $perlname || $name;
179 }
180
181 =head1 EXAMPLE
182
183 use Message::MIME::Charset::Encode;
184 $Message::MIME::Charset::Encode::CODE{input} = 'euc-jp';
185 $Message::MIME::Charset::Encode::CODE{output} = 'iso-2022-jp';
186 require Message::Entity;
187 #...
188
189 =head1 SEE ALSO
190
191 Message::MIME::Charset
192
193 Message::Entity
194
195 Encode
196
197 =head1 LICENSE
198
199 Copyright 2002 Wakaba <w@suika.fam.cx>
200
201 This program is free software; you can redistribute it and/or modify
202 it under the terms of the GNU General Public License as published by
203 the Free Software Foundation; either version 2 of the License, or
204 (at your option) any later version.
205
206 This program is distributed in the hope that it will be useful,
207 but WITHOUT ANY WARRANTY; without even the implied warranty of
208 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
209 GNU General Public License for more details.
210
211 You should have received a copy of the GNU General Public License
212 along with this program; see the file COPYING. If not, write to
213 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
214 Boston, MA 02111-1307, USA.
215
216 =head1 CHANGE
217
218 See F<ChangeLog>.
219 $Date: 2002/08/29 12:30:46 $
220
221 =cut
222
223 1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24