MIME/Charset/Encode.pm


=head1 NAME

Message::MIME::Charset::Encode --- Message-pm: Encode module plug-in for Message::* Perl Modules

=head1 DESCRIPTION

Message::* therselves don't convert coding systems of parts of
messages, but have mechanism to define to call external functions.
This module provides such macros for Encode modules.

This module is part of Message::* Perl Modules.

=head1 USAGE

  use Message::MIME::Charset::Encode;

=cut

package Message::MIME::Charset::Encode;
use strict;
use vars qw(%CODE $VERSION);
$VERSION=do{my @r=(q$Revision: 1.4 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};

require Message::MIME::Charset;
require Encode;

$CODE{internal} = 'utf-8';

=head1 $Message::MIME::Charset::Encode::CODE{input} = $perl_charset_name
=head1 $Message::MIME::Charset::Encode::CODE{output} = $perl_charset_name

Perl Encode module's name of '*default' charset.
You should change these value if necessary.

=cut

$CODE{input} = '7bit-jis';
$CODE{output} = '7bit-jis';

require Encode::Alias;
Encode::Alias::define_alias( qr/^(?:x-)?mac[-_]?(\w+)$/i => '"mac$1"' );
Encode::Alias::define_alias( qr/^macintosh$/i => '"macroman"' );
Encode::Alias::define_alias( qr/^windows[-_]?31j$/i => '"cp932"' );
unless ($Encode::EUCFixed::VERSION) {
  Encode::Alias::define_alias( qr/^cseucfixwidjapanese$/i => '"extended_unix_code_fixed_width_for_japanese"' );
}
unless ($Encode::HZ::VERSION) {
  Encode::Alias::define_alias( qr/^hz-gb-2312$/i => '"hz"' );
}
unless ($Encode::UTF1::VERSION) {
  Encode::Alias::define_alias( qr/^csiso10646utf1$/i => '"iso-10646-utf-1"' );
  Encode::Alias::define_alias( qr/^utf-?1$/i => '"iso-10646-utf-1"' );
}
unless ($Encode::UTF7::VERSION) {
  Encode::Alias::define_alias( qr/^(?:x-)?unicode-.-.-utf-?7$/i => '"utf-7"' );
  Encode::Alias::define_alias( qr/^csunicode11utf7$/i => '"utf-7"' );
  Encode::Alias::define_alias( qr/^cp65000$/i => '"utf-7"' );
}

my %_PerlName2IanaName = qw(
        7bit-jis        iso-2022-jp-1
        adobestandardencoding   adobe-standard-encoding
        adobesymbol     adobe-symbol-encoding
        ascii-ctrl      us-ascii
        cp37    ibm037
        cp932   windows-31j     cp936   gbk     cp949   windows-949
        cp1250  windows-1250    cp1251  windows-1251
        cp1252  windows-1252    cp1253  windows-1253
        cp1254  windows-1254    cp1255  windows-1255
        cp1256  windows-1256    cp1257  windows-1257
        cp1258  windows-1258
        euc-cn  gb2312
        gsm0338 gsm-default-alphabet
        hz      hz-gb-2312
        iso-8859-11     tis-620
        macarabic       x-mac-arabic    maccentraleurroman      x-mac-centralroman
        maccyrillic     x-mac-cyrillic  macgreek        x-mac-greek
        machebrew       x-mac-hebrew    macicelandic    x-mac-icelandic
        macroman        macintosh       macturkish      x-mac-turkish
        macukrainian    x-mac-ukrainian macchinesesimp  x-mac-chinesesimp
        macjapanese     x-mac-japanese  mackorean       x-mac-korean
        shiftjis        shift_jis       shiftjisx0213   shift_jisx0213
        ucs-2be iso-10646-ucs-2 ucs-4be iso-10646-ucs-4
        ucs-2le utf-16le        ucs-2   utf-16
);
#       MacCroatian     
#       MacFarsi        
#       MacRomanian     
#       MacRumanian     
#       MacSami 
#       MacThai 

sub import ($;%) {
  shift;
  Message::MIME::Charset::make_charset ('*undef' =>
    encoder     => sub {
      my ($name, $s) = @_;
      $name = $CODE{output} if $name =~ /\*/;
      my $e = Encode::find_encoding ($name);
      unless ($e) {
        Message::MIME::Charset::_utf8_off ($s);
        return ($s, success => 0);
      }
      return ($e->encode ($s), success => 1);
    },
    decoder     => sub {
      my ($name, $s) = @_;
      $name = $CODE{input} if $name =~ /\*/;
      #unless ($name) {
      #  use Encode::Guess qw/utf-8 iso-8859-1 iso-2022-jp/;
      #  $name = Encode::Guess->guess ($s);
      #  return ($name->decode ($s), success => 1) if ref $name;
      #}
      return ($s, success => 0) unless my $e = Encode::find_encoding ($name);
      if (Message::MIME::Charset::is_utf8 ($s)) {
        if ($name eq 'utf-8') {
          return ($s, success => 1);
        } else {
          $s = Encode::encode ('iso-8859-1', $s, Encode::XMLCREF ());
          return ($e->decode ($s), success => 1);
        }
      } else {
        return ($e->decode ($s), success => 1);
      }
    },
    preferred_name      => \&_preferred_name,
  );
  Message::MIME::Charset::make_charset ('*default' => alias_of => '*undef');
  Message::MIME::Charset::make_charset (extended_unix_code_fixed_width_for_japanese =>
    encoder     => sub { &_encoder ('EUCFixed', 'EUCFixed', @_) },
    decoder     => sub { &_decoder ('EUCFixed', 'EUCFixed', @_) },
  );
  Message::MIME::Charset::make_charset ('x-iso2022jp-cp932' =>
    encoder     => sub { &_encoder ('ISO2022::CP932', 'ISO2022::CP932', @_) },
    decoder     => sub { &_decoder ('ISO2022::CP932', 'ISO2022::CP932', @_) },
  );
  Message::MIME::Charset::make_charset ('iso-10646-utf-1' =>
    encoder     => sub { &_encoder ('Unicode::UTF1', 'Unicode::UTF1', @_) },
    decoder     => sub { &_decoder ('Unicode::UTF1', 'Unicode::UTF1', @_) },
  );
  Message::MIME::Charset::make_charset ('utf-7' =>
    encoder     => sub { &_encoder ('Unicode::UTF7', 'Unicode::UTF7', @_) },
    decoder     => sub { &_decoder ('Unicode::UTF7', 'Unicode::UTF7', @_) },
  );
  Message::MIME::Charset::make_charset ('x-imap4-modified-utf7' =>
    encoder     => sub { &_encoder ('Unicode::UTF7', 'Unicode::UTF7::IMAP', @_) },
    decoder     => sub { &_decoder ('Unicode::UTF7', 'Unicode::UTF7::IMAP', @_) },
  );
}

sub _encoder ($$@) {
  no strict 'refs';
  my $p1 = shift;
  my $p2 = shift;
  if (!${'Encode::'.$p2.'::VERSION'} && !eval qq{use Encode::$p1}) {
    my $s = shift;
    Message::MIME::Charset::_utf8_off ($s);
    return ($s, success => 0);
  }
  return (Encode::encode ($_[0],$_[1],$_[2]), success => 1);
}
sub _decoder ($@) {
  no strict 'refs';
  my $p = shift;
  return ($_[1], success => 0)
    if !${'Encode::'.$p.'::VERSION'} && !eval qq{use Encode::$p};
  if (Message::MIME::Charset::is_utf8 ($_[1])) {
    my $s = Encode::encode ('iso-8859-1', $_[1]);
    return (Encode::decode ($_[0],$s,$_[2]), success => 1);
  } else {
    return (Encode::decode ($_[0],$_[1],$_[2]), success => 1);
  }
}
sub _preferred_name ($) {
      my $name = shift;
      my $perlname = lc Encode::resolve_alias ($name);
      $_PerlName2IanaName{$perlname} || $perlname || $name;
}

=head1 EXAMPLE

  use Message::MIME::Charset::Encode;
  $Message::MIME::Charset::Encode::CODE{input} = 'euc-jp';
  $Message::MIME::Charset::Encode::CODE{output} = 'iso-2022-jp';
  require Message::Entity;
  #...

=head1 SEE ALSO

Message::MIME::Charset

Message::Entity

Encode

=head1 LICENSE

Copyright 2002 Wakaba <w@suika.fam.cx>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; see the file COPYING.  If not, write to
the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA.

=head1 CHANGE

See F<ChangeLog>.
$Date: 2002/08/29 12:30:46 $

=cut

1;
1
2	=head1 NAME
3
4	Message::MIME::Charset::Encode --- Message-pm: Encode module plug-in for Message::* Perl Modules
5
6	=head1 DESCRIPTION
7
8	Message::* therselves don't convert coding systems of parts of
9	messages, but have mechanism to define to call external functions.
10	This module provides such macros for Encode modules.
11
12	This module is part of Message::* Perl Modules.
13
14	=head1 USAGE
15
16	use Message::MIME::Charset::Encode;
17
18	=cut
19
20	package Message::MIME::Charset::Encode;
21	use strict;
22	use vars qw(%CODE $VERSION);
23	$VERSION=do{my @r=(q$Revision: 1.4 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
24
25	require Message::MIME::Charset;
26	require Encode;
27
28	$CODE{internal} = 'utf-8';
29
30	=head1 $Message::MIME::Charset::Encode::CODE{input} = $perl_charset_name
31	=head1 $Message::MIME::Charset::Encode::CODE{output} = $perl_charset_name
32
33	Perl Encode module's name of '*default' charset.
34	You should change these value if necessary.
35
36	=cut
37
38	$CODE{input} = '7bit-jis';
39	$CODE{output} = '7bit-jis';
40
41	require Encode::Alias;
42	Encode::Alias::define_alias( qr/^(?:x-)?mac[-_]?(\w+)$/i => '"mac$1"' );
43	Encode::Alias::define_alias( qr/^macintosh$/i => '"macroman"' );
44	Encode::Alias::define_alias( qr/^windows[-_]?31j$/i => '"cp932"' );
45	unless ($Encode::EUCFixed::VERSION) {
46	Encode::Alias::define_alias( qr/^cseucfixwidjapanese$/i => '"extended_unix_code_fixed_width_for_japanese"' );
47	}
48	unless ($Encode::HZ::VERSION) {
49	Encode::Alias::define_alias( qr/^hz-gb-2312$/i => '"hz"' );
50	}
51	unless ($Encode::UTF1::VERSION) {
52	Encode::Alias::define_alias( qr/^csiso10646utf1$/i => '"iso-10646-utf-1"' );
53	Encode::Alias::define_alias( qr/^utf-?1$/i => '"iso-10646-utf-1"' );
54	}
55	unless ($Encode::UTF7::VERSION) {
56	Encode::Alias::define_alias( qr/^(?:x-)?unicode-.-.-utf-?7$/i => '"utf-7"' );
57	Encode::Alias::define_alias( qr/^csunicode11utf7$/i => '"utf-7"' );
58	Encode::Alias::define_alias( qr/^cp65000$/i => '"utf-7"' );
59	}
60
61	my %_PerlName2IanaName = qw(
62	7bit-jis iso-2022-jp-1
63	adobestandardencoding adobe-standard-encoding
64	adobesymbol adobe-symbol-encoding
65	ascii-ctrl us-ascii
66	cp37 ibm037
67	cp932 windows-31j cp936 gbk cp949 windows-949
68	cp1250 windows-1250 cp1251 windows-1251
69	cp1252 windows-1252 cp1253 windows-1253
70	cp1254 windows-1254 cp1255 windows-1255
71	cp1256 windows-1256 cp1257 windows-1257
72	cp1258 windows-1258
73	euc-cn gb2312
74	gsm0338 gsm-default-alphabet
75	hz hz-gb-2312
76	iso-8859-11 tis-620
77	macarabic x-mac-arabic maccentraleurroman x-mac-centralroman
78	maccyrillic x-mac-cyrillic macgreek x-mac-greek
79	machebrew x-mac-hebrew macicelandic x-mac-icelandic
80	macroman macintosh macturkish x-mac-turkish
81	macukrainian x-mac-ukrainian macchinesesimp x-mac-chinesesimp
82	macjapanese x-mac-japanese mackorean x-mac-korean
83	shiftjis shift_jis shiftjisx0213 shift_jisx0213
84	ucs-2be iso-10646-ucs-2 ucs-4be iso-10646-ucs-4
85	ucs-2le utf-16le ucs-2 utf-16
86	);
87	# MacCroatian
88	# MacFarsi
89	# MacRomanian
90	# MacRumanian
91	# MacSami
92	# MacThai
93
94	sub import ($;%) {
95	shift;
96	Message::MIME::Charset::make_charset ('*undef' =>
97	encoder => sub {
98	my ($name, $s) = @_;
99	$name = $CODE{output} if $name =~ /\*/;
100	my $e = Encode::find_encoding ($name);
101	unless ($e) {
102	Message::MIME::Charset::_utf8_off ($s);
103	return ($s, success => 0);
104	}
105	return ($e->encode ($s), success => 1);
106	},
107	decoder => sub {
108	my ($name, $s) = @_;
109	$name = $CODE{input} if $name =~ /\*/;
110	#unless ($name) {
111	# use Encode::Guess qw/utf-8 iso-8859-1 iso-2022-jp/;
112	# $name = Encode::Guess->guess ($s);
113	# return ($name->decode ($s), success => 1) if ref $name;
114	#}
115	return ($s, success => 0) unless my $e = Encode::find_encoding ($name);
116	if (Message::MIME::Charset::is_utf8 ($s)) {
117	if ($name eq 'utf-8') {
118	return ($s, success => 1);
119	} else {
120	$s = Encode::encode ('iso-8859-1', $s, Encode::XMLCREF ());
121	return ($e->decode ($s), success => 1);
122	}
123	} else {
124	return ($e->decode ($s), success => 1);
125	}
126	},
127	preferred_name => \&_preferred_name,
128	);
129	Message::MIME::Charset::make_charset ('default' => alias_of => 'undef');
130	Message::MIME::Charset::make_charset (extended_unix_code_fixed_width_for_japanese =>
131	encoder => sub { &_encoder ('EUCFixed', 'EUCFixed', @_) },
132	decoder => sub { &_decoder ('EUCFixed', 'EUCFixed', @_) },
133	);
134	Message::MIME::Charset::make_charset ('x-iso2022jp-cp932' =>
135	encoder => sub { &_encoder ('ISO2022::CP932', 'ISO2022::CP932', @_) },
136	decoder => sub { &_decoder ('ISO2022::CP932', 'ISO2022::CP932', @_) },
137	);
138	Message::MIME::Charset::make_charset ('iso-10646-utf-1' =>
139	encoder => sub { &_encoder ('Unicode::UTF1', 'Unicode::UTF1', @_) },
140	decoder => sub { &_decoder ('Unicode::UTF1', 'Unicode::UTF1', @_) },
141	);
142	Message::MIME::Charset::make_charset ('utf-7' =>
143	encoder => sub { &_encoder ('Unicode::UTF7', 'Unicode::UTF7', @_) },
144	decoder => sub { &_decoder ('Unicode::UTF7', 'Unicode::UTF7', @_) },
145	);
146	Message::MIME::Charset::make_charset ('x-imap4-modified-utf7' =>
147	encoder => sub { &_encoder ('Unicode::UTF7', 'Unicode::UTF7::IMAP', @_) },
148	decoder => sub { &_decoder ('Unicode::UTF7', 'Unicode::UTF7::IMAP', @_) },
149	);
150	}
151
152	sub _encoder ($$@) {
153	no strict 'refs';
154	my $p1 = shift;
155	my $p2 = shift;
156	if (!${'Encode::'.$p2.'::VERSION'} && !eval qq{use Encode::$p1}) {
157	my $s = shift;
158	Message::MIME::Charset::_utf8_off ($s);
159	return ($s, success => 0);
160	}
161	return (Encode::encode ($_[0],$_[1],$_[2]), success => 1);
162	}
163	sub _decoder ($@) {
164	no strict 'refs';
165	my $p = shift;
166	return ($_[1], success => 0)
167	if !${'Encode::'.$p.'::VERSION'} && !eval qq{use Encode::$p};
168	if (Message::MIME::Charset::is_utf8 ($_[1])) {
169	my $s = Encode::encode ('iso-8859-1', $_[1]);
170	return (Encode::decode ($_[0],$s,$_[2]), success => 1);
171	} else {
172	return (Encode::decode ($_[0],$_[1],$_[2]), success => 1);
173	}
174	}
175	sub _preferred_name ($) {
176	my $name = shift;
177	my $perlname = lc Encode::resolve_alias ($name);
178	$_PerlName2IanaName{$perlname} \|\| $perlname \|\| $name;
179	}
180
181	=head1 EXAMPLE
182
183	use Message::MIME::Charset::Encode;
184	$Message::MIME::Charset::Encode::CODE{input} = 'euc-jp';
185	$Message::MIME::Charset::Encode::CODE{output} = 'iso-2022-jp';
186	require Message::Entity;
187	#...
188
189	=head1 SEE ALSO
190
191	Message::MIME::Charset
192
193	Message::Entity
194
195	Encode
196
197	=head1 LICENSE
198
199	Copyright 2002 Wakaba <w@suika.fam.cx>
200
201	This program is free software; you can redistribute it and/or modify
202	it under the terms of the GNU General Public License as published by
203	the Free Software Foundation; either version 2 of the License, or
204	(at your option) any later version.
205
206	This program is distributed in the hope that it will be useful,
207	but WITHOUT ANY WARRANTY; without even the implied warranty of
208	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
209	GNU General Public License for more details.
210
211	You should have received a copy of the GNU General Public License
212	along with this program; see the file COPYING. If not, write to
213	the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
214	Boston, MA 02111-1307, USA.
215
216	=head1 CHANGE
217
218	See F<ChangeLog>.
219	$Date: 2002/08/29 12:30:46 $
220
221	=cut
222
223	1;