/[suikacvs]/messaging/manakai/lib/Message/MIME/Charset.pm
Suika

Contents of /messaging/manakai/lib/Message/MIME/Charset.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.18 - (hide annotations) (download)
Sat Dec 28 09:07:05 2002 UTC (21 years, 11 months ago) by wakaba
Branch: MAIN
Changes since 1.17: +115 -21 lines
Encoding encoded-word is supported

1 wakaba 1.1
2     =head1 NAME
3    
4 wakaba 1.18 Message::MIME::Charset --- Message-pm: Coded character sets support
5 wakaba 1.1
6     =head1 DESCRIPTION
7    
8 wakaba 1.18 This module provides some abstracted functions to handle string
9     in various character codes with (as-far-as-possiblly-) coding system
10     independent implemention.
11    
12     Note that this module is not only used to implement MIME charset mechanism
13     but also used to support non-MIME schemes of character encoding.
14    
15     This module is part of Message::* Perl Modules.
16 wakaba 1.1
17     =cut
18    
19 wakaba 1.10 ## NOTE: You should not require/use other module (even it
20     ## is part of Message::* Perl Modules) as far as possible,
21     ## to be able to use this module (M::M::Charset) from
22     ## other (non-Message::*) modules.
23    
24 wakaba 1.1 package Message::MIME::Charset;
25     use strict;
26 wakaba 1.12 use vars qw(%CHARSET %MSNAME2IANANAME %REG $VERSION);
27 wakaba 1.18 $VERSION=do{my @r=(q$Revision: 1.17 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
28 wakaba 1.1
29 wakaba 1.9 &_builtin_charset;
30     sub _builtin_charset () {
31 wakaba 1.4
32     $CHARSET{'*DEFAULT'} = {
33     preferred_name => '',
34    
35     encoder => sub { $_[1] },
36     decoder => sub { $_[1] },
37    
38     mime_text => 1, ## Suitability in use as MIME text/* charset
39     #accept_cte => [qw/7bit .../],
40     cte_7bit_preferred => 'quoted-printable',
41     };
42     $CHARSET{'*default'} = $CHARSET{'*DEFAULT'};
43    
44     $CHARSET{'us-ascii'} = {
45     preferred_name => 'us-ascii',
46    
47     encoder => sub { $_[1] },
48     decoder => sub { $_[1] },
49 wakaba 1.5
50     mime_text => 1,
51 wakaba 1.13 cte_7bit_preferred => 'quoted-printable',
52 wakaba 1.18
53     divide_string => \&_divide_string_1,
54     cte_header_preferred => 'q',
55    
56     is_representable_in => sub { $_[1] =~ /[^\x00-\x7F]/ ? 0 : 1 },
57 wakaba 1.4 };
58    
59     $CHARSET{'iso-2022-int-1'} = {
60     preferred_name => 'iso-2022-int-1',
61    
62     encoder => sub { $_[1] },
63     decoder => sub { $_[1] },
64 wakaba 1.5
65     mime_text => 1,
66 wakaba 1.18 cte_7bit_preferred => 'quoted-printable',
67     cte_header_preferred => 'q',
68 wakaba 1.4 };
69    
70     $CHARSET{'unknown-8bit'} = {
71     preferred_name => 'unknown-8bit',
72    
73     encoder => sub { $_[1] },
74     decoder => sub { $_[1] },
75    
76 wakaba 1.13 mime_text => 1,
77     cte_7bit_preferred => 'base64',
78 wakaba 1.18
79     divide_string => \&_divide_string_1,
80 wakaba 1.13 };
81    
82     $CHARSET{'x-unknown'} = {
83     preferred_name => 'x-unknown',
84    
85     encoder => sub { $_[1] },
86     decoder => sub { $_[1] },
87    
88     mime_text => 0,
89     cte_7bit_preferred => 'base64',
90 wakaba 1.18
91     divide_string => \&_divide_string_1,
92 wakaba 1.13 };
93    
94     $CHARSET{'*undef'} = {
95     preferred_name => '',
96    
97     #encoder => sub { $_[1] },
98     #decoder => sub { $_[1] },
99    
100 wakaba 1.4 mime_text => 0,
101 wakaba 1.6 cte_7bit_preferred => 'base64',
102 wakaba 1.4 };
103    
104 wakaba 1.18 $CHARSET{'*internal'} = {
105     preferred_name => '',
106    
107     #encoder => sub { $_[1] },
108     #decoder => sub { $_[1] },
109    
110     mime_text => 0,
111     cte_7bit_preferred => 'base64',
112     };
113    
114     $CHARSET{'*default_value'} = { ## Dummy charset for default property value
115     perl_name => undef,
116     preferred_name => undef,
117     mime_text => 0,
118     cte_7bit_preferred => 'base64',
119     cte_header_preferred => '*auto',
120     };
121    
122 wakaba 1.9 } # /builtin_charset
123    
124     my %_MINIMUMIZER = (
125     'euc-jp' => \&_name_euc_japan,
126     'euc-jisx0213' => \&_name_euc_japan,
127     'euc-jisx0213-plane1' => \&_name_euc_japan,
128     'x-euc-jisx0213-packed' => \&_name_euc_japan,
129 wakaba 1.11 'x-iso-2022' => \&_name_8bit_iso2022,
130     'iso-2022-cn' => \&_name_8bit_iso2022,
131     'iso-2022-cn-ext' => \&_name_8bit_iso2022,
132 wakaba 1.9 'iso-2022-int-1' => \&_name_net_ascii_8bit,
133 wakaba 1.11 'iso-2022-jp' => \&_name_8bit_iso2022,
134     'iso-2022-jp-1' => \&_name_8bit_iso2022,
135     'iso-2022-jp-2' => \&_name_8bit_iso2022,
136     'iso-2022-jp-3' => \&_name_8bit_iso2022,
137     'iso-2022-jp-3-plane1' => \&_name_8bit_iso2022,
138     'iso-2022-kr' => \&_name_8bit_iso2022,
139     'iso-8859-1' => \&_name_8bit_iso2022,
140 wakaba 1.9 jis_x0201 => \&_name_shift_jis,
141 wakaba 1.18 'x-iso-2022-7bit' => \&_name_8bit_iso2022,
142     'x-iso-2022-7bit-utf-8' => \&_name_net_ascii_8bit,
143 wakaba 1.9 shift_jis => \&_name_shift_jis,
144     shift_jisx0213 => \&_name_shift_jis,
145     'shift_jisx0213-plane1' => \&_name_shift_jis,
146     'x-sjis' => \&_name_shift_jis,
147     'us-ascii' => \&_name_net_ascii_8bit,
148     'utf-8' => \&_name_net_ascii_8bit,
149     );
150    
151 wakaba 1.14 my %_IsMimeText;
152     for (qw(
153     adobe-standard-encoding adobe-symbol-encoding
154     big5 big5-eten big5-hkscs
155     cp950
156     gbk gb18030
157     euc-jp euc-jisx0213 euc-kr euc-tw
158     hp-roman8
159     hz-gb-2312
160     ibm437
161 wakaba 1.18 x-iso-2022-7bit x-iso-2022-7bit-utf-8 x-iso-2022
162 wakaba 1.14 iso-2022-cn iso-2022-cn-ext
163     iso-2022-int-1
164     iso-2022-jp iso-2022-jp-1 iso-2022-jp-2 iso-2022-jp-3
165 wakaba 1.15 x-iso2022jp-cp932
166 wakaba 1.14 iso-2022-kr
167     iso-8859-1 iso-8859-2 iso-8859-3
168     iso-8859-4 iso-8859-5 iso-8859-6
169     iso-8859-7 iso-8859-8 iso-8859-9
170     iso-8859-10 iso-8859-12 iso-8859-13
171     iso-8859-14 iso-8859-15 iso-8859-16
172     jis_encoding
173     koi8-r koi8-u
174     x-mac-arabic x-mac-centralroman x-mac-cyrillic x-mac-greek
175     x-mac-hebrew x-mac-icelandic macintosh x-mac-turkish
176     x-mac-ukrainian x-mac-chinesesimp x-mac-japanese x-mac-korean
177     shift_jis shift_jisx0213 x-sjis
178     tis-620
179     unicode-1-1-utf-7 unicode-1-1-utf-8
180     unicode-2-0-utf-7 unicode-2-0-utf-8
181     utf-7 utf-8 utf-9
182     viscii
183     windows-1250 windows-1251 windows-1252 windows-1253
184     windows-1254 windows-1255 windows-1256 windows-1257
185     windows-1258 windows-31j windows-949
186     )) { $_IsMimeText{$_} = 1 }
187    
188 wakaba 1.12 %MSNAME2IANANAME = (
189     'iso-2022-jp' => 'x-iso2022jp-cp932',
190     'ks_c_5601-1987' => 'windows-949',
191     );
192    
193 wakaba 1.4 sub make_charset ($%) {
194     my $name = shift;
195     return unless $name; ## Note: charset "0" is not supported.
196     my %definition = @_;
197 wakaba 1.6
198     $definition{preferred_name} ||= $name;
199 wakaba 1.4 if ($definition{preferred_name} ne $name
200     && ref $CHARSET{$definition{preferred_name}}) {
201     ## New charset is an alias of defined charset,
202     $CHARSET{$name} = $CHARSET{$definition{preferred_name}};
203     return;
204     } elsif ($definition{alias_of} && ref $CHARSET{$definition{alias_of}}) {
205     ## New charset is an alias of defined charset,
206     $CHARSET{$name} = $CHARSET{$definition{alias_of}};
207     return;
208     }
209     $CHARSET{$name} = \%definition;
210    
211     ## Set default values
212 wakaba 1.6 #$definition{encoder} ||= sub { $_[1] };
213     #$definition{decoder} ||= sub { $_[1] };
214 wakaba 1.4
215     $definition{mime_text} = 0 unless defined $definition{mime_text};
216     $definition{cte_7bit_preferred} = 'base64'
217     unless defined $definition{cte_7bit_preferred};
218     }
219 wakaba 1.1
220     sub encode ($$) {
221     my ($charset, $s) = (lc shift, shift);
222 wakaba 1.13 my $c = ref $CHARSET{$charset}->{encoder}? $charset: '*undef';
223     if (ref $CHARSET{$c}->{encoder}) {
224     my ($t, %r) = &{$CHARSET{$c}->{encoder}} ($charset, $s);
225     unless (defined $r{success}) {
226     $r{success} = 1;
227     }
228     return ($t, %r);
229 wakaba 1.1 }
230 wakaba 1.5 ($s, success => 0);
231 wakaba 1.1 }
232    
233     sub decode ($$) {
234     my ($charset, $s) = (lc shift, shift);
235 wakaba 1.13 my $c = ref $CHARSET{$charset}->{decoder}? $charset: '*undef';
236     if (ref $CHARSET{$c}->{decoder}) {
237     my ($t, %r) = &{$CHARSET{$c}->{decoder}} ($charset, $s);
238     unless (defined $r{success}) {
239     $r{success} = 1;
240     }
241     return ($t, %r);
242 wakaba 1.1 }
243 wakaba 1.13 ($s, success => 0);
244 wakaba 1.1 }
245    
246     sub name_normalize ($) {
247     my $name = lc shift;
248 wakaba 1.14 if (ref $CHARSET{$name}->{preferred_name} eq 'CODE') {
249     return &{ $CHARSET{$name}->{preferred_name} } ($name);
250     } elsif ($CHARSET{$name}->{preferred_name}) {
251     return $CHARSET{$name}->{preferred_name};
252     } elsif (ref $CHARSET{'*undef'}->{preferred_name} eq 'CODE') {
253     return &{ $CHARSET{'*undef'}->{preferred_name} } ($name);
254     }
255     $name;
256 wakaba 1.1 }
257    
258 wakaba 1.18 sub name_minimumize ($$;$) {
259 wakaba 1.17 require Message::MIME::Charset::MinName;
260 wakaba 1.18 my ($charset, $s, $option) = (lc shift, @_);
261 wakaba 1.9 if (ref $CHARSET{$charset}->{name_minimumizer} eq 'CODE') {
262 wakaba 1.6 return &{$CHARSET{$charset}->{name_minimumizer}} ($charset, $s);
263 wakaba 1.17 } elsif (ref $Message::MIME::Charset::MinName::MIN{$charset}) {
264 wakaba 1.18 return &{$Message::MIME::Charset::MinName::MIN{$charset}} ($charset, $s, $option);
265 wakaba 1.9 } elsif (ref $_MINIMUMIZER{$charset}) {
266 wakaba 1.18 return &{$_MINIMUMIZER{$charset}} ($charset, $s, $option);
267 wakaba 1.13 } elsif (ref $CHARSET{'*undef'}->{name_minimumizer} eq 'CODE') {
268 wakaba 1.18 return &{$CHARSET{'*undef'}->{name_minimumizer}} ($charset, $s, $option);
269 wakaba 1.9 }
270     (charset => $charset);
271     }
272    
273 wakaba 1.12 sub msname2iananame ($) {
274     my $mscharset = shift;
275     $MSNAME2IANANAME{$mscharset} || $mscharset;
276     }
277    
278 wakaba 1.9 sub _name_7bit_iso2022 ($$) {shift;
279     my $s = shift;
280     if ($s =~ /[\x0E\x0F\x1B]/) {
281     return (charset => 'iso-2022-jp')
282     unless $s =~ /\x1B[^\x24\x28]
283     |\x1B\x24[^\x40B]
284     |\x1B\x28[^BJ]
285     |\x0E|\x0F/x;
286     return (charset => 'iso-2022-jp-1')
287     unless $s =~ /\x1B[^\x24\x28]
288     |\x1B\x24[^\x40B\x28]
289     |\x1B\x24\x28[^D]
290     |\x1B\x28[^BJ]
291     |\x0E|\x0F/x;
292     return (charset => 'iso-2022-jp-3-plane1')
293     unless $s =~ /\x1B[^\x24\x28]
294     |\x1B\x24[^\x28] #[^B\x28]
295     |\x1B\x24\x28[^O]
296     |\x1B\x28[^B]
297     |\x0E|\x0F/x;
298     return (charset => 'iso-2022-jp-3')
299     unless $s =~ /\x1B[^\x24\x28]
300     |\x1B\x24[^\x28] #[^B\x28]
301     |\x1B\x24\x28[^OP]
302     |\x1B\x28[^B]
303     |\x0E|\x0F/x;
304     return (charset => 'iso-2022-kr')
305     unless $s =~ /\x1B[^\x24]
306     |\x1B\x24[^\x29]
307     |\x1B\x24\x29[^C]/x;
308     return (charset => 'iso-2022-jp-2')
309     unless $s =~ /\x1B[^\x24\x28\x2E\x4E]
310     |\x1B\x24[^\x40AB\x28]
311     |\x1B\x24\x28[^CD]
312     |\x1B\x28[^BJ]
313     |\x1B\x2E[^AF]
314     |\x0E|\x0F/x;
315     return (charset => 'iso-2022-cn')
316     unless $s =~ /\x1B[^\x4E\x24]
317     |\x1B\x24[^\x29\x2A]
318     |\x1B\x24\x29[^AG]
319     |\x1B\x24\x2A[^H]/x;
320     return (charset => 'iso-2022-cn-ext')
321     unless $s =~ /\x1B[^\x4E\x4F\x24]
322     |\x1B\x24[^\x29\x2A]
323     |\x1B\x24\x29[^AEG]
324     |\x1B\x24\x2A[^HIJKLM]/x;
325     return (charset => 'iso-2022-int-1')
326     unless $s =~ /\x1B[^\x24\x28\x2D]
327     |\x1B\x24[^\x40AB\x28\x29]
328     |\x1B\x24\x28[^DGH]
329     |\x1B\x24\x29[^C]
330     |\x1B\x28[^BJ]
331     |\x1B\x2D[^AF]/x;
332 wakaba 1.18 return (charset => 'x-iso-2022-7bit')
333 wakaba 1.9 unless $s =~ /\x1B[^\x24\x28\x2C]
334     |\x1B\x24[^\x28\x2C\x40-\x42]
335     |\x1B\x24[\x28\x2C][^\x20-\x7E]
336     |\x1B\x24[\x28\x2C][\x20-\x2F]+[^\x30-\x7E]
337     |\x1B[\x28\x2C][^\x20-\x7E]
338     |\x1B[\x28\x2C][\x20-\x2F]+[^\x30-\x7E]
339     |\x0E|\x0F/x;
340     return (charset => 'x-iso-2022');
341     } else {
342     return (charset => 'us-ascii');
343 wakaba 1.6 }
344     }
345    
346 wakaba 1.9 sub _name_net_ascii_8bit ($) {
347     my $name = shift; my $s = shift;
348 wakaba 1.8 return (charset => 'us-ascii') unless $s =~ /[\x1B\x0E\x0F\x80-\xFF]/;
349     if ($s =~ /[\x80-\xFF]/) {
350 wakaba 1.18 if ($s =~ /[\xC0-\xFD][\x80-\xBF]*[\x80-\xBF]/) {
351 wakaba 1.8 if ($s =~ /\x1B/) {
352 wakaba 1.18 return (charset => 'x-iso-2022-7bit'); ## iso-2022-7bit + UTF-8
353 wakaba 1.8 } else {
354     return (charset => 'utf-8');
355     }
356     } elsif ($s =~ /\x1B/) {
357 wakaba 1.9 return (charset => 'x-iso-2022'); ## 8bit ISO 2022
358     } else {
359     return (charset => 'iso-8859-1');
360     }
361     } else { ## 7bit ISO 2022
362     return _name_7bit_iso2022 ($name, $s);
363     }
364     }
365    
366 wakaba 1.11 sub _name_8bit_iso2022 ($$) {
367 wakaba 1.9 my $name = shift; my $s = shift;
368     return (charset => 'us-ascii') unless $s =~ /[\x1B\x0E\x0F\x80-\xFF]/;
369     if ($s =~ /[\x80-\xFF]/) {
370     if ($s =~ /\x1B/) {
371     return (charset => 'x-iso-2022'); ## 8bit ISO 2022
372 wakaba 1.8 } else {
373     return (charset => 'iso-8859-1');
374     }
375 wakaba 1.9 } else { ## 7bit ISO 2022
376     return _name_7bit_iso2022 ($name, $s);
377     }
378     }
379    
380     ## Not completed.
381     ## TODO: gb18030, cn-gb-12345
382     ## TODO: _name_euc_gbf (cn-gb-12345, gb2312)
383     sub _name_euc_gb ($$) {
384     my $name = shift; my $s = shift;
385     if ($s =~ /[\x80-\xFF]/) {
386     if ($s =~ /
387     (?:\G|[\x00-\x3F\x7F\x80\xFF])
388     (?:[\xA1-\xA9\xB0-\xFE][\xA1-\xFE]
389     |[\x40-\x7E])*
390     (?:
391     [\x81-\xA0\xAA-\xAF][\x40-\xFE]
392     |[\xA1-\xFE][\x40-\xA0]
393     )
394     /x) {
395     (charset => 'gbk');
396     } elsif ($s =~ /
397     (?:\xA2[\xA1-\xAA]
398     |\xA6[\xE0-\xF5]
399     |\xA8[\xBB-\xC0]
400     )
401     (?=(?:[\xA1-\xFE][\xA1-\xFE])*(?:[\x00-\xA0\xFF]|\z))
402     /x) {
403     (charset => 'gbk');
404     } elsif ($s =~ /
405     (?:\xA3\xE7|\xA7[\xDD-\xF2]
406     |\xA8[\xBB-\xC0]
407     |[\xAA-\xAF\xF8-\xFE][\xA1-\xFE]
408     )
409     (?=(?:[\xA1-\xFE][\xA1-\xFE])*(?:[\x00-\xA0\xFF]|\z))
410     /x) {
411     (charset => 'cn-gb-isoir165', 'charset-edition' => 1992);
412     } elsif ($s =~ /\xEF\xF1 ## Typo bug of GB 2312
413     (?=(?:[\xA1-\xFE][\xA1-\xFE])*(?:[\x00-\xA0\xFF]|\z))
414     /x) {
415     (charset => 'gb2312');
416     } else {
417     (charset => 'gb2312', 'charset-edition' => 1980);
418     }
419     } elsif ($s =~ /[\x0E\x0F]/) {
420     (charset => 'gb2312'); ## Actually, this is not "gb2312"
421     } else {
422 wakaba 1.11 _name_7bit_iso2022 ($name, $s);
423 wakaba 1.9 }
424     }
425    
426     sub _name_euc_japan ($$) {
427     my $name = shift; my $s = shift;
428     if ($s =~ /[\x80-\xFF]/) {
429     if ($s =~ /\x8F[\xA1\xA3-\xA5\xA8\xAC-\xAF\xEE-\xFE][\xA1-\xFE]/) {
430     if ($s =~ /\x8F[\xA2\xA6\xA7\xA9-\xAB\xB0-\xED][\xA1-\xFE]/) {
431     ## JIS X 0213 plane 2 + JIS X 0212
432     (charset => 'x-euc-jisx0213-packed');
433     } else {
434     (charset => 'euc-jisx0213');
435     }
436     } elsif ($s =~ m{(?<![\x8E\x8F]) ## Not G2/G3 character
437     (?: ## JIS X 0213:2000
438     [\xA9-\xAF\xF5-\xFE][\xA1-\xFE]
439     |\xA2[\xAF-\xB9\xC2-\xC9\xD1-\xDB\xE9-\xF1\xFA-\xFD]
440     |\xA3[\xA1-\xAF\xBA-\xC0\xDB-\xE0\xFB-\xFE]
441     |\xA4[\xF4-\xFE]|\xA5[\xF7-\xFE]
442     |\xA6[\xB9-\xC0\xD9-\xFE]|\xA7[\xC2-\xD0\xF2-\xFE]
443     |\xA8[\xC1-\xFE]|\xCF[\xD4-\xFE]|\xF4[\xA7-\xFE]
444     )
445     (?=(?:[\xA1-\xFE][\xA1-\xFE])*(?:[\x00-\xA0\xFF]|\z))}x) {
446     if ($s =~ /\x8F/) { ## JIS X 0213 plane 1 + JIS X 0212
447     (charset => 'x-euc-jisx0213-packed');
448     } else {
449     (charset => 'euc-jisx0213-plane1');
450     }
451     } else {
452     (charset => 'euc-jp');
453     }
454     } elsif ($s =~ /\x0E|\x0F|\x1B[\x4E\x4F]/) {
455     (charset => 'euc-jisx0213'); ## Actually, this is not euc-japan
456     } else {
457 wakaba 1.11 _name_7bit_iso2022 ($name, $s);
458 wakaba 1.9 }
459     }
460    
461     sub _name_shift_jis ($$) {
462     my $name = shift; my $s = shift;
463     if ($s =~ /[\x80-\xFF]/) {
464     if ($s =~ /[\x0E\x0F\x1B]/) {
465     (charset => 'x-sjis');
466     } elsif ($s =~ /
467     (?:\G|[\x00-\x3F\x7F])
468     (?:[\x81-\x9F\xE0-\xFC][\x40-\x7E\x80-\xFC]
469     |[\x40-\x7E\xA1-\xDF])*
470     [\xF0-\xFC][\x40-\x7E\x80-\xFC]
471     /x) {
472     (charset => 'shift_jisx0213');
473     } elsif ($s =~ /
474     (?:\G|[\x00-\x3F\x7F])
475     (?:[\x81-\x9F\xE0-\xFC][\x40-\x7E\x80-\xFC]
476     |[\x40-\x7E\xA1-\xDF])*
477     (?:
478     [\x85-\x87\xEB-\xEF][\x40-\x7E\x80-\xFC]
479     |\x81[\xAD-\xB7\xC0-\xC7\xCF-\xD9\xE9-\xEF\xF8-\xFB]
480     |\x82[\x40-\x4E\x59-\x5F\x7A-\x80\x9B-\x9E\xF2-\xFC]
481     |\x83[\x97-\x9E\xB7-\xBE\xD7-\xFC]
482     |\x84[\x61-\x6F\x72-\x9E\xBF-\xFC]
483     |\x88[\x40-\x9E]|\x98[\x73-\x9E]|\xEA[\xA5-\xFC]
484     )
485     /x) {
486     (charset => 'shift_jisx0213-plane1');
487     } else {
488     (charset => 'shift_jis');
489     }
490     } elsif ($s =~ /[\x5C\x7E]/) {
491     if ($s =~ /\x1B\x0E\x0F/) {
492     (charset => 'x-sjis'); ## ISO 2022 with implied "ESC ( J"
493     ## BUG: "ESC ( B foobar\aaa ESC ( J aiueo" also matchs this
494     } else {
495     (charset => 'jis_x0201');
496     }
497     } else {
498 wakaba 1.11 _name_7bit_iso2022 ($name, $s);
499 wakaba 1.9 }
500     }
501    
502 wakaba 1.18 eval q{require Encode};
503 wakaba 1.13 sub _utf8_on ($) {
504     Encode::_utf8_on ($_[0]) if $Encode::VERSION;
505     }
506     sub _utf8_off ($) {
507     Encode::_utf8_off ($_[0]) if $Encode::VERSION;
508     }
509    
510 wakaba 1.14 sub is_mime_text ($) {
511     my $name = lc shift;
512     if (ref $CHARSET{$name}->{mime_text} eq 'CODE') {
513     return &{ $CHARSET{$name}->{mime_text} } ($name);
514     } elsif (defined $CHARSET{$name}->{mime_text}) {
515     return $CHARSET{$name}->{mime_text};
516     } elsif (defined $_IsMimeText{$name}) {
517     return $_IsMimeText{$name};
518     } elsif (ref $CHARSET{'*undef'}->{mime_text} eq 'CODE') {
519     return &{ $CHARSET{'*undef'}->{mime_text} } ($name);
520     }
521     0;
522     }
523    
524 wakaba 1.18 sub divide_string ($$;%) {
525     my ($charset, $string, %option) = @_;
526     $option{-max} ||= 70;
527     if (ref $CHARSET{$charset}->{divide_string}) {
528     return &{$CHARSET{$charset}->{divide_string}} ($charset, $string, \%option);
529     } else {
530     my @r; ## 12 = 3*4. Most of stateless codes are 1-4 octets per char.
531     my $l = int ($option{-max} / 12) * 12;
532     for my $i (0..int (length ($string) / $l)) {
533     push @r, substr ($string, $l*$i, $l);
534     }
535     return \@r;
536     }
537     }
538     sub _divide_string_1 ($%) {
539     my (undef, $string, $option) = @_;
540     my @r;
541     for my $i (0..int (length ($string) / $option->{-max})) {
542     push @r, substr ($string, $option->{-max}*$i, $option->{-max});
543     }
544     return \@r;
545     }
546    
547     sub get_property ($$) {
548     my ($property, $charset) = @_;
549     if (defined $CHARSET{$charset}->{$property}) {
550     return $CHARSET{$charset}->{$property};
551     } else {
552     return $CHARSET{'*default_value'}->{$property};
553     }
554     }
555    
556     =head1 {charset => $charset,...} = Message::MIME::Charset::get_interchange_charset ($charset, $string, {%option})
557    
558     Get charset name (for IANA name context) for information interchange.
559    
560     =cut
561    
562     sub get_interchange_charset ($$;$) {
563     my ($charset, $string, $option) = @_;
564    
565     {charset => $charset};
566     }
567    
568     =head1 1/0 = is_representable_in ($charset, $string, {%option})
569    
570     Return whether $string (encoded in *internal charset) is able to be
571     represented in the $charset.
572    
573     Options: Currently no option argument is available.
574    
575     =cut
576    
577     sub is_representable_in ($$;$) {
578     my ($charset, $string, $option) = @_;
579     if (ref $CHARSET{$charset}->{is_representable_in}) {
580     return &{$CHARSET{$charset}->{is_representable_in}} (@_);
581     } else {
582     return 0;
583     }
584     }
585    
586 wakaba 1.1 =head1 LICENSE
587    
588 wakaba 1.18 Copyright 2002 Wakaba <w@suika.fam.cx>
589 wakaba 1.1
590     This program is free software; you can redistribute it and/or modify
591     it under the terms of the GNU General Public License as published by
592     the Free Software Foundation; either version 2 of the License, or
593     (at your option) any later version.
594    
595     This program is distributed in the hope that it will be useful,
596     but WITHOUT ANY WARRANTY; without even the implied warranty of
597     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
598     GNU General Public License for more details.
599    
600     You should have received a copy of the GNU General Public License
601     along with this program; see the file COPYING. If not, write to
602     the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
603     Boston, MA 02111-1307, USA.
604    
605     =cut
606    
607 wakaba 1.18 1; # $Date: $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24