/[suikacvs]/messaging/manakai/lib/Message/MIME/Charset.pm
Suika

Contents of /messaging/manakai/lib/Message/MIME/Charset.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.14 - (hide annotations) (download)
Mon Jul 22 07:48:01 2002 UTC (22 years, 4 months ago) by wakaba
Branch: MAIN
Changes since 1.13: +59 -3 lines
2002-07-22  Wakaba <w@suika.fam.cx>

	* Charset.pm:
	- (encode, decode): Reformed.  Return a hash as second or
	greter'th element of returned array.  (Now only 'success'
	key is used.)
	- (_utf8_on, _utf8_off): New procedures.
	- (%_IsMimeText): New hash.
	- (is_mime_text): New function.
	* EncodedWord.pm (_decode_eword): Fix for new 
	Message::MIME::Charset::decode.

1 wakaba 1.1
2     =head1 NAME
3    
4     Message::MIME::Charset Perl module
5    
6     =head1 DESCRIPTION
7    
8     Perl module for MIME charset.
9    
10     =cut
11    
12 wakaba 1.10 ## NOTE: You should not require/use other module (even it
13     ## is part of Message::* Perl Modules) as far as possible,
14     ## to be able to use this module (M::M::Charset) from
15     ## other (non-Message::*) modules.
16    
17 wakaba 1.1 package Message::MIME::Charset;
18     use strict;
19 wakaba 1.12 use vars qw(%CHARSET %MSNAME2IANANAME %REG $VERSION);
20 wakaba 1.14 $VERSION=do{my @r=(q$Revision: 1.13 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
21 wakaba 1.1
22 wakaba 1.9 &_builtin_charset;
23     sub _builtin_charset () {
24 wakaba 1.4
25     $CHARSET{'*DEFAULT'} = {
26     preferred_name => '',
27    
28     encoder => sub { $_[1] },
29     decoder => sub { $_[1] },
30    
31     mime_text => 1, ## Suitability in use as MIME text/* charset
32     #accept_cte => [qw/7bit .../],
33     cte_7bit_preferred => 'quoted-printable',
34     };
35     $CHARSET{'*default'} = $CHARSET{'*DEFAULT'};
36    
37     $CHARSET{'us-ascii'} = {
38     preferred_name => 'us-ascii',
39    
40     encoder => sub { $_[1] },
41     decoder => sub { $_[1] },
42 wakaba 1.5
43     mime_text => 1,
44 wakaba 1.13 cte_7bit_preferred => 'quoted-printable',
45 wakaba 1.4 };
46    
47     $CHARSET{'iso-2022-int-1'} = {
48     preferred_name => 'iso-2022-int-1',
49    
50     encoder => sub { $_[1] },
51     decoder => sub { $_[1] },
52 wakaba 1.5
53     mime_text => 1,
54 wakaba 1.4 };
55    
56     $CHARSET{'unknown-8bit'} = {
57     preferred_name => 'unknown-8bit',
58    
59     encoder => sub { $_[1] },
60     decoder => sub { $_[1] },
61    
62 wakaba 1.13 mime_text => 1,
63     cte_7bit_preferred => 'base64',
64     };
65    
66     $CHARSET{'x-unknown'} = {
67     preferred_name => 'x-unknown',
68    
69     encoder => sub { $_[1] },
70     decoder => sub { $_[1] },
71    
72     mime_text => 0,
73     cte_7bit_preferred => 'base64',
74     };
75    
76     $CHARSET{'*undef'} = {
77     preferred_name => '',
78    
79     #encoder => sub { $_[1] },
80     #decoder => sub { $_[1] },
81    
82 wakaba 1.4 mime_text => 0,
83 wakaba 1.6 cte_7bit_preferred => 'base64',
84 wakaba 1.4 };
85    
86 wakaba 1.9 } # /builtin_charset
87    
88     my %_MINIMUMIZER = (
89     'euc-jp' => \&_name_euc_japan,
90     'euc-jisx0213' => \&_name_euc_japan,
91     'euc-jisx0213-plane1' => \&_name_euc_japan,
92     'x-euc-jisx0213-packed' => \&_name_euc_japan,
93 wakaba 1.11 'x-iso-2022' => \&_name_8bit_iso2022,
94     'iso-2022-cn' => \&_name_8bit_iso2022,
95     'iso-2022-cn-ext' => \&_name_8bit_iso2022,
96 wakaba 1.9 'iso-2022-int-1' => \&_name_net_ascii_8bit,
97 wakaba 1.11 'iso-2022-jp' => \&_name_8bit_iso2022,
98     'iso-2022-jp-1' => \&_name_8bit_iso2022,
99     'iso-2022-jp-2' => \&_name_8bit_iso2022,
100     'iso-2022-jp-3' => \&_name_8bit_iso2022,
101     'iso-2022-jp-3-plane1' => \&_name_8bit_iso2022,
102     'iso-2022-kr' => \&_name_8bit_iso2022,
103     'iso-8859-1' => \&_name_8bit_iso2022,
104 wakaba 1.9 'iso-10646-j-1' => \&_name_utf16be,
105     'iso-10646-ucs-2' => \&_name_utf16be,
106     'iso-10646-ucs-4' => \&_name_utf32be,
107     'iso-10646-ucs-basic' => \&_name_utf16be,
108     'iso-10646-unicode-latin1' => \&_name_utf16be,
109     jis_x0201 => \&_name_shift_jis,
110 wakaba 1.11 junet => \&_name_8bit_iso2022,
111 wakaba 1.9 'x-junet8' => \&_name_net_ascii_8bit,
112     shift_jis => \&_name_shift_jis,
113     shift_jisx0213 => \&_name_shift_jis,
114     'shift_jisx0213-plane1' => \&_name_shift_jis,
115     'x-sjis' => \&_name_shift_jis,
116     'us-ascii' => \&_name_net_ascii_8bit,
117     'utf-8' => \&_name_net_ascii_8bit,
118     'utf-16be' => \&_name_utf16be,
119     'utf-32be' => \&_name_utf32be,
120     );
121    
122 wakaba 1.14 my %_IsMimeText;
123     for (qw(
124     adobe-standard-encoding adobe-symbol-encoding
125     big5 big5-eten big5-hkscs
126     cp950
127     gbk gb18030
128     euc-jp euc-jisx0213 euc-kr euc-tw
129     hp-roman8
130     hz-gb-2312
131     ibm437
132     iso-2022-cn iso-2022-cn-ext
133     iso-2022-int-1
134     iso-2022-jp iso-2022-jp-1 iso-2022-jp-2 iso-2022-jp-3
135     iso-2022-kr
136     iso-8859-1 iso-8859-2 iso-8859-3
137     iso-8859-4 iso-8859-5 iso-8859-6
138     iso-8859-7 iso-8859-8 iso-8859-9
139     iso-8859-10 iso-8859-12 iso-8859-13
140     iso-8859-14 iso-8859-15 iso-8859-16
141     jis_encoding
142     koi8-r koi8-u
143     x-mac-arabic x-mac-centralroman x-mac-cyrillic x-mac-greek
144     x-mac-hebrew x-mac-icelandic macintosh x-mac-turkish
145     x-mac-ukrainian x-mac-chinesesimp x-mac-japanese x-mac-korean
146     shift_jis shift_jisx0213 x-sjis
147     tis-620
148     unicode-1-1-utf-7 unicode-1-1-utf-8
149     unicode-2-0-utf-7 unicode-2-0-utf-8
150     utf-7 utf-8 utf-9
151     viscii
152     windows-1250 windows-1251 windows-1252 windows-1253
153     windows-1254 windows-1255 windows-1256 windows-1257
154     windows-1258 windows-31j windows-949
155     )) { $_IsMimeText{$_} = 1 }
156    
157 wakaba 1.12 %MSNAME2IANANAME = (
158     'iso-2022-jp' => 'x-iso2022jp-cp932',
159     'ks_c_5601-1987' => 'windows-949',
160     );
161    
162 wakaba 1.4 sub make_charset ($%) {
163     my $name = shift;
164     return unless $name; ## Note: charset "0" is not supported.
165     my %definition = @_;
166 wakaba 1.6
167     $definition{preferred_name} ||= $name;
168 wakaba 1.4 if ($definition{preferred_name} ne $name
169     && ref $CHARSET{$definition{preferred_name}}) {
170     ## New charset is an alias of defined charset,
171     $CHARSET{$name} = $CHARSET{$definition{preferred_name}};
172     return;
173     } elsif ($definition{alias_of} && ref $CHARSET{$definition{alias_of}}) {
174     ## New charset is an alias of defined charset,
175     $CHARSET{$name} = $CHARSET{$definition{alias_of}};
176     return;
177     }
178     $CHARSET{$name} = \%definition;
179    
180     ## Set default values
181 wakaba 1.6 #$definition{encoder} ||= sub { $_[1] };
182     #$definition{decoder} ||= sub { $_[1] };
183 wakaba 1.4
184     $definition{mime_text} = 0 unless defined $definition{mime_text};
185     $definition{cte_7bit_preferred} = 'base64'
186     unless defined $definition{cte_7bit_preferred};
187     }
188 wakaba 1.1
189     sub encode ($$) {
190     my ($charset, $s) = (lc shift, shift);
191 wakaba 1.13 my $c = ref $CHARSET{$charset}->{encoder}? $charset: '*undef';
192     if (ref $CHARSET{$c}->{encoder}) {
193     my ($t, %r) = &{$CHARSET{$c}->{encoder}} ($charset, $s);
194     unless (defined $r{success}) {
195     $r{success} = 1;
196     }
197     return ($t, %r);
198 wakaba 1.1 }
199 wakaba 1.5 ($s, success => 0);
200 wakaba 1.1 }
201    
202     sub decode ($$) {
203     my ($charset, $s) = (lc shift, shift);
204 wakaba 1.13 my $c = ref $CHARSET{$charset}->{decoder}? $charset: '*undef';
205     if (ref $CHARSET{$c}->{decoder}) {
206     my ($t, %r) = &{$CHARSET{$c}->{decoder}} ($charset, $s);
207     unless (defined $r{success}) {
208     $r{success} = 1;
209     }
210     return ($t, %r);
211 wakaba 1.1 }
212 wakaba 1.13 ($s, success => 0);
213 wakaba 1.1 }
214    
215     sub name_normalize ($) {
216     my $name = lc shift;
217 wakaba 1.14 if (ref $CHARSET{$name}->{preferred_name} eq 'CODE') {
218     return &{ $CHARSET{$name}->{preferred_name} } ($name);
219     } elsif ($CHARSET{$name}->{preferred_name}) {
220     return $CHARSET{$name}->{preferred_name};
221     } elsif (ref $CHARSET{'*undef'}->{preferred_name} eq 'CODE') {
222     return &{ $CHARSET{'*undef'}->{preferred_name} } ($name);
223     }
224     $name;
225 wakaba 1.1 }
226    
227 wakaba 1.6 sub name_minimumize ($$) {
228     my ($charset, $s) = (lc shift, shift);
229 wakaba 1.9 if (ref $CHARSET{$charset}->{name_minimumizer} eq 'CODE') {
230 wakaba 1.6 return &{$CHARSET{$charset}->{name_minimumizer}} ($charset, $s);
231 wakaba 1.9 } elsif (ref $_MINIMUMIZER{$charset}) {
232     return &{$_MINIMUMIZER{$charset}} ($charset, $s);
233 wakaba 1.13 } elsif (ref $CHARSET{'*undef'}->{name_minimumizer} eq 'CODE') {
234     return &{$CHARSET{'*undef'}->{name_minimumizer}} ($charset, $s);
235 wakaba 1.9 }
236     (charset => $charset);
237     }
238    
239 wakaba 1.12 sub msname2iananame ($) {
240     my $mscharset = shift;
241     $MSNAME2IANANAME{$mscharset} || $mscharset;
242     }
243    
244 wakaba 1.9 sub _name_7bit_iso2022 ($$) {shift;
245     my $s = shift;
246     if ($s =~ /[\x0E\x0F\x1B]/) {
247     return (charset => 'iso-2022-jp')
248     unless $s =~ /\x1B[^\x24\x28]
249     |\x1B\x24[^\x40B]
250     |\x1B\x28[^BJ]
251     |\x0E|\x0F/x;
252     return (charset => 'iso-2022-jp-1')
253     unless $s =~ /\x1B[^\x24\x28]
254     |\x1B\x24[^\x40B\x28]
255     |\x1B\x24\x28[^D]
256     |\x1B\x28[^BJ]
257     |\x0E|\x0F/x;
258     return (charset => 'iso-2022-jp-3-plane1')
259     unless $s =~ /\x1B[^\x24\x28]
260     |\x1B\x24[^\x28] #[^B\x28]
261     |\x1B\x24\x28[^O]
262     |\x1B\x28[^B]
263     |\x0E|\x0F/x;
264     return (charset => 'iso-2022-jp-3')
265     unless $s =~ /\x1B[^\x24\x28]
266     |\x1B\x24[^\x28] #[^B\x28]
267     |\x1B\x24\x28[^OP]
268     |\x1B\x28[^B]
269     |\x0E|\x0F/x;
270     return (charset => 'iso-2022-kr')
271     unless $s =~ /\x1B[^\x24]
272     |\x1B\x24[^\x29]
273     |\x1B\x24\x29[^C]/x;
274     return (charset => 'iso-2022-jp-2')
275     unless $s =~ /\x1B[^\x24\x28\x2E\x4E]
276     |\x1B\x24[^\x40AB\x28]
277     |\x1B\x24\x28[^CD]
278     |\x1B\x28[^BJ]
279     |\x1B\x2E[^AF]
280     |\x0E|\x0F/x;
281     return (charset => 'iso-2022-cn')
282     unless $s =~ /\x1B[^\x4E\x24]
283     |\x1B\x24[^\x29\x2A]
284     |\x1B\x24\x29[^AG]
285     |\x1B\x24\x2A[^H]/x;
286     return (charset => 'iso-2022-cn-ext')
287     unless $s =~ /\x1B[^\x4E\x4F\x24]
288     |\x1B\x24[^\x29\x2A]
289     |\x1B\x24\x29[^AEG]
290     |\x1B\x24\x2A[^HIJKLM]/x;
291     return (charset => 'iso-2022-int-1')
292     unless $s =~ /\x1B[^\x24\x28\x2D]
293     |\x1B\x24[^\x40AB\x28\x29]
294     |\x1B\x24\x28[^DGH]
295     |\x1B\x24\x29[^C]
296     |\x1B\x28[^BJ]
297     |\x1B\x2D[^AF]/x;
298     return (charset => 'junet')
299     unless $s =~ /\x1B[^\x24\x28\x2C]
300     |\x1B\x24[^\x28\x2C\x40-\x42]
301     |\x1B\x24[\x28\x2C][^\x20-\x7E]
302     |\x1B\x24[\x28\x2C][\x20-\x2F]+[^\x30-\x7E]
303     |\x1B[\x28\x2C][^\x20-\x7E]
304     |\x1B[\x28\x2C][\x20-\x2F]+[^\x30-\x7E]
305     |\x0E|\x0F/x;
306     return (charset => 'x-iso-2022');
307     } else {
308     return (charset => 'us-ascii');
309 wakaba 1.6 }
310     }
311    
312 wakaba 1.9 sub _name_net_ascii_8bit ($) {
313     my $name = shift; my $s = shift;
314 wakaba 1.8 return (charset => 'us-ascii') unless $s =~ /[\x1B\x0E\x0F\x80-\xFF]/;
315     if ($s =~ /[\x80-\xFF]/) {
316     if ($s =~ /[\xC0-\xFD][\x80-\xBF]*[\x80-\x8F]/) {
317     if ($s =~ /\x1B/) {
318 wakaba 1.9 return (charset => 'x-junet8'); ## junet + UTF-8
319 wakaba 1.8 } else {
320     return (charset => 'utf-8');
321     }
322     } elsif ($s =~ /\x1B/) {
323 wakaba 1.9 return (charset => 'x-iso-2022'); ## 8bit ISO 2022
324     } else {
325     return (charset => 'iso-8859-1');
326     }
327     } else { ## 7bit ISO 2022
328     return _name_7bit_iso2022 ($name, $s);
329     }
330     }
331    
332 wakaba 1.11 sub _name_8bit_iso2022 ($$) {
333 wakaba 1.9 my $name = shift; my $s = shift;
334     return (charset => 'us-ascii') unless $s =~ /[\x1B\x0E\x0F\x80-\xFF]/;
335     if ($s =~ /[\x80-\xFF]/) {
336     if ($s =~ /\x1B/) {
337     return (charset => 'x-iso-2022'); ## 8bit ISO 2022
338 wakaba 1.8 } else {
339     return (charset => 'iso-8859-1');
340     }
341 wakaba 1.9 } else { ## 7bit ISO 2022
342     return _name_7bit_iso2022 ($name, $s);
343     }
344     }
345    
346     ## Not completed.
347     ## TODO: gb18030, cn-gb-12345
348     ## TODO: _name_euc_gbf (cn-gb-12345, gb2312)
349     sub _name_euc_gb ($$) {
350     my $name = shift; my $s = shift;
351     if ($s =~ /[\x80-\xFF]/) {
352     if ($s =~ /
353     (?:\G|[\x00-\x3F\x7F\x80\xFF])
354     (?:[\xA1-\xA9\xB0-\xFE][\xA1-\xFE]
355     |[\x40-\x7E])*
356     (?:
357     [\x81-\xA0\xAA-\xAF][\x40-\xFE]
358     |[\xA1-\xFE][\x40-\xA0]
359     )
360     /x) {
361     (charset => 'gbk');
362     } elsif ($s =~ /
363     (?:\xA2[\xA1-\xAA]
364     |\xA6[\xE0-\xF5]
365     |\xA8[\xBB-\xC0]
366     )
367     (?=(?:[\xA1-\xFE][\xA1-\xFE])*(?:[\x00-\xA0\xFF]|\z))
368     /x) {
369     (charset => 'gbk');
370     } elsif ($s =~ /
371     (?:\xA3\xE7|\xA7[\xDD-\xF2]
372     |\xA8[\xBB-\xC0]
373     |[\xAA-\xAF\xF8-\xFE][\xA1-\xFE]
374     )
375     (?=(?:[\xA1-\xFE][\xA1-\xFE])*(?:[\x00-\xA0\xFF]|\z))
376     /x) {
377     (charset => 'cn-gb-isoir165', 'charset-edition' => 1992);
378     } elsif ($s =~ /\xEF\xF1 ## Typo bug of GB 2312
379     (?=(?:[\xA1-\xFE][\xA1-\xFE])*(?:[\x00-\xA0\xFF]|\z))
380     /x) {
381     (charset => 'gb2312');
382     } else {
383     (charset => 'gb2312', 'charset-edition' => 1980);
384     }
385     } elsif ($s =~ /[\x0E\x0F]/) {
386     (charset => 'gb2312'); ## Actually, this is not "gb2312"
387     } else {
388 wakaba 1.11 _name_7bit_iso2022 ($name, $s);
389 wakaba 1.9 }
390     }
391    
392     sub _name_euc_japan ($$) {
393     my $name = shift; my $s = shift;
394     if ($s =~ /[\x80-\xFF]/) {
395     if ($s =~ /\x8F[\xA1\xA3-\xA5\xA8\xAC-\xAF\xEE-\xFE][\xA1-\xFE]/) {
396     if ($s =~ /\x8F[\xA2\xA6\xA7\xA9-\xAB\xB0-\xED][\xA1-\xFE]/) {
397     ## JIS X 0213 plane 2 + JIS X 0212
398     (charset => 'x-euc-jisx0213-packed');
399     } else {
400     (charset => 'euc-jisx0213');
401     }
402     } elsif ($s =~ m{(?<![\x8E\x8F]) ## Not G2/G3 character
403     (?: ## JIS X 0213:2000
404     [\xA9-\xAF\xF5-\xFE][\xA1-\xFE]
405     |\xA2[\xAF-\xB9\xC2-\xC9\xD1-\xDB\xE9-\xF1\xFA-\xFD]
406     |\xA3[\xA1-\xAF\xBA-\xC0\xDB-\xE0\xFB-\xFE]
407     |\xA4[\xF4-\xFE]|\xA5[\xF7-\xFE]
408     |\xA6[\xB9-\xC0\xD9-\xFE]|\xA7[\xC2-\xD0\xF2-\xFE]
409     |\xA8[\xC1-\xFE]|\xCF[\xD4-\xFE]|\xF4[\xA7-\xFE]
410     )
411     (?=(?:[\xA1-\xFE][\xA1-\xFE])*(?:[\x00-\xA0\xFF]|\z))}x) {
412     if ($s =~ /\x8F/) { ## JIS X 0213 plane 1 + JIS X 0212
413     (charset => 'x-euc-jisx0213-packed');
414     } else {
415     (charset => 'euc-jisx0213-plane1');
416     }
417     } else {
418     (charset => 'euc-jp');
419     }
420     } elsif ($s =~ /\x0E|\x0F|\x1B[\x4E\x4F]/) {
421     (charset => 'euc-jisx0213'); ## Actually, this is not euc-japan
422     } else {
423 wakaba 1.11 _name_7bit_iso2022 ($name, $s);
424 wakaba 1.9 }
425     }
426    
427     sub _name_shift_jis ($$) {
428     my $name = shift; my $s = shift;
429     if ($s =~ /[\x80-\xFF]/) {
430     if ($s =~ /[\x0E\x0F\x1B]/) {
431     (charset => 'x-sjis');
432     } elsif ($s =~ /
433     (?:\G|[\x00-\x3F\x7F])
434     (?:[\x81-\x9F\xE0-\xFC][\x40-\x7E\x80-\xFC]
435     |[\x40-\x7E\xA1-\xDF])*
436     [\xF0-\xFC][\x40-\x7E\x80-\xFC]
437     /x) {
438     (charset => 'shift_jisx0213');
439     } elsif ($s =~ /
440     (?:\G|[\x00-\x3F\x7F])
441     (?:[\x81-\x9F\xE0-\xFC][\x40-\x7E\x80-\xFC]
442     |[\x40-\x7E\xA1-\xDF])*
443     (?:
444     [\x85-\x87\xEB-\xEF][\x40-\x7E\x80-\xFC]
445     |\x81[\xAD-\xB7\xC0-\xC7\xCF-\xD9\xE9-\xEF\xF8-\xFB]
446     |\x82[\x40-\x4E\x59-\x5F\x7A-\x80\x9B-\x9E\xF2-\xFC]
447     |\x83[\x97-\x9E\xB7-\xBE\xD7-\xFC]
448     |\x84[\x61-\x6F\x72-\x9E\xBF-\xFC]
449     |\x88[\x40-\x9E]|\x98[\x73-\x9E]|\xEA[\xA5-\xFC]
450     )
451     /x) {
452     (charset => 'shift_jisx0213-plane1');
453     } else {
454     (charset => 'shift_jis');
455     }
456     } elsif ($s =~ /[\x5C\x7E]/) {
457     if ($s =~ /\x1B\x0E\x0F/) {
458     (charset => 'x-sjis'); ## ISO 2022 with implied "ESC ( J"
459     ## BUG: "ESC ( B foobar\aaa ESC ( J aiueo" also matchs this
460     } else {
461     (charset => 'jis_x0201');
462     }
463     } else {
464 wakaba 1.11 _name_7bit_iso2022 ($name, $s);
465 wakaba 1.9 }
466     }
467    
468     sub _name_utf16be ($$) {
469     shift; my $s = shift;
470     if ($s =~ /[\xD8-\xDB][\x00-\xFF][\xDC-\xDF][\x00-\xFF]
471     (?=(?:[\x00-\xFF][\x00-\xFF])*\z)/sx) {
472     (charset => 'utf-16be');
473     } elsif ($s =~ /[\x01-\xFF][\x00-\xFF]
474     (?=(?:[\x00-\xFF][\x00-\xFF])*\z)/sx) {
475     if ($s =~ /([^\x00\x03\x04\x23\x25\x30\xFE\xFF]
476     [\x00-\xFF] # ^\x20\x22\x4E-\x9F\xF9\xFA
477     |\x03[^\x00-\x6F\xD0-\xFF]
478     #|\x20[^\x00-\x6F]
479     |\x25[^\x00-\x7F]
480     |\xFE[^\x30-\x4F]
481     |\xFF[^\x00-\xEF]
482     ## note 1 of RFC 1816 is ambitious, so block entire
483     ## is excepted
484     |\x30[\x00-\x3F]
485     )
486     (?=(?:[\x00-\xFF][\x00-\xFF])*\z)/sx) {
487     (charset => 'iso-10646-ucs-2');
488     } else {
489     (charset => 'iso-10646-j-1');
490     }
491     } elsif ($s =~ /\x00[\x80-\xFF]
492     (?=(?:[\x00-\xFF][\x00-\xFF])*\z)/sx) {
493     (charset => 'iso-10646-unicode-latin1');
494     } else {
495     (charset => 'iso-10646-ucs-basic');
496     }
497     }
498    
499     sub _name_utf32be ($$) {
500     shift; my $s = shift;
501     if ($s =~ /
502     ([\x01-\x7F][\x00-\xFF]{3}
503     |\x00[\x11-\xFF][\x00-\xFF][\x00-\xFF])
504     (?=(?:[\x00-\xFF]{4})*\z)/sx) {
505     (charset => 'iso-10646-ucs-4');
506     } else {
507     (charset => 'utf-32be');
508 wakaba 1.8 }
509     }
510    
511 wakaba 1.13 sub _utf8_on ($) {
512     Encode::_utf8_on ($_[0]) if $Encode::VERSION;
513     }
514     sub _utf8_off ($) {
515     Encode::_utf8_off ($_[0]) if $Encode::VERSION;
516     }
517    
518 wakaba 1.14 sub is_mime_text ($) {
519     my $name = lc shift;
520     if (ref $CHARSET{$name}->{mime_text} eq 'CODE') {
521     return &{ $CHARSET{$name}->{mime_text} } ($name);
522     } elsif (defined $CHARSET{$name}->{mime_text}) {
523     return $CHARSET{$name}->{mime_text};
524     } elsif (defined $_IsMimeText{$name}) {
525     return $_IsMimeText{$name};
526     } elsif (ref $CHARSET{'*undef'}->{mime_text} eq 'CODE') {
527     return &{ $CHARSET{'*undef'}->{mime_text} } ($name);
528     }
529     0;
530     }
531    
532 wakaba 1.1 =head1 LICENSE
533    
534     Copyright 2002 wakaba E<lt>w@suika.fam.cxE<gt>.
535    
536     This program is free software; you can redistribute it and/or modify
537     it under the terms of the GNU General Public License as published by
538     the Free Software Foundation; either version 2 of the License, or
539     (at your option) any later version.
540    
541     This program is distributed in the hope that it will be useful,
542     but WITHOUT ANY WARRANTY; without even the implied warranty of
543     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
544     GNU General Public License for more details.
545    
546     You should have received a copy of the GNU General Public License
547     along with this program; see the file COPYING. If not, write to
548     the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
549     Boston, MA 02111-1307, USA.
550    
551     =head1 CHANGE
552    
553     See F<ChangeLog>.
554 wakaba 1.14 $Date: 2002/07/22 02:48:55 $
555 wakaba 1.1
556     =cut
557    
558     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24