/[suikacvs]/messaging/manakai/lib/Message/Charset/Info.pm
Suika

Contents of /messaging/manakai/lib/Message/Charset/Info.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (hide annotations) (download)
Sat May 17 08:46:02 2008 UTC (16 years, 6 months ago) by wakaba
Branch: MAIN
Changes since 1.3: +219 -114 lines
*** empty log message ***

1 wakaba 1.1 package Message::Charset::Info;
2     use strict;
3 wakaba 1.4 our $VERSION=do{my @r=(q$Revision: 1.3 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
4 wakaba 1.1
5     sub UNREGISTERED_CHARSET_NAME () { 0b1 }
6 wakaba 1.4 ## Names for non-standard encodings/implementations for Perl encodings
7 wakaba 1.1 sub REGISTERED_CHARSET_NAME () { 0b10 }
8 wakaba 1.4 ## Names for standard encodings for Perl encodings
9     sub PRIMARY_CHARSET_NAME () { 0b100 }
10 wakaba 1.1 ## "Name:" field for IANA names
11 wakaba 1.4 ## Canonical name for Perl encodings
12     sub PREFERRED_CHARSET_NAME () { 0b1000 }
13 wakaba 1.1 ## "preferred MIME name" for IANA names
14    
15 wakaba 1.4 sub FALLBACK_ENCODING_IMPL () { 0b10000 }
16     ## For Perl encodings: Not a name of the encoding, the encoding
17     ## for the name might be useful as a fallback when the correct
18     ## encoding is not supported.
19     sub NONCONFORMING_ENCODING_IMPL () { FALLBACK_ENCODING_IMPL }
20     ## For Perl encodings: Not a conforming implementation of the encoding,
21     ## though it seems that the intention was to implement that encoding.
22     sub ERROR_REPORTING_ENCODING_IMPL () { 0b100000 }
23     ## For Perl encodings: Support error reporting via |manakai_onerror|
24     ## handler.
25    
26 wakaba 1.2 ## iana_status
27     sub STATUS_COMMON () { 0b1 }
28     sub STATUS_LIMITED_USE () { 0b10 }
29     sub STATUS_OBSOLETE () { 0b100 }
30    
31 wakaba 1.1 ## iana_names
32     ## is_html_ascii_superset: "superset of US-ASCII (specifically, ANSI_X3.4-1968)
33     ## for bytes in the range 0x09 - 0x0D, 0x20, 0x21, 0x22, 0x26, 0x27,
34     ## 0x2C - 0x3F, 0x41 - 0x5A, and 0x61 - 0x7A" [HTML5]
35     ## is_ebcdic_based
36    
37     ## ISSUE: Shift_JIS is a superset of US-ASCII? ISO-2022-JP is?
38     ## ISSUE: 0x5F (_) should be added to the range?
39    
40     my $Charset;
41    
42     our $IANACharset;
43    
44     $Charset->{'us-ascii'}
45     = $IANACharset->{'ansi_x3.4-1968'}
46     = $IANACharset->{'iso-ir-6'}
47     = $IANACharset->{'ansi_x3.4-1986'}
48     = $IANACharset->{'iso_646.irv:1991'}
49     = $IANACharset->{'ascii'}
50     = $IANACharset->{'iso646-us'}
51     = $IANACharset->{'us-ascii'}
52     = $IANACharset->{'us'}
53     = $IANACharset->{'ibm367'}
54     = $IANACharset->{'cp367'}
55     = $IANACharset->{'csascii'}
56 wakaba 1.4 = __PACKAGE__->new ({
57 wakaba 1.1 iana_names => {
58 wakaba 1.4 'ansi_x3.4-1968' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
59 wakaba 1.1 'iso-ir-6' => REGISTERED_CHARSET_NAME,
60     'ansi_x3.4-1986' => REGISTERED_CHARSET_NAME,
61     'iso_646.irv:1991' => REGISTERED_CHARSET_NAME,
62     'ascii' => REGISTERED_CHARSET_NAME,
63     'iso646-us' => REGISTERED_CHARSET_NAME,
64 wakaba 1.4 'us-ascii' => PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,
65 wakaba 1.1 'us' => REGISTERED_CHARSET_NAME,
66     'ibm367' => REGISTERED_CHARSET_NAME,
67     'cp367' => REGISTERED_CHARSET_NAME,
68     'csascii' => REGISTERED_CHARSET_NAME,
69     },
70     is_html_ascii_superset => 1,
71 wakaba 1.4 });
72 wakaba 1.1
73     $Charset->{'iso-8859-1'}
74     = $IANACharset->{'iso_8859-1:1987'}
75     = $IANACharset->{'iso-ir-100'}
76     = $IANACharset->{'iso_8859-1'}
77     = $IANACharset->{'iso-8859-1'}
78     = $IANACharset->{'latin1'}
79     = $IANACharset->{'l1'}
80     = $IANACharset->{'ibm819'}
81     = $IANACharset->{'cp819'}
82     = $IANACharset->{'csisolatin1'}
83 wakaba 1.4 = __PACKAGE__->new ({
84 wakaba 1.1 iana_names => {
85 wakaba 1.4 'iso_8859-1:1987' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
86 wakaba 1.1 'iso-ir-100' => REGISTERED_CHARSET_NAME,
87     'iso_8859-1' => REGISTERED_CHARSET_NAME,
88 wakaba 1.4 'iso-8859-1' => PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,
89 wakaba 1.1 'latin1' => REGISTERED_CHARSET_NAME,
90     'l1' => REGISTERED_CHARSET_NAME,
91     'ibm819' => REGISTERED_CHARSET_NAME,
92     'cp819' => REGISTERED_CHARSET_NAME,
93     'csisolatin1' => REGISTERED_CHARSET_NAME,
94     },
95     is_html_ascii_superset => 1,
96 wakaba 1.4 });
97 wakaba 1.1
98 wakaba 1.2 $Charset->{'iso-8859-2'}
99     = $IANACharset->{'iso_8859-2:1987'}
100     = $IANACharset->{'iso-ir-101'}
101     = $IANACharset->{'iso_8859-2'}
102     = $IANACharset->{'iso-8859-2'}
103     = $IANACharset->{'latin2'}
104     = $IANACharset->{'l2'}
105     = $IANACharset->{'csisolatin2'}
106 wakaba 1.4 = __PACKAGE__->new ({
107 wakaba 1.2 iana_names => {
108 wakaba 1.4 'iso_8859-2:1987' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
109 wakaba 1.2 'iso-ir-101' => REGISTERED_CHARSET_NAME,
110     'iso_8859-2' => REGISTERED_CHARSET_NAME,
111 wakaba 1.4 'iso-8859-2' => PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,
112 wakaba 1.2 'latin2' => REGISTERED_CHARSET_NAME,
113     'l2' => REGISTERED_CHARSET_NAME,
114     'csisolatin2' => REGISTERED_CHARSET_NAME,
115     },
116     is_html_ascii_superset => 1,
117 wakaba 1.4 });
118 wakaba 1.2
119     $Charset->{'iso-8859-3'}
120     = $IANACharset->{'iso_8859-3:1988'}
121     = $IANACharset->{'iso-ir-109'}
122     = $IANACharset->{'iso_8859-3'}
123     = $IANACharset->{'iso-8859-3'}
124     = $IANACharset->{'latin3'}
125     = $IANACharset->{'l3'}
126     = $IANACharset->{'csisolatin3'}
127 wakaba 1.4 = __PACKAGE__->new ({
128 wakaba 1.2 iana_names => {
129 wakaba 1.4 'iso_8859-3:1988' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
130 wakaba 1.2 'iso-ir-109' => REGISTERED_CHARSET_NAME,
131     'iso_8859-3' => REGISTERED_CHARSET_NAME,
132 wakaba 1.4 'iso-8859-3' => PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,
133 wakaba 1.2 'latin3' => REGISTERED_CHARSET_NAME,
134     'l3' => REGISTERED_CHARSET_NAME,
135     'csisolatin3' => REGISTERED_CHARSET_NAME,
136     },
137     is_html_ascii_superset => 1,
138 wakaba 1.4 });
139 wakaba 1.2
140     $Charset->{'iso-8859-4'}
141     = $IANACharset->{'iso_8859-4:1988'}
142     = $IANACharset->{'iso-ir-110'}
143     = $IANACharset->{'iso_8859-4'}
144     = $IANACharset->{'iso-8859-4'}
145     = $IANACharset->{'latin4'}
146     = $IANACharset->{'l4'}
147     = $IANACharset->{'csisolatin4'}
148 wakaba 1.4 = __PACKAGE__->new ({
149 wakaba 1.2 iana_names => {
150 wakaba 1.4 'iso_8859-4:1988' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
151 wakaba 1.2 'iso-ir-110' => REGISTERED_CHARSET_NAME,
152     'iso_8859-4' => REGISTERED_CHARSET_NAME,
153 wakaba 1.4 'iso-8859-4' => PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,
154 wakaba 1.2 'latin4' => REGISTERED_CHARSET_NAME,
155     'l4' => REGISTERED_CHARSET_NAME,
156     'csisolatin4' => REGISTERED_CHARSET_NAME,
157     },
158     is_html_ascii_superset => 1,
159 wakaba 1.4 });
160 wakaba 1.2
161     $Charset->{'iso-8859-5'}
162     = $IANACharset->{'iso_8859-5:1988'}
163     = $IANACharset->{'iso-ir-144'}
164     = $IANACharset->{'iso_8859-5'}
165     = $IANACharset->{'iso-8859-5'}
166     = $IANACharset->{'cyrillic'}
167     = $IANACharset->{'csisolatincyrillic'}
168 wakaba 1.4 = __PACKAGE__->new ({
169 wakaba 1.2 iana_names => {
170 wakaba 1.4 'iso_8859-5:1988' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
171 wakaba 1.2 'iso-ir-144' => REGISTERED_CHARSET_NAME,
172     'iso_8859-5' => REGISTERED_CHARSET_NAME,
173 wakaba 1.4 'iso-8859-5' => PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,
174 wakaba 1.2 'cyrillic' => REGISTERED_CHARSET_NAME,
175     'csisolatincyrillic' => REGISTERED_CHARSET_NAME,
176     },
177     is_html_ascii_superset => 1,
178 wakaba 1.4 });
179 wakaba 1.2
180     $Charset->{'iso-8859-6'}
181     = $IANACharset->{'iso_8859-6:1987'}
182     = $IANACharset->{'iso-ir-127'}
183     = $IANACharset->{'iso_8859-6'}
184     = $IANACharset->{'iso-8859-6'}
185     = $IANACharset->{'ecma-114'}
186     = $IANACharset->{'asmo-708'}
187     = $IANACharset->{'arabic'}
188     = $IANACharset->{'csisolatinarabic'}
189 wakaba 1.4 = __PACKAGE__->new ({
190 wakaba 1.2 iana_names => {
191 wakaba 1.4 'iso_8859-6:1987' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
192 wakaba 1.2 'iso-ir-127' => REGISTERED_CHARSET_NAME,
193     'iso_8859-6' => REGISTERED_CHARSET_NAME,
194 wakaba 1.4 'iso-8859-6' => PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,
195 wakaba 1.2 'ecma-114' => REGISTERED_CHARSET_NAME,
196     'asmo-708' => REGISTERED_CHARSET_NAME,
197     'arabic' => REGISTERED_CHARSET_NAME,
198     'csisolatinarabic' => REGISTERED_CHARSET_NAME,
199     },
200     is_html_ascii_superset => 1,
201     ## NOTE: 3/0..3/9 have different semantics from U+0030..0039,
202     ## but have same character names (maybe).
203 wakaba 1.3 ## NOTE: According to RFC 2046, charset left-hand half of "iso-8859-6"
204     ## is same as "us-ascii".
205 wakaba 1.4 });
206 wakaba 1.2
207     $Charset->{'iso-8859-7'}
208     = $IANACharset->{'iso_8859-7:1987'}
209     = $IANACharset->{'iso-ir-126'}
210     = $IANACharset->{'iso_8859-7'}
211     = $IANACharset->{'iso-8859-7'}
212     = $IANACharset->{'elot_928'}
213     = $IANACharset->{'ecma-118'}
214     = $IANACharset->{'greek'}
215     = $IANACharset->{'greek8'}
216     = $IANACharset->{'csisolatingreek'}
217 wakaba 1.4 = __PACKAGE__->new ({
218 wakaba 1.2 iana_names => {
219 wakaba 1.4 'iso_8859-7:1987' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
220 wakaba 1.2 'iso-ir-126' => REGISTERED_CHARSET_NAME,
221     'iso_8859-7' => REGISTERED_CHARSET_NAME,
222 wakaba 1.4 'iso-8859-7' => PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,
223 wakaba 1.2 'elot_928' => REGISTERED_CHARSET_NAME,
224     'ecma-118' => REGISTERED_CHARSET_NAME,
225     'greek' => REGISTERED_CHARSET_NAME,
226     'greek8' => REGISTERED_CHARSET_NAME,
227     'csisolatingreek' => REGISTERED_CHARSET_NAME,
228     },
229     is_html_ascii_superset => 1,
230 wakaba 1.4 });
231 wakaba 1.2
232     $Charset->{'iso-8859-8'}
233     = $IANACharset->{'iso_8859-8:1988'}
234     = $IANACharset->{'iso-ir-138'}
235     = $IANACharset->{'iso_8859-8'}
236     = $IANACharset->{'iso-8859-8'}
237     = $IANACharset->{'hebrew'}
238     = $IANACharset->{'csisolatinhebrew'}
239 wakaba 1.4 = __PACKAGE__->new ({
240 wakaba 1.2 iana_names => {
241 wakaba 1.4 'iso_8859-8:1988' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
242 wakaba 1.2 'iso-ir-138' => REGISTERED_CHARSET_NAME,
243     'iso_8859-8' => REGISTERED_CHARSET_NAME,
244 wakaba 1.4 'iso-8859-8' => PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,
245 wakaba 1.2 'hebrew' => REGISTERED_CHARSET_NAME,
246     'csisolatinhebrew' => REGISTERED_CHARSET_NAME,
247     },
248     is_html_ascii_superset => 1,
249 wakaba 1.4 });
250 wakaba 1.2
251     $Charset->{'iso-8859-9'}
252     = $IANACharset->{'iso_8859-9:1989'}
253     = $IANACharset->{'iso-ir-148'}
254     = $IANACharset->{'iso_8859-9'}
255     = $IANACharset->{'iso-8859-9'}
256     = $IANACharset->{'latin5'}
257     = $IANACharset->{'l5'}
258     = $IANACharset->{'csisolatin5'}
259 wakaba 1.4 = __PACKAGE__->new ({
260 wakaba 1.2 iana_names => {
261 wakaba 1.4 'iso_8859-9:1989' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
262 wakaba 1.2 'iso-ir-148' => REGISTERED_CHARSET_NAME,
263     'iso_8859-9' => REGISTERED_CHARSET_NAME,
264 wakaba 1.4 'iso-8859-9' => PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,
265 wakaba 1.2 'latin5' => REGISTERED_CHARSET_NAME,
266     'l5' => REGISTERED_CHARSET_NAME,
267     'csisolatin5' => REGISTERED_CHARSET_NAME,
268     },
269     is_html_ascii_superset => 1,
270 wakaba 1.4 });
271 wakaba 1.2
272     $Charset->{'iso-8859-10'}
273     = $IANACharset->{'iso-8859-10'}
274     = $IANACharset->{'iso-ir-157'}
275     = $IANACharset->{'l6'}
276     = $IANACharset->{'iso_8859-10:1992'}
277     = $IANACharset->{'csisolatin6'}
278     = $IANACharset->{'latin6'}
279 wakaba 1.4 = __PACKAGE__->new ({
280 wakaba 1.2 iana_names => {
281 wakaba 1.4 'iso-8859-10' => PRIMARY_CHARSET_NAME | PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,
282 wakaba 1.2 'iso-ir-157' => REGISTERED_CHARSET_NAME,
283     'l6' => REGISTERED_CHARSET_NAME,
284     'iso_8859-10:1992' => REGISTERED_CHARSET_NAME,
285     'csisolatin6' => REGISTERED_CHARSET_NAME,
286     'latin6' => REGISTERED_CHARSET_NAME,
287     },
288     is_html_ascii_superset => 1,
289 wakaba 1.4 });
290 wakaba 1.2
291     $Charset->{'iso_6937-2-add'}
292     = $IANACharset->{'iso_6937-2-add'}
293     = $IANACharset->{'iso-ir-142'}
294     = $IANACharset->{'csisotextcomm'}
295 wakaba 1.4 = __PACKAGE__->new ({
296 wakaba 1.2 iana_names => {
297 wakaba 1.4 'iso_6937-2-add' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
298 wakaba 1.2 'iso-ir-142' => REGISTERED_CHARSET_NAME,
299     'csisotextcomm' => REGISTERED_CHARSET_NAME,
300     },
301     is_html_ascii_superset => 1,
302 wakaba 1.4 });
303 wakaba 1.2
304     $Charset->{'jis_x0201'}
305     = $IANACharset->{'jis_x0201'}
306     = $IANACharset->{'x0201'}
307     = $IANACharset->{'cshalfwidthkatakana'}
308 wakaba 1.4 = __PACKAGE__->new ({
309 wakaba 1.2 iana_names => {
310 wakaba 1.4 'jis_x0201' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
311 wakaba 1.2 'x0201' => REGISTERED_CHARSET_NAME,
312     'cshalfwidthkatakana' => REGISTERED_CHARSET_NAME,
313     },
314     is_html_ascii_superset => 1,
315 wakaba 1.4 });
316 wakaba 1.2
317     $Charset->{'jis_encoding'}
318     = $IANACharset->{'jis_encoding'}
319     = $IANACharset->{'csjisencoding'}
320 wakaba 1.4 = __PACKAGE__->new ({
321 wakaba 1.2 iana_names => {
322 wakaba 1.4 'jis_encoding' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
323 wakaba 1.2 'csjisencoding' => REGISTERED_CHARSET_NAME,
324     },
325     ## NOTE: What is this?
326 wakaba 1.4 });
327 wakaba 1.1
328     $Charset->{'shift_jis'}
329     = $IANACharset->{'shift_jis'}
330     = $IANACharset->{'ms_kanji'}
331     = $IANACharset->{'csshiftjis'}
332 wakaba 1.4 = __PACKAGE__->new ({
333 wakaba 1.1 iana_names => {
334 wakaba 1.4 'shift_jis' => PREFERRED_CHARSET_NAME | PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
335 wakaba 1.1 'ms_kanji' => REGISTERED_CHARSET_NAME,
336     'csshiftjis' => REGISTERED_CHARSET_NAME,
337     },
338 wakaba 1.2 mime_text_suitable => 1,
339 wakaba 1.4 });
340 wakaba 1.1
341 wakaba 1.3 $Charset->{'x-sjis'}
342     = $IANACharset->{'x-sjis'}
343 wakaba 1.4 = __PACKAGE__->new ({
344 wakaba 1.3 iana_names => {
345     'x-sjis' => UNREGISTERED_CHARSET_NAME,
346     },
347     mime_text_suitable => 1,
348 wakaba 1.4 });
349 wakaba 1.3
350 wakaba 1.1 $Charset->{'euc-jp'}
351     = $IANACharset->{'extended_unix_code_packed_format_for_japanese'}
352     = $IANACharset->{'cseucpkdfmtjapanese'}
353     = $IANACharset->{'euc-jp'}
354 wakaba 1.3 = $IANACharset->{'x-euc-jp'}
355 wakaba 1.4 = __PACKAGE__->new ({
356 wakaba 1.1 iana_names => {
357 wakaba 1.4 'extended_unix_code_packed_format_for_japanese' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
358 wakaba 1.1 'cseucpkdfmtjapanese' => REGISTERED_CHARSET_NAME,
359 wakaba 1.4 'euc-jp' => PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,
360 wakaba 1.1 },
361     is_html_ascii_superset => 1,
362 wakaba 1.3 mime_text_suitable => 1,
363 wakaba 1.4 });
364 wakaba 1.3
365     $Charset->{'x-euc-jp'}
366     = $IANACharset->{'x-euc-jp'}
367 wakaba 1.4 = __PACKAGE__->new ({
368 wakaba 1.3 iana_names => {
369     'x-euc-jp' => UNREGISTERED_CHARSET_NAME,
370     },
371     is_html_ascii_superset => 1,
372     mime_text_suitable => 1,
373 wakaba 1.4 });
374 wakaba 1.1
375 wakaba 1.2 $Charset->{'extended_unix_code_fixed_width_for_japanese'}
376     = $IANACharset->{'extended_unix_code_fixed_width_for_japanese'}
377     = $IANACharset->{'cseucfixwidjapanese'}
378 wakaba 1.4 = __PACKAGE__->new ({
379 wakaba 1.2 iana_names => {
380 wakaba 1.4 'extended_unix_code_fixed_width_for_japanese' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
381 wakaba 1.2 'cseucfixwidjapanese' => REGISTERED_CHARSET_NAME,
382     },
383 wakaba 1.4 });
384 wakaba 1.2
385 wakaba 1.1 ## TODO: ...
386    
387 wakaba 1.2 $Charset->{'euc-kr'}
388     = $IANACharset->{'euc-kr'}
389     = $IANACharset->{'cseuckr'}
390 wakaba 1.4 = __PACKAGE__->new ({
391 wakaba 1.2 iana_names => {
392 wakaba 1.4 'euc-kr' => PRIMARY_CHARSET_NAME | PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,
393 wakaba 1.2 'cseuckr' => REGISTERED_CHARSET_NAME,
394     },
395     is_html_ascii_superset => 1,
396 wakaba 1.4 });
397 wakaba 1.2
398 wakaba 1.1 $Charset->{'iso-2022-jp'}
399     = $IANACharset->{'iso-2022-jp'}
400     = $IANACharset->{'csiso2022jp'}
401 wakaba 1.3 = $IANACharset->{'iso2022jp'}
402     = $IANACharset->{'junet-code'}
403 wakaba 1.4 = __PACKAGE__->new ({
404 wakaba 1.1 iana_names => {
405 wakaba 1.4 'iso-2022-jp' => PREFERRED_CHARSET_NAME | PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
406 wakaba 1.1 'csiso2022jp' => REGISTERED_CHARSET_NAME,
407 wakaba 1.3 'iso2022jp' => UNREGISTERED_CHARSET_NAME,
408     'junet-code' => UNREGISTERED_CHARSET_NAME,
409 wakaba 1.1 },
410 wakaba 1.2 mime_text_suitable => 1,
411 wakaba 1.4 });
412 wakaba 1.2
413     $Charset->{'iso-2022-jp-2'}
414     = $IANACharset->{'iso-2022-jp-2'}
415     = $IANACharset->{'csiso2022jp2'}
416 wakaba 1.4 = __PACKAGE__->new ({
417 wakaba 1.2 iana_names => {
418 wakaba 1.4 'iso-2022-jp-2' => PREFERRED_CHARSET_NAME | PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
419 wakaba 1.2 'csiso2022jp2' => REGISTERED_CHARSET_NAME,
420     },
421     mime_text_suitable => 1,
422 wakaba 1.4 });
423 wakaba 1.1
424     ## TODO: ...
425    
426     $Charset->{'utf-8'}
427     = $IANACharset->{'utf-8'}
428 wakaba 1.3 = $IANACharset->{'x-utf-8'}
429 wakaba 1.4 = __PACKAGE__->new ({
430 wakaba 1.1 iana_names => {
431 wakaba 1.4 'utf-8' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
432 wakaba 1.3 'x-utf-8' => UNREGISTERED_CHARSET_NAME,
433 wakaba 1.1 },
434     is_html_ascii_superset => 1,
435 wakaba 1.3 mime_text_suitable => 1,
436 wakaba 1.4 });
437 wakaba 1.3
438     $Charset->{'utf-8n'}
439     = $IANACharset->{'utf-8n'}
440 wakaba 1.4 = __PACKAGE__->new ({
441 wakaba 1.3 iana_names => {
442     'utf-8n' => UNREGISTERED_CHARSET_NAME,
443     },
444     is_html_ascii_superset => 1,
445     mime_text_suitable => 1,
446 wakaba 1.4 });
447 wakaba 1.1
448     ## TODO: ...
449    
450 wakaba 1.2 $Charset->{'gbk'}
451     = $IANACharset->{'gbk'}
452     = $IANACharset->{'cp936'}
453     = $IANACharset->{'ms936'}
454     = $IANACharset->{'windows-936'}
455 wakaba 1.4 = __PACKAGE__->new ({
456 wakaba 1.2 iana_names => {
457 wakaba 1.4 'gbk' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
458 wakaba 1.2 'cp936' => REGISTERED_CHARSET_NAME,
459     'ms936' => REGISTERED_CHARSET_NAME,
460     'windows-936' => REGISTERED_CHARSET_NAME,
461     },
462     iana_status => STATUS_COMMON | STATUS_OBSOLETE,
463     mime_text_suitable => 1,
464 wakaba 1.4 });
465 wakaba 1.2
466     $Charset->{'gb18030'}
467     = $IANACharset->{'gb18030'}
468 wakaba 1.4 = __PACKAGE__->new ({
469 wakaba 1.2 iana_names => {
470 wakaba 1.4 'gb18030' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
471 wakaba 1.2 },
472     iana_status => STATUS_COMMON,
473     mime_text_suitable => 1,
474 wakaba 1.4 });
475 wakaba 1.2
476     ## TODO: ...
477    
478 wakaba 1.1 $Charset->{'utf-16be'}
479     = $IANACharset->{'utf-16be'}
480 wakaba 1.4 = __PACKAGE__->new ({
481 wakaba 1.1 iana_names => {
482 wakaba 1.4 'utf-16be' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
483 wakaba 1.1 },
484 wakaba 1.4 });
485 wakaba 1.1
486     $Charset->{'utf-16le'}
487     = $IANACharset->{'utf-16le'}
488 wakaba 1.4 = __PACKAGE__->new ({
489 wakaba 1.1 iana_names => {
490 wakaba 1.4 'utf-16le' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
491 wakaba 1.1 },
492 wakaba 1.4 });
493 wakaba 1.1
494     $Charset->{'utf-16'}
495     = $IANACharset->{'utf-16'}
496 wakaba 1.4 = __PACKAGE__->new ({
497 wakaba 1.1 iana_names => {
498 wakaba 1.4 'utf-16' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
499 wakaba 1.1 },
500 wakaba 1.4 });
501 wakaba 1.1
502     ## TODO: ...
503    
504 wakaba 1.2 $Charset->{'windows-31j'}
505     = $IANACharset->{'windows-31j'}
506     = $IANACharset->{'cswindows31j'}
507 wakaba 1.4 = __PACKAGE__->new ({
508 wakaba 1.2 iana_names => {
509 wakaba 1.4 'windows-31j' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
510 wakaba 1.2 'cswindows31j' => REGISTERED_CHARSET_NAME,
511     },
512     iana_status => STATUS_LIMITED_USE, # maybe
513     mime_text_suitable => 1,
514 wakaba 1.4 });
515 wakaba 1.2
516     $Charset->{'gb2312'}
517     = $IANACharset->{'gb2312'}
518     = $IANACharset->{'csgb2312'}
519 wakaba 1.4 = __PACKAGE__->new ({
520 wakaba 1.2 iana_names => {
521 wakaba 1.4 'gb2312' => PRIMARY_CHARSET_NAME | PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,
522 wakaba 1.2 'csgb2312' => REGISTERED_CHARSET_NAME,
523     },
524     is_html_ascii_superset => 1,
525     mime_text_suitable => 1,
526 wakaba 1.4 });
527 wakaba 1.2
528     $Charset->{'big5'}
529     = $IANACharset->{'big5'}
530     = $IANACharset->{'csbig5'}
531 wakaba 1.4 = __PACKAGE__->new ({
532 wakaba 1.2 iana_names => {
533 wakaba 1.4 'big5' => PRIMARY_CHARSET_NAME | PREFERRED_CHARSET_NAME | REGISTERED_CHARSET_NAME,
534 wakaba 1.2 'csbig5' => REGISTERED_CHARSET_NAME,
535     },
536     mime_text_suitable => 1,
537 wakaba 1.4 });
538 wakaba 1.2
539     ## TODO: ...
540    
541     $Charset->{'big5-hkscs'}
542     = $IANACharset->{'big5-hkscs'}
543 wakaba 1.4 = __PACKAGE__->new ({
544 wakaba 1.2 iana_names => {
545 wakaba 1.4 'big5-hkscs' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
546 wakaba 1.2 },
547     mime_text_suitable => 1,
548 wakaba 1.4 });
549 wakaba 1.2
550     ## TODO: ...
551    
552 wakaba 1.1 $Charset->{'windows-1252'}
553     = $IANACharset->{'windows-1252'}
554 wakaba 1.4 = __PACKAGE__->new ({
555 wakaba 1.1 iana_names => {
556 wakaba 1.4 'windows-1252' => PRIMARY_CHARSET_NAME | REGISTERED_CHARSET_NAME,
557 wakaba 1.1 },
558     is_html_ascii_superset => 1,
559 wakaba 1.4 });
560 wakaba 1.1
561     ## TODO: ...
562    
563 wakaba 1.4 sub new ($$) {
564     return bless $_[1], $_[0];
565     } # new
566    
567     ## NOTE: A class method
568     sub get_by_iana_name ($$) {
569     my $name = $_[1];
570     $name =~ tr/A-Z/a-z/; ## ASCII case-insensitive
571     unless ($IANACharset->{$name}) {
572     $IANACharset->{$name} = __PACKAGE__->new ({
573     iana_names => {
574     $name => UNREGISTERED_CHARSET_NAME,
575     },
576     });
577     }
578     return $IANACharset->{$name};
579     } # get_by_iana_name
580    
581     sub get_perl_encoding ($;%) {
582     my ($self, %opt) = @_;
583    
584     require Encode;
585    
586     if ($opt{allow_error_reporting}) {
587     for my $perl_name (keys %{$self->{perl_names} or {}}) {
588     my $perl_status = $self->{perl_names}->{$perl_name};
589     next unless $perl_status & ERROR_REPORTING_ENCODING_IMPL;
590     next unless $perl_status & FALLBACK_ENCODING_IMPL;
591    
592     my $e = Encode::find_encoding ($perl_name);
593     if ($e) {
594     return ($e, $perl_status);
595     }
596     }
597     }
598    
599     for my $perl_name (keys %{$self->{perl_names} or {}}) {
600     my $perl_status = $self->{perl_names}->{$perl_name};
601     next if $perl_status & ERROR_REPORTING_ENCODING_IMPL;
602     next if $perl_status & FALLBACK_ENCODING_IMPL;
603    
604     my $e = Encode::find_encoding ($perl_name);
605     if ($e) {
606     return ($e, $perl_status);
607     }
608     }
609    
610     if ($opt{allow_fallback}) {
611     for my $perl_name (keys %{$self->{perl_names} or {}}) {
612     my $perl_status = $self->{perl_names}->{$perl_name};
613     next unless $perl_status & FALLBACK_ENCODING_IMPL;
614    
615     my $e = Encode::find_encoding ($perl_name);
616     if ($e) {
617     return ($e, $perl_status);
618     }
619     }
620    
621     for my $iana_name (keys %{$self->{iana_names} or {}}) {
622     my $e = Encode::find_encoding ($iana_name);
623     if ($e) {
624     return ($e, FALLBACK_ENCODING_IMPL);
625     }
626     }
627     }
628    
629     return (undef, 0);
630     } # get_perl_encoding
631    
632     sub get_iana_name ($) {
633     my $self = shift;
634    
635     my $primary;
636     my $other;
637     for my $iana_name (keys %{$self->{iana_names} or {}}) {
638     my $name_status = $self->{iana_names}->{$iana_name};
639     if ($name_status & PREFERRED_CHARSET_NAME) {
640     return $iana_name;
641     } elsif ($name_status & PRIMARY_CHARSET_NAME) {
642     $primary = $iana_name;
643     } elsif ($name_status & REGISTERED_CHARSET_NAME) {
644     $other = $iana_name;
645     } else {
646     $other ||= $iana_name;
647     }
648     }
649    
650     return $primary || $other;
651     } # get_iana_name
652    
653     ## NOTE: A non-method function
654 wakaba 1.3 sub is_syntactically_valid_iana_charset_name ($) {
655 wakaba 1.1 my $name = shift;
656     return $name =~ /\A[\x20-\x7E]{1,40}\z/;
657     } # is_suntactically_valid_iana_charset_name
658    
659     1;
660 wakaba 1.4 ## $Date: 2007/12/22 06:29:32 $
661 wakaba 1.1

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24