/[suikacvs]/markup/html/whatpm/Whatpm/Charset/DecodeHandle.pm
Suika

Contents of /markup/html/whatpm/Whatpm/Charset/DecodeHandle.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (hide annotations) (download)
Sun Jul 15 12:41:51 2007 UTC (18 years ago) by wakaba
Branch: MAIN
++ whatpm/t/ChangeLog	15 Jul 2007 12:41:07 -0000
2007-07-15  Wakaba  <wakaba@suika.fam.cx>

	* Charset-DecodeHandler.t: New test script.

++ whatpm/Whatpm/ChangeLog	15 Jul 2007 08:22:21 -0000
	* Charset/: New directory.

2007-07-15  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/Charset/ChangeLog	15 Jul 2007 08:22:53 -0000
2007-07-15  Wakaba  <wakaba@suika.fam.cx>

	* DecodeHandle.pm: New Perl module (created
	from manakai's |Encode.dis|).

2007-07-15  Wakaba  <wakaba@suika.fam.cx>

	* ChangeLog: New file.

1 wakaba 1.1 package Whatpm::Charset::DecodeHandle;
2     use strict;
3    
4     my $XML_AUTO_CHARSET = q<http://suika.fam.cx/www/2006/03/xml-entity/>;
5     my $IANA_CHARSET = q<urn:x-suika-fam-cx:charset:>;
6     my $PERL_CHARSET = q<http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.>;
7     my $XML_CHARSET = q<http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.>;
8    
9     ## ->create_decode_handle ($charset_uri, $byte_stream, $onerror)
10     sub create_decode_handle ($$$;$) {
11     my $csdef = $Whatpm::Charset::CharsetDef->{$_[1]};
12     my $obj = {
13     character_queue => [],
14     filehandle => $_[2],
15     charset => $_[1],
16     byte_buffer => '',
17     onerror => $_[3] || sub {},
18     };
19     if ($csdef->{uri}->{$XML_AUTO_CHARSET} or
20     $obj->{charset} eq $XML_AUTO_CHARSET) {
21     my $b = ''; # UTF-8 w/o BOM
22     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
23     $obj->{input_encoding} = 'utf-8';
24     if (read $obj->{filehandle}, $b, 256) {
25     no warnings "substr";
26     no warnings "uninitialized";
27     if (substr ($b, 0, 1) eq "<") {
28     if (substr ($b, 1, 1) eq "?") { # ASCII8
29     if ($b =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
30     encoding\s*=\s*["']([^"']*)/x) {
31     $obj->{input_encoding} = lc $1;
32     my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
33     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
34     if (not $csdef->{ascii8} or $csdef->{bom_required}) {
35     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
36     charset_uri => $uri,
37     charset_name => $obj->{input_encoding});
38     }
39     } else {
40     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
41     $obj->{input_encoding} = 'utf-8';
42     }
43     if (defined $csdef->{no_bom_variant}) {
44     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant}};
45     }
46     } elsif (substr ($b, 1, 1) eq "\x00") {
47     if (substr ($b, 2, 2) eq "?\x00") { # ASCII16LE
48     my $c = $b; $c =~ tr/\x00//d;
49     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
50     encoding\s*=\s*["']([^"']*)/x) {
51     $obj->{input_encoding} = lc $1;
52     my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
53     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
54     if (not $csdef->{ascii16} or $csdef->{ascii16be} or
55     $csdef->{bom_required}) {
56     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
57     charset_uri => $uri,
58     charset_name => $obj->{input_encoding});
59     }
60     } else {
61     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
62     $obj->{input_encoding} = 'utf-8';
63     }
64     if (defined $csdef->{no_bom_variant16le}) {
65     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16le}};
66     }
67     } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian4321
68     my $c = $b; $c =~ tr/\x00//d;
69     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
70     encoding\s*=\s*["']([^"']*)/x) {
71     $obj->{input_encoding} = lc $1;
72     my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
73     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
74     if (not $csdef->{ascii32} or
75     $csdef->{ascii32endian1234} or
76     $csdef->{ascii32endian2143} or
77     $csdef->{ascii32endian3412} or
78     $csdef->{bom_required}) {
79     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
80     charset_uri => $uri,
81     charset_name => $obj->{input_encoding});
82     }
83     } else {
84     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
85     $obj->{input_encoding} = 'utf-8';
86     }
87     if (defined $csdef->{no_bom_variant32endian4321}) {
88     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian4321}};
89     }
90     }
91     }
92     } elsif (substr ($b, 0, 3) eq "\xEF\xBB\xBF") { # UTF8
93     $obj->{has_bom} = 1;
94     substr ($b, 0, 3) = '';
95     my $c = $b;
96     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
97     encoding\s*=\s*["']([^"']*)/x) {
98     $obj->{input_encoding} = lc $1;
99     my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
100     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
101     if (not $csdef->{utf8_encoding_scheme} or
102     not $csdef->{bom_allowed}) {
103     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
104     charset_uri => $uri,
105     charset_name => $obj->{input_encoding});
106     }
107     } else {
108     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
109     $obj->{input_encoding} = 'utf-8';
110     }
111     if (defined $csdef->{no_bom_variant}) {
112     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant}};
113     }
114     } elsif (substr ($b, 0, 2) eq "\x00<") {
115     if (substr ($b, 2, 2) eq "\x00?") { # ASCII16BE
116     my $c = $b; $c =~ tr/\x00//d;
117     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
118     encoding\s*=\s*["']([^"']*)/x) {
119     $obj->{input_encoding} = lc $1;
120     my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
121     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
122     if (not $csdef->{ascii16} or $csdef->{ascii16le} or
123     $csdef->{bom_required}) {
124     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
125     charset_uri => $uri,
126     charset_name => $obj->{input_encoding});
127     }
128     } else {
129     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
130     $obj->{input_encoding} = 'utf-8';
131     }
132     if (defined $csdef->{no_bom_variant16be}) {
133     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16be}};
134     }
135     } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian3412
136     my $c = $b; $c =~ tr/\x00//d;
137     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
138     encoding\s*=\s*["']([^"']*)/x) {
139     $obj->{input_encoding} = lc $1;
140     my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
141     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
142     if (not $csdef->{ascii32} or
143     $csdef->{ascii32endian1234} or
144     $csdef->{ascii32endian2143} or
145     $csdef->{ascii32endian4321} or
146     $csdef->{bom_required}) {
147     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
148     charset_uri => $uri,
149     charset_name => $obj->{input_encoding});
150     }
151     } else {
152     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
153     $obj->{input_encoding} = 'utf-8';
154     }
155     if (defined $csdef->{no_bom_variant32endian3412}) {
156     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian3412}};
157     }
158     }
159     } elsif (substr ($b, 0, 2) eq "\xFE\xFF") {
160     if (substr ($b, 2, 2) eq "\x00<") { # ASCII16BE
161     $obj->{has_bom} = 1;
162     substr ($b, 0, 2) = '';
163     my $c = $b; $c =~ tr/\x00//d;
164     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
165     encoding\s*=\s*["']([^"']*)/x) {
166     $obj->{input_encoding} = lc $1;
167     my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
168     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
169     if (not $csdef->{ascii16} or
170     $csdef->{ascii16le} or
171     not $csdef->{bom_allowed}) {
172     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
173     charset_uri => $uri,
174     charset_name => $obj->{input_encoding});
175     }
176     } else {
177     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
178     $obj->{input_encoding} = 'utf-16';
179     }
180     if (defined $csdef->{no_bom_variant16be}) {
181     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16be}};
182     }
183     } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian3412
184     $obj->{has_bom} = 1;
185     substr ($b, 0, 4) = '';
186     my $c = $b; $c =~ tr/\x00//d;
187     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
188     encoding\s*=\s*["']([^"']*)/x) {
189     $obj->{input_encoding} = lc $1;
190     my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
191     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
192     if (not $csdef->{ascii32} or
193     $csdef->{ascii32endian1234} or
194     $csdef->{ascii32endian2143} or
195     $csdef->{ascii32endian4321} or
196     not $csdef->{bom_allowed}) {
197     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
198     charset_uri => $uri,
199     charset_name => $obj->{input_encoding});
200     }
201     } else {
202     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
203     $obj->{input_encoding} = 'utf-16';
204     $obj->{byte_buffer} .= "\x00\x00";
205     }
206     if (defined $csdef->{no_bom_variant32endian3412}) {
207     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian3412}};
208     }
209     } else {
210     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
211     $obj->{input_encoding} = 'utf-16';
212     substr ($b, 0, 2) = '';
213     $obj->{has_bom} = 1;
214     }
215     } elsif (substr ($b, 0, 2) eq "\xFF\xFE") {
216     if (substr ($b, 2, 2) eq "<\x00") { # ASCII16LE
217     $obj->{has_bom} = 1;
218     substr ($b, 0, 2) = '';
219     my $c = $b; $c =~ tr/\x00//d;
220     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
221     encoding\s*=\s*["']([^"']*)/x) {
222     $obj->{input_encoding} = lc $1;
223     my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
224     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
225     if (not $csdef->{ascii16} or
226     $csdef->{ascii16be} or
227     not $csdef->{bom_allowed}) {
228     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
229     charset_uri => $uri,
230     charset_name => $obj->{input_encoding});
231     }
232     } else {
233     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
234     $obj->{input_encoding} = 'utf-16';
235     }
236     if (defined $csdef->{no_bom_variant16le}) {
237     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16le}};
238     }
239     } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian4321
240     $obj->{has_bom} = 1;
241     substr ($b, 0, 4) = '';
242     my $c = $b; $c =~ tr/\x00//d;
243     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
244     encoding\s*=\s*["']([^"']*)/x) {
245     $obj->{input_encoding} = lc $1;
246     my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
247     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
248     if (not $csdef->{ascii32} or
249     $csdef->{ascii32endian1234} or
250     $csdef->{ascii32endian2143} or
251     $csdef->{ascii32endian3412} or
252     not $csdef->{bom_allowed}) {
253     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
254     charset_uri => $uri,
255     charset_name => $obj->{input_encoding});
256     }
257     } else {
258     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
259     $obj->{input_encoding} = 'utf-16';
260     $obj->{byte_buffer} .= "\x00\x00";
261     }
262     if (defined $csdef->{no_bom_variant32endian4321}) {
263     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian4321}};
264     }
265     } else {
266     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
267     $obj->{input_encoding} = 'utf-16';
268     substr ($b, 0, 2) = '';
269     $obj->{has_bom} = 1;
270     }
271     } elsif (substr ($b, 0, 2) eq "\x00\x00") {
272     if (substr ($b, 2, 2) eq "\x00<") { # ASCII32Endian1234
273     my $c = $b; $c =~ tr/\x00//d;
274     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
275     encoding\s*=\s*["']([^"']*)/x) {
276     $obj->{input_encoding} = lc $1;
277     my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
278     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
279     if (not $csdef->{ascii32} or
280     $csdef->{ascii32endian2143} or
281     $csdef->{ascii32endian3412} or
282     $csdef->{ascii32endian4321} or
283     $csdef->{bom_required}) {
284     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
285     charset_uri => $uri,
286     charset_name => $obj->{input_encoding});
287     }
288     } else {
289     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
290     $obj->{input_encoding} = 'utf-8';
291     }
292     if (defined $csdef->{no_bom_variant32endian1234}) {
293     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian1234}};
294     }
295     } elsif (substr ($b, 2, 2) eq "<\x00") { # ASCII32Endian2143
296     my $c = $b; $c =~ tr/\x00//d;
297     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
298     encoding\s*=\s*["']([^"']*)/x) {
299     $obj->{input_encoding} = lc $1;
300     my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
301     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
302     if (not $csdef->{ascii32} or
303     $csdef->{ascii32endian1234} or
304     $csdef->{ascii32endian3412} or
305     $csdef->{ascii32endian4321} or
306     $csdef->{bom_required}) {
307     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
308     charset_uri => $uri,
309     charset_name => $obj->{input_encoding});
310     }
311     } else {
312     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
313     $obj->{input_encoding} = 'utf-8';
314     }
315     if (defined $csdef->{no_bom_variant32endian2143}) {
316     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian2143}};
317     }
318     } elsif (substr ($b, 2, 2) eq "\xFE\xFF") { # ASCII32Endian1234
319     $obj->{has_bom} = 1;
320     substr ($b, 0, 4) = '';
321     my $c = $b; $c =~ tr/\x00//d;
322     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
323     encoding\s*=\s*["']([^"']*)/x) {
324     $obj->{input_encoding} = lc $1;
325     my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
326     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
327     if (not $csdef->{ascii32} or
328     $csdef->{ascii32endian2143} or
329     $csdef->{ascii32endian3412} or
330     $csdef->{ascii32endian4321} or
331     $csdef->{bom_required}) {
332     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
333     charset_uri => $uri,
334     charset_name => $obj->{input_encoding});
335     }
336     } else {
337     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
338     $obj->{input_encoding} = 'utf-8';
339     $obj->{has_bom} = 0;
340     $obj->{byte_buffer} .= "\x00\x00\xFE\xFF";
341     }
342     if (defined $csdef->{no_bom_variant32endian1234}) {
343     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian1234}};
344     }
345     } elsif (substr ($b, 2, 2) eq "\xFF\xFE") { # ASCII32Endian2143
346     $obj->{has_bom} = 1;
347     substr ($b, 0, 4) = '';
348     my $c = $b; $c =~ tr/\x00//d;
349     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
350     encoding\s*=\s*["']([^"']*)/x) {
351     $obj->{input_encoding} = lc $1;
352     my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
353     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
354     if (not $csdef->{ascii32} or
355     $csdef->{ascii32endian1234} or
356     $csdef->{ascii32endian3412} or
357     $csdef->{ascii32endian4321} or
358     $csdef->{bom_required}) {
359     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
360     charset_uri => $uri,
361     charset_name => $obj->{input_encoding});
362     }
363     } else {
364     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
365     $obj->{input_encoding} = 'utf-8';
366     $obj->{has_bom} = 0;
367     $obj->{byte_buffer} .= "\x00\x00\xFF\xFE";
368     }
369     if (defined $csdef->{no_bom_variant32endian2143}) {
370     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian2143}};
371     }
372     }
373     # \x4C\x6F\xA7\x94 EBCDIC
374     } # buffer
375     $obj->{byte_buffer} .= $b;
376     } # read
377     } elsif ($csdef->{uri}->{$XML_CHARSET.'utf-8'}) {
378     ## BOM is optional.
379     my $b = '';
380     if (read $obj->{filehandle}, $b, 3) {
381     if ($b eq "\xEF\xBB\xBF") {
382     $obj->{has_bom} = 1;
383     } else {
384     $obj->{byte_buffer} .= $b;
385     }
386     }
387     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'}; # UTF-8 w/o BOM
388     } elsif ($csdef->{uri}->{$XML_CHARSET.'utf-16'}) {
389     ## BOM is mandated.
390     my $b = '';
391     if (read $obj->{filehandle}, $b, 2) {
392     if ($b eq "\xFE\xFF") {
393     $obj->{has_bom} = 1; # UTF-16BE w/o BOM
394     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
395     } elsif ($b eq "\xFF\xFE") {
396     $obj->{has_bom} = 1; # UTF-16LE w/o BOM
397     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
398     } else {
399     $obj->{onerror}->(undef, 'no-bom-error', charset_uri => $obj->{charset});
400     $obj->{has_bom} = 0;
401     $obj->{byte_buffer} .= $b; # UTF-16BE w/o BOM
402     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
403     }
404     } else {
405     $obj->{onerror}->(undef, 'no-bom-error', charset_uri => $obj->{charset});
406     $obj->{has_bom} = 0; # UTF-16BE w/o BOM
407     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
408     }
409     }
410    
411     if ($csdef->{uri}->{$XML_CHARSET.'iso-2022-jp'}) {
412     $obj->{state_2440} = 'gl-jis-1997-swapped';
413     $obj->{state_2442} = 'gl-jis-1997';
414     $obj->{state} = 'state_2842';
415     require Encode::GLJIS1997Swapped;
416     require Encode::GLJIS1997;
417     if (Encode::find_encoding ($obj->{state_2440}) and
418     Encode::find_encoding ($obj->{state_2442})) {
419     return bless $obj, 'Whatpm::Charset::DecodeHandle::ISO2022JP';
420     }
421     } elsif ($csdef->{uri}->{$IANA_CHARSET.'iso-2022-jp'}) {
422     $obj->{state_2440} = 'gl-jis-1978';
423     $obj->{state_2442} = 'gl-jis-1983';
424     $obj->{state} = 'state_2842';
425     require Encode::GLJIS1978;
426     require Encode::GLJIS1983;
427     if (Encode::find_encoding ($obj->{state_2440}) and
428     Encode::find_encoding ($obj->{state_2442})) {
429     return bless $obj, 'Whatpm::Charset::DecodeHandle::ISO2022JP';
430     }
431     } elsif (defined $csdef->{perl_name}->[0]) {
432     if ($csdef->{uri}->{$XML_CHARSET.'euc-jp'} or
433     $csdef->{uri}->{$IANA_CHARSET.'euc-jp'}) {
434     $obj->{perl_encoding_name} = $csdef->{perl_name}->[0];
435     require Encode::EUCJP1997;
436     if (Encode::find_encoding ($obj->{perl_encoding_name})) {
437     return bless $obj, 'Whatpm::Charset::DecodeHandle::EUCJP';
438     }
439     } elsif ($csdef->{uri}->{$XML_CHARSET.'shift_jis'} or
440     $csdef->{uri}->{$IANA_CHARSET.'shift_jis'}) {
441     $obj->{perl_encoding_name} = $csdef->{perl_name}->[0];
442     require Encode::ShiftJIS1997;
443     if (Encode::find_encoding ($obj->{perl_encoding_name})) {
444     return bless $obj, 'Whatpm::Charset::DecodeHandle::ShiftJIS';
445     }
446     } elsif ($csdef->{is_block_safe}) {
447     $obj->{perl_encoding_name} = $csdef->{perl_name}->[0];
448     require Encode;
449     if (Encode::find_encoding ($obj->{perl_encoding_name})) {
450     return bless $obj, 'Whatpm::Charset::DecodeHandle::Encode';
451     }
452     }
453     }
454    
455     $obj->{onerror}->(undef, 'charset-not-supported-error',
456     charset_uri => $obj->{charset});
457     return undef;
458     } # create_decode_handle
459    
460     sub name_to_uri ($$$) {
461     my $domain = $_[1];
462     my $name = lc $_[2];
463    
464     if ($domain eq 'ietf') {
465     return $IANA_CHARSET . $name;
466     } elsif ($domain eq 'xml') {
467     if ({
468     'utf-8' => 1,
469     'utf-16' => 1,
470     'iso-10646-ucs-2' => 1,
471     'iso-10646-ucs-4' => 1,
472     'iso-8859-1' => 1,
473     'iso-8859-2' => 1,
474     'iso-8859-3' => 1,
475     'iso-8859-4' => 1,
476     'iso-8859-5' => 1,
477     'iso-8859-6' => 1,
478     'iso-8859-7' => 1,
479     'iso-8859-8' => 1,
480     'iso-8859-9' => 1,
481     'iso-8859-10' => 1,
482     'iso-8859-11' => 1,
483     'iso-8859-13' => 1,
484     'iso-8859-14' => 1,
485     'iso-8859-15' => 1,
486     'iso-8859-16' => 1,
487     'iso-2022-jp' => 1,
488     'shift_jis' => 1,
489     'euc-jp' => 1,
490     }->{$name}) {
491     return $XML_CHARSET . $name;
492     }
493    
494     my $uri = $IANA_CHARSET . $name;
495     return $uri if $Whatpm::Charset::CharsetDef->{$uri};
496    
497     return $XML_CHARSET . $name;
498     } else {
499     return undef;
500     }
501     } # name_to_uri
502    
503     sub uri_to_name ($$$) {
504     my (undef, $domain, $uri) = @_;
505     if ({
506     ietf => 1,
507     xml => 1,
508     }->{$domain}) {
509     my $r = $Whatpm::Charset::CharsetDef->{$uri}->{+{
510     ietf => 'ietf_name',
511     xml => 'xml_name',
512     }->{$domain}}->[0];
513     return $r if defined $r;
514    
515     if ($domain eq 'ietf' and
516     substr ($uri, 0, length $IANA_CHARSET) eq $IANA_CHARSET) {
517     return substr ($uri, length $IANA_CHARSET);
518     } elsif ($domain eq 'xml' and
519     substr ($uri, 0, length $XML_CHARSET) eq $XML_CHARSET) {
520     return substr ($uri, length $XML_CHARSET);
521     }
522     }
523    
524     return undef;
525     } # uri_to_name
526    
527     package Whatpm::Charset::DecodeHandle::Encode;
528    
529     sub charset ($) { $_[0]->{charset} }
530    
531     sub close ($) { CORE::close $_[0]->{filehandle} }
532    
533     sub getc ($) {
534     my $self = $_[0];
535     return shift @{$self->{character_queue}} if @{$self->{character_queue}};
536    
537     my $error;
538     if ($self->{continue}) {
539     if (read $self->{filehandle}, $self->{byte_buffer}, 256,
540     length $self->{byte_buffer}) {
541     #
542     } else {
543     $error = 1;
544     }
545     $self->{continue} = 0;
546     } elsif (512 > length $self->{byte_buffer}) {
547     read $self->{filehandle}, $self->{byte_buffer}, 256,
548     length $self->{byte_buffer};
549     }
550    
551     my $r;
552     unless ($error) {
553     my $string = Encode::decode ($self->{perl_encoding_name},
554     $self->{byte_buffer},
555     Encode::FB_QUIET ());
556     if (length $string) {
557     push @{$self->{character_queue}}, split //, $string;
558     $r = shift @{$self->{character_queue}};
559     if (length $self->{byte_buffer}) {
560     $self->{continue} = 1;
561     }
562     } else {
563     if (length $self->{byte_buffer}) {
564     $error = 1;
565     } else {
566     $r = undef;
567     }
568     }
569     }
570    
571     if ($error) {
572     $r = substr $self->{byte_buffer}, 0, 1, '';
573     $self->{onerror}->($self, 'illegal-octets-error', octets => \$r);
574     }
575    
576     return $r;
577     } # getc
578    
579     sub has_bom ($) { $_[0]->{has_bom} }
580    
581     sub input_encoding ($) { $_[0]->{input_encoding} }
582    
583     sub onerror ($;$) {
584     if (@_ > 1) {
585     $_[0]->{onerror} = $_[1];
586     }
587    
588     return $_[0]->{onerror};
589     } # onerror
590    
591     sub ungetc ($$) {
592     unshift @{$_[0]->{character_queue}}, chr int ($_[1] or 0);
593     } # ungetc
594    
595     package Whatpm::Charset::DecodeHandle::EUCJP;
596     push our @ISA, 'Whatpm::Charset::DecodeHandle::Encode';
597    
598     sub getc ($) {
599     my $self = $_[0];
600     return shift @{$self->{character_queue}} if @{$self->{character_queue}};
601    
602     my $error;
603     if ($self->{continue}) {
604     if (read $self->{filehandle}, $self->{byte_buffer}, 256,
605     length $self->{byte_buffer}) {
606     #
607     } else {
608     $error = 1;
609     }
610     $self->{continue} = 0;
611     } elsif (512 > length $self->{byte_buffer}) {
612     read $self->{filehandle}, $self->{byte_buffer}, 256,
613     length $self->{byte_buffer};
614     }
615    
616     my $r;
617     unless ($error) {
618     my $string = Encode::decode ($self->{perl_encoding_name},
619     $self->{byte_buffer},
620     Encode::FB_QUIET ());
621     if (length $string) {
622     push @{$self->{character_queue}}, split //, $string;
623     $r = shift @{$self->{character_queue}};
624     if (length $self->{byte_buffer}) {
625     $self->{continue} = 1;
626     }
627     } else {
628     if (length $self->{byte_buffer}) {
629     $error = 1;
630     } else {
631     $r = undef;
632     }
633     }
634     }
635    
636     if ($error) {
637     $r = substr $self->{byte_buffer}, 0, 1, '';
638     my $etype = 'illegal-octets-error';
639     if ($r =~ /^[\xA1-\xFE]/) {
640     if ($self->{byte_buffer} =~ s/^([\xA1-\xFE])//) {
641     $r .= $1;
642     $etype = 'unassigned-code-point-error';
643     }
644     } elsif ($r eq "\x8F") {
645     if ($self->{byte_buffer} =~ s/^([\xA1-\xFE][\xA1-\xFE]?)//) {
646     $r .= $1;
647     $etype = 'unassigned-code-point-error' if length $1 == 2;
648     }
649     } elsif ($r eq "\x8E") {
650     if ($self->{byte_buffer} =~ s/^([\xA1-\xFE])//) {
651     $r .= $1;
652     $etype = 'unassigned-code-point-error';
653     }
654     } elsif ($r eq "\xA0" or $r eq "\xFF") {
655     $etype = 'unassigned-code-point-error';
656     }
657     $self->{onerror}->($self, $etype, octets => \$r);
658     }
659    
660     return $r;
661     } # getc
662    
663     package Whatpm::Charset::DecodeHandle::ISO2022JP;
664     push our @ISA, 'Whatpm::Charset::DecodeHandle::Encode';
665    
666     sub getc ($) {
667     my $self = $_[0];
668     return shift @{$self->{character_queue}} if @{$self->{character_queue}};
669    
670     my $r;
671     A: {
672     my $error;
673     if ($self->{continue}) {
674     if (read $self->{filehandle}, $self->{byte_buffer}, 256,
675     length $self->{byte_buffer}) {
676     #
677     } else {
678     $error = 1;
679     }
680     $self->{continue} = 0;
681     } elsif (512 > length $self->{byte_buffer}) {
682     read $self->{filehandle}, $self->{byte_buffer}, 256,
683     length $self->{byte_buffer};
684     }
685    
686     unless ($error) {
687     if ($self->{byte_buffer} =~ s/^\x1B(\x24[\x40\x42]|\x28[\x42\x4A])//) {
688     $self->{state} = {
689     "\x24\x40" => 'state_2440',
690     "\x24\x42" => 'state_2442',
691     "\x28\x42" => 'state_2842',
692     "\x28\x4A" => 'state_284A',
693     }->{$1};
694     redo A;
695     } elsif ($self->{state} eq 'state_2842') { # IRV
696     if ($self->{byte_buffer} =~ s/^([\x00-\x0D\x10-\x1A\x1C-\x7F]+)//) {
697     push @{$self->{character_queue}}, split //, $1;
698     $r = shift @{$self->{character_queue}};
699     } else {
700     if (length $self->{byte_buffer}) {
701     $error = 1;
702     } else {
703     $r = undef;
704     }
705     }
706     } elsif ($self->{state} eq 'state_284A') { # 0201
707     if ($self->{byte_buffer} =~ s/^([\x00-\x0D\x10-\x1A\x1C-\x7F]+)//) {
708     my $v = $1;
709     $v =~ tr/\x5C\x7E/\xA5\x{203E}/;
710     push @{$self->{character_queue}}, split //, $v;
711     $r = shift @{$self->{character_queue}};
712     } else {
713     if (length $self->{byte_buffer}) {
714     $error = 1;
715     } else {
716     $r = undef;
717     $self->{onerror}->($self, 'invalid-state-error',
718     state => $self->{state});
719     }
720     }
721     } elsif ($self->{state} eq 'state_2442') { # 1983
722     my $v = Encode::decode ($self->{state_2442},
723     $self->{byte_buffer},
724     Encode::FB_QUIET ());
725     if (length $v) {
726     push @{$self->{character_queue}}, split //, $v;
727     $r = shift @{$self->{character_queue}};
728     } else {
729     if (length $self->{byte_buffer}) {
730     $error = 1;
731     } else {
732     $r = undef;
733     $self->{onerror}->($self, 'invalid-state-error',
734     state => $self->{state});
735     }
736     }
737     } elsif ($self->{state} eq 'state_2440') { # 1978
738     my $v = Encode::decode ($self->{state_2440},
739     $self->{byte_buffer},
740     Encode::FB_QUIET ());
741     if (length $v) {
742     push @{$self->{character_queue}}, split //, $v;
743     $r = shift @{$self->{character_queue}};
744     } else {
745     if (length $self->{byte_buffer}) {
746     $error = 1;
747     } else {
748     $r = undef;
749     $self->{onerror}->($self, 'invalid-state-error',
750     state => $self->{state});
751     }
752     }
753     } else {
754     $error = 1;
755     }
756     }
757    
758     if ($error) {
759     $r = substr $self->{byte_buffer}, 0, 1, '';
760     my $etype = 'illegal-octets-error';
761     if (($self->{state} eq 'state_2442' or
762     $self->{state} eq 'state_2440') and
763     $r =~ /^[\x21-\x7E]/ and
764     $self->{byte_buffer} =~ s/^([\x21-\x7E])//) {
765     $r .= $1;
766     $etype = 'unassigned-code-point-error';
767     } elsif ($r eq "\x1B" and
768     $self->{byte_buffer} =~ s/^\(H//) { # Old 0201
769     $r .= "(H";
770     $self->{state} = 'state_284A';
771     }
772     $self->{onerror}->($self, $etype, octets => \$r);
773     }
774     } # A
775    
776     return $r;
777     } # getc
778    
779     package Whatpm::Charset::DecodeHandle::ShiftJIS;
780     push our @ISA, 'Whatpm::Charset::DecodeHandle::Encode';
781    
782     sub getc ($) {
783     my $self = $_[0];
784     return shift @{$self->{character_queue}} if @{$self->{character_queue}};
785    
786     my $error;
787     if ($self->{continue}) {
788     if (read $self->{filehandle}, $self->{byte_buffer}, 256,
789     length $self->{byte_buffer}) {
790     #
791     } else {
792     $error = 1;
793     }
794     $self->{continue} = 0;
795     } elsif (512 > length $self->{byte_buffer}) {
796     read $self->{filehandle}, $self->{byte_buffer}, 256,
797     length $self->{byte_buffer};
798     }
799    
800     my $r;
801     unless ($error) {
802     my $string = Encode::decode ($self->{perl_encoding_name},
803     $self->{byte_buffer},
804     Encode::FB_QUIET ());
805     if (length $string) {
806     push @{$self->{character_queue}}, split //, $string;
807     $r = shift @{$self->{character_queue}};
808     if (length $self->{byte_buffer}) {
809     $self->{continue} = 1;
810     }
811     } else {
812     if (length $self->{byte_buffer}) {
813     $error = 1;
814     } else {
815     $r = undef;
816     }
817     }
818     }
819    
820     if ($error) {
821     $r = substr $self->{byte_buffer}, 0, 1, '';
822     my $etype = 'illegal-octets-error';
823     if ($r =~ /^[\x81-\x9F\xE0-\xEF]/) {
824     if ($self->{byte_buffer} =~ s/(.)//s) {
825     $r .= $1; # not limited to \x40-\xFC - \x7F
826     $etype = 'unassigned-code-point-error';
827     }
828     } elsif ($r =~ /^[\x80\xA0\xF0-\xFF]/) {
829     $etype = 'unassigned-code-point-error';
830     }
831     $self->{onerror}->($self, $etype, octets => \$r);
832     }
833    
834     return $r;
835     } # getc
836    
837     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:us-ascii'} =
838     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:us'} =
839     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso646-us'} =
840     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:cp367'} =
841     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ibm367'} =
842     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ansi_x3.4-1986'} =
843     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ansi_x3.4-1968'} =
844     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-ir-6'} =
845     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:csascii'} =
846     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso_646.irv:1991'} =
847     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ascii'} = {ascii8 =>
848     '1',
849     is_block_safe =>
850     '1',
851     ietf_name =>
852     ['ansi_x3.4-1968',
853     'ansi_x3.4-1986',
854     'ascii',
855     'cp367',
856     'csascii',
857     'ibm367',
858     'iso-ir-6',
859     'iso646-us',
860     'iso_646.irv:1991',
861     'us',
862     'us-ascii',
863     'us-ascii'],
864     mime_name =>
865     'us-ascii',
866     perl_name =>
867     ['ascii',
868     'iso-646-us',
869     'us-ascii'],
870     utf8_encoding_scheme =>
871     '1',
872     'uri',
873     {'urn:x-suika-fam-cx:charset:ansi_x3.4-1968',
874     '1',
875     'urn:x-suika-fam-cx:charset:ansi_x3.4-1986',
876     '1',
877     'urn:x-suika-fam-cx:charset:ascii',
878     '1',
879     'urn:x-suika-fam-cx:charset:cp367',
880     '1',
881     'urn:x-suika-fam-cx:charset:csascii',
882     '1',
883     'urn:x-suika-fam-cx:charset:ibm367',
884     '1',
885     'urn:x-suika-fam-cx:charset:iso-ir-6',
886     '1',
887     'urn:x-suika-fam-cx:charset:iso646-us',
888     '1',
889     'urn:x-suika-fam-cx:charset:iso_646.irv:1991',
890     '1',
891     'urn:x-suika-fam-cx:charset:us',
892     '1',
893     'urn:x-suika-fam-cx:charset:us-ascii',
894     '1'},
895     xml_name =>
896     ['ansi_x3.4-1968',
897     'ansi_x3.4-1986',
898     'ascii',
899     'cp367',
900     'csascii',
901     'ibm367',
902     'iso-ir-6',
903     'iso646-us',
904     'iso_646.irv:1991',
905     'us',
906     'us-ascii',
907     'us-ascii']};
908     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ascii-ctrl'} = {perl_name =>
909     ['ascii-ctrl'],
910     'uri',
911     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ascii-ctrl',
912     '1'}};
913     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.null'} = {perl_name =>
914     ['null'],
915     'uri',
916     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.null',
917     '1'}};
918     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-8'} = {ascii8 =>
919     '1',
920     bom_allowed =>
921     '1',
922     no_bom_variant =>
923     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8',
924     utf8_encoding_scheme =>
925     '1',
926     'uri',
927     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-8',
928     '1'},
929     xml_name =>
930     ['utf-8']};
931     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/UTF-8.RFC2279'} = {ascii8 =>
932     '1',
933     bom_allowed =>
934     '1',
935     no_bom_variant =>
936     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8',
937     utf8_encoding_scheme =>
938     '1',
939     'uri',
940     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/UTF-8.RFC2279',
941     '1'}};
942     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8'} = {
943     ascii8 => 1,
944     is_block_safe =>
945     '1',
946     perl_name =>
947     ['utf-8'],
948     utf8_encoding_scheme =>
949     '1',
950     'uri',
951     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8',
952     '1'}};
953     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-8'} = {
954     ascii8 => 1,
955     bom_allowed =>
956     '1',
957     no_bom_variant =>
958     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8',
959     ietf_name =>
960     ['utf-8'],
961     mime_name =>
962     'utf-8',
963     utf8_encoding_scheme =>
964     '1',
965     'uri',
966     {'urn:x-suika-fam-cx:charset:utf-8',
967     '1'},
968     xml_name =>
969     ['utf-8']};
970     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8'} = {ascii8 =>
971     '1',
972     is_block_safe =>
973     '1',
974     perl_name =>
975     ['utf8'],
976     utf8_encoding_scheme =>
977     '1',
978     'uri',
979     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8',
980     '1'}};
981     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-16'} = {
982     ascii16 => 1,
983     bom_allowed =>
984     '1',
985     bom_required =>
986     '1',
987     no_bom_variant =>
988     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
989     no_bom_variant16be =>
990     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
991     no_bom_variant16le =>
992     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
993     perl_name =>
994     ['utf-16'],
995     'uri',
996     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-16',
997     '1'},
998     xml_name =>
999     ['utf-16']};
1000     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16'} = {
1001     ascii16 => 1,
1002     bom_allowed =>
1003     '1',
1004     no_bom_variant =>
1005     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1006     no_bom_variant16be =>
1007     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1008     no_bom_variant16le =>
1009     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1010     ietf_name =>
1011     ['utf-16'],
1012     mime_name =>
1013     'utf-16',
1014     'uri',
1015     {'urn:x-suika-fam-cx:charset:utf-16',
1016     '1'},
1017     xml_name =>
1018     ['utf-16']};
1019     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16be'} = {
1020     ascii16 => 1,
1021     ascii16be => 1,
1022     bom_allowed =>
1023     '1',
1024     no_bom_variant =>
1025     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1026     no_bom_variant16be =>
1027     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1028     ietf_name =>
1029     ['utf-16be'],
1030     mime_name =>
1031     'utf-16be',
1032     'uri',
1033     {'urn:x-suika-fam-cx:charset:utf-16be',
1034     '1'},
1035     xml_name =>
1036     ['utf-16be']};
1037     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16le'} = {
1038     ascii16 => 1,
1039     ascii16le => 1,
1040     bom_allowed =>
1041     '1',
1042     no_bom_variant =>
1043     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1044     no_bom_variant16le =>
1045     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1046     ietf_name =>
1047     ['utf-16le'],
1048     mime_name =>
1049     'utf-16le',
1050     'uri',
1051     {'urn:x-suika-fam-cx:charset:utf-16le',
1052     '1'},
1053     xml_name =>
1054     ['utf-16le']};
1055     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be'} = {
1056     ascii16 => 1,
1057     ascii16be => 1,
1058     is_block_safe =>
1059     '1',
1060     perl_name =>
1061     ['utf-16be'],
1062     'uri',
1063     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1064     '1'}};
1065     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le'} = {
1066     ascii16 => 1,
1067     ascii16le => 1,
1068     is_block_safe =>
1069     '1',
1070     perl_name =>
1071     ['utf-16le'],
1072     'uri',
1073     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1074     '1'}};
1075     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-2'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-10646-ucs-2'} = {
1076     ascii16 => 1,
1077     bom_allowed =>
1078     '1',
1079     no_bom_variant =>
1080     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le',
1081     no_bom_variant16be =>
1082     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be',
1083     no_bom_variant16le =>
1084     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le',
1085     ietf_name =>
1086     ['csunicode',
1087     'iso-10646-ucs-2'],
1088     mime_name =>
1089     'iso-10646-ucs-2',
1090     'uri',
1091     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-2',
1092     '1',
1093     'urn:x-suika-fam-cx:charset:iso-10646-ucs-2',
1094     '1'},
1095     xml_name =>
1096     ['csunicode',
1097     'iso-10646-ucs-2',
1098     'iso-10646-ucs-2']};
1099     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be'} = {
1100     ascii16 => 1,
1101     ascii16be => 1,
1102     is_block_safe =>
1103     '1',
1104     perl_name =>
1105     ['ucs-2be'],
1106     'uri',
1107     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be',
1108     '1'}};
1109     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le'} = {
1110     ascii16 => 1,
1111     ascii16le => 1,
1112     is_block_safe =>
1113     '1',
1114     perl_name =>
1115     ['ucs-2le'],
1116     'uri',
1117     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le',
1118     '1'}};
1119     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-4'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-10646-ucs-4'} = {
1120     ascii32 => 1,
1121     bom_allowed =>
1122     '1',
1123     no_bom_variant =>
1124     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le',
1125     no_bom_variant32endian1234 =>
1126     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be',
1127     no_bom_variant32endian4321 =>
1128     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le',
1129     ietf_name =>
1130     ['csucs4',
1131     'iso-10646-ucs-4'],
1132     mime_name =>
1133     'iso-10646-ucs-4',
1134     'uri',
1135     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-4',
1136     '1',
1137     'urn:x-suika-fam-cx:charset:iso-10646-ucs-4',
1138     '1'},
1139     xml_name =>
1140     ['csucs4',
1141     'iso-10646-ucs-4',
1142     'iso-10646-ucs-4']};
1143     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be'} = {
1144     ascii32 => 1,
1145     ascii32endian1234 => 1,
1146     is_block_safe =>
1147     '1',
1148     perl_name =>
1149     ['ucs-4be',
1150     'utf-32be'],
1151     'uri',
1152     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be',
1153     '1'}};
1154     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le'} = {
1155     ascii32 => 1,
1156     ascii32endian4321 => 1,
1157     is_block_safe =>
1158     '1',
1159     perl_name =>
1160     ['ucs-4le',
1161     'utf-32le'],
1162     'uri',
1163     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le',
1164     '1'}};
1165     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso_8859-1:1987'} = $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-1'} = {ascii8 =>
1166     '1',
1167     is_block_safe =>
1168     '1',
1169     ietf_name =>
1170     ['cp819',
1171     'csisolatin1',
1172     'ibm819',
1173     'iso-8859-1',
1174     'iso-8859-1',
1175     'iso-ir-100',
1176     'iso_8859-1',
1177     'iso_8859-1:1987',
1178     'l1',
1179     'latin1'],
1180     mime_name =>
1181     'iso-8859-1',
1182     perl_name =>
1183     ['iso-8859-1',
1184     'latin1'],
1185     'uri',
1186     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-1',
1187     '1',
1188     'urn:x-suika-fam-cx:charset:iso_8859-1:1987',
1189     '1'},
1190     xml_name =>
1191     ['cp819',
1192     'csisolatin1',
1193     'ibm819',
1194     'iso-8859-1',
1195     'iso-8859-1',
1196     'iso-8859-1',
1197     'iso-ir-100',
1198     'iso_8859-1',
1199     'iso_8859-1:1987',
1200     'l1',
1201     'latin1']};
1202     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-2'} = {ascii8 =>
1203     '1',
1204     is_block_safe =>
1205     '1',
1206     'uri',
1207     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-2',
1208     '1'},
1209     xml_name =>
1210     ['iso-8859-2']};
1211     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-3'} = {ascii8 =>
1212     '1',
1213     is_block_safe =>
1214     '1',
1215     'uri',
1216     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-3',
1217     '1'},
1218     xml_name =>
1219     ['iso-8859-3']};
1220     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-4'} = {ascii8 =>
1221     '1',
1222     is_block_safe =>
1223     '1',
1224     'uri',
1225     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-4',
1226     '1'},
1227     xml_name =>
1228     ['iso-8859-4']};
1229     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-5'} = {ascii8 =>
1230     '1',
1231     is_block_safe =>
1232     '1',
1233     'uri',
1234     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-5',
1235     '1'},
1236     xml_name =>
1237     ['iso-8859-5']};
1238     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-6'} = {ascii8 =>
1239     '1',
1240     is_block_safe =>
1241     '1',
1242     'uri',
1243     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-6',
1244     '1'},
1245     xml_name =>
1246     ['iso-8859-6']};
1247     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-7'} = {ascii8 =>
1248     '1',
1249     is_block_safe =>
1250     '1',
1251     'uri',
1252     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-7',
1253     '1'},
1254     xml_name =>
1255     ['iso-8859-7']};
1256     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-8'} = {ascii8 =>
1257     '1',
1258     is_block_safe =>
1259     '1',
1260     'uri',
1261     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-8',
1262     '1'},
1263     xml_name =>
1264     ['iso-8859-8']};
1265     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-9'} = {ascii8 =>
1266     '1',
1267     is_block_safe =>
1268     '1',
1269     'uri',
1270     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-9',
1271     '1'},
1272     xml_name =>
1273     ['iso-8859-9']};
1274     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-10'} = {ascii8 =>
1275     '1',
1276     is_block_safe =>
1277     '1',
1278     'uri',
1279     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-10',
1280     '1'},
1281     xml_name =>
1282     ['iso-8859-10']};
1283     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-11'} = {ascii8 =>
1284     '1',
1285     is_block_safe =>
1286     '1',
1287     'uri',
1288     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-11',
1289     '1'},
1290     xml_name =>
1291     ['iso-8859-11']};
1292     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-13'} = {ascii8 =>
1293     '1',
1294     is_block_safe =>
1295     '1',
1296     'uri',
1297     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-13',
1298     '1'},
1299     xml_name =>
1300     ['iso-8859-13']};
1301     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-14'} = {ascii8 =>
1302     '1',
1303     is_block_safe =>
1304     '1',
1305     'uri',
1306     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-14',
1307     '1'},
1308     xml_name =>
1309     ['iso-8859-14']};
1310     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-15'} = {ascii8 =>
1311     '1',
1312     is_block_safe =>
1313     '1',
1314     'uri',
1315     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-15',
1316     '1'},
1317     xml_name =>
1318     ['iso-8859-15']};
1319     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-16'} = {ascii8 =>
1320     '1',
1321     is_block_safe =>
1322     '1',
1323     'uri',
1324     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-16',
1325     '1'},
1326     xml_name =>
1327     ['iso-8859-16']};
1328     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-2022-jp'} = {ascii8 =>
1329     '1',
1330     'uri',
1331     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-2022-jp',
1332     '1'},
1333     xml_name =>
1334     ['iso-2022-jp']};
1335     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-2022-jp'} = {ascii8 =>
1336     '1',
1337     ietf_name =>
1338     ['csiso2022jp',
1339     'iso-2022-jp',
1340     'iso-2022-jp'],
1341     mime_name =>
1342     'iso-2022-jp',
1343     'uri',
1344     {'urn:x-suika-fam-cx:charset:iso-2022-jp',
1345     '1'},
1346     xml_name =>
1347     ['csiso2022jp',
1348     'iso-2022-jp',
1349     'iso-2022-jp']};
1350     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.iso-2022-jp'} = {ascii8 =>
1351     '1',
1352     perl_name =>
1353     ['iso-2022-jp'],
1354     'uri',
1355     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.iso-2022-jp',
1356     '1'}};
1357     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:shift_jis'} = $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.shift_jis'} = {ascii8 =>
1358     '1',
1359     is_block_safe =>
1360     '1',
1361     ietf_name =>
1362     ['csshiftjis',
1363     'ms_kanji',
1364     'shift_jis',
1365     'shift_jis'],
1366     mime_name =>
1367     'shift_jis',
1368     perl_name =>
1369     ['shift-jis-1997'],
1370     'uri',
1371     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.shift_jis',
1372     '1',
1373     'urn:x-suika-fam-cx:charset:shift_jis',
1374     '1'},
1375     xml_name =>
1376     ['csshiftjis',
1377     'ms_kanji',
1378     'shift_jis',
1379     'shift_jis',
1380     'shift_jis']};
1381     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.shiftjis'} = {ascii8 =>
1382     '1',
1383     is_block_safe =>
1384     '1',
1385     perl_name =>
1386     ['shiftjis',
1387     'sjis'],
1388     'uri',
1389     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.shiftjis',
1390     '1'}};
1391     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:euc-jp'} = $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.euc-jp'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:extended_unix_code_packed_format_for_japanese'} = {ascii8 =>
1392     '1',
1393     is_block_safe =>
1394     '1',
1395     ietf_name =>
1396     ['cseucpkdfmtjapanese',
1397     'euc-jp',
1398     'euc-jp',
1399     'extended_unix_code_packed_format_for_japanese'],
1400     mime_name =>
1401     'euc-jp',
1402     perl_name =>
1403     ['euc-jp-1997'],
1404     'uri',
1405     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.euc-jp',
1406     '1',
1407     'urn:x-suika-fam-cx:charset:euc-jp',
1408     '1',
1409     'urn:x-suika-fam-cx:charset:extended_unix_code_packed_format_for_japanese',
1410     '1'},
1411     xml_name =>
1412     ['cseucpkdfmtjapanese',
1413     'euc-jp',
1414     'euc-jp',
1415     'euc-jp',
1416     'extended_unix_code_packed_format_for_japanese']};
1417     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.euc-jp'} = {ascii8 =>
1418     '1',
1419     is_block_safe =>
1420     '1',
1421     perl_name =>
1422     ['euc-jp',
1423     'ujis'],
1424     'uri',
1425     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.euc-jp',
1426     '1'}};
1427    
1428     1;
1429     ## $Date:$

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24