/[suikacvs]/markup/html/whatpm/Whatpm/Charset/DecodeHandle.pm
Suika

Contents of /markup/html/whatpm/Whatpm/Charset/DecodeHandle.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.9 - (hide annotations) (download)
Thu Sep 11 12:09:38 2008 UTC (16 years, 10 months ago) by wakaba
Branch: MAIN
Changes since 1.8: +112 -29 lines
++ whatpm/Whatpm/Charset/ChangeLog	11 Sep 2008 12:09:15 -0000
	* UnicodeChecker.pm, DecodeHandle.pm: Try to reduce the
	number of string copies and method calls, first round.

2008-09-11  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 package Whatpm::Charset::DecodeHandle;
2     use strict;
3    
4 wakaba 1.9 ## NOTE: |Message::Charset::Info| uses this module without calling
5     ## the constructor.
6    
7 wakaba 1.1 my $XML_AUTO_CHARSET = q<http://suika.fam.cx/www/2006/03/xml-entity/>;
8     my $IANA_CHARSET = q<urn:x-suika-fam-cx:charset:>;
9     my $PERL_CHARSET = q<http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.>;
10     my $XML_CHARSET = q<http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.>;
11    
12     ## ->create_decode_handle ($charset_uri, $byte_stream, $onerror)
13     sub create_decode_handle ($$$;$) {
14     my $csdef = $Whatpm::Charset::CharsetDef->{$_[1]};
15     my $obj = {
16 wakaba 1.9 char_buffer => \(my $s = ''),
17 wakaba 1.1 character_queue => [],
18     filehandle => $_[2],
19     charset => $_[1],
20     byte_buffer => '',
21     onerror => $_[3] || sub {},
22     };
23     if ($csdef->{uri}->{$XML_AUTO_CHARSET} or
24     $obj->{charset} eq $XML_AUTO_CHARSET) {
25     my $b = ''; # UTF-8 w/o BOM
26     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
27 wakaba 1.2 $obj->{input_encoding} = 'UTF-8';
28 wakaba 1.1 if (read $obj->{filehandle}, $b, 256) {
29     no warnings "substr";
30     no warnings "uninitialized";
31     if (substr ($b, 0, 1) eq "<") {
32     if (substr ($b, 1, 1) eq "?") { # ASCII8
33     if ($b =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
34     encoding\s*=\s*["']([^"']*)/x) {
35 wakaba 1.2 $obj->{input_encoding} = $1;
36 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
37     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
38     if (not $csdef->{ascii8} or $csdef->{bom_required}) {
39     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
40     charset_uri => $uri,
41     charset_name => $obj->{input_encoding});
42     }
43     } else {
44     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
45 wakaba 1.2 $obj->{input_encoding} = 'UTF-8';
46 wakaba 1.1 }
47     if (defined $csdef->{no_bom_variant}) {
48     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant}};
49     }
50     } elsif (substr ($b, 1, 1) eq "\x00") {
51     if (substr ($b, 2, 2) eq "?\x00") { # ASCII16LE
52     my $c = $b; $c =~ tr/\x00//d;
53     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
54     encoding\s*=\s*["']([^"']*)/x) {
55 wakaba 1.2 $obj->{input_encoding} = $1;
56 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
57     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
58     if (not $csdef->{ascii16} or $csdef->{ascii16be} or
59     $csdef->{bom_required}) {
60     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
61     charset_uri => $uri,
62     charset_name => $obj->{input_encoding});
63     }
64     } else {
65     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
66 wakaba 1.2 $obj->{input_encoding} = 'UTF-8';
67 wakaba 1.1 }
68     if (defined $csdef->{no_bom_variant16le}) {
69     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16le}};
70     }
71     } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian4321
72     my $c = $b; $c =~ tr/\x00//d;
73     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
74     encoding\s*=\s*["']([^"']*)/x) {
75 wakaba 1.2 $obj->{input_encoding} = $1;
76 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
77     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
78     if (not $csdef->{ascii32} or
79     $csdef->{ascii32endian1234} or
80     $csdef->{ascii32endian2143} or
81     $csdef->{ascii32endian3412} or
82     $csdef->{bom_required}) {
83     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
84     charset_uri => $uri,
85     charset_name => $obj->{input_encoding});
86     }
87     } else {
88     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
89 wakaba 1.2 $obj->{input_encoding} = 'UTF-8';
90 wakaba 1.1 }
91     if (defined $csdef->{no_bom_variant32endian4321}) {
92     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian4321}};
93     }
94     }
95     }
96     } elsif (substr ($b, 0, 3) eq "\xEF\xBB\xBF") { # UTF8
97     $obj->{has_bom} = 1;
98     substr ($b, 0, 3) = '';
99     my $c = $b;
100     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
101     encoding\s*=\s*["']([^"']*)/x) {
102 wakaba 1.2 $obj->{input_encoding} = $1;
103 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
104     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
105     if (not $csdef->{utf8_encoding_scheme} or
106     not $csdef->{bom_allowed}) {
107     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
108     charset_uri => $uri,
109     charset_name => $obj->{input_encoding});
110     }
111     } else {
112     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
113 wakaba 1.2 $obj->{input_encoding} = 'UTF-8';
114 wakaba 1.1 }
115     if (defined $csdef->{no_bom_variant}) {
116     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant}};
117     }
118     } elsif (substr ($b, 0, 2) eq "\x00<") {
119     if (substr ($b, 2, 2) eq "\x00?") { # ASCII16BE
120     my $c = $b; $c =~ tr/\x00//d;
121     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
122     encoding\s*=\s*["']([^"']*)/x) {
123 wakaba 1.2 $obj->{input_encoding} = $1;
124 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
125     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
126     if (not $csdef->{ascii16} or $csdef->{ascii16le} or
127     $csdef->{bom_required}) {
128     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
129     charset_uri => $uri,
130     charset_name => $obj->{input_encoding});
131     }
132     } else {
133     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
134 wakaba 1.2 $obj->{input_encoding} = 'UTF-8';
135 wakaba 1.1 }
136     if (defined $csdef->{no_bom_variant16be}) {
137     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16be}};
138     }
139     } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian3412
140     my $c = $b; $c =~ tr/\x00//d;
141     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
142     encoding\s*=\s*["']([^"']*)/x) {
143 wakaba 1.2 $obj->{input_encoding} = $1;
144 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
145     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
146     if (not $csdef->{ascii32} or
147     $csdef->{ascii32endian1234} or
148     $csdef->{ascii32endian2143} or
149     $csdef->{ascii32endian4321} or
150     $csdef->{bom_required}) {
151     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
152     charset_uri => $uri,
153     charset_name => $obj->{input_encoding});
154     }
155     } else {
156     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
157 wakaba 1.2 $obj->{input_encoding} = 'UTF-8';
158 wakaba 1.1 }
159     if (defined $csdef->{no_bom_variant32endian3412}) {
160     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian3412}};
161     }
162     }
163     } elsif (substr ($b, 0, 2) eq "\xFE\xFF") {
164     if (substr ($b, 2, 2) eq "\x00<") { # ASCII16BE
165     $obj->{has_bom} = 1;
166     substr ($b, 0, 2) = '';
167     my $c = $b; $c =~ tr/\x00//d;
168     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
169     encoding\s*=\s*["']([^"']*)/x) {
170 wakaba 1.2 $obj->{input_encoding} = $1;
171 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
172     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
173     if (not $csdef->{ascii16} or
174     $csdef->{ascii16le} or
175     not $csdef->{bom_allowed}) {
176     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
177     charset_uri => $uri,
178     charset_name => $obj->{input_encoding});
179     }
180     } else {
181     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
182 wakaba 1.2 $obj->{input_encoding} = 'UTF-16';
183 wakaba 1.1 }
184     if (defined $csdef->{no_bom_variant16be}) {
185     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16be}};
186     }
187     } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian3412
188     $obj->{has_bom} = 1;
189     substr ($b, 0, 4) = '';
190     my $c = $b; $c =~ tr/\x00//d;
191     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
192     encoding\s*=\s*["']([^"']*)/x) {
193 wakaba 1.2 $obj->{input_encoding} = $1;
194 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
195     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
196     if (not $csdef->{ascii32} or
197     $csdef->{ascii32endian1234} or
198     $csdef->{ascii32endian2143} or
199     $csdef->{ascii32endian4321} or
200     not $csdef->{bom_allowed}) {
201     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
202     charset_uri => $uri,
203     charset_name => $obj->{input_encoding});
204     }
205     } else {
206     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
207 wakaba 1.2 $obj->{input_encoding} = 'UTF-16';
208 wakaba 1.1 $obj->{byte_buffer} .= "\x00\x00";
209     }
210     if (defined $csdef->{no_bom_variant32endian3412}) {
211     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian3412}};
212     }
213     } else {
214     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
215 wakaba 1.2 $obj->{input_encoding} = 'UTF-16';
216 wakaba 1.1 substr ($b, 0, 2) = '';
217     $obj->{has_bom} = 1;
218     }
219     } elsif (substr ($b, 0, 2) eq "\xFF\xFE") {
220     if (substr ($b, 2, 2) eq "<\x00") { # ASCII16LE
221     $obj->{has_bom} = 1;
222     substr ($b, 0, 2) = '';
223     my $c = $b; $c =~ tr/\x00//d;
224     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
225     encoding\s*=\s*["']([^"']*)/x) {
226 wakaba 1.2 $obj->{input_encoding} = $1;
227 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
228     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
229     if (not $csdef->{ascii16} or
230     $csdef->{ascii16be} or
231     not $csdef->{bom_allowed}) {
232     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
233     charset_uri => $uri,
234     charset_name => $obj->{input_encoding});
235     }
236     } else {
237     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
238 wakaba 1.2 $obj->{input_encoding} = 'UTF-16';
239 wakaba 1.1 }
240     if (defined $csdef->{no_bom_variant16le}) {
241     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16le}};
242     }
243     } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian4321
244     $obj->{has_bom} = 1;
245     substr ($b, 0, 4) = '';
246     my $c = $b; $c =~ tr/\x00//d;
247     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
248     encoding\s*=\s*["']([^"']*)/x) {
249 wakaba 1.2 $obj->{input_encoding} = $1;
250 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
251     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
252     if (not $csdef->{ascii32} or
253     $csdef->{ascii32endian1234} or
254     $csdef->{ascii32endian2143} or
255     $csdef->{ascii32endian3412} or
256     not $csdef->{bom_allowed}) {
257     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
258     charset_uri => $uri,
259     charset_name => $obj->{input_encoding});
260     }
261     } else {
262     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
263 wakaba 1.2 $obj->{input_encoding} = 'UTF-16';
264 wakaba 1.1 $obj->{byte_buffer} .= "\x00\x00";
265     }
266     if (defined $csdef->{no_bom_variant32endian4321}) {
267     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian4321}};
268     }
269     } else {
270     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
271 wakaba 1.2 $obj->{input_encoding} = 'UTF-16';
272 wakaba 1.1 substr ($b, 0, 2) = '';
273     $obj->{has_bom} = 1;
274     }
275     } elsif (substr ($b, 0, 2) eq "\x00\x00") {
276     if (substr ($b, 2, 2) eq "\x00<") { # ASCII32Endian1234
277     my $c = $b; $c =~ tr/\x00//d;
278     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
279     encoding\s*=\s*["']([^"']*)/x) {
280 wakaba 1.2 $obj->{input_encoding} = $1;
281 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
282     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
283     if (not $csdef->{ascii32} or
284     $csdef->{ascii32endian2143} or
285     $csdef->{ascii32endian3412} or
286     $csdef->{ascii32endian4321} or
287     $csdef->{bom_required}) {
288     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
289     charset_uri => $uri,
290     charset_name => $obj->{input_encoding});
291     }
292     } else {
293     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
294 wakaba 1.2 $obj->{input_encoding} = 'UTF-8';
295 wakaba 1.1 }
296     if (defined $csdef->{no_bom_variant32endian1234}) {
297     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian1234}};
298     }
299     } elsif (substr ($b, 2, 2) eq "<\x00") { # ASCII32Endian2143
300     my $c = $b; $c =~ tr/\x00//d;
301     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
302     encoding\s*=\s*["']([^"']*)/x) {
303 wakaba 1.2 $obj->{input_encoding} = $1;
304 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
305     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
306     if (not $csdef->{ascii32} or
307     $csdef->{ascii32endian1234} or
308     $csdef->{ascii32endian3412} or
309     $csdef->{ascii32endian4321} or
310     $csdef->{bom_required}) {
311     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
312     charset_uri => $uri,
313     charset_name => $obj->{input_encoding});
314     }
315     } else {
316     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
317 wakaba 1.2 $obj->{input_encoding} = 'UTF-8';
318 wakaba 1.1 }
319     if (defined $csdef->{no_bom_variant32endian2143}) {
320     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian2143}};
321     }
322     } elsif (substr ($b, 2, 2) eq "\xFE\xFF") { # ASCII32Endian1234
323     $obj->{has_bom} = 1;
324     substr ($b, 0, 4) = '';
325     my $c = $b; $c =~ tr/\x00//d;
326     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
327     encoding\s*=\s*["']([^"']*)/x) {
328 wakaba 1.2 $obj->{input_encoding} = $1;
329 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
330     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
331     if (not $csdef->{ascii32} or
332     $csdef->{ascii32endian2143} or
333     $csdef->{ascii32endian3412} or
334     $csdef->{ascii32endian4321} or
335     $csdef->{bom_required}) {
336     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
337     charset_uri => $uri,
338     charset_name => $obj->{input_encoding});
339     }
340     } else {
341     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
342 wakaba 1.2 $obj->{input_encoding} = 'UTF-8';
343 wakaba 1.1 $obj->{has_bom} = 0;
344     $obj->{byte_buffer} .= "\x00\x00\xFE\xFF";
345     }
346     if (defined $csdef->{no_bom_variant32endian1234}) {
347     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian1234}};
348     }
349     } elsif (substr ($b, 2, 2) eq "\xFF\xFE") { # ASCII32Endian2143
350     $obj->{has_bom} = 1;
351     substr ($b, 0, 4) = '';
352     my $c = $b; $c =~ tr/\x00//d;
353     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
354     encoding\s*=\s*["']([^"']*)/x) {
355 wakaba 1.2 $obj->{input_encoding} = $1;
356 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
357     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
358     if (not $csdef->{ascii32} or
359     $csdef->{ascii32endian1234} or
360     $csdef->{ascii32endian3412} or
361     $csdef->{ascii32endian4321} or
362     $csdef->{bom_required}) {
363     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
364     charset_uri => $uri,
365     charset_name => $obj->{input_encoding});
366     }
367     } else {
368     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
369 wakaba 1.2 $obj->{input_encoding} = 'UTF-8';
370 wakaba 1.1 $obj->{has_bom} = 0;
371     $obj->{byte_buffer} .= "\x00\x00\xFF\xFE";
372     }
373     if (defined $csdef->{no_bom_variant32endian2143}) {
374     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian2143}};
375     }
376     }
377     # \x4C\x6F\xA7\x94 EBCDIC
378     } # buffer
379     $obj->{byte_buffer} .= $b;
380     } # read
381     } elsif ($csdef->{uri}->{$XML_CHARSET.'utf-8'}) {
382     ## BOM is optional.
383     my $b = '';
384     if (read $obj->{filehandle}, $b, 3) {
385     if ($b eq "\xEF\xBB\xBF") {
386     $obj->{has_bom} = 1;
387     } else {
388     $obj->{byte_buffer} .= $b;
389     }
390     }
391     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'}; # UTF-8 w/o BOM
392     } elsif ($csdef->{uri}->{$XML_CHARSET.'utf-16'}) {
393     ## BOM is mandated.
394     my $b = '';
395     if (read $obj->{filehandle}, $b, 2) {
396     if ($b eq "\xFE\xFF") {
397     $obj->{has_bom} = 1; # UTF-16BE w/o BOM
398     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
399     } elsif ($b eq "\xFF\xFE") {
400     $obj->{has_bom} = 1; # UTF-16LE w/o BOM
401     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
402     } else {
403     $obj->{onerror}->(undef, 'no-bom-error', charset_uri => $obj->{charset});
404     $obj->{has_bom} = 0;
405     $obj->{byte_buffer} .= $b; # UTF-16BE w/o BOM
406     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
407     }
408     } else {
409     $obj->{onerror}->(undef, 'no-bom-error', charset_uri => $obj->{charset});
410     $obj->{has_bom} = 0; # UTF-16BE w/o BOM
411     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
412     }
413     }
414    
415     if ($csdef->{uri}->{$XML_CHARSET.'iso-2022-jp'}) {
416     $obj->{state_2440} = 'gl-jis-1997-swapped';
417     $obj->{state_2442} = 'gl-jis-1997';
418     $obj->{state} = 'state_2842';
419     require Encode::GLJIS1997Swapped;
420     require Encode::GLJIS1997;
421     if (Encode::find_encoding ($obj->{state_2440}) and
422     Encode::find_encoding ($obj->{state_2442})) {
423     return bless $obj, 'Whatpm::Charset::DecodeHandle::ISO2022JP';
424     }
425     } elsif ($csdef->{uri}->{$IANA_CHARSET.'iso-2022-jp'}) {
426     $obj->{state_2440} = 'gl-jis-1978';
427     $obj->{state_2442} = 'gl-jis-1983';
428     $obj->{state} = 'state_2842';
429     require Encode::GLJIS1978;
430     require Encode::GLJIS1983;
431     if (Encode::find_encoding ($obj->{state_2440}) and
432     Encode::find_encoding ($obj->{state_2442})) {
433     return bless $obj, 'Whatpm::Charset::DecodeHandle::ISO2022JP';
434     }
435     } elsif (defined $csdef->{perl_name}->[0]) {
436     if ($csdef->{uri}->{$XML_CHARSET.'euc-jp'} or
437     $csdef->{uri}->{$IANA_CHARSET.'euc-jp'}) {
438     $obj->{perl_encoding_name} = $csdef->{perl_name}->[0];
439     require Encode::EUCJP1997;
440     if (Encode::find_encoding ($obj->{perl_encoding_name})) {
441     return bless $obj, 'Whatpm::Charset::DecodeHandle::EUCJP';
442     }
443     } elsif ($csdef->{uri}->{$XML_CHARSET.'shift_jis'} or
444     $csdef->{uri}->{$IANA_CHARSET.'shift_jis'}) {
445     $obj->{perl_encoding_name} = $csdef->{perl_name}->[0];
446     require Encode::ShiftJIS1997;
447     if (Encode::find_encoding ($obj->{perl_encoding_name})) {
448     return bless $obj, 'Whatpm::Charset::DecodeHandle::ShiftJIS';
449     }
450     } elsif ($csdef->{is_block_safe}) {
451     $obj->{perl_encoding_name} = $csdef->{perl_name}->[0];
452     require Encode;
453     if (Encode::find_encoding ($obj->{perl_encoding_name})) {
454     return bless $obj, 'Whatpm::Charset::DecodeHandle::Encode';
455     }
456     }
457     }
458    
459     $obj->{onerror}->(undef, 'charset-not-supported-error',
460     charset_uri => $obj->{charset});
461     return undef;
462     } # create_decode_handle
463    
464     sub name_to_uri ($$$) {
465     my $domain = $_[1];
466     my $name = lc $_[2];
467    
468     if ($domain eq 'ietf') {
469     return $IANA_CHARSET . $name;
470     } elsif ($domain eq 'xml') {
471     if ({
472     'utf-8' => 1,
473     'utf-16' => 1,
474     'iso-10646-ucs-2' => 1,
475     'iso-10646-ucs-4' => 1,
476     'iso-8859-1' => 1,
477     'iso-8859-2' => 1,
478     'iso-8859-3' => 1,
479     'iso-8859-4' => 1,
480     'iso-8859-5' => 1,
481     'iso-8859-6' => 1,
482     'iso-8859-7' => 1,
483     'iso-8859-8' => 1,
484     'iso-8859-9' => 1,
485     'iso-8859-10' => 1,
486     'iso-8859-11' => 1,
487     'iso-8859-13' => 1,
488     'iso-8859-14' => 1,
489     'iso-8859-15' => 1,
490     'iso-8859-16' => 1,
491     'iso-2022-jp' => 1,
492     'shift_jis' => 1,
493     'euc-jp' => 1,
494     }->{$name}) {
495     return $XML_CHARSET . $name;
496     }
497    
498     my $uri = $IANA_CHARSET . $name;
499     return $uri if $Whatpm::Charset::CharsetDef->{$uri};
500    
501     return $XML_CHARSET . $name;
502     } else {
503     return undef;
504     }
505     } # name_to_uri
506    
507     sub uri_to_name ($$$) {
508     my (undef, $domain, $uri) = @_;
509 wakaba 1.2
510     if ($domain eq 'xml') {
511     my $v = $Whatpm::Charset::CharsetDef->{$uri}->{xml_name};
512     return $v if defined $v;
513    
514     if (substr ($uri, 0, length $XML_CHARSET) eq $XML_CHARSET) {
515     return substr ($uri, length $XML_CHARSET);
516     }
517    
518     $domain = 'ietf'; ## TODO: XML encoding name has smaller range
519     }
520    
521     if ($domain eq 'ietf') {
522     my $v = $Whatpm::Charset::CharsetDef->{$uri}->{iana_name};
523     return $v->[0] if defined $v;
524    
525     if (substr ($uri, 0, length $IANA_CHARSET) eq $IANA_CHARSET) {
526 wakaba 1.1 return substr ($uri, length $IANA_CHARSET);
527     }
528     }
529    
530     return undef;
531     } # uri_to_name
532    
533 wakaba 1.3 require IO::Handle;
534    
535     package Whatpm::Charset::DecodeHandle::ByteBuffer;
536    
537 wakaba 1.7 ## NOTE: Provides a byte buffer wrapper object.
538    
539 wakaba 1.3 sub new ($$) {
540     my $self = bless {
541     buffer => '',
542     }, shift;
543     $self->{filehandle} = shift;
544     return $self;
545     } # new
546    
547     sub read {
548     my $self = shift;
549     my $pos = length $self->{buffer};
550     my $r = $self->{filehandle}->read ($self->{buffer}, $_[1], $pos);
551     substr ($_[0], $_[2]) = substr ($self->{buffer}, $pos);
552 wakaba 1.8 ## NOTE: This would do different behavior from Perl's standard
553     ## |read| when $pos points beyond the end of the string.
554 wakaba 1.3 return $r;
555     } # read
556    
557     sub close { $_[0]->{filehandle}->close }
558    
559 wakaba 1.1 package Whatpm::Charset::DecodeHandle::Encode;
560    
561 wakaba 1.7 ## NOTE: Provides a Perl |Encode| module wrapper object.
562    
563 wakaba 1.1 sub charset ($) { $_[0]->{charset} }
564    
565 wakaba 1.3 sub close ($) { $_[0]->{filehandle}->close }
566 wakaba 1.1
567     sub getc ($) {
568 wakaba 1.9 my $c = '';
569     my $l = $_[0]->read ($c, 1);
570     if ($l) {
571     return $c;
572     } else {
573     return undef;
574     }
575     } # getc
576    
577     sub read ($$$;$) {
578 wakaba 1.1 my $self = $_[0];
579 wakaba 1.9 # $scalar = $_[1];
580     my $length = $_[2];
581     my $offset = $_[3] || 0;
582     my $count = 0;
583     my $eof;
584    
585     A: {
586     return $count if $length < 1;
587    
588     if (my $l = length ${$self->{char_buffer}}) {
589     if ($l >= $length) {
590     substr ($_[1], $offset) = substr (${$self->{char_buffer}}, 0, $length);
591     $count += $length;
592     substr (${$self->{char_buffer}}, 0, $length) = '';
593     $length = 0;
594     return $count;
595     } else {
596     substr ($_[1], $offset) = ${$self->{char_buffer}};
597     $count += $l;
598     $length -= $l;
599     ${$self->{char_buffer}} = '';
600     }
601     $offset = length $_[1];
602     }
603    
604     if ($eof) {
605     return $count;
606     }
607 wakaba 1.1
608     my $error;
609     if ($self->{continue}) {
610 wakaba 1.3 if ($self->{filehandle}->read ($self->{byte_buffer}, 256,
611     length $self->{byte_buffer})) {
612 wakaba 1.1 #
613     } else {
614     $error = 1;
615     }
616     $self->{continue} = 0;
617     } elsif (512 > length $self->{byte_buffer}) {
618 wakaba 1.9 if ($self->{filehandle}->read ($self->{byte_buffer}, 256,
619     length $self->{byte_buffer})) {
620     #
621     } else {
622     $eof = 1;
623     }
624 wakaba 1.1 }
625    
626     unless ($error) {
627 wakaba 1.4 if (not $self->{bom_checked}) {
628     if (defined $self->{bom_pattern}) {
629     if ($self->{byte_buffer} =~ s/^$self->{bom_pattern}//) {
630     $self->{has_bom} = 1;
631     }
632     }
633     $self->{bom_checked} = 1;
634     }
635    
636 wakaba 1.1 my $string = Encode::decode ($self->{perl_encoding_name},
637     $self->{byte_buffer},
638     Encode::FB_QUIET ());
639     if (length $string) {
640 wakaba 1.9 $self->{char_buffer} = \$string;
641 wakaba 1.1 if (length $self->{byte_buffer}) {
642     $self->{continue} = 1;
643     }
644     } else {
645     if (length $self->{byte_buffer}) {
646     $error = 1;
647     } else {
648 wakaba 1.9 ## NOTE: No further input
649     redo A;
650 wakaba 1.1 }
651     }
652     }
653    
654     if ($error) {
655 wakaba 1.9 my $r = substr $self->{byte_buffer}, 0, 1, '';
656 wakaba 1.6 my $fallback = $self->{fallback}->{$r};
657     if (defined $fallback) {
658 wakaba 1.7 ## NOTE: This is an HTML5 parse error.
659 wakaba 1.6 $self->{onerror}->($self, 'fallback-char-error', octets => \$r,
660     char => \$fallback,
661 wakaba 1.7 level => $self->{level}->{$self->{error_level}->{'fallback-char-error'}});
662 wakaba 1.9 ${$self->{char_buffer}} .= $fallback;
663 wakaba 1.7 } elsif (exists $self->{fallback}->{$r}) {
664     ## NOTE: This is an HTML5 parse error. In addition, the octet
665     ## is not assigned with a character.
666     $self->{onerror}->($self, 'fallback-unassigned-error', octets => \$r,
667     level => $self->{level}->{$self->{error_level}->{'fallback-unassigned-error'}});
668 wakaba 1.9 ${$self->{char_buffer}} .= $r;
669 wakaba 1.6 } else {
670 wakaba 1.7 $self->{onerror}->($self, 'illegal-octets-error', octets => \$r,
671     level => $self->{level}->{$self->{error_level}->{'illegal-octets-error'}});
672 wakaba 1.9 ${$self->{char_buffer}} .= $r;
673 wakaba 1.6 }
674 wakaba 1.1 }
675    
676 wakaba 1.9 redo A;
677     } # A
678 wakaba 1.1 } # getc
679    
680     sub has_bom ($) { $_[0]->{has_bom} }
681    
682 wakaba 1.2 sub input_encoding ($) {
683     my $v = $_[0]->{input_encoding};
684     return $v if defined $v;
685    
686     my $uri = $_[0]->{charset};
687     if (defined $uri) {
688     return Whatpm::Charset::DecodeHandle->uri_to_name (xml => $uri);
689     }
690    
691     return undef;
692     } # input_encoding
693 wakaba 1.1
694     sub onerror ($;$) {
695     if (@_ > 1) {
696     $_[0]->{onerror} = $_[1];
697     }
698    
699     return $_[0]->{onerror};
700     } # onerror
701    
702     sub ungetc ($$) {
703     unshift @{$_[0]->{character_queue}}, chr int ($_[1] or 0);
704     } # ungetc
705    
706     package Whatpm::Charset::DecodeHandle::EUCJP;
707     push our @ISA, 'Whatpm::Charset::DecodeHandle::Encode';
708    
709     sub getc ($) {
710     my $self = $_[0];
711     return shift @{$self->{character_queue}} if @{$self->{character_queue}};
712    
713     my $error;
714     if ($self->{continue}) {
715 wakaba 1.3 if ($self->{filehandle}->read ($self->{byte_buffer}, 256,
716     length $self->{byte_buffer})) {
717 wakaba 1.1 #
718     } else {
719     $error = 1;
720     }
721     $self->{continue} = 0;
722     } elsif (512 > length $self->{byte_buffer}) {
723 wakaba 1.3 $self->{filehandle}->read ($self->{byte_buffer}, 256,
724     length $self->{byte_buffer});
725 wakaba 1.1 }
726    
727     my $r;
728     unless ($error) {
729     my $string = Encode::decode ($self->{perl_encoding_name},
730     $self->{byte_buffer},
731     Encode::FB_QUIET ());
732     if (length $string) {
733     push @{$self->{character_queue}}, split //, $string;
734     $r = shift @{$self->{character_queue}};
735     if (length $self->{byte_buffer}) {
736     $self->{continue} = 1;
737     }
738     } else {
739     if (length $self->{byte_buffer}) {
740     $error = 1;
741     } else {
742     $r = undef;
743     }
744     }
745     }
746    
747     if ($error) {
748     $r = substr $self->{byte_buffer}, 0, 1, '';
749     my $etype = 'illegal-octets-error';
750     if ($r =~ /^[\xA1-\xFE]/) {
751     if ($self->{byte_buffer} =~ s/^([\xA1-\xFE])//) {
752     $r .= $1;
753     $etype = 'unassigned-code-point-error';
754     }
755     } elsif ($r eq "\x8F") {
756     if ($self->{byte_buffer} =~ s/^([\xA1-\xFE][\xA1-\xFE]?)//) {
757     $r .= $1;
758     $etype = 'unassigned-code-point-error' if length $1 == 2;
759     }
760     } elsif ($r eq "\x8E") {
761     if ($self->{byte_buffer} =~ s/^([\xA1-\xFE])//) {
762     $r .= $1;
763     $etype = 'unassigned-code-point-error';
764     }
765     } elsif ($r eq "\xA0" or $r eq "\xFF") {
766     $etype = 'unassigned-code-point-error';
767     }
768 wakaba 1.7 $self->{onerror}->($self, $etype, octets => \$r,
769     level => $self->{level}->{$self->{error_level}->{$etype}});
770 wakaba 1.1 }
771    
772     return $r;
773     } # getc
774    
775 wakaba 1.9 ## TODO: This is not good for performance. Should be replaced
776     ## by read-centric implementation.
777     sub read ($$$;$) {
778     #my ($self, $scalar, $length, $offset) = @_;
779     my $length = $_[2];
780     my $r = '';
781     while ($length > 0) {
782     my $c = $_[0]->getc;
783     last unless defined $c;
784     $r .= $c;
785     $length--;
786     }
787     substr ($_[1], $_[3]) = $r;
788     ## NOTE: This would do different thing from what Perl's |read| do
789     ## if $offset points beyond the end of the $scalar.
790     return length $r;
791     } # read
792    
793 wakaba 1.1 package Whatpm::Charset::DecodeHandle::ISO2022JP;
794     push our @ISA, 'Whatpm::Charset::DecodeHandle::Encode';
795    
796     sub getc ($) {
797     my $self = $_[0];
798     return shift @{$self->{character_queue}} if @{$self->{character_queue}};
799    
800     my $r;
801     A: {
802     my $error;
803     if ($self->{continue}) {
804 wakaba 1.3 if ($self->{filehandle}->read ($self->{byte_buffer}, 256,
805     length $self->{byte_buffer})) {
806 wakaba 1.1 #
807     } else {
808     $error = 1;
809     }
810     $self->{continue} = 0;
811     } elsif (512 > length $self->{byte_buffer}) {
812 wakaba 1.3 $self->{filehandle}->read ($self->{byte_buffer}, 256,
813     length $self->{byte_buffer});
814 wakaba 1.1 }
815    
816     unless ($error) {
817     if ($self->{byte_buffer} =~ s/^\x1B(\x24[\x40\x42]|\x28[\x42\x4A])//) {
818     $self->{state} = {
819     "\x24\x40" => 'state_2440',
820     "\x24\x42" => 'state_2442',
821     "\x28\x42" => 'state_2842',
822     "\x28\x4A" => 'state_284A',
823     }->{$1};
824     redo A;
825     } elsif ($self->{state} eq 'state_2842') { # IRV
826     if ($self->{byte_buffer} =~ s/^([\x00-\x0D\x10-\x1A\x1C-\x7F]+)//) {
827     push @{$self->{character_queue}}, split //, $1;
828     $r = shift @{$self->{character_queue}};
829     } else {
830     if (length $self->{byte_buffer}) {
831     $error = 1;
832     } else {
833     $r = undef;
834     }
835     }
836     } elsif ($self->{state} eq 'state_284A') { # 0201
837     if ($self->{byte_buffer} =~ s/^([\x00-\x0D\x10-\x1A\x1C-\x7F]+)//) {
838     my $v = $1;
839     $v =~ tr/\x5C\x7E/\xA5\x{203E}/;
840     push @{$self->{character_queue}}, split //, $v;
841     $r = shift @{$self->{character_queue}};
842     } else {
843     if (length $self->{byte_buffer}) {
844     $error = 1;
845     } else {
846     $r = undef;
847     $self->{onerror}->($self, 'invalid-state-error',
848 wakaba 1.7 state => $self->{state},
849     level => $self->{level}->{$self->{error_level}->{'invalid-state-error'}});
850 wakaba 1.1 }
851     }
852     } elsif ($self->{state} eq 'state_2442') { # 1983
853     my $v = Encode::decode ($self->{state_2442},
854     $self->{byte_buffer},
855     Encode::FB_QUIET ());
856     if (length $v) {
857     push @{$self->{character_queue}}, split //, $v;
858     $r = shift @{$self->{character_queue}};
859     } else {
860     if (length $self->{byte_buffer}) {
861     $error = 1;
862     } else {
863     $r = undef;
864     $self->{onerror}->($self, 'invalid-state-error',
865 wakaba 1.7 state => $self->{state},
866     level => $self->{level}->{$self->{error_level}->{'invalid-state-error'}});
867 wakaba 1.1 }
868     }
869     } elsif ($self->{state} eq 'state_2440') { # 1978
870     my $v = Encode::decode ($self->{state_2440},
871     $self->{byte_buffer},
872     Encode::FB_QUIET ());
873     if (length $v) {
874     push @{$self->{character_queue}}, split //, $v;
875     $r = shift @{$self->{character_queue}};
876     } else {
877     if (length $self->{byte_buffer}) {
878     $error = 1;
879     } else {
880     $r = undef;
881     $self->{onerror}->($self, 'invalid-state-error',
882 wakaba 1.7 state => $self->{state},
883     level => $self->{level}->{$self->{error_level}->{'invalid-state-error'}});
884 wakaba 1.1 }
885     }
886     } else {
887     $error = 1;
888     }
889     }
890    
891     if ($error) {
892     $r = substr $self->{byte_buffer}, 0, 1, '';
893     my $etype = 'illegal-octets-error';
894     if (($self->{state} eq 'state_2442' or
895     $self->{state} eq 'state_2440') and
896     $r =~ /^[\x21-\x7E]/ and
897     $self->{byte_buffer} =~ s/^([\x21-\x7E])//) {
898     $r .= $1;
899     $etype = 'unassigned-code-point-error';
900     } elsif ($r eq "\x1B" and
901     $self->{byte_buffer} =~ s/^\(H//) { # Old 0201
902     $r .= "(H";
903     $self->{state} = 'state_284A';
904     }
905 wakaba 1.7 $self->{onerror}->($self, $etype, octets => \$r,
906     level => $self->{level}->{$self->{error_level}->{$etype}});
907 wakaba 1.1 }
908     } # A
909    
910     return $r;
911     } # getc
912    
913 wakaba 1.9 ## TODO: This is not good for performance. Should be replaced
914     ## by read-centric implementation.
915     sub read ($$$;$) {
916     #my ($self, $scalar, $length, $offset) = @_;
917     my $length = $_[2];
918     my $r = '';
919     while ($length > 0) {
920     my $c = $_[0]->getc;
921     last unless defined $c;
922     $r .= $c;
923     $length--;
924     }
925     substr ($_[1], $_[3]) = $r;
926     ## NOTE: This would do different thing from what Perl's |read| do
927     ## if $offset points beyond the end of the $scalar.
928     return length $r;
929     } # read
930    
931 wakaba 1.1 package Whatpm::Charset::DecodeHandle::ShiftJIS;
932     push our @ISA, 'Whatpm::Charset::DecodeHandle::Encode';
933    
934     sub getc ($) {
935     my $self = $_[0];
936     return shift @{$self->{character_queue}} if @{$self->{character_queue}};
937    
938     my $error;
939     if ($self->{continue}) {
940 wakaba 1.3 if ($self->{filehandle}->read ($self->{byte_buffer}, 256,
941     length $self->{byte_buffer})) {
942 wakaba 1.1 #
943     } else {
944     $error = 1;
945     }
946     $self->{continue} = 0;
947     } elsif (512 > length $self->{byte_buffer}) {
948 wakaba 1.3 $self->{filehandle}->read ($self->{byte_buffer}, 256,
949     length $self->{byte_buffer});
950 wakaba 1.1 }
951    
952     my $r;
953     unless ($error) {
954     my $string = Encode::decode ($self->{perl_encoding_name},
955     $self->{byte_buffer},
956     Encode::FB_QUIET ());
957     if (length $string) {
958     push @{$self->{character_queue}}, split //, $string;
959     $r = shift @{$self->{character_queue}};
960     if (length $self->{byte_buffer}) {
961     $self->{continue} = 1;
962     }
963     } else {
964     if (length $self->{byte_buffer}) {
965     $error = 1;
966     } else {
967     $r = undef;
968     }
969     }
970     }
971    
972     if ($error) {
973     $r = substr $self->{byte_buffer}, 0, 1, '';
974     my $etype = 'illegal-octets-error';
975 wakaba 1.5 if ($r =~ /^[\x81-\x9F\xE0-\xFC]/) {
976 wakaba 1.1 if ($self->{byte_buffer} =~ s/(.)//s) {
977     $r .= $1; # not limited to \x40-\xFC - \x7F
978     $etype = 'unassigned-code-point-error';
979     }
980 wakaba 1.5 ## NOTE: Range [\xF0-\xFC] is unassigned and may be used as a single-byte
981     ## character or as the first-byte of a double-byte character according
982     ## to JIS X 0208:1997 Appendix 1. However, the current practice is
983     ## use the range as the first-byte of double-byte characters.
984     } elsif ($r =~ /^[\x80\xA0\xFD-\xFF]/) {
985 wakaba 1.1 $etype = 'unassigned-code-point-error';
986     }
987 wakaba 1.7 $self->{onerror}->($self, $etype, octets => \$r,
988     level => $self->{level}->{$self->{error_level}->{$etype}});
989 wakaba 1.1 }
990    
991     return $r;
992     } # getc
993    
994 wakaba 1.9 ## TODO: This is not good for performance. Should be replaced
995     ## by read-centric implementation.
996     sub read ($$$;$) {
997     #my ($self, $scalar, $length, $offset) = @_;
998     my $length = $_[2];
999     my $r = '';
1000     while ($length > 0) {
1001     my $c = $_[0]->getc;
1002     last unless defined $c;
1003     $r .= $c;
1004     $length--;
1005     }
1006     substr ($_[1], $_[3]) = $r;
1007     ## NOTE: This would do different thing from what Perl's |read| do
1008     ## if $offset points beyond the end of the $scalar.
1009     return length $r;
1010     } # read
1011    
1012 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:us-ascii'} =
1013     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:us'} =
1014     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso646-us'} =
1015     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:cp367'} =
1016     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ibm367'} =
1017     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ansi_x3.4-1986'} =
1018     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ansi_x3.4-1968'} =
1019     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-ir-6'} =
1020     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:csascii'} =
1021     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso_646.irv:1991'} =
1022     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ascii'} = {ascii8 =>
1023     '1',
1024     is_block_safe =>
1025     '1',
1026     ietf_name =>
1027     ['ansi_x3.4-1968',
1028     'ansi_x3.4-1986',
1029     'ascii',
1030     'cp367',
1031     'csascii',
1032     'ibm367',
1033     'iso-ir-6',
1034     'iso646-us',
1035     'iso_646.irv:1991',
1036     'us',
1037     'us-ascii',
1038     'us-ascii'],
1039     mime_name =>
1040     'us-ascii',
1041     perl_name =>
1042     ['ascii',
1043     'iso-646-us',
1044     'us-ascii'],
1045     utf8_encoding_scheme =>
1046     '1',
1047     'uri',
1048     {'urn:x-suika-fam-cx:charset:ansi_x3.4-1968',
1049     '1',
1050     'urn:x-suika-fam-cx:charset:ansi_x3.4-1986',
1051     '1',
1052     'urn:x-suika-fam-cx:charset:ascii',
1053     '1',
1054     'urn:x-suika-fam-cx:charset:cp367',
1055     '1',
1056     'urn:x-suika-fam-cx:charset:csascii',
1057     '1',
1058     'urn:x-suika-fam-cx:charset:ibm367',
1059     '1',
1060     'urn:x-suika-fam-cx:charset:iso-ir-6',
1061     '1',
1062     'urn:x-suika-fam-cx:charset:iso646-us',
1063     '1',
1064     'urn:x-suika-fam-cx:charset:iso_646.irv:1991',
1065     '1',
1066     'urn:x-suika-fam-cx:charset:us',
1067     '1',
1068     'urn:x-suika-fam-cx:charset:us-ascii',
1069     '1'},
1070 wakaba 1.2 };
1071    
1072 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ascii-ctrl'} = {perl_name =>
1073     ['ascii-ctrl'],
1074     'uri',
1075     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ascii-ctrl',
1076     '1'}};
1077     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.null'} = {perl_name =>
1078     ['null'],
1079     'uri',
1080     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.null',
1081     '1'}};
1082     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-8'} = {ascii8 =>
1083     '1',
1084     bom_allowed =>
1085     '1',
1086     no_bom_variant =>
1087     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8',
1088     utf8_encoding_scheme =>
1089     '1',
1090     'uri',
1091     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-8',
1092     '1'},
1093 wakaba 1.2 xml_name => 'UTF-8',
1094     };
1095    
1096 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/UTF-8.RFC2279'} = {ascii8 =>
1097     '1',
1098     bom_allowed =>
1099     '1',
1100     no_bom_variant =>
1101     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8',
1102     utf8_encoding_scheme =>
1103     '1',
1104     'uri',
1105     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/UTF-8.RFC2279',
1106     '1'}};
1107     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8'} = {
1108     ascii8 => 1,
1109     is_block_safe =>
1110     '1',
1111     perl_name =>
1112     ['utf-8'],
1113     utf8_encoding_scheme =>
1114     '1',
1115     'uri',
1116     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8',
1117     '1'}};
1118     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-8'} = {
1119     ascii8 => 1,
1120     bom_allowed =>
1121     '1',
1122     no_bom_variant =>
1123     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8',
1124     ietf_name =>
1125     ['utf-8'],
1126     mime_name =>
1127     'utf-8',
1128     utf8_encoding_scheme =>
1129     '1',
1130     'uri',
1131     {'urn:x-suika-fam-cx:charset:utf-8',
1132     '1'},
1133 wakaba 1.2 };
1134    
1135 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8'} = {ascii8 =>
1136     '1',
1137     is_block_safe =>
1138     '1',
1139     perl_name =>
1140     ['utf8'],
1141     utf8_encoding_scheme =>
1142     '1',
1143     'uri',
1144     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8',
1145     '1'}};
1146     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-16'} = {
1147     ascii16 => 1,
1148     bom_allowed =>
1149     '1',
1150     bom_required =>
1151     '1',
1152     no_bom_variant =>
1153     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1154     no_bom_variant16be =>
1155     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1156     no_bom_variant16le =>
1157     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1158     perl_name =>
1159     ['utf-16'],
1160     'uri',
1161     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-16',
1162     '1'},
1163 wakaba 1.2 xml_name => 'UTF-16',
1164     };
1165    
1166 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16'} = {
1167     ascii16 => 1,
1168     bom_allowed =>
1169     '1',
1170     no_bom_variant =>
1171     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1172     no_bom_variant16be =>
1173     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1174     no_bom_variant16le =>
1175     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1176     ietf_name =>
1177     ['utf-16'],
1178     mime_name =>
1179     'utf-16',
1180     'uri',
1181     {'urn:x-suika-fam-cx:charset:utf-16',
1182     '1'},
1183 wakaba 1.2 };
1184    
1185 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16be'} = {
1186     ascii16 => 1,
1187     ascii16be => 1,
1188     bom_allowed =>
1189     '1',
1190     no_bom_variant =>
1191     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1192     no_bom_variant16be =>
1193     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1194     ietf_name =>
1195     ['utf-16be'],
1196     mime_name =>
1197     'utf-16be',
1198     'uri',
1199     {'urn:x-suika-fam-cx:charset:utf-16be',
1200     '1'},
1201 wakaba 1.2 };
1202    
1203 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16le'} = {
1204     ascii16 => 1,
1205     ascii16le => 1,
1206     bom_allowed =>
1207     '1',
1208     no_bom_variant =>
1209     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1210     no_bom_variant16le =>
1211     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1212     ietf_name =>
1213     ['utf-16le'],
1214     mime_name =>
1215     'utf-16le',
1216     'uri',
1217     {'urn:x-suika-fam-cx:charset:utf-16le',
1218     '1'},
1219 wakaba 1.2 };
1220    
1221 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be'} = {
1222     ascii16 => 1,
1223     ascii16be => 1,
1224     is_block_safe =>
1225     '1',
1226     perl_name =>
1227     ['utf-16be'],
1228     'uri',
1229     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1230     '1'}};
1231     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le'} = {
1232     ascii16 => 1,
1233     ascii16le => 1,
1234     is_block_safe =>
1235     '1',
1236     perl_name =>
1237     ['utf-16le'],
1238     'uri',
1239     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1240     '1'}};
1241     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-2'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-10646-ucs-2'} = {
1242     ascii16 => 1,
1243     bom_allowed =>
1244     '1',
1245     no_bom_variant =>
1246     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le',
1247     no_bom_variant16be =>
1248     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be',
1249     no_bom_variant16le =>
1250     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le',
1251     ietf_name =>
1252     ['csunicode',
1253     'iso-10646-ucs-2'],
1254     mime_name =>
1255     'iso-10646-ucs-2',
1256     'uri',
1257     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-2',
1258     '1',
1259     'urn:x-suika-fam-cx:charset:iso-10646-ucs-2',
1260     '1'},
1261 wakaba 1.2 xml_name => 'ISO-10646-UCS-2',
1262     };
1263    
1264 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be'} = {
1265     ascii16 => 1,
1266     ascii16be => 1,
1267     is_block_safe =>
1268     '1',
1269     perl_name =>
1270     ['ucs-2be'],
1271     'uri',
1272     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be',
1273     '1'}};
1274     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le'} = {
1275     ascii16 => 1,
1276     ascii16le => 1,
1277     is_block_safe =>
1278     '1',
1279     perl_name =>
1280     ['ucs-2le'],
1281     'uri',
1282     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le',
1283     '1'}};
1284     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-4'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-10646-ucs-4'} = {
1285     ascii32 => 1,
1286     bom_allowed =>
1287     '1',
1288     no_bom_variant =>
1289     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le',
1290     no_bom_variant32endian1234 =>
1291     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be',
1292     no_bom_variant32endian4321 =>
1293     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le',
1294     ietf_name =>
1295     ['csucs4',
1296     'iso-10646-ucs-4'],
1297     mime_name =>
1298     'iso-10646-ucs-4',
1299     'uri',
1300     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-4',
1301     '1',
1302     'urn:x-suika-fam-cx:charset:iso-10646-ucs-4',
1303     '1'},
1304 wakaba 1.2 xml_name => 'ISO-10646-UCS-4',
1305     };
1306    
1307 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be'} = {
1308     ascii32 => 1,
1309     ascii32endian1234 => 1,
1310     is_block_safe =>
1311     '1',
1312     perl_name =>
1313     ['ucs-4be',
1314     'utf-32be'],
1315     'uri',
1316     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be',
1317     '1'}};
1318     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le'} = {
1319     ascii32 => 1,
1320     ascii32endian4321 => 1,
1321     is_block_safe =>
1322     '1',
1323     perl_name =>
1324     ['ucs-4le',
1325     'utf-32le'],
1326     'uri',
1327     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le',
1328     '1'}};
1329     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso_8859-1:1987'} = $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-1'} = {ascii8 =>
1330     '1',
1331     is_block_safe =>
1332     '1',
1333     ietf_name =>
1334     ['cp819',
1335     'csisolatin1',
1336     'ibm819',
1337     'iso-8859-1',
1338     'iso-8859-1',
1339     'iso-ir-100',
1340     'iso_8859-1',
1341     'iso_8859-1:1987',
1342     'l1',
1343     'latin1'],
1344     mime_name =>
1345     'iso-8859-1',
1346     perl_name =>
1347     ['iso-8859-1',
1348     'latin1'],
1349     'uri',
1350     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-1',
1351     '1',
1352     'urn:x-suika-fam-cx:charset:iso_8859-1:1987',
1353     '1'},
1354 wakaba 1.2 xml_name => 'ISO-8859-1',
1355     };
1356    
1357 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-2'} = {ascii8 =>
1358     '1',
1359     is_block_safe =>
1360     '1',
1361     'uri',
1362     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-2',
1363     '1'},
1364 wakaba 1.2 xml_name => 'ISO-8859-2',
1365     };
1366    
1367 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-3'} = {ascii8 =>
1368     '1',
1369     is_block_safe =>
1370     '1',
1371     'uri',
1372     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-3',
1373     '1'},
1374 wakaba 1.2 xml_name => 'ISO-8859-3',
1375     };
1376    
1377 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-4'} = {ascii8 =>
1378     '1',
1379     is_block_safe =>
1380     '1',
1381     'uri',
1382     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-4',
1383     '1'},
1384 wakaba 1.2 xml_name => 'ISO-8859-4',
1385     };
1386    
1387 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-5'} = {ascii8 =>
1388     '1',
1389     is_block_safe =>
1390     '1',
1391     'uri',
1392     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-5',
1393     '1'},
1394 wakaba 1.2 xml_name => 'ISO-8859-5',
1395     };
1396    
1397 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-6'} = {ascii8 =>
1398     '1',
1399     is_block_safe =>
1400     '1',
1401     'uri',
1402     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-6',
1403     '1'},
1404 wakaba 1.2 xml_name => 'ISO-8859-6',
1405     };
1406    
1407 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-7'} = {ascii8 =>
1408     '1',
1409     is_block_safe =>
1410     '1',
1411     'uri',
1412     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-7',
1413     '1'},
1414 wakaba 1.2 xml_name => 'ISO-8859-7',
1415     };
1416    
1417 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-8'} = {ascii8 =>
1418     '1',
1419     is_block_safe =>
1420     '1',
1421     'uri',
1422     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-8',
1423     '1'},
1424 wakaba 1.2 xml_name => 'ISO-8859-8',
1425     };
1426    
1427 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-9'} = {ascii8 =>
1428     '1',
1429     is_block_safe =>
1430     '1',
1431     'uri',
1432     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-9',
1433     '1'},
1434 wakaba 1.2 xml_name => 'ISO-8859-9',
1435     };
1436    
1437 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-10'} = {ascii8 =>
1438     '1',
1439     is_block_safe =>
1440     '1',
1441     'uri',
1442     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-10',
1443     '1'},
1444 wakaba 1.2 xml_name => 'ISO-8859-10',
1445     };
1446    
1447 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-11'} = {ascii8 =>
1448     '1',
1449     is_block_safe =>
1450     '1',
1451     'uri',
1452     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-11',
1453     '1'},
1454 wakaba 1.2 xml_name => 'ISO-8859-11',
1455     };
1456    
1457 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-13'} = {ascii8 =>
1458     '1',
1459     is_block_safe =>
1460     '1',
1461     'uri',
1462     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-13',
1463     '1'},
1464 wakaba 1.2 xml_name => 'ISO-8859-13',
1465     };
1466    
1467 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-14'} = {ascii8 =>
1468     '1',
1469     is_block_safe =>
1470     '1',
1471     'uri',
1472     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-14',
1473     '1'},
1474 wakaba 1.2 xml_name => 'ISO-8859-14',
1475     };
1476    
1477 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-15'} = {ascii8 =>
1478     '1',
1479     is_block_safe =>
1480     '1',
1481     'uri',
1482     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-15',
1483     '1'},
1484 wakaba 1.2 xml_name => 'ISO-8859-15',
1485     };
1486    
1487 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-16'} = {ascii8 =>
1488     '1',
1489     is_block_safe =>
1490     '1',
1491     'uri',
1492     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-16',
1493     '1'},
1494 wakaba 1.2 xml_name => 'ISO-8859-16',
1495     };
1496    
1497 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-2022-jp'} = {ascii8 =>
1498     '1',
1499     'uri',
1500     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-2022-jp',
1501     '1'},
1502 wakaba 1.2 xml_name => 'ISO-2022-JP',
1503     };
1504    
1505 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-2022-jp'} = {ascii8 =>
1506     '1',
1507     ietf_name =>
1508     ['csiso2022jp',
1509     'iso-2022-jp',
1510     'iso-2022-jp'],
1511     mime_name =>
1512     'iso-2022-jp',
1513     'uri',
1514     {'urn:x-suika-fam-cx:charset:iso-2022-jp',
1515     '1'},
1516 wakaba 1.2 };
1517    
1518 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.iso-2022-jp'} = {ascii8 =>
1519     '1',
1520     perl_name =>
1521     ['iso-2022-jp'],
1522     'uri',
1523     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.iso-2022-jp',
1524     '1'}};
1525     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:shift_jis'} = $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.shift_jis'} = {ascii8 =>
1526     '1',
1527     is_block_safe =>
1528     '1',
1529     ietf_name =>
1530     ['csshiftjis',
1531     'ms_kanji',
1532     'shift_jis',
1533     'shift_jis'],
1534     mime_name =>
1535     'shift_jis',
1536     perl_name =>
1537     ['shift-jis-1997'],
1538     'uri',
1539     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.shift_jis',
1540     '1',
1541     'urn:x-suika-fam-cx:charset:shift_jis',
1542     '1'},
1543 wakaba 1.2 xml_name => 'Shift_JIS',
1544     };
1545    
1546 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.shiftjis'} = {ascii8 =>
1547     '1',
1548     is_block_safe =>
1549     '1',
1550     perl_name =>
1551     ['shiftjis',
1552     'sjis'],
1553     'uri',
1554     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.shiftjis',
1555     '1'}};
1556     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:euc-jp'} = $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.euc-jp'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:extended_unix_code_packed_format_for_japanese'} = {ascii8 =>
1557     '1',
1558     is_block_safe =>
1559     '1',
1560     ietf_name =>
1561     ['cseucpkdfmtjapanese',
1562     'euc-jp',
1563     'euc-jp',
1564     'extended_unix_code_packed_format_for_japanese'],
1565     mime_name =>
1566     'euc-jp',
1567     perl_name =>
1568     ['euc-jp-1997'],
1569     'uri',
1570     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.euc-jp',
1571     '1',
1572     'urn:x-suika-fam-cx:charset:euc-jp',
1573     '1',
1574     'urn:x-suika-fam-cx:charset:extended_unix_code_packed_format_for_japanese',
1575     '1'},
1576 wakaba 1.2 xml_name => 'EUC-JP',
1577     };
1578    
1579 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.euc-jp'} = {ascii8 =>
1580     '1',
1581     is_block_safe =>
1582     '1',
1583     perl_name =>
1584     ['euc-jp',
1585     'ujis'],
1586     'uri',
1587     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.euc-jp',
1588     '1'}};
1589    
1590     1;
1591 wakaba 1.9 ## $Date: 2008/09/11 09:55:56 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24