/[suikacvs]/markup/html/whatpm/Whatpm/Charset/DecodeHandle.pm
Suika

Contents of /markup/html/whatpm/Whatpm/Charset/DecodeHandle.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.10 - (hide annotations) (download)
Fri Sep 12 03:31:40 2008 UTC (16 years, 10 months ago) by wakaba
Branch: MAIN
Changes since 1.9: +4 -1 lines
++ whatpm/Whatpm/ContentChecker/ChangeLog	12 Sep 2008 03:31:23 -0000
2008-09-12  Wakaba  <wakaba@suika.fam.cx>

	* HTML.pm: The WF2 |oninput| attribute was not added.
	The WF2 |onforminput| and |onformchange| attributes were
	not added, since they are not defined (although they ARE
	mentioned) in the normative part of the WF2 spec.
	Typo on the |oninvalid| attribute fixed.

1 wakaba 1.1 package Whatpm::Charset::DecodeHandle;
2     use strict;
3    
4 wakaba 1.9 ## NOTE: |Message::Charset::Info| uses this module without calling
5     ## the constructor.
6    
7 wakaba 1.1 my $XML_AUTO_CHARSET = q<http://suika.fam.cx/www/2006/03/xml-entity/>;
8     my $IANA_CHARSET = q<urn:x-suika-fam-cx:charset:>;
9     my $PERL_CHARSET = q<http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.>;
10     my $XML_CHARSET = q<http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.>;
11    
12     ## ->create_decode_handle ($charset_uri, $byte_stream, $onerror)
13     sub create_decode_handle ($$$;$) {
14     my $csdef = $Whatpm::Charset::CharsetDef->{$_[1]};
15     my $obj = {
16 wakaba 1.9 char_buffer => \(my $s = ''),
17 wakaba 1.1 character_queue => [],
18     filehandle => $_[2],
19     charset => $_[1],
20     byte_buffer => '',
21     onerror => $_[3] || sub {},
22     };
23     if ($csdef->{uri}->{$XML_AUTO_CHARSET} or
24     $obj->{charset} eq $XML_AUTO_CHARSET) {
25     my $b = ''; # UTF-8 w/o BOM
26     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
27 wakaba 1.2 $obj->{input_encoding} = 'UTF-8';
28 wakaba 1.1 if (read $obj->{filehandle}, $b, 256) {
29     no warnings "substr";
30     no warnings "uninitialized";
31     if (substr ($b, 0, 1) eq "<") {
32     if (substr ($b, 1, 1) eq "?") { # ASCII8
33     if ($b =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
34     encoding\s*=\s*["']([^"']*)/x) {
35 wakaba 1.2 $obj->{input_encoding} = $1;
36 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
37     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
38     if (not $csdef->{ascii8} or $csdef->{bom_required}) {
39     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
40     charset_uri => $uri,
41     charset_name => $obj->{input_encoding});
42     }
43     } else {
44     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
45 wakaba 1.2 $obj->{input_encoding} = 'UTF-8';
46 wakaba 1.1 }
47     if (defined $csdef->{no_bom_variant}) {
48     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant}};
49     }
50     } elsif (substr ($b, 1, 1) eq "\x00") {
51     if (substr ($b, 2, 2) eq "?\x00") { # ASCII16LE
52     my $c = $b; $c =~ tr/\x00//d;
53     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
54     encoding\s*=\s*["']([^"']*)/x) {
55 wakaba 1.2 $obj->{input_encoding} = $1;
56 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
57     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
58     if (not $csdef->{ascii16} or $csdef->{ascii16be} or
59     $csdef->{bom_required}) {
60     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
61     charset_uri => $uri,
62     charset_name => $obj->{input_encoding});
63     }
64     } else {
65     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
66 wakaba 1.2 $obj->{input_encoding} = 'UTF-8';
67 wakaba 1.1 }
68     if (defined $csdef->{no_bom_variant16le}) {
69     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16le}};
70     }
71     } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian4321
72     my $c = $b; $c =~ tr/\x00//d;
73     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
74     encoding\s*=\s*["']([^"']*)/x) {
75 wakaba 1.2 $obj->{input_encoding} = $1;
76 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
77     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
78     if (not $csdef->{ascii32} or
79     $csdef->{ascii32endian1234} or
80     $csdef->{ascii32endian2143} or
81     $csdef->{ascii32endian3412} or
82     $csdef->{bom_required}) {
83     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
84     charset_uri => $uri,
85     charset_name => $obj->{input_encoding});
86     }
87     } else {
88     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
89 wakaba 1.2 $obj->{input_encoding} = 'UTF-8';
90 wakaba 1.1 }
91     if (defined $csdef->{no_bom_variant32endian4321}) {
92     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian4321}};
93     }
94     }
95     }
96     } elsif (substr ($b, 0, 3) eq "\xEF\xBB\xBF") { # UTF8
97     $obj->{has_bom} = 1;
98     substr ($b, 0, 3) = '';
99     my $c = $b;
100     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
101     encoding\s*=\s*["']([^"']*)/x) {
102 wakaba 1.2 $obj->{input_encoding} = $1;
103 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
104     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
105     if (not $csdef->{utf8_encoding_scheme} or
106     not $csdef->{bom_allowed}) {
107     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
108     charset_uri => $uri,
109     charset_name => $obj->{input_encoding});
110     }
111     } else {
112     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
113 wakaba 1.2 $obj->{input_encoding} = 'UTF-8';
114 wakaba 1.1 }
115     if (defined $csdef->{no_bom_variant}) {
116     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant}};
117     }
118     } elsif (substr ($b, 0, 2) eq "\x00<") {
119     if (substr ($b, 2, 2) eq "\x00?") { # ASCII16BE
120     my $c = $b; $c =~ tr/\x00//d;
121     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
122     encoding\s*=\s*["']([^"']*)/x) {
123 wakaba 1.2 $obj->{input_encoding} = $1;
124 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
125     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
126     if (not $csdef->{ascii16} or $csdef->{ascii16le} or
127     $csdef->{bom_required}) {
128     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
129     charset_uri => $uri,
130     charset_name => $obj->{input_encoding});
131     }
132     } else {
133     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
134 wakaba 1.2 $obj->{input_encoding} = 'UTF-8';
135 wakaba 1.1 }
136     if (defined $csdef->{no_bom_variant16be}) {
137     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16be}};
138     }
139     } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian3412
140     my $c = $b; $c =~ tr/\x00//d;
141     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
142     encoding\s*=\s*["']([^"']*)/x) {
143 wakaba 1.2 $obj->{input_encoding} = $1;
144 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
145     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
146     if (not $csdef->{ascii32} or
147     $csdef->{ascii32endian1234} or
148     $csdef->{ascii32endian2143} or
149     $csdef->{ascii32endian4321} or
150     $csdef->{bom_required}) {
151     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
152     charset_uri => $uri,
153     charset_name => $obj->{input_encoding});
154     }
155     } else {
156     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
157 wakaba 1.2 $obj->{input_encoding} = 'UTF-8';
158 wakaba 1.1 }
159     if (defined $csdef->{no_bom_variant32endian3412}) {
160     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian3412}};
161     }
162     }
163     } elsif (substr ($b, 0, 2) eq "\xFE\xFF") {
164     if (substr ($b, 2, 2) eq "\x00<") { # ASCII16BE
165     $obj->{has_bom} = 1;
166     substr ($b, 0, 2) = '';
167     my $c = $b; $c =~ tr/\x00//d;
168     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
169     encoding\s*=\s*["']([^"']*)/x) {
170 wakaba 1.2 $obj->{input_encoding} = $1;
171 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
172     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
173     if (not $csdef->{ascii16} or
174     $csdef->{ascii16le} or
175     not $csdef->{bom_allowed}) {
176     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
177     charset_uri => $uri,
178     charset_name => $obj->{input_encoding});
179     }
180     } else {
181     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
182 wakaba 1.2 $obj->{input_encoding} = 'UTF-16';
183 wakaba 1.1 }
184     if (defined $csdef->{no_bom_variant16be}) {
185     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16be}};
186     }
187     } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian3412
188     $obj->{has_bom} = 1;
189     substr ($b, 0, 4) = '';
190     my $c = $b; $c =~ tr/\x00//d;
191     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
192     encoding\s*=\s*["']([^"']*)/x) {
193 wakaba 1.2 $obj->{input_encoding} = $1;
194 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
195     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
196     if (not $csdef->{ascii32} or
197     $csdef->{ascii32endian1234} or
198     $csdef->{ascii32endian2143} or
199     $csdef->{ascii32endian4321} or
200     not $csdef->{bom_allowed}) {
201     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
202     charset_uri => $uri,
203     charset_name => $obj->{input_encoding});
204     }
205     } else {
206     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
207 wakaba 1.2 $obj->{input_encoding} = 'UTF-16';
208 wakaba 1.1 $obj->{byte_buffer} .= "\x00\x00";
209     }
210     if (defined $csdef->{no_bom_variant32endian3412}) {
211     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian3412}};
212     }
213     } else {
214     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
215 wakaba 1.2 $obj->{input_encoding} = 'UTF-16';
216 wakaba 1.1 substr ($b, 0, 2) = '';
217     $obj->{has_bom} = 1;
218     }
219     } elsif (substr ($b, 0, 2) eq "\xFF\xFE") {
220     if (substr ($b, 2, 2) eq "<\x00") { # ASCII16LE
221     $obj->{has_bom} = 1;
222     substr ($b, 0, 2) = '';
223     my $c = $b; $c =~ tr/\x00//d;
224     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
225     encoding\s*=\s*["']([^"']*)/x) {
226 wakaba 1.2 $obj->{input_encoding} = $1;
227 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
228     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
229     if (not $csdef->{ascii16} or
230     $csdef->{ascii16be} or
231     not $csdef->{bom_allowed}) {
232     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
233     charset_uri => $uri,
234     charset_name => $obj->{input_encoding});
235     }
236     } else {
237     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
238 wakaba 1.2 $obj->{input_encoding} = 'UTF-16';
239 wakaba 1.1 }
240     if (defined $csdef->{no_bom_variant16le}) {
241     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16le}};
242     }
243     } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian4321
244     $obj->{has_bom} = 1;
245     substr ($b, 0, 4) = '';
246     my $c = $b; $c =~ tr/\x00//d;
247     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
248     encoding\s*=\s*["']([^"']*)/x) {
249 wakaba 1.2 $obj->{input_encoding} = $1;
250 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
251     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
252     if (not $csdef->{ascii32} or
253     $csdef->{ascii32endian1234} or
254     $csdef->{ascii32endian2143} or
255     $csdef->{ascii32endian3412} or
256     not $csdef->{bom_allowed}) {
257     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
258     charset_uri => $uri,
259     charset_name => $obj->{input_encoding});
260     }
261     } else {
262     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
263 wakaba 1.2 $obj->{input_encoding} = 'UTF-16';
264 wakaba 1.1 $obj->{byte_buffer} .= "\x00\x00";
265     }
266     if (defined $csdef->{no_bom_variant32endian4321}) {
267     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian4321}};
268     }
269     } else {
270     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
271 wakaba 1.2 $obj->{input_encoding} = 'UTF-16';
272 wakaba 1.1 substr ($b, 0, 2) = '';
273     $obj->{has_bom} = 1;
274     }
275     } elsif (substr ($b, 0, 2) eq "\x00\x00") {
276     if (substr ($b, 2, 2) eq "\x00<") { # ASCII32Endian1234
277     my $c = $b; $c =~ tr/\x00//d;
278     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
279     encoding\s*=\s*["']([^"']*)/x) {
280 wakaba 1.2 $obj->{input_encoding} = $1;
281 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
282     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
283     if (not $csdef->{ascii32} or
284     $csdef->{ascii32endian2143} or
285     $csdef->{ascii32endian3412} or
286     $csdef->{ascii32endian4321} or
287     $csdef->{bom_required}) {
288     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
289     charset_uri => $uri,
290     charset_name => $obj->{input_encoding});
291     }
292     } else {
293     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
294 wakaba 1.2 $obj->{input_encoding} = 'UTF-8';
295 wakaba 1.1 }
296     if (defined $csdef->{no_bom_variant32endian1234}) {
297     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian1234}};
298     }
299     } elsif (substr ($b, 2, 2) eq "<\x00") { # ASCII32Endian2143
300     my $c = $b; $c =~ tr/\x00//d;
301     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
302     encoding\s*=\s*["']([^"']*)/x) {
303 wakaba 1.2 $obj->{input_encoding} = $1;
304 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
305     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
306     if (not $csdef->{ascii32} or
307     $csdef->{ascii32endian1234} or
308     $csdef->{ascii32endian3412} or
309     $csdef->{ascii32endian4321} or
310     $csdef->{bom_required}) {
311     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
312     charset_uri => $uri,
313     charset_name => $obj->{input_encoding});
314     }
315     } else {
316     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
317 wakaba 1.2 $obj->{input_encoding} = 'UTF-8';
318 wakaba 1.1 }
319     if (defined $csdef->{no_bom_variant32endian2143}) {
320     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian2143}};
321     }
322     } elsif (substr ($b, 2, 2) eq "\xFE\xFF") { # ASCII32Endian1234
323     $obj->{has_bom} = 1;
324     substr ($b, 0, 4) = '';
325     my $c = $b; $c =~ tr/\x00//d;
326     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
327     encoding\s*=\s*["']([^"']*)/x) {
328 wakaba 1.2 $obj->{input_encoding} = $1;
329 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
330     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
331     if (not $csdef->{ascii32} or
332     $csdef->{ascii32endian2143} or
333     $csdef->{ascii32endian3412} or
334     $csdef->{ascii32endian4321} or
335     $csdef->{bom_required}) {
336     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
337     charset_uri => $uri,
338     charset_name => $obj->{input_encoding});
339     }
340     } else {
341     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
342 wakaba 1.2 $obj->{input_encoding} = 'UTF-8';
343 wakaba 1.1 $obj->{has_bom} = 0;
344     $obj->{byte_buffer} .= "\x00\x00\xFE\xFF";
345     }
346     if (defined $csdef->{no_bom_variant32endian1234}) {
347     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian1234}};
348     }
349     } elsif (substr ($b, 2, 2) eq "\xFF\xFE") { # ASCII32Endian2143
350     $obj->{has_bom} = 1;
351     substr ($b, 0, 4) = '';
352     my $c = $b; $c =~ tr/\x00//d;
353     if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
354     encoding\s*=\s*["']([^"']*)/x) {
355 wakaba 1.2 $obj->{input_encoding} = $1;
356 wakaba 1.1 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
357     $csdef = $Whatpm::Charset::CharsetDef->{$uri};
358     if (not $csdef->{ascii32} or
359     $csdef->{ascii32endian1234} or
360     $csdef->{ascii32endian3412} or
361     $csdef->{ascii32endian4321} or
362     $csdef->{bom_required}) {
363     $obj->{onerror}->(undef, 'charset-name-mismatch-error',
364     charset_uri => $uri,
365     charset_name => $obj->{input_encoding});
366     }
367     } else {
368     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
369 wakaba 1.2 $obj->{input_encoding} = 'UTF-8';
370 wakaba 1.1 $obj->{has_bom} = 0;
371     $obj->{byte_buffer} .= "\x00\x00\xFF\xFE";
372     }
373     if (defined $csdef->{no_bom_variant32endian2143}) {
374     $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian2143}};
375     }
376     }
377     # \x4C\x6F\xA7\x94 EBCDIC
378     } # buffer
379     $obj->{byte_buffer} .= $b;
380     } # read
381     } elsif ($csdef->{uri}->{$XML_CHARSET.'utf-8'}) {
382     ## BOM is optional.
383     my $b = '';
384     if (read $obj->{filehandle}, $b, 3) {
385     if ($b eq "\xEF\xBB\xBF") {
386     $obj->{has_bom} = 1;
387     } else {
388     $obj->{byte_buffer} .= $b;
389     }
390     }
391     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'}; # UTF-8 w/o BOM
392     } elsif ($csdef->{uri}->{$XML_CHARSET.'utf-16'}) {
393     ## BOM is mandated.
394     my $b = '';
395     if (read $obj->{filehandle}, $b, 2) {
396     if ($b eq "\xFE\xFF") {
397     $obj->{has_bom} = 1; # UTF-16BE w/o BOM
398     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
399     } elsif ($b eq "\xFF\xFE") {
400     $obj->{has_bom} = 1; # UTF-16LE w/o BOM
401     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
402     } else {
403     $obj->{onerror}->(undef, 'no-bom-error', charset_uri => $obj->{charset});
404     $obj->{has_bom} = 0;
405     $obj->{byte_buffer} .= $b; # UTF-16BE w/o BOM
406     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
407     }
408     } else {
409     $obj->{onerror}->(undef, 'no-bom-error', charset_uri => $obj->{charset});
410     $obj->{has_bom} = 0; # UTF-16BE w/o BOM
411     $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
412     }
413     }
414    
415     if ($csdef->{uri}->{$XML_CHARSET.'iso-2022-jp'}) {
416     $obj->{state_2440} = 'gl-jis-1997-swapped';
417     $obj->{state_2442} = 'gl-jis-1997';
418     $obj->{state} = 'state_2842';
419     require Encode::GLJIS1997Swapped;
420     require Encode::GLJIS1997;
421     if (Encode::find_encoding ($obj->{state_2440}) and
422     Encode::find_encoding ($obj->{state_2442})) {
423     return bless $obj, 'Whatpm::Charset::DecodeHandle::ISO2022JP';
424     }
425     } elsif ($csdef->{uri}->{$IANA_CHARSET.'iso-2022-jp'}) {
426     $obj->{state_2440} = 'gl-jis-1978';
427     $obj->{state_2442} = 'gl-jis-1983';
428     $obj->{state} = 'state_2842';
429     require Encode::GLJIS1978;
430     require Encode::GLJIS1983;
431     if (Encode::find_encoding ($obj->{state_2440}) and
432     Encode::find_encoding ($obj->{state_2442})) {
433     return bless $obj, 'Whatpm::Charset::DecodeHandle::ISO2022JP';
434     }
435     } elsif (defined $csdef->{perl_name}->[0]) {
436     if ($csdef->{uri}->{$XML_CHARSET.'euc-jp'} or
437     $csdef->{uri}->{$IANA_CHARSET.'euc-jp'}) {
438     $obj->{perl_encoding_name} = $csdef->{perl_name}->[0];
439     require Encode::EUCJP1997;
440     if (Encode::find_encoding ($obj->{perl_encoding_name})) {
441     return bless $obj, 'Whatpm::Charset::DecodeHandle::EUCJP';
442     }
443     } elsif ($csdef->{uri}->{$XML_CHARSET.'shift_jis'} or
444     $csdef->{uri}->{$IANA_CHARSET.'shift_jis'}) {
445     $obj->{perl_encoding_name} = $csdef->{perl_name}->[0];
446     require Encode::ShiftJIS1997;
447     if (Encode::find_encoding ($obj->{perl_encoding_name})) {
448     return bless $obj, 'Whatpm::Charset::DecodeHandle::ShiftJIS';
449     }
450     } elsif ($csdef->{is_block_safe}) {
451     $obj->{perl_encoding_name} = $csdef->{perl_name}->[0];
452     require Encode;
453     if (Encode::find_encoding ($obj->{perl_encoding_name})) {
454     return bless $obj, 'Whatpm::Charset::DecodeHandle::Encode';
455     }
456     }
457     }
458    
459     $obj->{onerror}->(undef, 'charset-not-supported-error',
460     charset_uri => $obj->{charset});
461     return undef;
462     } # create_decode_handle
463    
464     sub name_to_uri ($$$) {
465     my $domain = $_[1];
466     my $name = lc $_[2];
467    
468     if ($domain eq 'ietf') {
469     return $IANA_CHARSET . $name;
470     } elsif ($domain eq 'xml') {
471     if ({
472     'utf-8' => 1,
473     'utf-16' => 1,
474     'iso-10646-ucs-2' => 1,
475     'iso-10646-ucs-4' => 1,
476     'iso-8859-1' => 1,
477     'iso-8859-2' => 1,
478     'iso-8859-3' => 1,
479     'iso-8859-4' => 1,
480     'iso-8859-5' => 1,
481     'iso-8859-6' => 1,
482     'iso-8859-7' => 1,
483     'iso-8859-8' => 1,
484     'iso-8859-9' => 1,
485     'iso-8859-10' => 1,
486     'iso-8859-11' => 1,
487     'iso-8859-13' => 1,
488     'iso-8859-14' => 1,
489     'iso-8859-15' => 1,
490     'iso-8859-16' => 1,
491     'iso-2022-jp' => 1,
492     'shift_jis' => 1,
493     'euc-jp' => 1,
494     }->{$name}) {
495     return $XML_CHARSET . $name;
496     }
497    
498     my $uri = $IANA_CHARSET . $name;
499     return $uri if $Whatpm::Charset::CharsetDef->{$uri};
500    
501     return $XML_CHARSET . $name;
502     } else {
503     return undef;
504     }
505     } # name_to_uri
506    
507     sub uri_to_name ($$$) {
508     my (undef, $domain, $uri) = @_;
509 wakaba 1.2
510     if ($domain eq 'xml') {
511     my $v = $Whatpm::Charset::CharsetDef->{$uri}->{xml_name};
512     return $v if defined $v;
513    
514     if (substr ($uri, 0, length $XML_CHARSET) eq $XML_CHARSET) {
515     return substr ($uri, length $XML_CHARSET);
516     }
517    
518     $domain = 'ietf'; ## TODO: XML encoding name has smaller range
519     }
520    
521     if ($domain eq 'ietf') {
522     my $v = $Whatpm::Charset::CharsetDef->{$uri}->{iana_name};
523     return $v->[0] if defined $v;
524    
525     if (substr ($uri, 0, length $IANA_CHARSET) eq $IANA_CHARSET) {
526 wakaba 1.1 return substr ($uri, length $IANA_CHARSET);
527     }
528     }
529    
530     return undef;
531     } # uri_to_name
532    
533 wakaba 1.3 require IO::Handle;
534    
535     package Whatpm::Charset::DecodeHandle::ByteBuffer;
536    
537 wakaba 1.7 ## NOTE: Provides a byte buffer wrapper object.
538    
539 wakaba 1.3 sub new ($$) {
540     my $self = bless {
541     buffer => '',
542     }, shift;
543     $self->{filehandle} = shift;
544     return $self;
545     } # new
546    
547     sub read {
548     my $self = shift;
549     my $pos = length $self->{buffer};
550     my $r = $self->{filehandle}->read ($self->{buffer}, $_[1], $pos);
551     substr ($_[0], $_[2]) = substr ($self->{buffer}, $pos);
552 wakaba 1.8 ## NOTE: This would do different behavior from Perl's standard
553     ## |read| when $pos points beyond the end of the string.
554 wakaba 1.3 return $r;
555     } # read
556    
557     sub close { $_[0]->{filehandle}->close }
558    
559 wakaba 1.1 package Whatpm::Charset::DecodeHandle::Encode;
560    
561 wakaba 1.7 ## NOTE: Provides a Perl |Encode| module wrapper object.
562    
563 wakaba 1.1 sub charset ($) { $_[0]->{charset} }
564    
565 wakaba 1.3 sub close ($) { $_[0]->{filehandle}->close }
566 wakaba 1.1
567     sub getc ($) {
568 wakaba 1.9 my $c = '';
569     my $l = $_[0]->read ($c, 1);
570     if ($l) {
571     return $c;
572     } else {
573     return undef;
574     }
575     } # getc
576    
577     sub read ($$$;$) {
578 wakaba 1.1 my $self = $_[0];
579 wakaba 1.9 # $scalar = $_[1];
580     my $length = $_[2];
581     my $offset = $_[3] || 0;
582     my $count = 0;
583     my $eof;
584    
585 wakaba 1.10 ## NOTE: It is incompatible with the standard Perl semantics
586     ## if $offset is greater than the length of $scalar.
587    
588 wakaba 1.9 A: {
589     return $count if $length < 1;
590    
591     if (my $l = length ${$self->{char_buffer}}) {
592     if ($l >= $length) {
593     substr ($_[1], $offset) = substr (${$self->{char_buffer}}, 0, $length);
594     $count += $length;
595     substr (${$self->{char_buffer}}, 0, $length) = '';
596     $length = 0;
597     return $count;
598     } else {
599     substr ($_[1], $offset) = ${$self->{char_buffer}};
600     $count += $l;
601     $length -= $l;
602     ${$self->{char_buffer}} = '';
603     }
604     $offset = length $_[1];
605     }
606    
607     if ($eof) {
608     return $count;
609     }
610 wakaba 1.1
611     my $error;
612     if ($self->{continue}) {
613 wakaba 1.3 if ($self->{filehandle}->read ($self->{byte_buffer}, 256,
614     length $self->{byte_buffer})) {
615 wakaba 1.1 #
616     } else {
617     $error = 1;
618     }
619     $self->{continue} = 0;
620     } elsif (512 > length $self->{byte_buffer}) {
621 wakaba 1.9 if ($self->{filehandle}->read ($self->{byte_buffer}, 256,
622     length $self->{byte_buffer})) {
623     #
624     } else {
625     $eof = 1;
626     }
627 wakaba 1.1 }
628    
629     unless ($error) {
630 wakaba 1.4 if (not $self->{bom_checked}) {
631     if (defined $self->{bom_pattern}) {
632     if ($self->{byte_buffer} =~ s/^$self->{bom_pattern}//) {
633     $self->{has_bom} = 1;
634     }
635     }
636     $self->{bom_checked} = 1;
637     }
638    
639 wakaba 1.1 my $string = Encode::decode ($self->{perl_encoding_name},
640     $self->{byte_buffer},
641     Encode::FB_QUIET ());
642     if (length $string) {
643 wakaba 1.9 $self->{char_buffer} = \$string;
644 wakaba 1.1 if (length $self->{byte_buffer}) {
645     $self->{continue} = 1;
646     }
647     } else {
648     if (length $self->{byte_buffer}) {
649     $error = 1;
650     } else {
651 wakaba 1.9 ## NOTE: No further input
652     redo A;
653 wakaba 1.1 }
654     }
655     }
656    
657     if ($error) {
658 wakaba 1.9 my $r = substr $self->{byte_buffer}, 0, 1, '';
659 wakaba 1.6 my $fallback = $self->{fallback}->{$r};
660     if (defined $fallback) {
661 wakaba 1.7 ## NOTE: This is an HTML5 parse error.
662 wakaba 1.6 $self->{onerror}->($self, 'fallback-char-error', octets => \$r,
663     char => \$fallback,
664 wakaba 1.7 level => $self->{level}->{$self->{error_level}->{'fallback-char-error'}});
665 wakaba 1.9 ${$self->{char_buffer}} .= $fallback;
666 wakaba 1.7 } elsif (exists $self->{fallback}->{$r}) {
667     ## NOTE: This is an HTML5 parse error. In addition, the octet
668     ## is not assigned with a character.
669     $self->{onerror}->($self, 'fallback-unassigned-error', octets => \$r,
670     level => $self->{level}->{$self->{error_level}->{'fallback-unassigned-error'}});
671 wakaba 1.9 ${$self->{char_buffer}} .= $r;
672 wakaba 1.6 } else {
673 wakaba 1.7 $self->{onerror}->($self, 'illegal-octets-error', octets => \$r,
674     level => $self->{level}->{$self->{error_level}->{'illegal-octets-error'}});
675 wakaba 1.9 ${$self->{char_buffer}} .= $r;
676 wakaba 1.6 }
677 wakaba 1.1 }
678    
679 wakaba 1.9 redo A;
680     } # A
681 wakaba 1.1 } # getc
682    
683     sub has_bom ($) { $_[0]->{has_bom} }
684    
685 wakaba 1.2 sub input_encoding ($) {
686     my $v = $_[0]->{input_encoding};
687     return $v if defined $v;
688    
689     my $uri = $_[0]->{charset};
690     if (defined $uri) {
691     return Whatpm::Charset::DecodeHandle->uri_to_name (xml => $uri);
692     }
693    
694     return undef;
695     } # input_encoding
696 wakaba 1.1
697     sub onerror ($;$) {
698     if (@_ > 1) {
699     $_[0]->{onerror} = $_[1];
700     }
701    
702     return $_[0]->{onerror};
703     } # onerror
704    
705     sub ungetc ($$) {
706     unshift @{$_[0]->{character_queue}}, chr int ($_[1] or 0);
707     } # ungetc
708    
709     package Whatpm::Charset::DecodeHandle::EUCJP;
710     push our @ISA, 'Whatpm::Charset::DecodeHandle::Encode';
711    
712     sub getc ($) {
713     my $self = $_[0];
714     return shift @{$self->{character_queue}} if @{$self->{character_queue}};
715    
716     my $error;
717     if ($self->{continue}) {
718 wakaba 1.3 if ($self->{filehandle}->read ($self->{byte_buffer}, 256,
719     length $self->{byte_buffer})) {
720 wakaba 1.1 #
721     } else {
722     $error = 1;
723     }
724     $self->{continue} = 0;
725     } elsif (512 > length $self->{byte_buffer}) {
726 wakaba 1.3 $self->{filehandle}->read ($self->{byte_buffer}, 256,
727     length $self->{byte_buffer});
728 wakaba 1.1 }
729    
730     my $r;
731     unless ($error) {
732     my $string = Encode::decode ($self->{perl_encoding_name},
733     $self->{byte_buffer},
734     Encode::FB_QUIET ());
735     if (length $string) {
736     push @{$self->{character_queue}}, split //, $string;
737     $r = shift @{$self->{character_queue}};
738     if (length $self->{byte_buffer}) {
739     $self->{continue} = 1;
740     }
741     } else {
742     if (length $self->{byte_buffer}) {
743     $error = 1;
744     } else {
745     $r = undef;
746     }
747     }
748     }
749    
750     if ($error) {
751     $r = substr $self->{byte_buffer}, 0, 1, '';
752     my $etype = 'illegal-octets-error';
753     if ($r =~ /^[\xA1-\xFE]/) {
754     if ($self->{byte_buffer} =~ s/^([\xA1-\xFE])//) {
755     $r .= $1;
756     $etype = 'unassigned-code-point-error';
757     }
758     } elsif ($r eq "\x8F") {
759     if ($self->{byte_buffer} =~ s/^([\xA1-\xFE][\xA1-\xFE]?)//) {
760     $r .= $1;
761     $etype = 'unassigned-code-point-error' if length $1 == 2;
762     }
763     } elsif ($r eq "\x8E") {
764     if ($self->{byte_buffer} =~ s/^([\xA1-\xFE])//) {
765     $r .= $1;
766     $etype = 'unassigned-code-point-error';
767     }
768     } elsif ($r eq "\xA0" or $r eq "\xFF") {
769     $etype = 'unassigned-code-point-error';
770     }
771 wakaba 1.7 $self->{onerror}->($self, $etype, octets => \$r,
772     level => $self->{level}->{$self->{error_level}->{$etype}});
773 wakaba 1.1 }
774    
775     return $r;
776     } # getc
777    
778 wakaba 1.9 ## TODO: This is not good for performance. Should be replaced
779     ## by read-centric implementation.
780     sub read ($$$;$) {
781     #my ($self, $scalar, $length, $offset) = @_;
782     my $length = $_[2];
783     my $r = '';
784     while ($length > 0) {
785     my $c = $_[0]->getc;
786     last unless defined $c;
787     $r .= $c;
788     $length--;
789     }
790     substr ($_[1], $_[3]) = $r;
791     ## NOTE: This would do different thing from what Perl's |read| do
792     ## if $offset points beyond the end of the $scalar.
793     return length $r;
794     } # read
795    
796 wakaba 1.1 package Whatpm::Charset::DecodeHandle::ISO2022JP;
797     push our @ISA, 'Whatpm::Charset::DecodeHandle::Encode';
798    
799     sub getc ($) {
800     my $self = $_[0];
801     return shift @{$self->{character_queue}} if @{$self->{character_queue}};
802    
803     my $r;
804     A: {
805     my $error;
806     if ($self->{continue}) {
807 wakaba 1.3 if ($self->{filehandle}->read ($self->{byte_buffer}, 256,
808     length $self->{byte_buffer})) {
809 wakaba 1.1 #
810     } else {
811     $error = 1;
812     }
813     $self->{continue} = 0;
814     } elsif (512 > length $self->{byte_buffer}) {
815 wakaba 1.3 $self->{filehandle}->read ($self->{byte_buffer}, 256,
816     length $self->{byte_buffer});
817 wakaba 1.1 }
818    
819     unless ($error) {
820     if ($self->{byte_buffer} =~ s/^\x1B(\x24[\x40\x42]|\x28[\x42\x4A])//) {
821     $self->{state} = {
822     "\x24\x40" => 'state_2440',
823     "\x24\x42" => 'state_2442',
824     "\x28\x42" => 'state_2842',
825     "\x28\x4A" => 'state_284A',
826     }->{$1};
827     redo A;
828     } elsif ($self->{state} eq 'state_2842') { # IRV
829     if ($self->{byte_buffer} =~ s/^([\x00-\x0D\x10-\x1A\x1C-\x7F]+)//) {
830     push @{$self->{character_queue}}, split //, $1;
831     $r = shift @{$self->{character_queue}};
832     } else {
833     if (length $self->{byte_buffer}) {
834     $error = 1;
835     } else {
836     $r = undef;
837     }
838     }
839     } elsif ($self->{state} eq 'state_284A') { # 0201
840     if ($self->{byte_buffer} =~ s/^([\x00-\x0D\x10-\x1A\x1C-\x7F]+)//) {
841     my $v = $1;
842     $v =~ tr/\x5C\x7E/\xA5\x{203E}/;
843     push @{$self->{character_queue}}, split //, $v;
844     $r = shift @{$self->{character_queue}};
845     } else {
846     if (length $self->{byte_buffer}) {
847     $error = 1;
848     } else {
849     $r = undef;
850     $self->{onerror}->($self, 'invalid-state-error',
851 wakaba 1.7 state => $self->{state},
852     level => $self->{level}->{$self->{error_level}->{'invalid-state-error'}});
853 wakaba 1.1 }
854     }
855     } elsif ($self->{state} eq 'state_2442') { # 1983
856     my $v = Encode::decode ($self->{state_2442},
857     $self->{byte_buffer},
858     Encode::FB_QUIET ());
859     if (length $v) {
860     push @{$self->{character_queue}}, split //, $v;
861     $r = shift @{$self->{character_queue}};
862     } else {
863     if (length $self->{byte_buffer}) {
864     $error = 1;
865     } else {
866     $r = undef;
867     $self->{onerror}->($self, 'invalid-state-error',
868 wakaba 1.7 state => $self->{state},
869     level => $self->{level}->{$self->{error_level}->{'invalid-state-error'}});
870 wakaba 1.1 }
871     }
872     } elsif ($self->{state} eq 'state_2440') { # 1978
873     my $v = Encode::decode ($self->{state_2440},
874     $self->{byte_buffer},
875     Encode::FB_QUIET ());
876     if (length $v) {
877     push @{$self->{character_queue}}, split //, $v;
878     $r = shift @{$self->{character_queue}};
879     } else {
880     if (length $self->{byte_buffer}) {
881     $error = 1;
882     } else {
883     $r = undef;
884     $self->{onerror}->($self, 'invalid-state-error',
885 wakaba 1.7 state => $self->{state},
886     level => $self->{level}->{$self->{error_level}->{'invalid-state-error'}});
887 wakaba 1.1 }
888     }
889     } else {
890     $error = 1;
891     }
892     }
893    
894     if ($error) {
895     $r = substr $self->{byte_buffer}, 0, 1, '';
896     my $etype = 'illegal-octets-error';
897     if (($self->{state} eq 'state_2442' or
898     $self->{state} eq 'state_2440') and
899     $r =~ /^[\x21-\x7E]/ and
900     $self->{byte_buffer} =~ s/^([\x21-\x7E])//) {
901     $r .= $1;
902     $etype = 'unassigned-code-point-error';
903     } elsif ($r eq "\x1B" and
904     $self->{byte_buffer} =~ s/^\(H//) { # Old 0201
905     $r .= "(H";
906     $self->{state} = 'state_284A';
907     }
908 wakaba 1.7 $self->{onerror}->($self, $etype, octets => \$r,
909     level => $self->{level}->{$self->{error_level}->{$etype}});
910 wakaba 1.1 }
911     } # A
912    
913     return $r;
914     } # getc
915    
916 wakaba 1.9 ## TODO: This is not good for performance. Should be replaced
917     ## by read-centric implementation.
918     sub read ($$$;$) {
919     #my ($self, $scalar, $length, $offset) = @_;
920     my $length = $_[2];
921     my $r = '';
922     while ($length > 0) {
923     my $c = $_[0]->getc;
924     last unless defined $c;
925     $r .= $c;
926     $length--;
927     }
928     substr ($_[1], $_[3]) = $r;
929     ## NOTE: This would do different thing from what Perl's |read| do
930     ## if $offset points beyond the end of the $scalar.
931     return length $r;
932     } # read
933    
934 wakaba 1.1 package Whatpm::Charset::DecodeHandle::ShiftJIS;
935     push our @ISA, 'Whatpm::Charset::DecodeHandle::Encode';
936    
937     sub getc ($) {
938     my $self = $_[0];
939     return shift @{$self->{character_queue}} if @{$self->{character_queue}};
940    
941     my $error;
942     if ($self->{continue}) {
943 wakaba 1.3 if ($self->{filehandle}->read ($self->{byte_buffer}, 256,
944     length $self->{byte_buffer})) {
945 wakaba 1.1 #
946     } else {
947     $error = 1;
948     }
949     $self->{continue} = 0;
950     } elsif (512 > length $self->{byte_buffer}) {
951 wakaba 1.3 $self->{filehandle}->read ($self->{byte_buffer}, 256,
952     length $self->{byte_buffer});
953 wakaba 1.1 }
954    
955     my $r;
956     unless ($error) {
957     my $string = Encode::decode ($self->{perl_encoding_name},
958     $self->{byte_buffer},
959     Encode::FB_QUIET ());
960     if (length $string) {
961     push @{$self->{character_queue}}, split //, $string;
962     $r = shift @{$self->{character_queue}};
963     if (length $self->{byte_buffer}) {
964     $self->{continue} = 1;
965     }
966     } else {
967     if (length $self->{byte_buffer}) {
968     $error = 1;
969     } else {
970     $r = undef;
971     }
972     }
973     }
974    
975     if ($error) {
976     $r = substr $self->{byte_buffer}, 0, 1, '';
977     my $etype = 'illegal-octets-error';
978 wakaba 1.5 if ($r =~ /^[\x81-\x9F\xE0-\xFC]/) {
979 wakaba 1.1 if ($self->{byte_buffer} =~ s/(.)//s) {
980     $r .= $1; # not limited to \x40-\xFC - \x7F
981     $etype = 'unassigned-code-point-error';
982     }
983 wakaba 1.5 ## NOTE: Range [\xF0-\xFC] is unassigned and may be used as a single-byte
984     ## character or as the first-byte of a double-byte character according
985     ## to JIS X 0208:1997 Appendix 1. However, the current practice is
986     ## use the range as the first-byte of double-byte characters.
987     } elsif ($r =~ /^[\x80\xA0\xFD-\xFF]/) {
988 wakaba 1.1 $etype = 'unassigned-code-point-error';
989     }
990 wakaba 1.7 $self->{onerror}->($self, $etype, octets => \$r,
991     level => $self->{level}->{$self->{error_level}->{$etype}});
992 wakaba 1.1 }
993    
994     return $r;
995     } # getc
996    
997 wakaba 1.9 ## TODO: This is not good for performance. Should be replaced
998     ## by read-centric implementation.
999     sub read ($$$;$) {
1000     #my ($self, $scalar, $length, $offset) = @_;
1001     my $length = $_[2];
1002     my $r = '';
1003     while ($length > 0) {
1004     my $c = $_[0]->getc;
1005     last unless defined $c;
1006     $r .= $c;
1007     $length--;
1008     }
1009     substr ($_[1], $_[3]) = $r;
1010     ## NOTE: This would do different thing from what Perl's |read| do
1011     ## if $offset points beyond the end of the $scalar.
1012     return length $r;
1013     } # read
1014    
1015 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:us-ascii'} =
1016     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:us'} =
1017     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso646-us'} =
1018     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:cp367'} =
1019     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ibm367'} =
1020     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ansi_x3.4-1986'} =
1021     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ansi_x3.4-1968'} =
1022     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-ir-6'} =
1023     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:csascii'} =
1024     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso_646.irv:1991'} =
1025     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ascii'} = {ascii8 =>
1026     '1',
1027     is_block_safe =>
1028     '1',
1029     ietf_name =>
1030     ['ansi_x3.4-1968',
1031     'ansi_x3.4-1986',
1032     'ascii',
1033     'cp367',
1034     'csascii',
1035     'ibm367',
1036     'iso-ir-6',
1037     'iso646-us',
1038     'iso_646.irv:1991',
1039     'us',
1040     'us-ascii',
1041     'us-ascii'],
1042     mime_name =>
1043     'us-ascii',
1044     perl_name =>
1045     ['ascii',
1046     'iso-646-us',
1047     'us-ascii'],
1048     utf8_encoding_scheme =>
1049     '1',
1050     'uri',
1051     {'urn:x-suika-fam-cx:charset:ansi_x3.4-1968',
1052     '1',
1053     'urn:x-suika-fam-cx:charset:ansi_x3.4-1986',
1054     '1',
1055     'urn:x-suika-fam-cx:charset:ascii',
1056     '1',
1057     'urn:x-suika-fam-cx:charset:cp367',
1058     '1',
1059     'urn:x-suika-fam-cx:charset:csascii',
1060     '1',
1061     'urn:x-suika-fam-cx:charset:ibm367',
1062     '1',
1063     'urn:x-suika-fam-cx:charset:iso-ir-6',
1064     '1',
1065     'urn:x-suika-fam-cx:charset:iso646-us',
1066     '1',
1067     'urn:x-suika-fam-cx:charset:iso_646.irv:1991',
1068     '1',
1069     'urn:x-suika-fam-cx:charset:us',
1070     '1',
1071     'urn:x-suika-fam-cx:charset:us-ascii',
1072     '1'},
1073 wakaba 1.2 };
1074    
1075 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ascii-ctrl'} = {perl_name =>
1076     ['ascii-ctrl'],
1077     'uri',
1078     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ascii-ctrl',
1079     '1'}};
1080     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.null'} = {perl_name =>
1081     ['null'],
1082     'uri',
1083     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.null',
1084     '1'}};
1085     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-8'} = {ascii8 =>
1086     '1',
1087     bom_allowed =>
1088     '1',
1089     no_bom_variant =>
1090     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8',
1091     utf8_encoding_scheme =>
1092     '1',
1093     'uri',
1094     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-8',
1095     '1'},
1096 wakaba 1.2 xml_name => 'UTF-8',
1097     };
1098    
1099 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/UTF-8.RFC2279'} = {ascii8 =>
1100     '1',
1101     bom_allowed =>
1102     '1',
1103     no_bom_variant =>
1104     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8',
1105     utf8_encoding_scheme =>
1106     '1',
1107     'uri',
1108     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/UTF-8.RFC2279',
1109     '1'}};
1110     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8'} = {
1111     ascii8 => 1,
1112     is_block_safe =>
1113     '1',
1114     perl_name =>
1115     ['utf-8'],
1116     utf8_encoding_scheme =>
1117     '1',
1118     'uri',
1119     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8',
1120     '1'}};
1121     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-8'} = {
1122     ascii8 => 1,
1123     bom_allowed =>
1124     '1',
1125     no_bom_variant =>
1126     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8',
1127     ietf_name =>
1128     ['utf-8'],
1129     mime_name =>
1130     'utf-8',
1131     utf8_encoding_scheme =>
1132     '1',
1133     'uri',
1134     {'urn:x-suika-fam-cx:charset:utf-8',
1135     '1'},
1136 wakaba 1.2 };
1137    
1138 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8'} = {ascii8 =>
1139     '1',
1140     is_block_safe =>
1141     '1',
1142     perl_name =>
1143     ['utf8'],
1144     utf8_encoding_scheme =>
1145     '1',
1146     'uri',
1147     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8',
1148     '1'}};
1149     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-16'} = {
1150     ascii16 => 1,
1151     bom_allowed =>
1152     '1',
1153     bom_required =>
1154     '1',
1155     no_bom_variant =>
1156     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1157     no_bom_variant16be =>
1158     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1159     no_bom_variant16le =>
1160     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1161     perl_name =>
1162     ['utf-16'],
1163     'uri',
1164     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-16',
1165     '1'},
1166 wakaba 1.2 xml_name => 'UTF-16',
1167     };
1168    
1169 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16'} = {
1170     ascii16 => 1,
1171     bom_allowed =>
1172     '1',
1173     no_bom_variant =>
1174     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1175     no_bom_variant16be =>
1176     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1177     no_bom_variant16le =>
1178     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1179     ietf_name =>
1180     ['utf-16'],
1181     mime_name =>
1182     'utf-16',
1183     'uri',
1184     {'urn:x-suika-fam-cx:charset:utf-16',
1185     '1'},
1186 wakaba 1.2 };
1187    
1188 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16be'} = {
1189     ascii16 => 1,
1190     ascii16be => 1,
1191     bom_allowed =>
1192     '1',
1193     no_bom_variant =>
1194     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1195     no_bom_variant16be =>
1196     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1197     ietf_name =>
1198     ['utf-16be'],
1199     mime_name =>
1200     'utf-16be',
1201     'uri',
1202     {'urn:x-suika-fam-cx:charset:utf-16be',
1203     '1'},
1204 wakaba 1.2 };
1205    
1206 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16le'} = {
1207     ascii16 => 1,
1208     ascii16le => 1,
1209     bom_allowed =>
1210     '1',
1211     no_bom_variant =>
1212     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1213     no_bom_variant16le =>
1214     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1215     ietf_name =>
1216     ['utf-16le'],
1217     mime_name =>
1218     'utf-16le',
1219     'uri',
1220     {'urn:x-suika-fam-cx:charset:utf-16le',
1221     '1'},
1222 wakaba 1.2 };
1223    
1224 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be'} = {
1225     ascii16 => 1,
1226     ascii16be => 1,
1227     is_block_safe =>
1228     '1',
1229     perl_name =>
1230     ['utf-16be'],
1231     'uri',
1232     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1233     '1'}};
1234     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le'} = {
1235     ascii16 => 1,
1236     ascii16le => 1,
1237     is_block_safe =>
1238     '1',
1239     perl_name =>
1240     ['utf-16le'],
1241     'uri',
1242     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1243     '1'}};
1244     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-2'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-10646-ucs-2'} = {
1245     ascii16 => 1,
1246     bom_allowed =>
1247     '1',
1248     no_bom_variant =>
1249     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le',
1250     no_bom_variant16be =>
1251     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be',
1252     no_bom_variant16le =>
1253     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le',
1254     ietf_name =>
1255     ['csunicode',
1256     'iso-10646-ucs-2'],
1257     mime_name =>
1258     'iso-10646-ucs-2',
1259     'uri',
1260     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-2',
1261     '1',
1262     'urn:x-suika-fam-cx:charset:iso-10646-ucs-2',
1263     '1'},
1264 wakaba 1.2 xml_name => 'ISO-10646-UCS-2',
1265     };
1266    
1267 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be'} = {
1268     ascii16 => 1,
1269     ascii16be => 1,
1270     is_block_safe =>
1271     '1',
1272     perl_name =>
1273     ['ucs-2be'],
1274     'uri',
1275     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be',
1276     '1'}};
1277     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le'} = {
1278     ascii16 => 1,
1279     ascii16le => 1,
1280     is_block_safe =>
1281     '1',
1282     perl_name =>
1283     ['ucs-2le'],
1284     'uri',
1285     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le',
1286     '1'}};
1287     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-4'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-10646-ucs-4'} = {
1288     ascii32 => 1,
1289     bom_allowed =>
1290     '1',
1291     no_bom_variant =>
1292     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le',
1293     no_bom_variant32endian1234 =>
1294     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be',
1295     no_bom_variant32endian4321 =>
1296     'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le',
1297     ietf_name =>
1298     ['csucs4',
1299     'iso-10646-ucs-4'],
1300     mime_name =>
1301     'iso-10646-ucs-4',
1302     'uri',
1303     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-4',
1304     '1',
1305     'urn:x-suika-fam-cx:charset:iso-10646-ucs-4',
1306     '1'},
1307 wakaba 1.2 xml_name => 'ISO-10646-UCS-4',
1308     };
1309    
1310 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be'} = {
1311     ascii32 => 1,
1312     ascii32endian1234 => 1,
1313     is_block_safe =>
1314     '1',
1315     perl_name =>
1316     ['ucs-4be',
1317     'utf-32be'],
1318     'uri',
1319     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be',
1320     '1'}};
1321     $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le'} = {
1322     ascii32 => 1,
1323     ascii32endian4321 => 1,
1324     is_block_safe =>
1325     '1',
1326     perl_name =>
1327     ['ucs-4le',
1328     'utf-32le'],
1329     'uri',
1330     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le',
1331     '1'}};
1332     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso_8859-1:1987'} = $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-1'} = {ascii8 =>
1333     '1',
1334     is_block_safe =>
1335     '1',
1336     ietf_name =>
1337     ['cp819',
1338     'csisolatin1',
1339     'ibm819',
1340     'iso-8859-1',
1341     'iso-8859-1',
1342     'iso-ir-100',
1343     'iso_8859-1',
1344     'iso_8859-1:1987',
1345     'l1',
1346     'latin1'],
1347     mime_name =>
1348     'iso-8859-1',
1349     perl_name =>
1350     ['iso-8859-1',
1351     'latin1'],
1352     'uri',
1353     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-1',
1354     '1',
1355     'urn:x-suika-fam-cx:charset:iso_8859-1:1987',
1356     '1'},
1357 wakaba 1.2 xml_name => 'ISO-8859-1',
1358     };
1359    
1360 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-2'} = {ascii8 =>
1361     '1',
1362     is_block_safe =>
1363     '1',
1364     'uri',
1365     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-2',
1366     '1'},
1367 wakaba 1.2 xml_name => 'ISO-8859-2',
1368     };
1369    
1370 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-3'} = {ascii8 =>
1371     '1',
1372     is_block_safe =>
1373     '1',
1374     'uri',
1375     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-3',
1376     '1'},
1377 wakaba 1.2 xml_name => 'ISO-8859-3',
1378     };
1379    
1380 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-4'} = {ascii8 =>
1381     '1',
1382     is_block_safe =>
1383     '1',
1384     'uri',
1385     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-4',
1386     '1'},
1387 wakaba 1.2 xml_name => 'ISO-8859-4',
1388     };
1389    
1390 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-5'} = {ascii8 =>
1391     '1',
1392     is_block_safe =>
1393     '1',
1394     'uri',
1395     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-5',
1396     '1'},
1397 wakaba 1.2 xml_name => 'ISO-8859-5',
1398     };
1399    
1400 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-6'} = {ascii8 =>
1401     '1',
1402     is_block_safe =>
1403     '1',
1404     'uri',
1405     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-6',
1406     '1'},
1407 wakaba 1.2 xml_name => 'ISO-8859-6',
1408     };
1409    
1410 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-7'} = {ascii8 =>
1411     '1',
1412     is_block_safe =>
1413     '1',
1414     'uri',
1415     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-7',
1416     '1'},
1417 wakaba 1.2 xml_name => 'ISO-8859-7',
1418     };
1419    
1420 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-8'} = {ascii8 =>
1421     '1',
1422     is_block_safe =>
1423     '1',
1424     'uri',
1425     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-8',
1426     '1'},
1427 wakaba 1.2 xml_name => 'ISO-8859-8',
1428     };
1429    
1430 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-9'} = {ascii8 =>
1431     '1',
1432     is_block_safe =>
1433     '1',
1434     'uri',
1435     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-9',
1436     '1'},
1437 wakaba 1.2 xml_name => 'ISO-8859-9',
1438     };
1439    
1440 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-10'} = {ascii8 =>
1441     '1',
1442     is_block_safe =>
1443     '1',
1444     'uri',
1445     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-10',
1446     '1'},
1447 wakaba 1.2 xml_name => 'ISO-8859-10',
1448     };
1449    
1450 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-11'} = {ascii8 =>
1451     '1',
1452     is_block_safe =>
1453     '1',
1454     'uri',
1455     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-11',
1456     '1'},
1457 wakaba 1.2 xml_name => 'ISO-8859-11',
1458     };
1459    
1460 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-13'} = {ascii8 =>
1461     '1',
1462     is_block_safe =>
1463     '1',
1464     'uri',
1465     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-13',
1466     '1'},
1467 wakaba 1.2 xml_name => 'ISO-8859-13',
1468     };
1469    
1470 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-14'} = {ascii8 =>
1471     '1',
1472     is_block_safe =>
1473     '1',
1474     'uri',
1475     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-14',
1476     '1'},
1477 wakaba 1.2 xml_name => 'ISO-8859-14',
1478     };
1479    
1480 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-15'} = {ascii8 =>
1481     '1',
1482     is_block_safe =>
1483     '1',
1484     'uri',
1485     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-15',
1486     '1'},
1487 wakaba 1.2 xml_name => 'ISO-8859-15',
1488     };
1489    
1490 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-16'} = {ascii8 =>
1491     '1',
1492     is_block_safe =>
1493     '1',
1494     'uri',
1495     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-16',
1496     '1'},
1497 wakaba 1.2 xml_name => 'ISO-8859-16',
1498     };
1499    
1500 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-2022-jp'} = {ascii8 =>
1501     '1',
1502     'uri',
1503     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-2022-jp',
1504     '1'},
1505 wakaba 1.2 xml_name => 'ISO-2022-JP',
1506     };
1507    
1508 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-2022-jp'} = {ascii8 =>
1509     '1',
1510     ietf_name =>
1511     ['csiso2022jp',
1512     'iso-2022-jp',
1513     'iso-2022-jp'],
1514     mime_name =>
1515     'iso-2022-jp',
1516     'uri',
1517     {'urn:x-suika-fam-cx:charset:iso-2022-jp',
1518     '1'},
1519 wakaba 1.2 };
1520    
1521 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.iso-2022-jp'} = {ascii8 =>
1522     '1',
1523     perl_name =>
1524     ['iso-2022-jp'],
1525     'uri',
1526     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.iso-2022-jp',
1527     '1'}};
1528     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:shift_jis'} = $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.shift_jis'} = {ascii8 =>
1529     '1',
1530     is_block_safe =>
1531     '1',
1532     ietf_name =>
1533     ['csshiftjis',
1534     'ms_kanji',
1535     'shift_jis',
1536     'shift_jis'],
1537     mime_name =>
1538     'shift_jis',
1539     perl_name =>
1540     ['shift-jis-1997'],
1541     'uri',
1542     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.shift_jis',
1543     '1',
1544     'urn:x-suika-fam-cx:charset:shift_jis',
1545     '1'},
1546 wakaba 1.2 xml_name => 'Shift_JIS',
1547     };
1548    
1549 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.shiftjis'} = {ascii8 =>
1550     '1',
1551     is_block_safe =>
1552     '1',
1553     perl_name =>
1554     ['shiftjis',
1555     'sjis'],
1556     'uri',
1557     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.shiftjis',
1558     '1'}};
1559     $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:euc-jp'} = $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.euc-jp'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:extended_unix_code_packed_format_for_japanese'} = {ascii8 =>
1560     '1',
1561     is_block_safe =>
1562     '1',
1563     ietf_name =>
1564     ['cseucpkdfmtjapanese',
1565     'euc-jp',
1566     'euc-jp',
1567     'extended_unix_code_packed_format_for_japanese'],
1568     mime_name =>
1569     'euc-jp',
1570     perl_name =>
1571     ['euc-jp-1997'],
1572     'uri',
1573     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.euc-jp',
1574     '1',
1575     'urn:x-suika-fam-cx:charset:euc-jp',
1576     '1',
1577     'urn:x-suika-fam-cx:charset:extended_unix_code_packed_format_for_japanese',
1578     '1'},
1579 wakaba 1.2 xml_name => 'EUC-JP',
1580     };
1581    
1582 wakaba 1.1 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.euc-jp'} = {ascii8 =>
1583     '1',
1584     is_block_safe =>
1585     '1',
1586     perl_name =>
1587     ['euc-jp',
1588     'ujis'],
1589     'uri',
1590     {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.euc-jp',
1591     '1'}};
1592    
1593     1;
1594 wakaba 1.10 ## $Date: 2008/09/11 12:09:38 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24