/[suikacvs]/markup/html/whatpm/Whatpm/Charset/DecodeHandle.pm
Suika

Contents of /markup/html/whatpm/Whatpm/Charset/DecodeHandle.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.18 - (show annotations) (download)
Mon Sep 15 07:19:03 2008 UTC (16 years, 10 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.17: +10 -3 lines
++ whatpm/Whatpm/ChangeLog	15 Sep 2008 07:17:34 -0000
	* HTML.pm.src: Remove checking for control character, surrogate
	pair, or noncharacter code points and non-Unicode code
	points (they should be handled by Whatpm::Charset::UnicodeChecker).
	(parse_char_stream): Support for the |$get_wrapper| argument and
	character stream error handlers.

2008-09-15  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/Charset/ChangeLog	15 Sep 2008 07:18:45 -0000
	* DecodeHandle.pm (onerror): Return |undef| if no explicit value
	is set.

	* UnicodeChecker.pm: Support for HTML5 parse errors.
	(onerror): Return |undef| if no explicit value is set.

2008-09-15  Wakaba  <wakaba@suika.fam.cx>

1 package Whatpm::Charset::DecodeHandle;
2 use strict;
3
4 ## NOTE: |Message::Charset::Info| uses this module without calling
5 ## the constructor.
6 use Message::Charset::Info;
7
8 my $XML_AUTO_CHARSET = q<http://suika.fam.cx/www/2006/03/xml-entity/>;
9 my $IANA_CHARSET = q<urn:x-suika-fam-cx:charset:>;
10 my $PERL_CHARSET = q<http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.>;
11 my $XML_CHARSET = q<http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.>;
12
13 ## ->create_decode_handle ($charset_uri, $byte_stream, $onerror)
14 sub create_decode_handle ($$$;$) {
15 my $csdef = $Whatpm::Charset::CharsetDef->{$_[1]};
16 my $obj = {
17 category => 0,
18 char_buffer => \(my $s = ''),
19 char_buffer_pos => 0,
20 character_queue => [],
21 filehandle => $_[2],
22 charset => $_[1],
23 byte_buffer => '',
24 onerror => $_[3] || sub {},
25 #onerror_set
26 };
27 if ($csdef->{uri}->{$XML_AUTO_CHARSET} or
28 $obj->{charset} eq $XML_AUTO_CHARSET) {
29 my $b = ''; # UTF-8 w/o BOM
30 $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
31 $obj->{input_encoding} = 'UTF-8';
32 if (read $obj->{filehandle}, $b, 256) {
33 no warnings "substr";
34 no warnings "uninitialized";
35 if (substr ($b, 0, 1) eq "<") {
36 if (substr ($b, 1, 1) eq "?") { # ASCII8
37 if ($b =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
38 encoding\s*=\s*["']([^"']*)/x) {
39 $obj->{input_encoding} = $1;
40 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
41 $csdef = $Whatpm::Charset::CharsetDef->{$uri};
42 if (not $csdef->{ascii8} or $csdef->{bom_required}) {
43 $obj->{onerror}->(undef, 'charset-name-mismatch-error',
44 charset_uri => $uri,
45 charset_name => $obj->{input_encoding});
46 }
47 } else {
48 $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
49 $obj->{input_encoding} = 'UTF-8';
50 }
51 if (defined $csdef->{no_bom_variant}) {
52 $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant}};
53 }
54 } elsif (substr ($b, 1, 1) eq "\x00") {
55 if (substr ($b, 2, 2) eq "?\x00") { # ASCII16LE
56 my $c = $b; $c =~ tr/\x00//d;
57 if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
58 encoding\s*=\s*["']([^"']*)/x) {
59 $obj->{input_encoding} = $1;
60 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
61 $csdef = $Whatpm::Charset::CharsetDef->{$uri};
62 if (not $csdef->{ascii16} or $csdef->{ascii16be} or
63 $csdef->{bom_required}) {
64 $obj->{onerror}->(undef, 'charset-name-mismatch-error',
65 charset_uri => $uri,
66 charset_name => $obj->{input_encoding});
67 }
68 } else {
69 $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
70 $obj->{input_encoding} = 'UTF-8';
71 }
72 if (defined $csdef->{no_bom_variant16le}) {
73 $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16le}};
74 }
75 } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian4321
76 my $c = $b; $c =~ tr/\x00//d;
77 if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
78 encoding\s*=\s*["']([^"']*)/x) {
79 $obj->{input_encoding} = $1;
80 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
81 $csdef = $Whatpm::Charset::CharsetDef->{$uri};
82 if (not $csdef->{ascii32} or
83 $csdef->{ascii32endian1234} or
84 $csdef->{ascii32endian2143} or
85 $csdef->{ascii32endian3412} or
86 $csdef->{bom_required}) {
87 $obj->{onerror}->(undef, 'charset-name-mismatch-error',
88 charset_uri => $uri,
89 charset_name => $obj->{input_encoding});
90 }
91 } else {
92 $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
93 $obj->{input_encoding} = 'UTF-8';
94 }
95 if (defined $csdef->{no_bom_variant32endian4321}) {
96 $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian4321}};
97 }
98 }
99 }
100 } elsif (substr ($b, 0, 3) eq "\xEF\xBB\xBF") { # UTF8
101 $obj->{has_bom} = 1;
102 substr ($b, 0, 3) = '';
103 my $c = $b;
104 if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
105 encoding\s*=\s*["']([^"']*)/x) {
106 $obj->{input_encoding} = $1;
107 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
108 $csdef = $Whatpm::Charset::CharsetDef->{$uri};
109 if (not $csdef->{utf8_encoding_scheme} or
110 not $csdef->{bom_allowed}) {
111 $obj->{onerror}->(undef, 'charset-name-mismatch-error',
112 charset_uri => $uri,
113 charset_name => $obj->{input_encoding});
114 }
115 } else {
116 $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
117 $obj->{input_encoding} = 'UTF-8';
118 }
119 if (defined $csdef->{no_bom_variant}) {
120 $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant}};
121 }
122 } elsif (substr ($b, 0, 2) eq "\x00<") {
123 if (substr ($b, 2, 2) eq "\x00?") { # ASCII16BE
124 my $c = $b; $c =~ tr/\x00//d;
125 if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
126 encoding\s*=\s*["']([^"']*)/x) {
127 $obj->{input_encoding} = $1;
128 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
129 $csdef = $Whatpm::Charset::CharsetDef->{$uri};
130 if (not $csdef->{ascii16} or $csdef->{ascii16le} or
131 $csdef->{bom_required}) {
132 $obj->{onerror}->(undef, 'charset-name-mismatch-error',
133 charset_uri => $uri,
134 charset_name => $obj->{input_encoding});
135 }
136 } else {
137 $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
138 $obj->{input_encoding} = 'UTF-8';
139 }
140 if (defined $csdef->{no_bom_variant16be}) {
141 $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16be}};
142 }
143 } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian3412
144 my $c = $b; $c =~ tr/\x00//d;
145 if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
146 encoding\s*=\s*["']([^"']*)/x) {
147 $obj->{input_encoding} = $1;
148 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
149 $csdef = $Whatpm::Charset::CharsetDef->{$uri};
150 if (not $csdef->{ascii32} or
151 $csdef->{ascii32endian1234} or
152 $csdef->{ascii32endian2143} or
153 $csdef->{ascii32endian4321} or
154 $csdef->{bom_required}) {
155 $obj->{onerror}->(undef, 'charset-name-mismatch-error',
156 charset_uri => $uri,
157 charset_name => $obj->{input_encoding});
158 }
159 } else {
160 $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
161 $obj->{input_encoding} = 'UTF-8';
162 }
163 if (defined $csdef->{no_bom_variant32endian3412}) {
164 $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian3412}};
165 }
166 }
167 } elsif (substr ($b, 0, 2) eq "\xFE\xFF") {
168 if (substr ($b, 2, 2) eq "\x00<") { # ASCII16BE
169 $obj->{has_bom} = 1;
170 substr ($b, 0, 2) = '';
171 my $c = $b; $c =~ tr/\x00//d;
172 if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
173 encoding\s*=\s*["']([^"']*)/x) {
174 $obj->{input_encoding} = $1;
175 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
176 $csdef = $Whatpm::Charset::CharsetDef->{$uri};
177 if (not $csdef->{ascii16} or
178 $csdef->{ascii16le} or
179 not $csdef->{bom_allowed}) {
180 $obj->{onerror}->(undef, 'charset-name-mismatch-error',
181 charset_uri => $uri,
182 charset_name => $obj->{input_encoding});
183 }
184 } else {
185 $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
186 $obj->{input_encoding} = 'UTF-16';
187 }
188 if (defined $csdef->{no_bom_variant16be}) {
189 $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16be}};
190 }
191 } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian3412
192 $obj->{has_bom} = 1;
193 substr ($b, 0, 4) = '';
194 my $c = $b; $c =~ tr/\x00//d;
195 if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
196 encoding\s*=\s*["']([^"']*)/x) {
197 $obj->{input_encoding} = $1;
198 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
199 $csdef = $Whatpm::Charset::CharsetDef->{$uri};
200 if (not $csdef->{ascii32} or
201 $csdef->{ascii32endian1234} or
202 $csdef->{ascii32endian2143} or
203 $csdef->{ascii32endian4321} or
204 not $csdef->{bom_allowed}) {
205 $obj->{onerror}->(undef, 'charset-name-mismatch-error',
206 charset_uri => $uri,
207 charset_name => $obj->{input_encoding});
208 }
209 } else {
210 $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
211 $obj->{input_encoding} = 'UTF-16';
212 $obj->{byte_buffer} .= "\x00\x00";
213 }
214 if (defined $csdef->{no_bom_variant32endian3412}) {
215 $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian3412}};
216 }
217 } else {
218 $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
219 $obj->{input_encoding} = 'UTF-16';
220 substr ($b, 0, 2) = '';
221 $obj->{has_bom} = 1;
222 }
223 } elsif (substr ($b, 0, 2) eq "\xFF\xFE") {
224 if (substr ($b, 2, 2) eq "<\x00") { # ASCII16LE
225 $obj->{has_bom} = 1;
226 substr ($b, 0, 2) = '';
227 my $c = $b; $c =~ tr/\x00//d;
228 if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
229 encoding\s*=\s*["']([^"']*)/x) {
230 $obj->{input_encoding} = $1;
231 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
232 $csdef = $Whatpm::Charset::CharsetDef->{$uri};
233 if (not $csdef->{ascii16} or
234 $csdef->{ascii16be} or
235 not $csdef->{bom_allowed}) {
236 $obj->{onerror}->(undef, 'charset-name-mismatch-error',
237 charset_uri => $uri,
238 charset_name => $obj->{input_encoding});
239 }
240 } else {
241 $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
242 $obj->{input_encoding} = 'UTF-16';
243 }
244 if (defined $csdef->{no_bom_variant16le}) {
245 $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant16le}};
246 }
247 } elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian4321
248 $obj->{has_bom} = 1;
249 substr ($b, 0, 4) = '';
250 my $c = $b; $c =~ tr/\x00//d;
251 if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
252 encoding\s*=\s*["']([^"']*)/x) {
253 $obj->{input_encoding} = $1;
254 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
255 $csdef = $Whatpm::Charset::CharsetDef->{$uri};
256 if (not $csdef->{ascii32} or
257 $csdef->{ascii32endian1234} or
258 $csdef->{ascii32endian2143} or
259 $csdef->{ascii32endian3412} or
260 not $csdef->{bom_allowed}) {
261 $obj->{onerror}->(undef, 'charset-name-mismatch-error',
262 charset_uri => $uri,
263 charset_name => $obj->{input_encoding});
264 }
265 } else {
266 $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
267 $obj->{input_encoding} = 'UTF-16';
268 $obj->{byte_buffer} .= "\x00\x00";
269 }
270 if (defined $csdef->{no_bom_variant32endian4321}) {
271 $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian4321}};
272 }
273 } else {
274 $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
275 $obj->{input_encoding} = 'UTF-16';
276 substr ($b, 0, 2) = '';
277 $obj->{has_bom} = 1;
278 }
279 } elsif (substr ($b, 0, 2) eq "\x00\x00") {
280 if (substr ($b, 2, 2) eq "\x00<") { # ASCII32Endian1234
281 my $c = $b; $c =~ tr/\x00//d;
282 if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
283 encoding\s*=\s*["']([^"']*)/x) {
284 $obj->{input_encoding} = $1;
285 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
286 $csdef = $Whatpm::Charset::CharsetDef->{$uri};
287 if (not $csdef->{ascii32} or
288 $csdef->{ascii32endian2143} or
289 $csdef->{ascii32endian3412} or
290 $csdef->{ascii32endian4321} or
291 $csdef->{bom_required}) {
292 $obj->{onerror}->(undef, 'charset-name-mismatch-error',
293 charset_uri => $uri,
294 charset_name => $obj->{input_encoding});
295 }
296 } else {
297 $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
298 $obj->{input_encoding} = 'UTF-8';
299 }
300 if (defined $csdef->{no_bom_variant32endian1234}) {
301 $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian1234}};
302 }
303 } elsif (substr ($b, 2, 2) eq "<\x00") { # ASCII32Endian2143
304 my $c = $b; $c =~ tr/\x00//d;
305 if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
306 encoding\s*=\s*["']([^"']*)/x) {
307 $obj->{input_encoding} = $1;
308 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
309 $csdef = $Whatpm::Charset::CharsetDef->{$uri};
310 if (not $csdef->{ascii32} or
311 $csdef->{ascii32endian1234} or
312 $csdef->{ascii32endian3412} or
313 $csdef->{ascii32endian4321} or
314 $csdef->{bom_required}) {
315 $obj->{onerror}->(undef, 'charset-name-mismatch-error',
316 charset_uri => $uri,
317 charset_name => $obj->{input_encoding});
318 }
319 } else {
320 $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
321 $obj->{input_encoding} = 'UTF-8';
322 }
323 if (defined $csdef->{no_bom_variant32endian2143}) {
324 $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian2143}};
325 }
326 } elsif (substr ($b, 2, 2) eq "\xFE\xFF") { # ASCII32Endian1234
327 $obj->{has_bom} = 1;
328 substr ($b, 0, 4) = '';
329 my $c = $b; $c =~ tr/\x00//d;
330 if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
331 encoding\s*=\s*["']([^"']*)/x) {
332 $obj->{input_encoding} = $1;
333 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
334 $csdef = $Whatpm::Charset::CharsetDef->{$uri};
335 if (not $csdef->{ascii32} or
336 $csdef->{ascii32endian2143} or
337 $csdef->{ascii32endian3412} or
338 $csdef->{ascii32endian4321} or
339 $csdef->{bom_required}) {
340 $obj->{onerror}->(undef, 'charset-name-mismatch-error',
341 charset_uri => $uri,
342 charset_name => $obj->{input_encoding});
343 }
344 } else {
345 $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
346 $obj->{input_encoding} = 'UTF-8';
347 $obj->{has_bom} = 0;
348 $obj->{byte_buffer} .= "\x00\x00\xFE\xFF";
349 }
350 if (defined $csdef->{no_bom_variant32endian1234}) {
351 $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian1234}};
352 }
353 } elsif (substr ($b, 2, 2) eq "\xFF\xFE") { # ASCII32Endian2143
354 $obj->{has_bom} = 1;
355 substr ($b, 0, 4) = '';
356 my $c = $b; $c =~ tr/\x00//d;
357 if ($c =~ /^<\?xml\s+(?:version\s*=\s*["'][^"']*["']\s*)?
358 encoding\s*=\s*["']([^"']*)/x) {
359 $obj->{input_encoding} = $1;
360 my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
361 $csdef = $Whatpm::Charset::CharsetDef->{$uri};
362 if (not $csdef->{ascii32} or
363 $csdef->{ascii32endian1234} or
364 $csdef->{ascii32endian3412} or
365 $csdef->{ascii32endian4321} or
366 $csdef->{bom_required}) {
367 $obj->{onerror}->(undef, 'charset-name-mismatch-error',
368 charset_uri => $uri,
369 charset_name => $obj->{input_encoding});
370 }
371 } else {
372 $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
373 $obj->{input_encoding} = 'UTF-8';
374 $obj->{has_bom} = 0;
375 $obj->{byte_buffer} .= "\x00\x00\xFF\xFE";
376 }
377 if (defined $csdef->{no_bom_variant32endian2143}) {
378 $csdef = $Whatpm::Charset::CharsetDef->{$csdef->{no_bom_variant32endian2143}};
379 }
380 }
381 # \x4C\x6F\xA7\x94 EBCDIC
382 } # buffer
383 $obj->{byte_buffer} .= $b;
384 } # read
385 } elsif ($csdef->{uri}->{$XML_CHARSET.'utf-8'}) {
386 ## BOM is optional.
387 my $b = '';
388 if (read $obj->{filehandle}, $b, 3) {
389 if ($b eq "\xEF\xBB\xBF") {
390 $obj->{has_bom} = 1;
391 } else {
392 $obj->{byte_buffer} .= $b;
393 }
394 }
395 $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'}; # UTF-8 w/o BOM
396 } elsif ($csdef->{uri}->{$XML_CHARSET.'utf-16'}) {
397 ## BOM is mandated.
398 my $b = '';
399 if (read $obj->{filehandle}, $b, 2) {
400 if ($b eq "\xFE\xFF") {
401 $obj->{has_bom} = 1; # UTF-16BE w/o BOM
402 $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
403 } elsif ($b eq "\xFF\xFE") {
404 $obj->{has_bom} = 1; # UTF-16LE w/o BOM
405 $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
406 } else {
407 $obj->{onerror}->(undef, 'no-bom-error', charset_uri => $obj->{charset});
408 $obj->{has_bom} = 0;
409 $obj->{byte_buffer} .= $b; # UTF-16BE w/o BOM
410 $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
411 }
412 } else {
413 $obj->{onerror}->(undef, 'no-bom-error', charset_uri => $obj->{charset});
414 $obj->{has_bom} = 0; # UTF-16BE w/o BOM
415 $csdef = $Whatpm::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
416 }
417 }
418
419 if ($csdef->{uri}->{$XML_CHARSET.'iso-2022-jp'}) {
420 $obj->{state_2440} = 'gl-jis-1997-swapped';
421 $obj->{state_2442} = 'gl-jis-1997';
422 $obj->{state} = 'state_2842';
423 require Encode::GLJIS1997Swapped;
424 require Encode::GLJIS1997;
425 if (Encode::find_encoding ($obj->{state_2440}) and
426 Encode::find_encoding ($obj->{state_2442})) {
427 return bless $obj, 'Whatpm::Charset::DecodeHandle::ISO2022JP';
428 }
429 } elsif ($csdef->{uri}->{$IANA_CHARSET.'iso-2022-jp'}) {
430 $obj->{state_2440} = 'gl-jis-1978';
431 $obj->{state_2442} = 'gl-jis-1983';
432 $obj->{state} = 'state_2842';
433 require Encode::GLJIS1978;
434 require Encode::GLJIS1983;
435 if (Encode::find_encoding ($obj->{state_2440}) and
436 Encode::find_encoding ($obj->{state_2442})) {
437 return bless $obj, 'Whatpm::Charset::DecodeHandle::ISO2022JP';
438 }
439 } elsif (defined $csdef->{perl_name}->[0]) {
440 if ($csdef->{uri}->{$XML_CHARSET.'euc-jp'} or
441 $csdef->{uri}->{$IANA_CHARSET.'euc-jp'}) {
442 $obj->{perl_encoding_name} = $csdef->{perl_name}->[0];
443 require Encode::EUCJP1997;
444 if (Encode::find_encoding ($obj->{perl_encoding_name})) {
445 $obj->{category} |= Message::Charset::Info::CHARSET_CATEGORY_EUCJP;
446 return bless $obj, 'Whatpm::Charset::DecodeHandle::Encode';
447 }
448 } elsif ($csdef->{uri}->{$XML_CHARSET.'shift_jis'} or
449 $csdef->{uri}->{$IANA_CHARSET.'shift_jis'}) {
450 $obj->{perl_encoding_name} = $csdef->{perl_name}->[0];
451 require Encode::ShiftJIS1997;
452 if (Encode::find_encoding ($obj->{perl_encoding_name})) {
453 return bless $obj, 'Whatpm::Charset::DecodeHandle::Encode';
454 }
455 } elsif ($csdef->{is_block_safe}) {
456 $obj->{perl_encoding_name} = $csdef->{perl_name}->[0];
457 require Encode;
458 if (Encode::find_encoding ($obj->{perl_encoding_name})) {
459 return bless $obj, 'Whatpm::Charset::DecodeHandle::Encode';
460 }
461 }
462 }
463
464 $obj->{onerror}->(undef, 'charset-not-supported-error',
465 charset_uri => $obj->{charset});
466 return undef;
467 } # create_decode_handle
468
469 sub name_to_uri ($$$) {
470 my $domain = $_[1];
471 my $name = lc $_[2];
472
473 if ($domain eq 'ietf') {
474 return $IANA_CHARSET . $name;
475 } elsif ($domain eq 'xml') {
476 if ({
477 'utf-8' => 1,
478 'utf-16' => 1,
479 'iso-10646-ucs-2' => 1,
480 'iso-10646-ucs-4' => 1,
481 'iso-8859-1' => 1,
482 'iso-8859-2' => 1,
483 'iso-8859-3' => 1,
484 'iso-8859-4' => 1,
485 'iso-8859-5' => 1,
486 'iso-8859-6' => 1,
487 'iso-8859-7' => 1,
488 'iso-8859-8' => 1,
489 'iso-8859-9' => 1,
490 'iso-8859-10' => 1,
491 'iso-8859-11' => 1,
492 'iso-8859-13' => 1,
493 'iso-8859-14' => 1,
494 'iso-8859-15' => 1,
495 'iso-8859-16' => 1,
496 'iso-2022-jp' => 1,
497 'shift_jis' => 1,
498 'euc-jp' => 1,
499 }->{$name}) {
500 return $XML_CHARSET . $name;
501 }
502
503 my $uri = $IANA_CHARSET . $name;
504 return $uri if $Whatpm::Charset::CharsetDef->{$uri};
505
506 return $XML_CHARSET . $name;
507 } else {
508 return undef;
509 }
510 } # name_to_uri
511
512 sub uri_to_name ($$$) {
513 my (undef, $domain, $uri) = @_;
514
515 if ($domain eq 'xml') {
516 my $v = $Whatpm::Charset::CharsetDef->{$uri}->{xml_name};
517 return $v if defined $v;
518
519 if (substr ($uri, 0, length $XML_CHARSET) eq $XML_CHARSET) {
520 return substr ($uri, length $XML_CHARSET);
521 }
522
523 $domain = 'ietf'; ## TODO: XML encoding name has smaller range
524 }
525
526 if ($domain eq 'ietf') {
527 my $v = $Whatpm::Charset::CharsetDef->{$uri}->{iana_name};
528 return $v->[0] if defined $v;
529
530 if (substr ($uri, 0, length $IANA_CHARSET) eq $IANA_CHARSET) {
531 return substr ($uri, length $IANA_CHARSET);
532 }
533 }
534
535 return undef;
536 } # uri_to_name
537
538 require IO::Handle;
539
540 package Whatpm::Charset::DecodeHandle::ByteBuffer;
541
542 ## NOTE: Provides a byte buffer wrapper object.
543
544 sub new ($$) {
545 my $self = bless {
546 buffer => '',
547 }, shift;
548 $self->{filehandle} = shift;
549 return $self;
550 } # new
551
552 sub read {
553 my $self = shift;
554 my $pos = length $self->{buffer};
555 my $r = $self->{filehandle}->read ($self->{buffer}, $_[1], $pos);
556 substr ($_[0], $_[2]) = substr ($self->{buffer}, $pos);
557 ## NOTE: This would do different behavior from Perl's standard
558 ## |read| when $pos points beyond the end of the string.
559 return $r;
560 } # read
561
562 sub close { $_[0]->{filehandle}->close }
563
564 package Whatpm::Charset::DecodeHandle::CharString;
565
566 ## NOTE: Same as Perl's standard |open $handle, '<', \$char_string|,
567 ## but supports |ungetc| and other extensions.
568
569 sub new ($$) {
570 my $self = bless {pos => 0}, shift;
571 $self->{string} = shift; # must be a scalar ref
572 return $self;
573 } # new
574
575 sub getc ($) {
576 my $self = shift;
577 if ($self->{pos} < length ${$self->{string}}) {
578 return substr ${$self->{string}}, $self->{pos}++, 1;
579 } else {
580 return undef;
581 }
582 } # getc
583
584 sub read ($$$$) {
585 #my ($self, $scalar, $length, $offset) = @_;
586 my $self = $_[0];
587 my $length = $_[2] || 0;
588 my $offset = $_[3];
589 ## NOTE: We don't support standard Perl semantics if $offset is
590 ## greater than the length of $scalar.
591 substr ($_[1], $offset) = substr (${$self->{string}}, $self->{pos}, $length);
592 my $count = (length $_[1]) - $offset;
593 $self->{pos} += $count;
594 return $count;
595 } # read
596
597 sub manakai_read_until ($$$;$) {
598 #my ($self, $scalar, $pattern, $offset) = @_;
599 my $self = $_[0];
600 pos (${$self->{string}}) = $self->{pos};
601 if (${$self->{string}} =~ /\G(?>$_[2])+/) {
602 substr ($_[1], $_[3]) = substr (${$self->{string}}, $-[0], $+[0] - $-[0]);
603 $self->{pos} += $+[0] - $-[0];
604 return $+[0] - $-[0];
605 } else {
606 return 0;
607 }
608 } # manakai_read_until
609
610 sub ungetc ($$) {
611 my $self = shift;
612 ## Ignore second parameter.
613 $self->{pos}-- if $self->{pos} > 0;
614 } # ungetc
615
616 sub close ($) { }
617
618 sub onerror ($;$) { }
619
620 package Whatpm::Charset::DecodeHandle::Encode;
621
622 ## NOTE: Provides a Perl |Encode| module wrapper object.
623
624 sub charset ($) { $_[0]->{charset} }
625
626 sub close ($) { $_[0]->{filehandle}->close }
627
628 sub getc ($) {
629 my $c = '';
630 my $l = $_[0]->read ($c, 1);
631 if ($l) {
632 return $c;
633 } else {
634 return undef;
635 }
636 } # getc
637
638 sub read ($$$;$) {
639 my $self = $_[0];
640 #my $scalar = $_[1];
641 my $length = $_[2];
642 my $offset = $_[3] || 0;
643 my $count = 0;
644 my $eof;
645 ## NOTE: It is incompatible with the standard Perl semantics
646 ## if $offset is greater than the length of $scalar.
647
648 A: {
649 return $count if $length < 1;
650
651 if (my $l = (length ${$self->{char_buffer}}) - $self->{char_buffer_pos}) {
652 if ($l >= $length) {
653 substr ($_[1], $offset)
654 = substr (${$self->{char_buffer}}, $self->{char_buffer_pos},
655 $length);
656 $count += $length;
657 $self->{char_buffer_pos} += $length;
658 $length = 0;
659 return $count;
660 } else {
661 substr ($_[1], $offset)
662 = substr (${$self->{char_buffer}}, $self->{char_buffer_pos});
663 $count += $l;
664 $length -= $l;
665 ${$self->{char_buffer}} = '';
666 $self->{char_buffer_pos} = 0;
667 }
668 $offset = length $_[1];
669 }
670
671 if ($eof) {
672 return $count;
673 }
674
675 my $error;
676 if ($self->{continue}) {
677 if ($self->{filehandle}->read ($self->{byte_buffer}, 256,
678 length $self->{byte_buffer})) {
679 #
680 } else {
681 $error = 1;
682 }
683 $self->{continue} = 0;
684 } elsif (512 > length $self->{byte_buffer}) {
685 if ($self->{filehandle}->read ($self->{byte_buffer}, 256,
686 length $self->{byte_buffer})) {
687 #
688 } else {
689 $eof = 1;
690 }
691 }
692
693 unless ($error) {
694 if (not $self->{bom_checked}) {
695 if (defined $self->{bom_pattern}) {
696 if ($self->{byte_buffer} =~ s/^$self->{bom_pattern}//) {
697 $self->{has_bom} = 1;
698 }
699 }
700 $self->{bom_checked} = 1;
701 }
702
703 my $string = Encode::decode ($self->{perl_encoding_name},
704 $self->{byte_buffer},
705 Encode::FB_QUIET ());
706 if (length $string) {
707 $self->{char_buffer} = \$string;
708 $self->{char_buffer_pos} = 0;
709 if (length $self->{byte_buffer}) {
710 $self->{continue} = 1;
711 }
712 } else {
713 if (length $self->{byte_buffer}) {
714 $error = 1;
715 } else {
716 ## NOTE: No further input.
717 redo A;
718 }
719 }
720 }
721
722 if ($error) {
723 my $r = substr $self->{byte_buffer}, 0, 1, '';
724 my $fallback;
725 my $etype = 'illegal-octets-error';
726 my %earg;
727 if ($self->{category}
728 & Message::Charset::Info::CHARSET_CATEGORY_SJIS) {
729 if ($r =~ /^[\x81-\x9F\xE0-\xFC]/) {
730 if ($self->{byte_buffer} =~ s/(.)//s) {
731 $r .= $1; # not limited to \x40-\xFC - \x7F
732 $etype = 'unassigned-code-point-error';
733 }
734 ## NOTE: Range [\xF0-\xFC] is unassigned and may be used as a
735 ## single-byte character or as the first-byte of a double-byte
736 ## character, according to JIS X 0208:1997 Appendix 1. However, the
737 ## current practice is using the range as first-bytes of double-byte
738 ## characters.
739 } elsif ($r =~ /^[\x80\xA0\xFD-\xFF]/) {
740 $etype = 'unassigned-code-point-error';
741 }
742 } elsif ($self->{category}
743 & Message::Charset::Info::CHARSET_CATEGORY_EUCJP) {
744 if ($r =~ /^[\xA1-\xFE]/) {
745 if ($self->{byte_buffer} =~ s/^([\xA1-\xFE])//) {
746 $r .= $1;
747 $etype = 'unassigned-code-point-error';
748 }
749 } elsif ($r eq "\x8F") {
750 if ($self->{byte_buffer} =~ s/^([\xA1-\xFE][\xA1-\xFE]?)//) {
751 $r .= $1;
752 $etype = 'unassigned-code-point-error' if length $1 == 2;
753 }
754 } elsif ($r eq "\x8E") {
755 if ($self->{byte_buffer} =~ s/^([\xA1-\xFE])//) {
756 $r .= $1;
757 $etype = 'unassigned-code-point-error';
758 }
759 } elsif ($r eq "\xA0" or $r eq "\xFF") {
760 $etype = 'unassigned-code-point-error';
761 }
762 } else {
763 $fallback = $self->{fallback}->{$r};
764 if (defined $fallback) {
765 ## NOTE: This is an HTML5 parse error.
766 $etype = 'fallback-char-error';
767 $earg{char} = \$fallback;
768 } elsif (exists $self->{fallback}->{$r}) {
769 ## NOTE: This is an HTML5 parse error. In addition, the octet
770 ## is not assigned with a character.
771 $etype = 'fallback-unassigned-error';
772 }
773 }
774
775 ## NOTE: Fixup line/column number by counting the number of
776 ## lines/columns in the string that is to be retuend by this
777 ## method call.
778 my $line_diff = 0;
779 my $col_diff = 0;
780 my $set_col;
781 for (my $i = 0; $i < $count; $i++) {
782 my $s = substr $_[1], $i - $count, 1;
783 if ($s eq "\x0D") {
784 $line_diff++;
785 $col_diff = 0;
786 $set_col = 1;
787 $i++ if substr ($_[1], $i - $count + 1, 1) eq "\x0A";
788 } elsif ($s eq "\x0A") {
789 $line_diff++;
790 $col_diff = 0;
791 $set_col = 1;
792 } else {
793 $col_diff++;
794 }
795 }
796 my $i = $self->{char_buffer_pos};
797 if ($count and substr (${$self->{char_buffer}}, -1, 1) eq "\x0D") {
798 if (substr (${$self->{char_buffer}}, $i, 1) eq "\x0A") {
799 $i++;
800 }
801 }
802 my $cb_length = length ${$self->{char_buffer}};
803 for (; $i < $cb_length; $i++) {
804 my $s = substr $_[1], $i, 1;
805 if ($s eq "\x0D") {
806 $line_diff++;
807 $col_diff = 0;
808 $set_col = 1;
809 $i++ if substr ($_[1], $i + 1, 1) eq "\x0A";
810 } elsif ($s eq "\x0A") {
811 $line_diff++;
812 $col_diff = 0;
813 $set_col = 1;
814 } else {
815 $col_diff++;
816 }
817 }
818 $self->{onerror}->($self, $etype, octets => \$r, %earg,
819 level => $self->{level}->{$self->{error_level}->{$etype}},
820 line_diff => $line_diff,
821 ($set_col ? (column => 1) : ()),
822 column_diff => $col_diff);
823 ## NOTE: Error handler may modify |octets| parameter, which
824 ## would be returned as part of the output. Note that what
825 ## is returned would affect what |manakai_read_until| returns.
826 ${$self->{char_buffer}} .= defined $fallback ? $fallback : $r;
827 }
828
829 redo A;
830 } # A
831 } # read
832
833 sub manakai_read_until ($$$;$) {
834 #my ($self, $scalar, $pattern, $offset) = @_;
835 my $self = $_[0];
836 my $s = '';
837 $self->read ($s, 255);
838 if ($s =~ /^(?>$_[2])+/) {
839 my $rem_length = (length $s) - $+[0];
840 if ($rem_length) {
841 if ($self->{char_buffer_pos} > $rem_length) {
842 $self->{char_buffer_pos} -= $rem_length;
843 } else {
844 substr (${$self->{char_buffer}}, 0, $self->{char_buffer_pos})
845 = substr ($s, $+[0]);
846 $self->{char_buffer_pos} = 0;
847 }
848 }
849 substr ($_[1], $_[3]) = substr ($s, $-[0], $+[0] - $-[0]);
850 return $+[0];
851 } elsif (length $s) {
852 if ($self->{char_buffer_pos} > length $s) {
853 $self->{char_buffer_pos} -= length $s;
854 } else {
855 substr (${$self->{char_buffer}}, 0, $self->{char_buffer_pos}) = $s;
856 $self->{char_buffer_pos} = 0;
857 }
858 }
859 return 0;
860 } # manakai_read_until
861
862 sub has_bom ($) { $_[0]->{has_bom} }
863
864 sub input_encoding ($) {
865 my $v = $_[0]->{input_encoding};
866 return $v if defined $v;
867
868 my $uri = $_[0]->{charset};
869 if (defined $uri) {
870 return Whatpm::Charset::DecodeHandle->uri_to_name (xml => $uri);
871 }
872
873 return undef;
874 } # input_encoding
875
876 sub onerror ($;$) {
877 if (@_ > 1) {
878 if ($_[1]) {
879 $_[0]->{onerror} = $_[1];
880 $_[0]->{onerror_set} = 1;
881 } else {
882 $_[0]->{onerror} = sub { };
883 delete $_[0]->{onerror_set};
884 }
885 }
886
887 return $_[0]->{onerror_set} ? $_[0]->{onerror} : undef;
888 } # onerror
889
890 sub ungetc ($$) {
891 unshift @{$_[0]->{character_queue}}, chr int ($_[1] or 0);
892 } # ungetc
893
894 package Whatpm::Charset::DecodeHandle::ISO2022JP;
895 push our @ISA, 'Whatpm::Charset::DecodeHandle::Encode';
896
897 sub getc ($) {
898 my $self = $_[0];
899 return shift @{$self->{character_queue}} if @{$self->{character_queue}};
900
901 my $r;
902 A: {
903 my $error;
904 if ($self->{continue}) {
905 if ($self->{filehandle}->read ($self->{byte_buffer}, 256,
906 length $self->{byte_buffer})) {
907 #
908 } else {
909 $error = 1;
910 }
911 $self->{continue} = 0;
912 } elsif (512 > length $self->{byte_buffer}) {
913 $self->{filehandle}->read ($self->{byte_buffer}, 256,
914 length $self->{byte_buffer});
915 }
916
917 unless ($error) {
918 if ($self->{byte_buffer} =~ s/^\x1B(\x24[\x40\x42]|\x28[\x42\x4A])//) {
919 $self->{state} = {
920 "\x24\x40" => 'state_2440',
921 "\x24\x42" => 'state_2442',
922 "\x28\x42" => 'state_2842',
923 "\x28\x4A" => 'state_284A',
924 }->{$1};
925 redo A;
926 } elsif ($self->{state} eq 'state_2842') { # IRV
927 if ($self->{byte_buffer} =~ s/^([\x00-\x0D\x10-\x1A\x1C-\x7F]+)//) {
928 push @{$self->{character_queue}}, split //, $1;
929 $r = shift @{$self->{character_queue}};
930 } else {
931 if (length $self->{byte_buffer}) {
932 $error = 1;
933 } else {
934 $r = undef;
935 }
936 }
937 } elsif ($self->{state} eq 'state_284A') { # 0201
938 if ($self->{byte_buffer} =~ s/^([\x00-\x0D\x10-\x1A\x1C-\x7F]+)//) {
939 my $v = $1;
940 $v =~ tr/\x5C\x7E/\xA5\x{203E}/;
941 push @{$self->{character_queue}}, split //, $v;
942 $r = shift @{$self->{character_queue}};
943 } else {
944 if (length $self->{byte_buffer}) {
945 $error = 1;
946 } else {
947 $r = undef;
948 $self->{onerror}->($self, 'invalid-state-error',
949 state => $self->{state},
950 level => $self->{level}->{$self->{error_level}->{'invalid-state-error'}});
951 }
952 }
953 } elsif ($self->{state} eq 'state_2442') { # 1983
954 my $v = Encode::decode ($self->{state_2442},
955 $self->{byte_buffer},
956 Encode::FB_QUIET ());
957 if (length $v) {
958 push @{$self->{character_queue}}, split //, $v;
959 $r = shift @{$self->{character_queue}};
960 } else {
961 if (length $self->{byte_buffer}) {
962 $error = 1;
963 } else {
964 $r = undef;
965 $self->{onerror}->($self, 'invalid-state-error',
966 state => $self->{state},
967 level => $self->{level}->{$self->{error_level}->{'invalid-state-error'}});
968 }
969 }
970 } elsif ($self->{state} eq 'state_2440') { # 1978
971 my $v = Encode::decode ($self->{state_2440},
972 $self->{byte_buffer},
973 Encode::FB_QUIET ());
974 if (length $v) {
975 push @{$self->{character_queue}}, split //, $v;
976 $r = shift @{$self->{character_queue}};
977 } else {
978 if (length $self->{byte_buffer}) {
979 $error = 1;
980 } else {
981 $r = undef;
982 $self->{onerror}->($self, 'invalid-state-error',
983 state => $self->{state},
984 level => $self->{level}->{$self->{error_level}->{'invalid-state-error'}});
985 }
986 }
987 } else {
988 $error = 1;
989 }
990 }
991
992 if ($error) {
993 $r = substr $self->{byte_buffer}, 0, 1, '';
994 my $etype = 'illegal-octets-error';
995 if (($self->{state} eq 'state_2442' or
996 $self->{state} eq 'state_2440') and
997 $r =~ /^[\x21-\x7E]/ and
998 $self->{byte_buffer} =~ s/^([\x21-\x7E])//) {
999 $r .= $1;
1000 $etype = 'unassigned-code-point-error';
1001 } elsif ($r eq "\x1B" and
1002 $self->{byte_buffer} =~ s/^\(H//) { # Old 0201
1003 $r .= "(H";
1004 $self->{state} = 'state_284A';
1005 }
1006 $self->{onerror}->($self, $etype, octets => \$r,
1007 level => $self->{level}->{$self->{error_level}->{$etype}});
1008 }
1009 } # A
1010
1011 return $r;
1012 } # getc
1013
1014 ## TODO: This is not good for performance. Should be replaced
1015 ## by read-centric implementation.
1016 sub read ($$$;$) {
1017 #my ($self, $scalar, $length, $offset) = @_;
1018 my $length = $_[2];
1019 my $r = '';
1020 while ($length > 0) {
1021 my $c = $_[0]->getc;
1022 last unless defined $c;
1023 $r .= $c;
1024 $length--;
1025 }
1026 substr ($_[1], $_[3]) = $r;
1027 ## NOTE: This would do different thing from what Perl's |read| do
1028 ## if $offset points beyond the end of the $scalar.
1029 return length $r;
1030 } # read
1031
1032 sub manakai_read_until ($$$;$) {
1033 #my ($self, $scalar, $pattern, $offset) = @_;
1034 my $self = $_[0];
1035 my $c = $self->getc;
1036 if ($c =~ /^$_[2]/) {
1037 substr ($_[1], $_[3]) = $c;
1038 return 1;
1039 } elsif (defined $c) {
1040 $self->ungetc (ord $c);
1041 return 0;
1042 } else {
1043 return 0;
1044 }
1045 } # manakai_read_until
1046
1047 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:us-ascii'} =
1048 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:us'} =
1049 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso646-us'} =
1050 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:cp367'} =
1051 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ibm367'} =
1052 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ansi_x3.4-1986'} =
1053 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ansi_x3.4-1968'} =
1054 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-ir-6'} =
1055 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:csascii'} =
1056 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso_646.irv:1991'} =
1057 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ascii'} = {ascii8 =>
1058 '1',
1059 is_block_safe =>
1060 '1',
1061 ietf_name =>
1062 ['ansi_x3.4-1968',
1063 'ansi_x3.4-1986',
1064 'ascii',
1065 'cp367',
1066 'csascii',
1067 'ibm367',
1068 'iso-ir-6',
1069 'iso646-us',
1070 'iso_646.irv:1991',
1071 'us',
1072 'us-ascii',
1073 'us-ascii'],
1074 mime_name =>
1075 'us-ascii',
1076 perl_name =>
1077 ['ascii',
1078 'iso-646-us',
1079 'us-ascii'],
1080 utf8_encoding_scheme =>
1081 '1',
1082 'uri',
1083 {'urn:x-suika-fam-cx:charset:ansi_x3.4-1968',
1084 '1',
1085 'urn:x-suika-fam-cx:charset:ansi_x3.4-1986',
1086 '1',
1087 'urn:x-suika-fam-cx:charset:ascii',
1088 '1',
1089 'urn:x-suika-fam-cx:charset:cp367',
1090 '1',
1091 'urn:x-suika-fam-cx:charset:csascii',
1092 '1',
1093 'urn:x-suika-fam-cx:charset:ibm367',
1094 '1',
1095 'urn:x-suika-fam-cx:charset:iso-ir-6',
1096 '1',
1097 'urn:x-suika-fam-cx:charset:iso646-us',
1098 '1',
1099 'urn:x-suika-fam-cx:charset:iso_646.irv:1991',
1100 '1',
1101 'urn:x-suika-fam-cx:charset:us',
1102 '1',
1103 'urn:x-suika-fam-cx:charset:us-ascii',
1104 '1'},
1105 };
1106
1107 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ascii-ctrl'} = {perl_name =>
1108 ['ascii-ctrl'],
1109 'uri',
1110 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ascii-ctrl',
1111 '1'}};
1112 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.null'} = {perl_name =>
1113 ['null'],
1114 'uri',
1115 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.null',
1116 '1'}};
1117 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-8'} = {ascii8 =>
1118 '1',
1119 bom_allowed =>
1120 '1',
1121 no_bom_variant =>
1122 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8',
1123 utf8_encoding_scheme =>
1124 '1',
1125 'uri',
1126 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-8',
1127 '1'},
1128 xml_name => 'UTF-8',
1129 };
1130
1131 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/UTF-8.RFC2279'} = {ascii8 =>
1132 '1',
1133 bom_allowed =>
1134 '1',
1135 no_bom_variant =>
1136 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8',
1137 utf8_encoding_scheme =>
1138 '1',
1139 'uri',
1140 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/UTF-8.RFC2279',
1141 '1'}};
1142 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8'} = {
1143 ascii8 => 1,
1144 is_block_safe =>
1145 '1',
1146 perl_name =>
1147 ['utf-8'],
1148 utf8_encoding_scheme =>
1149 '1',
1150 'uri',
1151 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8',
1152 '1'}};
1153 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-8'} = {
1154 ascii8 => 1,
1155 bom_allowed =>
1156 '1',
1157 no_bom_variant =>
1158 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8',
1159 ietf_name =>
1160 ['utf-8'],
1161 mime_name =>
1162 'utf-8',
1163 utf8_encoding_scheme =>
1164 '1',
1165 'uri',
1166 {'urn:x-suika-fam-cx:charset:utf-8',
1167 '1'},
1168 };
1169
1170 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8'} = {ascii8 =>
1171 '1',
1172 is_block_safe =>
1173 '1',
1174 perl_name =>
1175 ['utf8'],
1176 utf8_encoding_scheme =>
1177 '1',
1178 'uri',
1179 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8',
1180 '1'}};
1181 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-16'} = {
1182 ascii16 => 1,
1183 bom_allowed =>
1184 '1',
1185 bom_required =>
1186 '1',
1187 no_bom_variant =>
1188 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1189 no_bom_variant16be =>
1190 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1191 no_bom_variant16le =>
1192 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1193 perl_name =>
1194 ['utf-16'],
1195 'uri',
1196 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-16',
1197 '1'},
1198 xml_name => 'UTF-16',
1199 };
1200
1201 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16'} = {
1202 ascii16 => 1,
1203 bom_allowed =>
1204 '1',
1205 no_bom_variant =>
1206 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1207 no_bom_variant16be =>
1208 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1209 no_bom_variant16le =>
1210 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1211 ietf_name =>
1212 ['utf-16'],
1213 mime_name =>
1214 'utf-16',
1215 'uri',
1216 {'urn:x-suika-fam-cx:charset:utf-16',
1217 '1'},
1218 };
1219
1220 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16be'} = {
1221 ascii16 => 1,
1222 ascii16be => 1,
1223 bom_allowed =>
1224 '1',
1225 no_bom_variant =>
1226 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1227 no_bom_variant16be =>
1228 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1229 ietf_name =>
1230 ['utf-16be'],
1231 mime_name =>
1232 'utf-16be',
1233 'uri',
1234 {'urn:x-suika-fam-cx:charset:utf-16be',
1235 '1'},
1236 };
1237
1238 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16le'} = {
1239 ascii16 => 1,
1240 ascii16le => 1,
1241 bom_allowed =>
1242 '1',
1243 no_bom_variant =>
1244 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1245 no_bom_variant16le =>
1246 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1247 ietf_name =>
1248 ['utf-16le'],
1249 mime_name =>
1250 'utf-16le',
1251 'uri',
1252 {'urn:x-suika-fam-cx:charset:utf-16le',
1253 '1'},
1254 };
1255
1256 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be'} = {
1257 ascii16 => 1,
1258 ascii16be => 1,
1259 is_block_safe =>
1260 '1',
1261 perl_name =>
1262 ['utf-16be'],
1263 'uri',
1264 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1265 '1'}};
1266 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le'} = {
1267 ascii16 => 1,
1268 ascii16le => 1,
1269 is_block_safe =>
1270 '1',
1271 perl_name =>
1272 ['utf-16le'],
1273 'uri',
1274 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1275 '1'}};
1276 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-2'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-10646-ucs-2'} = {
1277 ascii16 => 1,
1278 bom_allowed =>
1279 '1',
1280 no_bom_variant =>
1281 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le',
1282 no_bom_variant16be =>
1283 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be',
1284 no_bom_variant16le =>
1285 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le',
1286 ietf_name =>
1287 ['csunicode',
1288 'iso-10646-ucs-2'],
1289 mime_name =>
1290 'iso-10646-ucs-2',
1291 'uri',
1292 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-2',
1293 '1',
1294 'urn:x-suika-fam-cx:charset:iso-10646-ucs-2',
1295 '1'},
1296 xml_name => 'ISO-10646-UCS-2',
1297 };
1298
1299 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be'} = {
1300 ascii16 => 1,
1301 ascii16be => 1,
1302 is_block_safe =>
1303 '1',
1304 perl_name =>
1305 ['ucs-2be'],
1306 'uri',
1307 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be',
1308 '1'}};
1309 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le'} = {
1310 ascii16 => 1,
1311 ascii16le => 1,
1312 is_block_safe =>
1313 '1',
1314 perl_name =>
1315 ['ucs-2le'],
1316 'uri',
1317 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le',
1318 '1'}};
1319 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-4'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-10646-ucs-4'} = {
1320 ascii32 => 1,
1321 bom_allowed =>
1322 '1',
1323 no_bom_variant =>
1324 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le',
1325 no_bom_variant32endian1234 =>
1326 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be',
1327 no_bom_variant32endian4321 =>
1328 'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le',
1329 ietf_name =>
1330 ['csucs4',
1331 'iso-10646-ucs-4'],
1332 mime_name =>
1333 'iso-10646-ucs-4',
1334 'uri',
1335 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-4',
1336 '1',
1337 'urn:x-suika-fam-cx:charset:iso-10646-ucs-4',
1338 '1'},
1339 xml_name => 'ISO-10646-UCS-4',
1340 };
1341
1342 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be'} = {
1343 ascii32 => 1,
1344 ascii32endian1234 => 1,
1345 is_block_safe =>
1346 '1',
1347 perl_name =>
1348 ['ucs-4be',
1349 'utf-32be'],
1350 'uri',
1351 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be',
1352 '1'}};
1353 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le'} = {
1354 ascii32 => 1,
1355 ascii32endian4321 => 1,
1356 is_block_safe =>
1357 '1',
1358 perl_name =>
1359 ['ucs-4le',
1360 'utf-32le'],
1361 'uri',
1362 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le',
1363 '1'}};
1364 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso_8859-1:1987'} = $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-1'} = {ascii8 =>
1365 '1',
1366 is_block_safe =>
1367 '1',
1368 ietf_name =>
1369 ['cp819',
1370 'csisolatin1',
1371 'ibm819',
1372 'iso-8859-1',
1373 'iso-8859-1',
1374 'iso-ir-100',
1375 'iso_8859-1',
1376 'iso_8859-1:1987',
1377 'l1',
1378 'latin1'],
1379 mime_name =>
1380 'iso-8859-1',
1381 perl_name =>
1382 ['iso-8859-1',
1383 'latin1'],
1384 'uri',
1385 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-1',
1386 '1',
1387 'urn:x-suika-fam-cx:charset:iso_8859-1:1987',
1388 '1'},
1389 xml_name => 'ISO-8859-1',
1390 };
1391
1392 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-2'} = {ascii8 =>
1393 '1',
1394 is_block_safe =>
1395 '1',
1396 'uri',
1397 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-2',
1398 '1'},
1399 xml_name => 'ISO-8859-2',
1400 };
1401
1402 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-3'} = {ascii8 =>
1403 '1',
1404 is_block_safe =>
1405 '1',
1406 'uri',
1407 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-3',
1408 '1'},
1409 xml_name => 'ISO-8859-3',
1410 };
1411
1412 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-4'} = {ascii8 =>
1413 '1',
1414 is_block_safe =>
1415 '1',
1416 'uri',
1417 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-4',
1418 '1'},
1419 xml_name => 'ISO-8859-4',
1420 };
1421
1422 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-5'} = {ascii8 =>
1423 '1',
1424 is_block_safe =>
1425 '1',
1426 'uri',
1427 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-5',
1428 '1'},
1429 xml_name => 'ISO-8859-5',
1430 };
1431
1432 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-6'} = {ascii8 =>
1433 '1',
1434 is_block_safe =>
1435 '1',
1436 'uri',
1437 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-6',
1438 '1'},
1439 xml_name => 'ISO-8859-6',
1440 };
1441
1442 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-7'} = {ascii8 =>
1443 '1',
1444 is_block_safe =>
1445 '1',
1446 'uri',
1447 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-7',
1448 '1'},
1449 xml_name => 'ISO-8859-7',
1450 };
1451
1452 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-8'} = {ascii8 =>
1453 '1',
1454 is_block_safe =>
1455 '1',
1456 'uri',
1457 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-8',
1458 '1'},
1459 xml_name => 'ISO-8859-8',
1460 };
1461
1462 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-9'} = {ascii8 =>
1463 '1',
1464 is_block_safe =>
1465 '1',
1466 'uri',
1467 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-9',
1468 '1'},
1469 xml_name => 'ISO-8859-9',
1470 };
1471
1472 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-10'} = {ascii8 =>
1473 '1',
1474 is_block_safe =>
1475 '1',
1476 'uri',
1477 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-10',
1478 '1'},
1479 xml_name => 'ISO-8859-10',
1480 };
1481
1482 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-11'} = {ascii8 =>
1483 '1',
1484 is_block_safe =>
1485 '1',
1486 'uri',
1487 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-11',
1488 '1'},
1489 xml_name => 'ISO-8859-11',
1490 };
1491
1492 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-13'} = {ascii8 =>
1493 '1',
1494 is_block_safe =>
1495 '1',
1496 'uri',
1497 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-13',
1498 '1'},
1499 xml_name => 'ISO-8859-13',
1500 };
1501
1502 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-14'} = {ascii8 =>
1503 '1',
1504 is_block_safe =>
1505 '1',
1506 'uri',
1507 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-14',
1508 '1'},
1509 xml_name => 'ISO-8859-14',
1510 };
1511
1512 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-15'} = {ascii8 =>
1513 '1',
1514 is_block_safe =>
1515 '1',
1516 'uri',
1517 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-15',
1518 '1'},
1519 xml_name => 'ISO-8859-15',
1520 };
1521
1522 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-16'} = {ascii8 =>
1523 '1',
1524 is_block_safe =>
1525 '1',
1526 'uri',
1527 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-16',
1528 '1'},
1529 xml_name => 'ISO-8859-16',
1530 };
1531
1532 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-2022-jp'} = {ascii8 =>
1533 '1',
1534 'uri',
1535 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-2022-jp',
1536 '1'},
1537 xml_name => 'ISO-2022-JP',
1538 };
1539
1540 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-2022-jp'} = {ascii8 =>
1541 '1',
1542 ietf_name =>
1543 ['csiso2022jp',
1544 'iso-2022-jp',
1545 'iso-2022-jp'],
1546 mime_name =>
1547 'iso-2022-jp',
1548 'uri',
1549 {'urn:x-suika-fam-cx:charset:iso-2022-jp',
1550 '1'},
1551 };
1552
1553 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.iso-2022-jp'} = {ascii8 =>
1554 '1',
1555 perl_name =>
1556 ['iso-2022-jp'],
1557 'uri',
1558 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.iso-2022-jp',
1559 '1'}};
1560 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:shift_jis'} = $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.shift_jis'} = {ascii8 =>
1561 '1',
1562 is_block_safe =>
1563 '1',
1564 ietf_name =>
1565 ['csshiftjis',
1566 'ms_kanji',
1567 'shift_jis',
1568 'shift_jis'],
1569 mime_name =>
1570 'shift_jis',
1571 perl_name =>
1572 ['shift-jis-1997'],
1573 'uri',
1574 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.shift_jis',
1575 '1',
1576 'urn:x-suika-fam-cx:charset:shift_jis',
1577 '1'},
1578 xml_name => 'Shift_JIS',
1579 };
1580
1581 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.shiftjis'} = {ascii8 =>
1582 '1',
1583 is_block_safe =>
1584 '1',
1585 perl_name =>
1586 ['shiftjis',
1587 'sjis'],
1588 'uri',
1589 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.shiftjis',
1590 '1'}};
1591 $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:euc-jp'} = $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.euc-jp'} = $Whatpm::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:extended_unix_code_packed_format_for_japanese'} = {ascii8 =>
1592 '1',
1593 is_block_safe =>
1594 '1',
1595 ietf_name =>
1596 ['cseucpkdfmtjapanese',
1597 'euc-jp',
1598 'euc-jp',
1599 'extended_unix_code_packed_format_for_japanese'],
1600 mime_name =>
1601 'euc-jp',
1602 perl_name =>
1603 ['euc-jp-1997'],
1604 'uri',
1605 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.euc-jp',
1606 '1',
1607 'urn:x-suika-fam-cx:charset:euc-jp',
1608 '1',
1609 'urn:x-suika-fam-cx:charset:extended_unix_code_packed_format_for_japanese',
1610 '1'},
1611 xml_name => 'EUC-JP',
1612 };
1613
1614 $Whatpm::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.euc-jp'} = {ascii8 =>
1615 '1',
1616 is_block_safe =>
1617 '1',
1618 perl_name =>
1619 ['euc-jp',
1620 'ujis'],
1621 'uri',
1622 {'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.euc-jp',
1623 '1'}};
1624
1625 1;
1626 ## $Date: 2008/09/14 11:57:41 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24