/[suikacvs]/markup/html/whatpm/t/Charset-DecodeHandler.t
Suika

Contents of /markup/html/whatpm/t/Charset-DecodeHandler.t

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (hide annotations) (download) (as text)
Sun Jul 15 16:51:14 2007 UTC (18 years ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +30 -26 lines
File MIME type: application/x-troff
++ whatpm/t/ChangeLog	15 Jul 2007 16:51:05 -0000
	* Charset-DecodeHandler.t: Cases for charset names has
	been changed to match with the module's implementation.

2007-07-15  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/Charset/ChangeLog	15 Jul 2007 16:49:53 -0000
	* DecodeHandle.pm (create_decode_handle): Set canonical
	or specified name to |input_encoding| attribute.
	(uri_to_name): Reimplemented.
	(input_encoding): Return charset name returned
	by |uri_to_name| if available.
	($CharsetDef): Property |xml_name| is now contain
	only name defined in XML specifications.

2007-07-15  Wakaba  <wakaba@suika.fam.cx>

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3     use Test;
4     BEGIN { plan tests => 6185 }
5    
6     require Whatpm::Charset::DecodeHandle;
7    
8     my $XML_AUTO_CHARSET = q<http://suika.fam.cx/www/2006/03/xml-entity/>;
9     my $IANA_CHARSET = q<urn:x-suika-fam-cx:charset:>;
10     my $PERL_CHARSET = q<http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.>;
11     my $XML_CHARSET = q<http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.>;
12    
13     ## |create_decode_handle|
14     for my $test (
15     ['perl.utf8', $PERL_CHARSET.'utf8', 1],
16     ['xml', $XML_AUTO_CHARSET, 1],
17     ['unknown', q<http://www.unknown.test/>, 0],
18     ['iana.euc-jp', $IANA_CHARSET.'euc-jp', 1],
19     ['xml.euc-jp', $XML_CHARSET.'euc-jp', 1],
20     ['iana.shift_jis', $IANA_CHARSET.'shift_jis', 1],
21     ['xml.shift_jis', $XML_CHARSET.'shift_jis', 1],
22     ['iana.iso-2022-jp', $IANA_CHARSET.'iso-2022-jp', 1],
23     ['xml.iso-2022-jp', $XML_CHARSET.'iso-2022-jp', 1],
24     ) {
25     open my $fh, '<', \'';
26     my $dh = Whatpm::Charset::DecodeHandle->create_decode_handle ($test->[1], $fh);
27    
28     if ($test->[2]) {
29     ok UNIVERSAL::isa ($dh, 'Whatpm::Charset::DecodeHandle::Encode') ? 1 : 0, 1,
30     'create_decode_handle ' . $test->[0] . ' object';
31     ok ref $dh->onerror eq 'CODE' ? 1 : 0, 1,
32     'create_decode_handle ' . $test->[0] . ' onerror';
33     } else {
34     ok UNIVERSAL::isa ($dh, 'Whatpm::Charset::DecodeHandle::Encode') ? 1 : 0, 0,
35     'create_decode_handle ' . $test->[0] . ' object';
36    
37     Whatpm::Charset::DecodeHandle->create_decode_handle ($test->[1], $fh, sub {
38     ok $_[1], 'charset-not-supported-error',
39     'create_decode_handle ' . $test->[0] . ' error';
40     });
41     }
42     }
43    
44     ## |name_to_uri|
45     for (
46     [$IANA_CHARSET.'utf-8', 'utf-8'],
47     [$IANA_CHARSET.'x-no-such-charset', 'x-no-such-charset'],
48     [$IANA_CHARSET.'utf-8', 'UTF-8'],
49     [$IANA_CHARSET.'utf-8', 'uTf-8'],
50     [$IANA_CHARSET.'utf-16be', 'utf-16be'],
51     ) {
52     my $iname = Whatpm::Charset::DecodeHandle->name_to_uri (ietf => $_->[1]);
53     ok $iname, $_->[0], 'ietf charset URI ' . $_->[1];
54     }
55    
56     for (
57     [$XML_CHARSET.'utf-8', 'utf-8'],
58     [$XML_CHARSET.'x-no-such-charset', 'x-no-such-charset'],
59     [$XML_CHARSET.'utf-8', 'UTF-8'],
60     [$XML_CHARSET.'utf-8', 'uTf-8'],
61     [$IANA_CHARSET.'utf-16be', 'utf-16be'],
62     ) {
63     my $iname = Whatpm::Charset::DecodeHandle->name_to_uri (xml => $_->[1]);
64     ok $iname, $_->[0], 'XML encoding URI ' . $_->[1];
65     }
66    
67     ## |uri_to_name|
68     for (
69     [$IANA_CHARSET.'utf-8', 'utf-8'],
70     [$IANA_CHARSET.'x-no-such-charset', 'x-no-such-charset'],
71     [q<http://charset.example/>, undef],
72     ) {
73     my $uri = Whatpm::Charset::DecodeHandle->uri_to_name (ietf => $_->[0]);
74     ok $uri, $_->[1], 'URI -> IETF charset ' . $_->[0];
75     }
76    
77     for (
78 wakaba 1.2 [$XML_CHARSET.'utf-8', 'UTF-8'],
79 wakaba 1.1 [$XML_CHARSET.'x-no-such-charset', 'x-no-such-charset'],
80     [q<http://charset.example/>, undef],
81     ) {
82     my $uri = Whatpm::Charset::DecodeHandle->uri_to_name (xml => $_->[0]);
83     ok $uri, $_->[1], 'URI -> XML encoding ' . $_->[0];
84     }
85    
86     ## |getc|
87     {
88     my $byte = "a\xE3\x81\x82\x81a";
89     open my $fh, '<', \$byte;
90     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
91     ($PERL_CHARSET.'utf8', $fh);
92    
93     my $error;
94     $efh->onerror (sub {
95     my ($efh, $type, %opt) = @_;
96     $error = ${$opt{octets}};
97     });
98    
99     ok $efh->getc, "a", "getc 1 [1]";
100     ok $error, undef, "getc 1 [1] error";
101     ok $efh->getc, "\x{3042}", "getc 1 [2]";
102     ok $error, undef, "getc 1 [2] error";
103     ok $efh->getc, "\x81", "getc 1 [3]";
104     ok $error, "\x81", "getc 1 [3] error";
105     undef $error;
106     ok $efh->getc, "a", "getc 1 [4]";
107     ok $error, undef, "getc 1 [4] error";
108     ok $efh->getc, undef, "getc 1 [5]";
109     ok $error, undef, "getc 1 [5] error";
110     }
111    
112     {
113     my $byte = "a" x 256;
114     $byte .= "b" x 256;
115    
116     open my $fh, '<', \$byte;
117     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
118     ($PERL_CHARSET.'utf8', $fh);
119    
120     my $error;
121     $efh->onerror (sub {
122     my ($efh, $type, %opt) = @_;
123     $error = ${$opt{octets}};
124     });
125    
126     for my $i (0..255) {
127     ok $efh->getc, "a", "getc 2 [$i]";
128     ok $error, undef, "getc 2 [$i] error";
129     }
130    
131     for my $i (0..255) {
132     ok $efh->getc, "b", "getc 2 [255+$i]";
133     ok $error, undef, "getc 2 [255+$i] error";
134     }
135    
136     ok $efh->getc, undef, "getc 2 [-1]";
137     ok $error, undef, "getc 2 [-1] error";
138     }
139    
140     {
141     my $byte = "a" x 255;
142     $byte .= "\xE3\x81\x82";
143     $byte .= "b" x 256;
144    
145     open my $fh, '<', \$byte;
146     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
147     ($PERL_CHARSET.'utf8', $fh);
148    
149     my $error;
150     $efh->onerror (sub {
151     my ($efh, $type, %opt) = @_;
152     $error = ${$opt{octets}};
153     });
154    
155     for my $i (0..254) {
156     ok $efh->getc, "a", "getc 3 [$i]";
157     ok $error, undef, "getc 3 [$i] error";
158     }
159    
160     ok $efh->getc, "\x{3042}", "getc 3 [255]";
161     ok $error, undef, "getc 3 [255] error";
162    
163     for my $i (0..255) {
164     ok $efh->getc, "b", "getc 3 [255+$i]";
165     ok $error, undef, "getc 3 [255+$i] error";
166     }
167    
168     ok $efh->getc, undef, "getc 3 [-1]";
169     ok $error, undef, "getc 3 [-1] error";
170     }
171    
172     {
173     my $byte = "a" x 255;
174     $byte .= "\xE3";
175    
176     open my $fh, '<', \$byte;
177     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
178     ($PERL_CHARSET.'utf8', $fh);
179    
180     my $error;
181     $efh->onerror (sub {
182     my ($efh, $type, %opt) = @_;
183     $error = ${$opt{octets}};
184     });
185    
186     for my $i (0..254) {
187     ok $efh->getc, "a", "getc 4 [$i]";
188     ok $error, undef, "getc 4 [$i] error";
189     }
190    
191     ok $efh->getc, "\xE3", "getc 4 [255]";
192     ok $error, "\xE3", "getc 4 [255] error";
193     undef $error;
194    
195     ok $efh->getc, undef, "getc 4 [-1]";
196     ok $error, undef, "getc 4 [-1] error";
197     }
198    
199     ## |ungetc|
200     {
201     my $byte = "a\x{4E00}b\x{4E11}";
202    
203     open my $fh, '<', \$byte;
204     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
205     ($PERL_CHARSET.'utf8', $fh);
206    
207     ok $efh->getc, "a", "ungetc [1]";
208    
209     $efh->ungetc (ord "a");
210     ok $efh->getc, "a", "ungetc [2]";
211    
212     ok $efh->getc, "\x{4E00}", "ungetc [3]";
213    
214     $efh->ungetc (ord "\x{4E00}");
215     ok $efh->getc, "\x{4E00}", "ungetc [4]";
216    
217     ok $efh->getc, "b", "ungetc [5]";
218    
219     ok $efh->getc, "\x{4E11}", "ungetc [6]";
220    
221     $efh->ungetc (ord "\x{4E11}");
222     ok $efh->getc, "\x{4E11}", "ungetc [7]";
223     }
224    
225     ## UTF-8, UTF-16 and BOM
226     for my $test (
227     ["UTF-8 BOM 1", qq<\xEF\xBB\xBFabc>, $XML_CHARSET.'utf-8',
228     ["a", "b", "c", undef], 1],
229     ["UTF-8 no BOM 1", qq<abc>, $XML_CHARSET.'utf-8',
230     ["a", "b", "c", undef], 0],
231     ["UTF-8 BOM 2", qq<\xEF\xBB\xBF\xEF\xBB\xBFabc>, $XML_CHARSET.'utf-8',
232     ["\x{FEFF}", "a", "b", "c", undef], 1],
233     ["UTF-8 BOM 3", qq<\xEF\xBB\xBF>, $XML_CHARSET.'utf-8',
234     [undef], 1],
235     ["UTF-8 no BOM 2", qq<>, $XML_CHARSET.'utf-8',
236     [undef], 0],
237     ["UTF-8 no BOM 3", qq<ab>, $XML_CHARSET.'utf-8',
238     [qw/a b/, undef], 0],
239     ["UTF-8 no BOM 4", qq<a>, $XML_CHARSET.'utf-8',
240     [qw/a/, undef], 0],
241     ["UTF-16BE BOM 1", qq<\xFE\xFF\x4E\x00\x00a>, $XML_CHARSET.'utf-16',
242     ["\x{4E00}", "a", undef], 1],
243     ["UTF-16LE BOM 1", qq<\xFF\xFE\x00\x4Ea\x00>, $XML_CHARSET.'utf-16',
244     ["\x{4E00}", "a", undef], 1],
245     ["UTF-16BE BOM 2", qq<\xFE\xFF\x00a>, $XML_CHARSET.'utf-16',
246     ["a", undef], 1],
247     ["UTF-16LE BOM 2", qq<\xFF\xFEa\x00>, $XML_CHARSET.'utf-16',
248     ["a", undef], 1],
249     ["UTF-16BE BOM 3", qq<\xFE\xFF>, $XML_CHARSET.'utf-16',
250     [undef], 1],
251     ["UTF-16LE BOM 3", qq<\xFF\xFE>, $XML_CHARSET.'utf-16',
252     [undef], 1],
253     ) {
254     my $error;
255    
256     open my $fh, '<', \($test->[1]);
257     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
258     ($test->[2], $fh, sub { $error = 1 });
259    
260     for my $i (0..$#{$test->[3]}) {
261     ok $efh->getc, $test->[3]->[$i], $test->[0] . " $i";
262     }
263     ok $error, undef, $test->[0] . " error";
264     ok $efh->has_bom ? 1 : 0, $test->[4], $test->[0] . " has_bom";
265     }
266    
267     {
268     my $byte = qq<\xFE\xFFa>;
269    
270     my $error;
271    
272     open my $fh, '<', \$byte;
273     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
274     ($XML_CHARSET.'utf-16', $fh, sub { $error = $_[1] });
275    
276     ok $error, undef, "UTF-16 [1]";
277     ok $efh->getc, "a", "UTF-16 [2]";
278     ok $error, 'illegal-octets-error', "UTF-16 [3]";
279     undef $error;
280     ok $efh->getc, undef, "UTF-16 [4]";
281     ok $error, undef, "UTF-16 [5]";
282     ok $efh->has_bom ? 1 : 0, 1, "UTF-16 [6]";
283     }
284     {
285     my $byte = qq<\xFF\xFEa>;
286    
287     my $error;
288    
289     open my $fh, '<', \$byte;
290     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
291     ($XML_CHARSET.'utf-16', $fh, sub { $error = $_[1] });
292    
293     ok $error, undef, "UTF-16 [7]";
294     ok $efh->getc, "a", "UTF-16 [8]";
295     ok $error, 'illegal-octets-error', "UTF-16 [9]";
296     undef $error;
297     ok $efh->getc, undef, "UTF-16 [10]";
298     ok $error, undef, "UTF-16 [11]";
299     ok $efh->has_bom ? 1 : 0, 1, "UTF-16 [12]";
300     }
301    
302     {
303     my $byte = qq<\xFD\xFF>;
304    
305     my $error;
306    
307     open my $fh, '<', \$byte;
308     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
309     ($XML_CHARSET.'utf-16', $fh, sub { $error = $_[1] });
310    
311     ok $error, 'no-bom-error', "UTF-16 [13]";
312     undef $error;
313    
314     ok $efh->getc, "\x{FDFF}", "UTF-16 [14]";
315     ok $error, undef, "UTF-16 [15]";
316     ok $efh->getc, undef, "UTF-16 [16]";
317     ok $error, undef, "UTF-16 [17]";
318     ok $efh->has_bom ? 1 : 0, 0, "UTF-16 [18]";
319     }
320    
321     {
322     my $byte = qq<\xFD>;
323    
324     my $error;
325    
326     open my $fh, '<', \$byte;
327     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
328     ($XML_CHARSET.'utf-16', $fh, sub { $error = $_[1] });
329    
330     ok $error, 'no-bom-error', "UTF-16 [19]";
331     undef $error;
332    
333     ok $efh->getc, "\xFD", "UTF-16 [20]";
334     ok $error, 'illegal-octets-error', "UTF-16 [21]";
335     undef $error;
336    
337     ok $efh->getc, undef, "UTF-16 [22]";
338     ok $error, undef, "UTF-16 [23]";
339     ok $efh->has_bom ? 1 : 0, 0, "UTF-16 [24]";
340     }
341    
342     {
343     my $byte = qq<>;
344    
345     my $error;
346    
347     open my $fh, '<', \$byte;
348     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
349     ($XML_CHARSET.'utf-16', $fh, sub { $error = $_[1] });
350    
351     ok $error, 'no-bom-error', "UTF-16 [25]";
352     undef $error;
353    
354     ok $efh->getc, undef, "UTF-16 [26]";
355     ok $error, undef, "UTF-16 [27]";
356     ok $efh->has_bom ? 1 : 0, 0, "UTF-16 [28]";
357     }
358    
359     sub check_charset ($$$) {
360     my $test_name = $_[0];
361     my $charset_uri = $_[1];
362     for my $testdata (@{$_[2]}) {
363     my $byte = $testdata->{in};
364     my $error;
365     my $i = 0;
366    
367     open my $fh, '<', \$byte;
368     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
369     ($charset_uri, $fh, sub {
370     my (undef, $etype, %opt) = @_;
371     $error = [$etype, \%opt];
372     });
373    
374     ok defined $efh ? 1 : 0, 1, "$test_name $testdata->{id} return";
375     next unless defined $efh;
376     ok $efh->has_bom ? 1 : 0, $testdata->{bom} || 0,
377     "$test_name $testdata->{id} BOM";
378     ok $efh->input_encoding, $testdata->{name}, "$test_name $testdata->{id} ie";
379    
380     while (@{$testdata->{out}}) {
381     if ($i != 0) {
382     my $c = shift @{$testdata->{out}};
383     ok $efh->getc, $c, "$test_name $testdata->{id} $i";
384     }
385    
386     my $v = shift @{$testdata->{out}};
387     if (defined $v) {
388     ok defined $error ? 1 : 0, 1, "$test_name $testdata->{id} $i error";
389     ok $error->[0], $v->[0], "$test_name $testdata->{id} $i error 0";
390     } else {
391     ok defined $error ? 1 : 0, 0, "$test_name $testdata->{id} $i error";
392     }
393     undef $error;
394     $i++;
395     }
396    
397     ok $efh->getc, undef, "$test_name $testdata->{id} EOF";
398     if ($testdata->{eof_error}) {
399     ok defined $error ? 1 : 0, 1, "$test_name $testdata->{id} EOF error";
400     ok $error->[0], $testdata->{eof_error}->[0],
401     "$test_name $testdata->{id} EOF error 0";
402     } else {
403     ok $error, undef, "$test_name $testdata->{id} EOF error";
404     }
405     } # testdata
406     } # check_charset
407    
408     ## XML Character Encoding Autodetection
409     {
410     my @testdata = (
411     {
412     id => q<l=0>,
413     in => q<>,
414     out => [undef],
415 wakaba 1.2 name => 'UTF-8', bom => 0,
416 wakaba 1.1 },
417     {
418     id => q<l=1>,
419     in => "a",
420     out => [undef, "a", undef],
421 wakaba 1.2 name => 'UTF-8', bom => 0,
422 wakaba 1.1 },
423     {
424     id => q<bom8.l=0>,
425     in => "\xEF\xBB\xBF",
426     out => [undef],
427 wakaba 1.2 name => 'UTF-8', bom => 1,
428 wakaba 1.1 },
429     {
430     id => q<bom8.l=1>,
431     in => "\xEF\xBB\xBFa",
432     out => [undef, "a", undef],
433 wakaba 1.2 name => 'UTF-8', bom => 1,
434 wakaba 1.1 },
435     {
436     id => q<bom8.zwnbsp>,
437     in => "\xEF\xBB\xBF\xEF\xBB\xBF",
438     out => [undef, "\x{FEFF}", undef],
439 wakaba 1.2 name => 'UTF-8', bom => 1,
440 wakaba 1.1 },
441     {
442     id => q<bom16be.l=0>,
443     in => "\xFE\xFF",
444     out => [undef],
445 wakaba 1.2 name => 'UTF-16', bom => 1,
446 wakaba 1.1 },
447     {
448     id => q<bom16le.l=0>,
449     in => "\xFF\xFE",
450     out => [undef],
451 wakaba 1.2 name => 'UTF-16', bom => 1,
452 wakaba 1.1 },
453     {
454     id => q<bom16be.l=1>,
455     in => "\xFE\xFFa",
456     out => [undef, "a", [q<illegal-octets-error>]],
457 wakaba 1.2 name => 'UTF-16', bom => 1,
458 wakaba 1.1 },
459     {
460     id => q<bom16le.l=1>,
461     in => "\xFF\xFEa",
462     out => [undef, "a", [q<illegal-octets-error>]],
463 wakaba 1.2 name => 'UTF-16', bom => 1,
464 wakaba 1.1 },
465     {
466     id => q<bom16be.l=2>,
467     in => "\xFE\xFF\x4E\x00",
468     out => [undef, "\x{4E00}", undef],
469 wakaba 1.2 name => 'UTF-16', bom => 1,
470 wakaba 1.1 },
471     {
472     id => q<bom16le.l=2>,
473     in => "\xFF\xFE\x00\x4E",
474     out => [undef, "\x{4E00}", undef],
475 wakaba 1.2 name => 'UTF-16', bom => 1,
476 wakaba 1.1 },
477     {
478     id => q<bom16be.l=2lt>,
479     in => "\xFE\xFF\x00<",
480     out => [undef, "<", undef],
481 wakaba 1.2 name => 'UTF-16', bom => 1,
482 wakaba 1.1 },
483     {
484     id => q<bom16le.l=2lt>,
485     in => "\xFF\xFE<\x00",
486     out => [undef, "<", undef],
487 wakaba 1.2 name => 'UTF-16', bom => 1,
488 wakaba 1.1 },
489     {
490     id => q<bom16be.zwnbsp>,
491     in => "\xFE\xFF\xFE\xFF",
492     out => [undef, "\x{FEFF}", undef],
493 wakaba 1.2 name => 'UTF-16', bom => 1,
494 wakaba 1.1 },
495     {
496     id => q<bom16le.zwnbsp>,
497     in => "\xFF\xFE\xFF\xFE",
498     out => [undef, "\x{FEFF}", undef],
499 wakaba 1.2 name => 'UTF-16', bom => 1,
500 wakaba 1.1 },
501     {
502     id => q<bom32e3412.l=0>,
503     in => "\xFE\xFF\x00\x00",
504     out => [undef, "\x00", undef],
505 wakaba 1.2 name => 'UTF-16', bom => 1,
506 wakaba 1.1 },
507     {
508     id => q<bom32e4321.l=0>,
509     in => "\xFF\xFE\x00\x00",
510     out => [undef, "\x00", undef],
511 wakaba 1.2 name => 'UTF-16', bom => 1,
512 wakaba 1.1 },
513     {
514     id => q<bom16be.l=4ltq>,
515     in => "\xFE\xFF\x00<\x00?",
516     out => [undef, "<", undef, "?", undef],
517 wakaba 1.2 name => 'UTF-16', bom => 1,
518 wakaba 1.1 },
519     {
520     id => q<bom16le.l=4ltq>,
521     in => "\xFF\xFE<\x00?\x00",
522     out => [undef, "<", undef, "?", undef],
523 wakaba 1.2 name => 'UTF-16', bom => 1,
524 wakaba 1.1 },
525     {
526     id => q<bom16be.decl.1>,
527     in => qq[\xFE\xFF\x00<\x00?\x00x\x00m\x00l\x00 \x00v\x00e\x00r].
528     qq[\x00s\x00i\x00o\x00n\x00=\x00"\x001\x00.\x000\x00"].
529     qq[\x00 \x00e\x00n\x00c\x00o\x00d\x00i\x00n\x00g\x00=].
530     qq[\x00"\x00u\x00t\x00f\x00-\x001\x006\x00"\x00?\x00>],
531     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
532     " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
533     "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
534     "1", undef, ".", undef, "0", undef, '"', undef, " ", undef,
535     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
536     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
537     "u", undef, "t", undef, "f", undef, "-", undef, "1", undef,
538     "6", undef, '"', undef, "?", undef, ">", undef],
539     name => 'utf-16', bom => 1,
540     },
541     {
542     id => q<bom16le.decl.1>,
543     in => qq[\xFF\xFE<\x00?\x00x\x00m\x00l\x00 \x00v\x00e\x00r].
544     qq[\x00s\x00i\x00o\x00n\x00=\x00"\x001\x00.\x000\x00"].
545     qq[\x00 \x00e\x00n\x00c\x00o\x00d\x00i\x00n\x00g\x00=].
546     qq[\x00"\x00u\x00t\x00f\x00-\x001\x006\x00"\x00?\x00>\x00],
547     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
548     " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
549     "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
550     "1", undef, ".", undef, "0", undef, '"', undef, " ", undef,
551     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
552     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
553     "u", undef, "t", undef, "f", undef, "-", undef, "1", undef,
554     "6", undef, '"', undef, "?", undef, ">", undef],
555     name => 'utf-16', bom => 1,
556     },
557     {
558     id => q<utf16be.decl.1>,
559     in => qq[\x00<\x00?\x00x\x00m\x00l\x00 \x00v\x00e\x00r].
560     qq[\x00s\x00i\x00o\x00n\x00=\x00"\x001\x00.\x000\x00"].
561     qq[\x00 \x00e\x00n\x00c\x00o\x00d\x00i\x00n\x00g\x00=].
562     qq[\x00"\x00u\x00t\x00f\x00-\x001\x006\x00b\x00e\x00"\x00?\x00>],
563     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
564     " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
565     "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
566     "1", undef, ".", undef, "0", undef, '"', undef, " ", undef,
567     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
568     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
569     "u", undef, "t", undef, "f", undef, "-", undef, "1", undef,
570     "6", undef, "b", undef, "e", undef, '"', undef,
571     "?", undef, ">", undef],
572     name => 'utf-16be', bom => 0,
573     },
574     {
575     id => q<utf16le.decl.1>,
576     in => qq[<\x00?\x00x\x00m\x00l\x00 \x00v\x00e\x00r].
577     qq[\x00s\x00i\x00o\x00n\x00=\x00"\x001\x00.\x000\x00"].
578     qq[\x00 \x00e\x00n\x00c\x00o\x00d\x00i\x00n\x00g\x00=].
579     qq[\x00"\x00u\x00t\x00f\x00-\x001\x006\x00l\x00e\x00"].
580     qq[\x00?\x00>\x00],
581     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
582     " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
583     "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
584     "1", undef, ".", undef, "0", undef, '"', undef, " ", undef,
585     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
586     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
587     "u", undef, "t", undef, "f", undef, "-", undef, "1", undef,
588     "6", undef, "l", undef, "e", undef, '"', undef, "?", undef,
589     ">", undef],
590     name => 'utf-16le', bom => 0,
591     },
592     {
593     id => q<16be.decl.1>,
594     in => qq[\x00<\x00?\x00x\x00m\x00l\x00 \x00v\x00e\x00r].
595     qq[\x00s\x00i\x00o\x00n\x00=\x00"\x001\x00.\x000\x00"].
596     qq[\x00 \x00e\x00n\x00c\x00o\x00d\x00i\x00n\x00g\x00=].
597     qq[\x00"\x00u\x00t\x00f\x00-\x001\x006\x00"\x00?\x00>],
598     out => [[q<charset-name-mismatch-error>],
599     "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
600     " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
601     "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
602     "1", undef, ".", undef, "0", undef, '"', undef, " ", undef,
603     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
604     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
605     "u", undef, "t", undef, "f", undef, "-", undef, "1", undef,
606     "6", undef, '"', undef, "?", undef, ">", undef],
607     name => 'utf-16', bom => 0,
608     },
609     {
610     id => q<16le.decl.1>,
611     in => qq[<\x00?\x00x\x00m\x00l\x00 \x00v\x00e\x00r].
612     qq[\x00s\x00i\x00o\x00n\x00=\x00"\x001\x00.\x000\x00"].
613     qq[\x00 \x00e\x00n\x00c\x00o\x00d\x00i\x00n\x00g\x00=].
614     qq[\x00"\x00u\x00t\x00f\x00-\x001\x006\x00"\x00?\x00>\x00],
615     out => [[q<charset-name-mismatch-error>],
616     "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
617     " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
618     "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
619     "1", undef, ".", undef, "0", undef, '"', undef, " ", undef,
620     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
621     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
622     "u", undef, "t", undef, "f", undef, "-", undef, "1", undef,
623     "6", undef, '"', undef, "?", undef, ">", undef],
624     name => 'utf-16', bom => 0,
625     },
626     {
627     id => q<8.decl.1>,
628     in => qq[<?xml version="1.0" encoding="utf-8"?>],
629     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
630     " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
631     "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
632     "1", undef, ".", undef, "0", undef, '"', undef, " ", undef,
633     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
634     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
635     "u", undef, "t", undef, "f", undef, "-", undef, "8", undef,
636     '"', undef, "?", undef, ">", undef],
637     name => 'utf-8', bom => 0,
638     },
639     {
640     id => q<8.decl.2>,
641     in => qq[<?xml encoding="utf-8"?>],
642     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
643     " ", undef,
644     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
645     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
646     "u", undef, "t", undef, "f", undef, "-", undef, "8", undef,
647     '"', undef, "?", undef, ">", undef],
648     name => 'utf-8', bom => 0,
649     },
650     {
651     id => q<8.decl.3>,
652     in => qq[<?xml version="1.1" encoding="utf-8"?>],
653     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
654     " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
655     "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
656     "1", undef, ".", undef, "1", undef, '"', undef, " ", undef,
657     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
658     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
659     "u", undef, "t", undef, "f", undef, "-", undef, "8", undef,
660     '"', undef, "?", undef, ">", undef],
661     name => 'utf-8', bom => 0,
662     },
663     {
664     id => q<8.decl.4>,
665     in => qq[<?xml version="1.0"?>],
666     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
667     " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
668     "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
669     "1", undef, ".", undef, "0", undef, '"', undef,
670     "?", undef, ">", undef],
671 wakaba 1.2 name => 'UTF-8', bom => 0,
672 wakaba 1.1 },
673     {
674     id => q<bom8.decl.1>,
675     in => qq[\xEF\xBB\xBF<?xml encoding="utf-8"?>],
676     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
677     " ", undef,
678     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
679     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
680     "u", undef, "t", undef, "f", undef, "-", undef, "8", undef,
681     '"', undef, "?", undef, ">", undef],
682     name => 'utf-8', bom => 1,
683     },
684     {
685     id => q<us-ascii.decl.1>,
686     in => qq[<?xml encoding="us-ascii"?>],
687     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
688     " ", undef,
689     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
690     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
691     "u", undef, "s", undef, "-", undef, "a", undef, "s", undef,
692     "c", undef, "i", undef, "i", undef,
693     '"', undef, "?", undef, ">", undef],
694     name => 'us-ascii', bom => 0,
695     },
696     {
697     id => q<us-ascii.decl.2>,
698     in => qq[<?xml encoding="US-ascii"?>],
699     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
700     " ", undef,
701     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
702     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
703     "U", undef, "S", undef, "-", undef, "a", undef, "s", undef,
704     "c", undef, "i", undef, "i", undef,
705     '"', undef, "?", undef, ">", undef],
706 wakaba 1.2 name => 'US-ascii', bom => 0,
707 wakaba 1.1 },
708     {
709     id => q<us-ascii.decl.3>,
710     in => qq[<?xml encoding='us-ascii'?>],
711     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
712     " ", undef,
713     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
714     "i", undef, "n", undef, "g", undef, "=", undef, "'", undef,
715     "u", undef, "s", undef, "-", undef, "a", undef, "s", undef,
716     "c", undef, "i", undef, "i", undef,
717     "'", undef, "?", undef, ">", undef],
718     name => 'us-ascii', bom => 0,
719     },
720     );
721     check_charset ('XML', $XML_AUTO_CHARSET, \@testdata);
722     }
723    
724     ## EUC-JP
725     {
726     my @testdata = (
727     {
728     id => q<l=0>,
729     in => q<>,
730     out => [undef],
731     },
732     {
733     id => q<l=1.00>,
734     in => qq<\x00>,
735     out => [undef, "\x00", undef],
736     },
737     {
738     id => q<l=1.0d>,
739     in => qq<\x0D>,
740     out => [undef, "\x0D", undef],
741     },
742     {
743     id => q<l=1.0e>,
744     in => qq<\x0E>,
745     out => [undef, "\x0E", undef],
746     }, # Error??
747     {
748     id => q<l=1.0f>,
749     in => qq<\x0F>,
750     out => [undef, "\x0F", undef],
751     }, # Error??
752     {
753     id => q<l=1.1b>,
754     in => qq<\x1B>,
755     out => [undef, "\x1B", undef],
756     }, # Error??
757     {
758     id => q<l=1.a>,
759     in => q<a>,
760     out => [undef, "a", undef],
761     },
762     {
763     id => q<l=1.20>,
764     in => qq<\x20>,
765     out => [undef, "\x20", undef],
766     },
767     {
768     id => q<5C>,
769     in => qq<\x5C>,
770     out => [undef, "\x5C", undef],
771     },
772     {
773     id => q<l=1.7E>,
774     in => qq<\x7E>,
775     out => [undef, "\x7E", undef],
776     },
777     {
778     id => q<l=1.7F>,
779     in => qq<\x7F>,
780     out => [undef, "\x7F", undef],
781     },
782     {
783     id => q<l=1.80>,
784     in => qq<\x80>,
785     out => [undef, "\x80", undef],
786     },
787     {
788     id => q<l=1.8c>,
789     in => qq<\x8C>,
790     out => [undef, "\x8C", undef],
791     },
792     {
793     id => q<l=1.8e>,
794     in => qq<\x8E>,
795     out => [undef, "\x8E", [q<illegal-octets-error>]],
796     },
797     {
798     id => q<l=1.8f>,
799     in => qq<\x8F>,
800     out => [undef, "\x8F", [q<illegal-octets-error>]],
801     },
802     {
803     id => q<l=1.a0>,
804     in => qq<\xA0>,
805     out => [undef, "\xA0", [q<unassigned-code-point-error>]],
806     },
807     {
808     id => q<l=1.a1>,
809     in => qq<\xA1>,
810     out => [undef, "\xA1", [q<illegal-octets-error>]],
811     },
812     {
813     id => q<l=1.a2>,
814     in => qq<\xA2>,
815     out => [undef, "\xA2", [q<illegal-octets-error>]],
816     },
817     {
818     id => q<l=1.fd>,
819     in => qq<\xFD>,
820     out => [undef, "\xFD", [q<illegal-octets-error>]],
821     },
822     {
823     id => q<l=1.fe>,
824     in => qq<\xFE>,
825     out => [undef, "\xFE", [q<illegal-octets-error>]],
826     },
827     {
828     id => q<l=1.ff>,
829     in => qq<\xFF>,
830     out => [undef, "\xFF", [q<unassigned-code-point-error>]],
831     },
832     {
833     id => q<l=2.0000>,
834     in => qq<\x00\x00>,
835     out => [undef, "\x00", undef, "\x00", undef],
836     },
837     {
838     id => q<l=2.0D0A>,
839     in => qq<\x0D\x0A>,
840     out => [undef, "\x0D", undef, "\x0A", undef],
841     },
842     {
843     id => q<l=2.1B28>,
844     in => qq<\x1B\x28>,
845     out => [undef, "\x1B", undef, "\x28", undef],
846     },# Error??
847     {
848     id => q<l=2.2020>,
849     in => qq<\x20\x20>,
850     out => [undef, "\x20", undef, "\x20", undef],
851     },
852     {
853     id => q<l=2.ab>,
854     in => qq<ab>,
855     out => [undef, "a", undef, "b", undef],
856     },
857     {
858     id => q<l=2.a0a1>,
859     in => qq<\xA0\xA1>,
860     out => [undef, "\xA0", [q<unassigned-code-point-error>],
861     "\xA1", [q<illegal-octets-error>]],
862     },
863     {
864     id => q<l=2.a1a1>,
865     in => qq<\xA1\xA1>,
866     out => [undef, "\x{3000}", undef],
867     },
868     {
869     id => q<l=2.a1a2>,
870     in => qq<\xA1\xA2>,
871     out => [undef, "\x{3001}", undef],
872     },
873     {
874     id => q<l=2.a1a4>,
875     in => qq<\xA1\xA4>,
876     out => [undef, "\x{FF0C}", undef], # FULLWIDTH COMMA
877     },
878     {
879     id => q<a1a6>,
880     in => qq<\xA1\xA6>,
881     out => [undef, "\x{30FB}", undef], # KATAKABA MIDDLE DOT
882     },
883     {
884     id => q<a1a7>,
885     in => qq<\xA1\xA7>,
886     out => [undef, "\x{FF1A}", undef], # FULLWIDTH COLON
887     },
888     {
889     id => q<a1b1>,
890     in => qq<\xA1\xB1>,
891     out => [undef, "\x{203E}", undef], # OVERLINE
892     },
893     {
894     id => q<a1bd>,
895     in => qq<\xA1\xBD>,
896     out => [undef, "\x{2014}", undef], # EM DASH
897     },
898     {
899     id => q<a1c0>,
900     in => qq<\xA1\xC0>,
901     out => [undef, "\x{FF3C}", undef], # FULLWIDTH REVERSE SOLIDUS
902     },
903     {
904     id => q<a1c1>,
905     in => qq<\xA1\xC1>,
906     out => [undef, "\x{301C}", undef], # WAVE DASH
907     },
908     {
909     id => q<a1c2>,
910     in => qq<\xA1\xC2>,
911     out => [undef, "\x{2016}", undef], # DOUBLE VERTICAL LINE
912     },
913     {
914     id => q<a1c4>,
915     in => qq<\xA1\xC4>,
916     out => [undef, "\x{2026}", undef], # HORIZONTAL ELLIPSIS
917     },
918     {
919     id => q<a1dd>,
920     in => qq<\xA1\xDD>,
921     out => [undef, "\x{2212}", undef], # MINUS SIGN
922     },
923     {
924     id => q<a1ef>,
925     in => qq<\xA1\xEF>,
926     out => [undef, "\x{00A5}", undef], # YEN SIGN
927     },
928     {
929     id => q<a1f1>,
930     in => qq<\xA1\xF1>,
931     out => [undef, "\x{00A2}", undef], # CENT SIGN
932     },
933     {
934     id => q<a1f2>,
935     in => qq<\xA1\xF2>,
936     out => [undef, "\x{00A3}", undef], # POUND SIGN
937     },
938     {
939     id => q<a1f2>,
940     in => qq<\xA1\xFF>,
941     out => [undef, "\xA1", [q<illegal-octets-error>],
942     "\xFF", [q<unassigned-code-point-error>]],
943     },
944     {
945     id => q<a2ae>,
946     in => qq<\xA2\xAE>,
947     out => [undef, "\x{3013}", undef], # GETA MARK
948     },
949     {
950     id => q<a2af>,
951     in => qq<\xA2\xAF>,
952     out => [undef, "\xA2\xAF", [q<unassigned-code-point-error>]],
953     },
954     {
955     id => q<a2ba>,
956     in => qq<\xA2\xBA>,
957     out => [undef, "\x{2208}", undef], # ELEMENT OF
958     },
959     {
960     id => q<a2fe>,
961     in => qq<\xA2\xFE>,
962     out => [undef, "\x{25EF}", undef], # LARGE CIRCLE
963     },
964     {
965     id => q<adce>,
966     in => qq<\xAD\xCE>,
967     out => [undef, "\xAD\xCE", [q<unassigned-code-point-error>]],
968     },
969     {
970     id => q<b0a6>,
971     in => qq<\xB0\xA6>,
972     out => [undef, "\x{611B}", undef], # han
973     },
974     {
975     id => q<f4a6>,
976     in => qq<\xF4\xA6>,
977     out => [undef, "\x{7199}", undef], # han
978     },
979     {
980     id => q<8ea1>,
981     in => qq<\x8E\xA1>,
982     out => [undef, "\x{FF61}", undef],
983     },
984     {
985     id => q<8efe>,
986     in => qq<\x8E\xFE>,
987     out => [undef, "\x8E\xFE", [q<unassigned-code-point-error>]],
988     },
989     {
990     id => q<8ffe>,
991     in => qq<\x8F\xFE>,
992     out => [undef, "\x8F\xFE", [q<illegal-octets-error>]],
993     },
994     {
995     id => q<l=2.a1a2a3>,
996     in => qq<\xA1\xA2\xA3>,
997     out => [undef, "\x{3001}", undef,
998     "\xA3", [q<illegal-octets-error>]],
999     },
1000     {
1001     id => q<8ea1a1>,
1002     in => qq<\x8E\xA1\xA1>,
1003     out => [undef, "\x{FF61}", undef,
1004     "\xA1", [q<illegal-octets-error>]],
1005     },
1006     {
1007     id => q<8fa1a1>,
1008     in => qq<\x8F\xA1\xA1>,
1009     out => [undef, "\x8F\xA1\xA1", [q<unassigned-code-point-error>]],
1010     },
1011     {
1012     id => q<8fa2af>,
1013     in => qq<\x8F\xA2\xAF>,
1014     out => [undef, "\x{02D8}", undef],
1015     },
1016     {
1017     id => q<8fa2b7>,
1018     in => qq<\x8F\xA2\xB7>,
1019     out => [undef, "\x{FF5E}", undef], # FULLWIDTH TILDE
1020     },
1021     {
1022     id => q<a1a2a1a3>,
1023     in => qq<\xA1\xA2\xA1\xA3>,
1024     out => [undef, "\x{3001}", undef, "\x{3002}", undef],
1025     },
1026     {
1027     id => q<8fa2af>,
1028     in => qq<\x8F\xA2\xAF\xAF>,
1029     out => [undef, "\x{02D8}", undef,
1030     "\xAF", [q<illegal-octets-error>]],
1031     },
1032     {
1033     id => q<8fa2afafa1>,
1034     in => qq<\x8F\xA2\xAF\xAF\xA1>,
1035     out => [undef, "\x{02D8}", undef,
1036     "\xAF\xA1", [q<unassigned-code-point-error>]],
1037     },
1038     );
1039 wakaba 1.2 check_charset ('XML-EUC-JP', $XML_CHARSET.'euc-jp',
1040     [map {$_->{name} = 'EUC-JP'; $_} @testdata]);
1041 wakaba 1.1 }
1042    
1043     ## Shift_JIS
1044     {
1045     my @testdata = (
1046     {
1047     id => q<l=0>,
1048     in => q<>,
1049     out => [undef],
1050     },
1051     {
1052     id => q<l=1.00>,
1053     in => qq<\x00>,
1054     out => [undef, "\x00", undef],
1055     },
1056     {
1057     id => q<l=1.0d>,
1058     in => qq<\x0D>,
1059     out => [undef, "\x0D", undef],
1060     },
1061     {
1062     id => q<l=1.0e>,
1063     in => qq<\x0E>,
1064     out => [undef, "\x0E", undef],
1065     }, # Error??
1066     {
1067     id => q<l=1.0f>,
1068     in => qq<\x0F>,
1069     out => [undef, "\x0F", undef],
1070     }, # Error??
1071     {
1072     id => q<l=1.1b>,
1073     in => qq<\x1B>,
1074     out => [undef, "\x1B", undef],
1075     }, # Error??
1076     {
1077     id => q<l=1.a>,
1078     in => q<a>,
1079     out => [undef, "a", undef],
1080     },
1081     {
1082     id => q<l=1.20>,
1083     in => qq<\x20>,
1084     out => [undef, "\x20", undef],
1085     },
1086     {
1087     id => q<l=1.5C>,
1088     in => qq<\x5C>,
1089     out => [undef, "\xA5", undef], # YEN SIGN
1090     },
1091     {
1092     id => q<l=1.7E>,
1093     in => qq<\x7E>,
1094     out => [undef, "\x{203E}", undef], # OVERLINE
1095     },
1096     {
1097     id => q<l=1.7F>,
1098     in => qq<\x7F>,
1099     out => [undef, "\x7F", undef],
1100     },
1101     {
1102     id => q<l=1.80>,
1103     in => qq<\x80>,
1104     out => [undef, "\x80", [q<unassigned-code-point-error>]],
1105     },
1106     {
1107     id => q<l=1.8c>,
1108     in => qq<\x8C>,
1109     out => [undef, "\x8C", [q<illegal-octets-error>]],
1110     },
1111     {
1112     id => q<l=1.8e>,
1113     in => qq<\x8E>,
1114     out => [undef, "\x8E", [q<illegal-octets-error>]],
1115     },
1116     {
1117     id => q<l=1.8f>,
1118     in => qq<\x8F>,
1119     out => [undef, "\x8F", [q<illegal-octets-error>]],
1120     },
1121     {
1122     id => q<l=1.a0>,
1123     in => qq<\xA0>,
1124     out => [undef, "\xA0", [q<unassigned-code-point-error>]],
1125     },
1126     {
1127     id => q<l=1.a1>,
1128     in => qq<\xA1>,
1129     out => [undef, "\x{FF61}", undef],
1130     },
1131     {
1132     id => q<l=1.a2>,
1133     in => qq<\xA2>,
1134     out => [undef, "\x{FF62}", undef],
1135     },
1136     {
1137     id => q<l=1.df>,
1138     in => qq<\xdf>,
1139     out => [undef, "\x{FF9F}", undef],
1140     },
1141     {
1142     id => q<l=1.e0>,
1143     in => qq<\xe0>,
1144     out => [undef, "\xE0", [q<illegal-octets-error>]],
1145     },
1146     {
1147     id => q<l=1.ef>,
1148     in => qq<\xEF>,
1149     out => [undef, "\xEF", [q<illegal-octets-error>]],
1150     },
1151     {
1152     id => q<F0>,
1153     in => qq<\xF0>,
1154     out => [undef, "\xF0", [q<unassigned-code-point-error>]],
1155     },
1156     {
1157     id => q<l=1.fc>,
1158     in => qq<\xFC>,
1159     out => [undef, "\xFC", [q<unassigned-code-point-error>]],
1160     },
1161     {
1162     id => q<l=1.fd>,
1163     in => qq<\xFD>,
1164     out => [undef, "\xFD", [q<unassigned-code-point-error>]],
1165     },
1166     {
1167     id => q<l=1.fe>,
1168     in => qq<\xFE>,
1169     out => [undef, "\xFE", [q<unassigned-code-point-error>]],
1170     },
1171     {
1172     id => q<l=1.ff>,
1173     in => qq<\xFF>,
1174     out => [undef, "\xFF", [q<unassigned-code-point-error>]],
1175     },
1176     {
1177     id => q<l=2.0000>,
1178     in => qq<\x00\x00>,
1179     out => [undef, "\x00", undef, "\x00", undef],
1180     },
1181     {
1182     id => q<l=2.0D0A>,
1183     in => qq<\x0D\x0A>,
1184     out => [undef, "\x0D", undef, "\x0A", undef],
1185     },
1186     {
1187     id => q<l=2.1B28>,
1188     in => qq<\x1B\x28>,
1189     out => [undef, "\x1B", undef, "\x28", undef],
1190     },# Error??
1191     {
1192     id => q<l=2.2020>,
1193     in => qq<\x20\x20>,
1194     out => [undef, "\x20", undef, "\x20", undef],
1195     },
1196     {
1197     id => q<l=2.ab>,
1198     in => qq<ab>,
1199     out => [undef, "a", undef, "b", undef],
1200     },
1201     {
1202     id => q<8040>,
1203     in => qq<\x80\x40>,
1204     out => [undef, "\x80", [q<unassigned-code-point-error>],
1205     "\x40", undef],
1206     },
1207     {
1208     id => q<8100>,
1209     in => qq<\x81\x00>,
1210     out => [undef, "\x81\x00", [q<unassigned-code-point-error>]],
1211     },
1212     {
1213     id => q<8101>,
1214     in => qq<\x81\x01>,
1215     out => [undef, "\x81\x01", [q<unassigned-code-point-error>]],
1216     },
1217     {
1218     id => q<813F>,
1219     in => qq<\x81\x3F>,
1220     out => [undef, "\x81\x3F", [q<unassigned-code-point-error>]],
1221     },
1222     {
1223     id => q<8140>,
1224     in => qq<\x81\x40>,
1225     out => [undef, "\x{3000}", undef],
1226     },
1227     {
1228     id => q<8141>,
1229     in => qq<\x81\x41>,
1230     out => [undef, "\x{3001}", undef],
1231     },
1232     {
1233     id => q<8143>,
1234     in => qq<\x81\x43>,
1235     out => [undef, "\x{FF0C}", undef], # FULLWIDTH COMMA
1236     },
1237     {
1238     id => q<8150>,
1239     in => qq<\x81\x50>,
1240     out => [undef, "\x{FFE3}", undef], # FULLWIDTH MACRON
1241     },
1242     {
1243     id => q<815C>,
1244     in => qq<\x81\x5C>,
1245     out => [undef, "\x{2014}", undef], # EM DASH
1246     },
1247     {
1248     id => q<815F>,
1249     in => qq<\x81\x5F>,
1250     out => [undef, "\x{005C}", undef], # REVERSE SOLIDUS
1251     },
1252     {
1253     id => q<8160>,
1254     in => qq<\x81\x60>,
1255     out => [undef, "\x{301C}", undef], # WAVE DASH
1256     },
1257     {
1258     id => q<8161>,
1259     in => qq<\x81\x61>,
1260     out => [undef, "\x{2016}", undef], # DOUBLE VERTICAL LINE
1261     },
1262     {
1263     id => q<8163>,
1264     in => qq<\x81\x63>,
1265     out => [undef, "\x{2026}", undef], # HORIZONTAL ELLIPSIS
1266     },
1267     {
1268     id => q<817C>,
1269     in => qq<\x81\x7C>,
1270     out => [undef, "\x{2212}", undef], # MINUS SIGN
1271     },
1272     {
1273     id => q<817F>,
1274     in => qq<\x81\x7F>,
1275     out => [undef, "\x81\x7F", [q<unassigned-code-point-error>]],
1276     },
1277     {
1278     id => q<818F>,
1279     in => qq<\x81\x8F>,
1280     out => [undef, "\x{FFE5}", undef], # FULLWIDTH YEN SIGN
1281     },
1282     {
1283     id => q<8191>,
1284     in => qq<\x81\x91>,
1285     out => [undef, "\x{00A2}", undef], # CENT SIGN
1286     },
1287     {
1288     id => q<8192>,
1289     in => qq<\x81\x92>,
1290     out => [undef, "\x{00A3}", undef], # POUND SIGN
1291     },
1292     {
1293     id => q<81AC>,
1294     in => qq<\x81\xAC>,
1295     out => [undef, "\x{3013}", undef], # GETA MARK
1296     },
1297     {
1298     id => q<81AD>,
1299     in => qq<\x81\xAD>,
1300     out => [undef, "\x81\xAD", [q<unassigned-code-point-error>]],
1301     },
1302     {
1303     id => q<81B8>,
1304     in => qq<\x81\xB8>,
1305     out => [undef, "\x{2208}", undef], # ELEMENT OF
1306     },
1307     {
1308     id => q<81CA>,
1309     in => qq<\x81\xCA>,
1310     out => [undef, "\x{00AC}", undef], # NOT SIGN
1311     },
1312     {
1313     id => q<81FC>,
1314     in => qq<\x81\xFC>,
1315     out => [undef, "\x{25EF}", undef], # LARGE CIRCLE
1316     },
1317     {
1318     id => q<81FD>,
1319     in => qq<\x81\xFD>,
1320     out => [undef, "\x81\xFD", [q<unassigned-code-point-error>]],
1321     },
1322     {
1323     id => q<81FE>,
1324     in => qq<\x81\xFE>,
1325     out => [undef, "\x81\xFE", [q<unassigned-code-point-error>]],
1326     },
1327     {
1328     id => q<81FF>,
1329     in => qq<\x81\xFF>,
1330     out => [undef, "\x81\xFF", [q<unassigned-code-point-error>]],
1331     },
1332     {
1333     id => q<DDDE>,
1334     in => qq<\xDD\xDE>,
1335     out => [undef, "\x{FF9D}", undef, "\x{FF9E}", undef],
1336     },
1337     {
1338     id => q<e040>,
1339     in => qq<\xE0\x40>,
1340     out => [undef, "\x{6F3E}", undef],
1341     },
1342     {
1343     id => q<eaa4>,
1344     in => qq<\xEA\xA4>,
1345     out => [undef, "\x{7199}", undef],
1346     },
1347     {
1348     id => q<eaa5>,
1349     in => qq<\xEA\xA5>,
1350     out => [undef, "\xEA\xA5", [q<unassigned-code-point-error>]],
1351     },
1352     {
1353     id => q<eb40>,
1354     in => qq<\xEB\x40>,
1355     out => [undef, "\xEB\x40", [q<unassigned-code-point-error>]],
1356     },
1357     {
1358     id => q<ed40>,
1359     in => qq<\xED\x40>,
1360     out => [undef, "\xED\x40", [q<unassigned-code-point-error>]],
1361     },
1362     {
1363     id => q<effc>,
1364     in => qq<\xEF\xFC>,
1365     out => [undef, "\xEF\xFC", [q<unassigned-code-point-error>]],
1366     },
1367     {
1368     id => q<f040>,
1369     in => qq<\xF0\x40>,
1370     out => [undef, "\xF0", [q<unassigned-code-point-error>],
1371     "\x40", undef],
1372     },
1373     {
1374     id => q<f140>,
1375     in => qq<\xF1\x40>,
1376     out => [undef, "\xF1", [q<unassigned-code-point-error>],
1377     "\x40", undef],
1378     },
1379     {
1380     id => q<fb40>,
1381     in => qq<\xFB\x40>,
1382     out => [undef, "\xFB", [q<unassigned-code-point-error>],
1383     "\x40", undef],
1384     },
1385     {
1386     id => q<fc40>,
1387     in => qq<\xFc\x40>,
1388     out => [undef, "\xFC", [q<unassigned-code-point-error>],
1389     "\x40", undef],
1390     },
1391     {
1392     id => q<fd40>,
1393     in => qq<\xFD\x40>,
1394     out => [undef, "\xFD", [q<unassigned-code-point-error>],
1395     "\x40", undef],
1396     },
1397     {
1398     id => q<fE40>,
1399     in => qq<\xFE\x40>,
1400     out => [undef, "\xFE", [q<unassigned-code-point-error>],
1401     "\x40", undef],
1402     },
1403     {
1404     id => q<ff40>,
1405     in => qq<\xFF\x40>,
1406     out => [undef, "\xFF", [q<unassigned-code-point-error>],
1407     "\x40", undef],
1408     },
1409     {
1410     id => q<81408142>,
1411     in => qq<\x81\x40\x81\x42>,
1412     out => [undef, "\x{3000}", undef, "\x{3002}", undef],
1413     },
1414     );
1415    
1416 wakaba 1.2 check_charset ('XML-Shift_JIS', $XML_CHARSET.'shift_jis',
1417     [map {$_->{name} = 'Shift_JIS'; $_} @testdata]);
1418 wakaba 1.1 }
1419    
1420     ## ISO-2022-JP
1421     {
1422    
1423     my @testdata = (
1424     {
1425     id => q<l=0>,
1426     in => q<>,
1427     out1 => [undef],
1428     out2 => [undef],
1429     },
1430     {
1431     id => q<l=1.00>,
1432     in => qq<\x00>,
1433     out1 => [undef, "\x00", undef],
1434     out2 => [undef, "\x00", undef],
1435     },
1436     {
1437     id => q<l=1.0d>,
1438     in => qq<\x0D>,
1439     out1 => [undef, "\x0D", undef],
1440     out2 => [undef, "\x0D", undef],
1441     }, # Error?
1442     {
1443     id => q<0A>,
1444     in => qq<\x0A>,
1445     out1 => [undef, "\x0A", undef],
1446     out2 => [undef, "\x0A", undef],
1447     }, # Error?
1448     {
1449     id => q<l=1.0e>,
1450     in => qq<\x0E>,
1451     out1 => [undef, "\x0E", [q<illegal-octets-error>]],
1452     out2 => [undef, "\x0E", [q<illegal-octets-error>]],
1453     },
1454     {
1455     id => q<l=1.0f>,
1456     in => qq<\x0F>,
1457     out1 => [undef, "\x0F", [q<illegal-octets-error>]],
1458     out2 => [undef, "\x0F", [q<illegal-octets-error>]],
1459     },
1460     {
1461     id => q<l=1.1b>,
1462     in => qq<\x1B>,
1463     out1 => [undef, "\x1B", [q<illegal-octets-error>]],
1464     out2 => [undef, "\x1B", [q<illegal-octets-error>]],
1465     },
1466     {
1467     id => q<l=1.a>,
1468     in => q<a>,
1469     out1 => [undef, "a", undef],
1470     out2 => [undef, "a", undef],
1471     },
1472     {
1473     id => q<l=1.20>,
1474     in => qq<\x20>,
1475     out1 => [undef, "\x20", undef],
1476     out2 => [undef, "\x20", undef],
1477     },
1478     {
1479     id => q<l=1.5C>,
1480     in => qq<\x5C>,
1481     out1 => [undef, "\x5C", undef],
1482     out2 => [undef, "\x5C", undef],
1483     },
1484     {
1485     id => q<l=1.7E>,
1486     in => qq<\x7E>,
1487     out1 => [undef, "\x7E", undef],
1488     out2 => [undef, "\x7E", undef],
1489     },
1490     {
1491     id => q<l=1.7F>,
1492     in => qq<\x7F>,
1493     out1 => [undef, "\x7F", undef],
1494     out2 => [undef, "\x7F", undef],
1495     },
1496     {
1497     id => q<l=1.80>,
1498     in => qq<\x80>,
1499     out1 => [undef, "\x80", [q<illegal-octets-error>]],
1500     out2 => [undef, "\x80", [q<illegal-octets-error>]],
1501     },
1502     {
1503     id => q<l=1.8c>,
1504     in => qq<\x8C>,
1505     out1 => [undef, "\x8C", [q<illegal-octets-error>]],
1506     out2 => [undef, "\x8C", [q<illegal-octets-error>]],
1507     },
1508     {
1509     id => q<l=1.8e>,
1510     in => qq<\x8E>,
1511     out1 => [undef, "\x8E", [q<illegal-octets-error>]],
1512     out2 => [undef, "\x8E", [q<illegal-octets-error>]],
1513     },
1514     {
1515     id => q<l=1.8f>,
1516     in => qq<\x8F>,
1517     out1 => [undef, "\x8F", [q<illegal-octets-error>]],
1518     out2 => [undef, "\x8F", [q<illegal-octets-error>]],
1519     },
1520     {
1521     id => q<l=1.a0>,
1522     in => qq<\xA0>,
1523     out1 => [undef, "\xA0", [q<illegal-octets-error>]],
1524     out2 => [undef, "\xA0", [q<illegal-octets-error>]],
1525     },
1526     {
1527     id => q<l=1.a1>,
1528     in => qq<\xA1>,
1529     out1 => [undef, "\xA1", [q<illegal-octets-error>]],
1530     out2 => [undef, "\xA1", [q<illegal-octets-error>]],
1531     },
1532     {
1533     id => q<l=1.a2>,
1534     in => qq<\xA2>,
1535     out1 => [undef, "\xA2", [q<illegal-octets-error>]],
1536     out2 => [undef, "\xA2", [q<illegal-octets-error>]],
1537     },
1538     {
1539     id => q<l=1.df>,
1540     in => qq<\xdf>,
1541     out1 => [undef, "\xDF", [q<illegal-octets-error>]],
1542     out2 => [undef, "\xDF", [q<illegal-octets-error>]],
1543     },
1544     {
1545     id => q<l=1.e0>,
1546     in => qq<\xe0>,
1547     out1 => [undef, "\xE0", [q<illegal-octets-error>]],
1548     out2 => [undef, "\xE0", [q<illegal-octets-error>]],
1549     },
1550     {
1551     id => q<l=1.ef>,
1552     in => qq<\xEF>,
1553     out1 => [undef, "\xEF", [q<illegal-octets-error>]],
1554     out2 => [undef, "\xEF", [q<illegal-octets-error>]],
1555     },
1556     {
1557     id => q<F0>,
1558     in => qq<\xF0>,
1559     out1 => [undef, "\xF0", [q<illegal-octets-error>]],
1560     out2 => [undef, "\xF0", [q<illegal-octets-error>]],
1561     },
1562     {
1563     id => q<l=1.fc>,
1564     in => qq<\xFC>,
1565     out1 => [undef, "\xFC", [q<illegal-octets-error>]],
1566     out2 => [undef, "\xFC", [q<illegal-octets-error>]],
1567     },
1568     {
1569     id => q<l=1.fd>,
1570     in => qq<\xFD>,
1571     out1 => [undef, "\xFD", [q<illegal-octets-error>]],
1572     out2 => [undef, "\xFD", [q<illegal-octets-error>]],
1573     },
1574     {
1575     id => q<l=1.fe>,
1576     in => qq<\xFE>,
1577     out1 => [undef, "\xFE", [q<illegal-octets-error>]],
1578     out2 => [undef, "\xFE", [q<illegal-octets-error>]],
1579     },
1580     {
1581     id => q<l=1.ff>,
1582     in => qq<\xFF>,
1583     out1 => [undef, "\xFF", [q<illegal-octets-error>]],
1584     out2 => [undef, "\xFF", [q<illegal-octets-error>]],
1585     },
1586     {
1587     id => q<l=2.0000>,
1588     in => qq<\x00\x00>,
1589     out1 => [undef, "\x00", undef, "\x00", undef],
1590     out2 => [undef, "\x00", undef, "\x00", undef],
1591     },
1592     {
1593     id => q<l=2.0D0A>,
1594     in => qq<\x0D\x0A>,
1595     out1 => [undef, "\x0D", undef, "\x0A", undef],
1596     out2 => [undef, "\x0D", undef, "\x0A", undef],
1597     },
1598     {
1599     id => q<l=2.1B1B>,
1600     in => qq<\x1B\x1B>,
1601     out1 => [undef, "\x1B", [q<illegal-octets-error>],
1602     "\x1B", [q<illegal-octets-error>]],
1603     out2 => [undef, "\x1B", [q<illegal-octets-error>],
1604     "\x1B", [q<illegal-octets-error>]],
1605     },
1606     {
1607     id => q<l=2.1B20>,
1608     in => qq<\x1B\x20>,
1609     out1 => [undef, "\x1B", [q<illegal-octets-error>], "\x20", undef],
1610     out2 => [undef, "\x1B", [q<illegal-octets-error>], "\x20", undef],
1611     },
1612     {
1613     id => q<l=2.1B24>,
1614     in => qq<\x1B\x24>,
1615     out1 => [undef, "\x1B", [q<illegal-octets-error>], "\x24", undef],
1616     out2 => [undef, "\x1B", [q<illegal-octets-error>], "\x24", undef],
1617     },
1618     {
1619     id => q<l=2.1B28>,
1620     in => qq<\x1B\x28>,
1621     out1 => [undef, "\x1B", [q<illegal-octets-error>], "\x28", undef],
1622     out2 => [undef, "\x1B", [q<illegal-octets-error>], "\x28", undef],
1623     },
1624     {
1625     id => q<l=2.2020>,
1626     in => qq<\x20\x20>,
1627     out1 => [undef, "\x20", undef, "\x20", undef],
1628     out2 => [undef, "\x20", undef, "\x20", undef],
1629     },
1630     {
1631     id => q<l=2.ab>,
1632     in => qq<ab>,
1633     out1 => [undef, "a", undef, "b", undef],
1634     out2 => [undef, "a", undef, "b", undef],
1635     },
1636     {
1637     id => q<8040>,
1638     in => qq<\x80\x40>,
1639     out1 => [undef, "\x80", [q<illegal-octets-error>],
1640     "\x40", undef],
1641     out2 => [undef, "\x80", [q<illegal-octets-error>],
1642     "\x40", undef],
1643     },
1644     {
1645     id => q<1B2440>,
1646     in => qq<\x1B\x24\x40>,
1647     out1 => [undef],
1648     out2 => [undef],
1649     eof_error => [q<invalid-state-error>],
1650     },
1651     {
1652     id => q<1B2442>,
1653     in => qq<\x1B\x24\x42>,
1654     out1 => [undef],
1655     out2 => [undef],
1656     eof_error => [q<invalid-state-error>],
1657     },
1658     {
1659     id => q<1B2840>,
1660     in => qq<\x1B\x28\x40>,
1661     out1 => [undef, "\x1B", [q<illegal-octets-error>], "(", undef,
1662     "\x40", undef],
1663     out2 => [undef, "\x1B", [q<illegal-octets-error>], "(", undef,
1664     "\x40", undef],
1665     },
1666     {
1667     id => q<1B2842>,
1668     in => qq<\x1B\x28\x42>,
1669     out1 => [undef],
1670     out2 => [undef],
1671     },
1672     {
1673     id => q<1B284A>,
1674     in => qq<\x1B\x28\x4A>,
1675     out1 => [undef],
1676     out2 => [undef],
1677     eof_error => [q<invalid-state-error>],
1678     },
1679     {
1680     id => q<1B$B1B(B>,
1681     in => qq<\x1B\x24\x42\x1B\x28\x42>,
1682     out1 => [undef],
1683     out2 => [undef],
1684     },
1685     {
1686     id => q<1B(B1B(B>,
1687     in => qq<\x1B\x28\x42\x1B\x28\x42>,
1688     out1 => [undef],
1689     out2 => [undef],
1690     },
1691     {
1692     id => q<1B(Ba1B(B>,
1693     in => qq<\x1B\x28\x42a\x1B\x28\x42>,
1694     out1 => [undef, "a", undef],
1695     out2 => [undef, "a", undef],
1696     },
1697     {
1698     id => q<1B(Ba1B(B1B(B>,
1699     in => qq<\x1B\x28\x42a\x1B\x28\x42\x1B\x28\x42>,
1700     out1 => [undef, "a", undef],
1701     out2 => [undef, "a", undef],
1702     },
1703     {
1704     id => q<1B$42!!1B2842>,
1705     in => qq<\x1B\x24\x42!!\x1B\x28\x42>,
1706     out1 => [undef, "\x{3000}", undef],
1707     out2 => [undef, "\x{3000}", undef],
1708     },
1709     {
1710     id => q<1B$4221211B284A>,
1711     in => qq<\x1B\x24\x42!!\x1B\x28\x4A>,
1712     out1 => [undef, "\x{3000}", undef],
1713     out2 => [undef, "\x{3000}", undef],
1714     eof_error => [q<invalid-state-error>],
1715     },
1716     {
1717     id => q<1B$4021211B2842>,
1718     in => qq<\x1B\x24\x40!!\x1B\x28\x42>,
1719     out1 => [undef, "\x{3000}", undef],
1720     out2 => [undef, "\x{3000}", undef],
1721     },
1722     {
1723     id => q<1B$402121211B2842>,
1724     in => qq<\x1B\x24\x40!!!\x1B\x28\x42>,
1725     out1 => [undef, "\x{3000}", undef, "!", [q<illegal-octets-error>]],
1726     out2 => [undef, "\x{3000}", undef, "!", [q<illegal-octets-error>]],
1727     },
1728     {
1729     id => q<1B$4021211B2442!!1B2842>,
1730     in => qq<\x1B\x24\x40!!\x1B\x24\x42!!\x1B\x28\x42>,
1731     out1 => [undef, "\x{3000}", undef, "\x{3000}", undef],
1732     out2 => [undef, "\x{3000}", undef, "\x{3000}", undef],
1733     },
1734     {
1735     id => q<1B$4021211B2440!!1B2842>,
1736     in => qq<\x1B\x24\x40!!\x1B\x24\x40!!\x1B\x28\x42>,
1737     out1 => [undef, "\x{3000}", undef, "\x{3000}", undef],
1738     out2 => [undef, "\x{3000}", undef, "\x{3000}", undef],
1739     },
1740     {
1741     id => q<1B$@!"1B(B\~|>,
1742     in => qq<\x1B\x24\x40!"\x1B(B\\~|>,
1743     out1 => [undef, "\x{3001}", undef, "\x5C", undef,
1744     "\x7E", undef, "|", undef],
1745     out2 => [undef, "\x{3001}", undef, "\x5C", undef,
1746     "\x7E", undef, "|", undef],
1747     },
1748     {
1749     id => q<1B$B!"1B(J\~|1B(B>,
1750     in => qq<\x1B\x24\x42!"\x1B(J\\~|\x1B(B>,
1751     out1 => [undef, "\x{3001}", undef, "\xA5", undef,
1752     "\x{203E}", undef, "|", undef],
1753     out2 => [undef, "\x{3001}", undef, "\xA5", undef,
1754     "\x{203E}", undef, "|", undef],
1755     },
1756     {
1757     id => q<78compat.3022(16-02)>,
1758     in => qq<\x1B\$\@\x30\x22\x1B\$B\x30\x22\x1B(B>,
1759     out1 => [undef, "\x{555E}", undef, "\x{5516}", undef],
1760     out2 => [undef, "\x{5516}", undef, "\x{5516}", undef],
1761     },
1762     {
1763     id => q<unassigned.2239>,
1764     in => qq<\x1B\$\@\x22\x39\x1B\$B\x22\x39\x1B(B>,
1765     out1 => [undef, "\x22\x39", [q<unassigned-code-point-error>],
1766     "\x22\x39", [q<unassigned-code-point-error>]],
1767     out2 => [undef, "\x22\x39", [q<unassigned-code-point-error>],
1768     "\x22\x39", [q<unassigned-code-point-error>]],
1769     },
1770     {
1771     id => q<83add.223A>,
1772     in => qq<\x1B\$\@\x22\x3A\x1B\$B\x22\x3A\x1B(B>,
1773     out1 => [undef, "\x22\x3A", [q<unassigned-code-point-error>],
1774     "\x{2208}", undef],
1775     out2 => [undef, "\x{2208}", undef, "\x{2208}", undef],
1776     },
1777     {
1778     id => q<83add.2840>,
1779     in => qq<\x1B\$\@\x28\x40\x1B\$B\x28\x40\x1B(B>,
1780     out1 => [undef, "\x28\x40", [q<unassigned-code-point-error>],
1781     "\x{2542}", undef],
1782     out2 => [undef, "\x{2542}", undef, "\x{2542}", undef],
1783     },
1784     {
1785     id => q<83add.7421>,
1786     in => qq<\x1B\$\@\x74\x21\x1B\$B\x74\x21\x1B(B>,
1787     out1 => [undef, "\x74\x21", [q<unassigned-code-point-error>],
1788     "\x{582F}", undef],
1789     out2 => [undef, "\x{5C2D}", undef, "\x{582F}", undef],
1790     },
1791     {
1792     id => q<83swap.3033>,
1793     in => qq<\x1B\$\@\x30\x33\x1B\$B\x30\x33\x1B(B>,
1794     out1 => [undef, "\x{9C3A}", undef, "\x{9BF5}", undef],
1795     out2 => [undef, "\x{9C3A}", undef, "\x{9BF5}", undef],
1796     },
1797     {
1798     id => q<83swap.724D>,
1799     in => qq<\x1B\$\@\x72\x4D\x1B\$B\x72\x4D\x1B(B>,
1800     out1 => [undef, "\x{9BF5}", undef, "\x{9C3A}", undef],
1801     out2 => [undef, "\x{9BF5}", undef, "\x{9C3A}", undef],
1802     },
1803     {
1804     id => q<90add.7425>,
1805     in => qq<\x1B\$\@\x74\x25\x1B\$B\x74\x25\x1B(B>,
1806     out1 => [undef, "\x74\x25", [q<unassigned-code-point-error>],
1807     "\x74\x25", [q<unassigned-code-point-error>]],
1808     out2 => [undef, "\x{51DC}", undef, "\x{51DC}", undef],
1809     },
1810     {
1811     id => q<90add.7426>,
1812     in => qq<\x1B\$\@\x74\x26\x1B\$B\x74\x26\x1B(B>,
1813     out1 => [undef, "\x74\x26", [q<unassigned-code-point-error>],
1814     "\x74\x26", [q<unassigned-code-point-error>]],
1815     out2 => [undef, "\x{7199}", undef, "\x{7199}", undef],
1816     },
1817     );
1818    
1819     check_charset ('IETF-ISO-2022-JP', $IANA_CHARSET.'iso-2022-jp',
1820 wakaba 1.2 [map {$_->{out} = $_->{out1}; $_->{name} = 'iso-2022-jp'; $_}
1821     @testdata]);
1822 wakaba 1.1 check_charset ('XML-ISO-2022-JP', $XML_CHARSET.'iso-2022-jp',
1823 wakaba 1.2 [map {$_->{out} = $_->{out2}; $_->{name} = 'ISO-2022-JP'; $_}
1824     @testdata]);
1825 wakaba 1.1 }

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24