/[suikacvs]/markup/html/whatpm/t/Charset-DecodeHandler.t
Suika

Contents of /markup/html/whatpm/t/Charset-DecodeHandler.t

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (hide annotations) (download) (as text)
Sun Jul 15 12:41:51 2007 UTC (18 years ago) by wakaba
Branch: MAIN
File MIME type: application/x-troff
++ whatpm/t/ChangeLog	15 Jul 2007 12:41:07 -0000
2007-07-15  Wakaba  <wakaba@suika.fam.cx>

	* Charset-DecodeHandler.t: New test script.

++ whatpm/Whatpm/ChangeLog	15 Jul 2007 08:22:21 -0000
	* Charset/: New directory.

2007-07-15  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/Charset/ChangeLog	15 Jul 2007 08:22:53 -0000
2007-07-15  Wakaba  <wakaba@suika.fam.cx>

	* DecodeHandle.pm: New Perl module (created
	from manakai's |Encode.dis|).

2007-07-15  Wakaba  <wakaba@suika.fam.cx>

	* ChangeLog: New file.

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3     use Test;
4     BEGIN { plan tests => 6185 }
5    
6     require Whatpm::Charset::DecodeHandle;
7    
8     my $XML_AUTO_CHARSET = q<http://suika.fam.cx/www/2006/03/xml-entity/>;
9     my $IANA_CHARSET = q<urn:x-suika-fam-cx:charset:>;
10     my $PERL_CHARSET = q<http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.>;
11     my $XML_CHARSET = q<http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.>;
12    
13     ## |create_decode_handle|
14     for my $test (
15     ['perl.utf8', $PERL_CHARSET.'utf8', 1],
16     ['xml', $XML_AUTO_CHARSET, 1],
17     ['unknown', q<http://www.unknown.test/>, 0],
18     ['iana.euc-jp', $IANA_CHARSET.'euc-jp', 1],
19     ['xml.euc-jp', $XML_CHARSET.'euc-jp', 1],
20     ['iana.shift_jis', $IANA_CHARSET.'shift_jis', 1],
21     ['xml.shift_jis', $XML_CHARSET.'shift_jis', 1],
22     ['iana.iso-2022-jp', $IANA_CHARSET.'iso-2022-jp', 1],
23     ['xml.iso-2022-jp', $XML_CHARSET.'iso-2022-jp', 1],
24     ) {
25     open my $fh, '<', \'';
26     my $dh = Whatpm::Charset::DecodeHandle->create_decode_handle ($test->[1], $fh);
27    
28     if ($test->[2]) {
29     ok UNIVERSAL::isa ($dh, 'Whatpm::Charset::DecodeHandle::Encode') ? 1 : 0, 1,
30     'create_decode_handle ' . $test->[0] . ' object';
31     ok ref $dh->onerror eq 'CODE' ? 1 : 0, 1,
32     'create_decode_handle ' . $test->[0] . ' onerror';
33     } else {
34     ok UNIVERSAL::isa ($dh, 'Whatpm::Charset::DecodeHandle::Encode') ? 1 : 0, 0,
35     'create_decode_handle ' . $test->[0] . ' object';
36    
37     Whatpm::Charset::DecodeHandle->create_decode_handle ($test->[1], $fh, sub {
38     ok $_[1], 'charset-not-supported-error',
39     'create_decode_handle ' . $test->[0] . ' error';
40     });
41     }
42     }
43    
44     ## |name_to_uri|
45     for (
46     [$IANA_CHARSET.'utf-8', 'utf-8'],
47     [$IANA_CHARSET.'x-no-such-charset', 'x-no-such-charset'],
48     [$IANA_CHARSET.'utf-8', 'UTF-8'],
49     [$IANA_CHARSET.'utf-8', 'uTf-8'],
50     [$IANA_CHARSET.'utf-16be', 'utf-16be'],
51     ) {
52     my $iname = Whatpm::Charset::DecodeHandle->name_to_uri (ietf => $_->[1]);
53     ok $iname, $_->[0], 'ietf charset URI ' . $_->[1];
54     }
55    
56     for (
57     [$XML_CHARSET.'utf-8', 'utf-8'],
58     [$XML_CHARSET.'x-no-such-charset', 'x-no-such-charset'],
59     [$XML_CHARSET.'utf-8', 'UTF-8'],
60     [$XML_CHARSET.'utf-8', 'uTf-8'],
61     [$IANA_CHARSET.'utf-16be', 'utf-16be'],
62     ) {
63     my $iname = Whatpm::Charset::DecodeHandle->name_to_uri (xml => $_->[1]);
64     ok $iname, $_->[0], 'XML encoding URI ' . $_->[1];
65     }
66    
67     ## |uri_to_name|
68     for (
69     [$IANA_CHARSET.'utf-8', 'utf-8'],
70     [$IANA_CHARSET.'x-no-such-charset', 'x-no-such-charset'],
71     [q<http://charset.example/>, undef],
72     ) {
73     my $uri = Whatpm::Charset::DecodeHandle->uri_to_name (ietf => $_->[0]);
74     ok $uri, $_->[1], 'URI -> IETF charset ' . $_->[0];
75     }
76    
77     for (
78     [$XML_CHARSET.'utf-8', 'utf-8'],
79     [$XML_CHARSET.'x-no-such-charset', 'x-no-such-charset'],
80     [q<http://charset.example/>, undef],
81     ) {
82     my $uri = Whatpm::Charset::DecodeHandle->uri_to_name (xml => $_->[0]);
83     ok $uri, $_->[1], 'URI -> XML encoding ' . $_->[0];
84     }
85    
86     ## |getc|
87     {
88     my $byte = "a\xE3\x81\x82\x81a";
89     open my $fh, '<', \$byte;
90     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
91     ($PERL_CHARSET.'utf8', $fh);
92    
93     my $error;
94     $efh->onerror (sub {
95     my ($efh, $type, %opt) = @_;
96     $error = ${$opt{octets}};
97     });
98    
99     ok $efh->getc, "a", "getc 1 [1]";
100     ok $error, undef, "getc 1 [1] error";
101     ok $efh->getc, "\x{3042}", "getc 1 [2]";
102     ok $error, undef, "getc 1 [2] error";
103     ok $efh->getc, "\x81", "getc 1 [3]";
104     ok $error, "\x81", "getc 1 [3] error";
105     undef $error;
106     ok $efh->getc, "a", "getc 1 [4]";
107     ok $error, undef, "getc 1 [4] error";
108     ok $efh->getc, undef, "getc 1 [5]";
109     ok $error, undef, "getc 1 [5] error";
110     }
111    
112     {
113     my $byte = "a" x 256;
114     $byte .= "b" x 256;
115    
116     open my $fh, '<', \$byte;
117     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
118     ($PERL_CHARSET.'utf8', $fh);
119    
120     my $error;
121     $efh->onerror (sub {
122     my ($efh, $type, %opt) = @_;
123     $error = ${$opt{octets}};
124     });
125    
126     for my $i (0..255) {
127     ok $efh->getc, "a", "getc 2 [$i]";
128     ok $error, undef, "getc 2 [$i] error";
129     }
130    
131     for my $i (0..255) {
132     ok $efh->getc, "b", "getc 2 [255+$i]";
133     ok $error, undef, "getc 2 [255+$i] error";
134     }
135    
136     ok $efh->getc, undef, "getc 2 [-1]";
137     ok $error, undef, "getc 2 [-1] error";
138     }
139    
140     {
141     my $byte = "a" x 255;
142     $byte .= "\xE3\x81\x82";
143     $byte .= "b" x 256;
144    
145     open my $fh, '<', \$byte;
146     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
147     ($PERL_CHARSET.'utf8', $fh);
148    
149     my $error;
150     $efh->onerror (sub {
151     my ($efh, $type, %opt) = @_;
152     $error = ${$opt{octets}};
153     });
154    
155     for my $i (0..254) {
156     ok $efh->getc, "a", "getc 3 [$i]";
157     ok $error, undef, "getc 3 [$i] error";
158     }
159    
160     ok $efh->getc, "\x{3042}", "getc 3 [255]";
161     ok $error, undef, "getc 3 [255] error";
162    
163     for my $i (0..255) {
164     ok $efh->getc, "b", "getc 3 [255+$i]";
165     ok $error, undef, "getc 3 [255+$i] error";
166     }
167    
168     ok $efh->getc, undef, "getc 3 [-1]";
169     ok $error, undef, "getc 3 [-1] error";
170     }
171    
172     {
173     my $byte = "a" x 255;
174     $byte .= "\xE3";
175    
176     open my $fh, '<', \$byte;
177     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
178     ($PERL_CHARSET.'utf8', $fh);
179    
180     my $error;
181     $efh->onerror (sub {
182     my ($efh, $type, %opt) = @_;
183     $error = ${$opt{octets}};
184     });
185    
186     for my $i (0..254) {
187     ok $efh->getc, "a", "getc 4 [$i]";
188     ok $error, undef, "getc 4 [$i] error";
189     }
190    
191     ok $efh->getc, "\xE3", "getc 4 [255]";
192     ok $error, "\xE3", "getc 4 [255] error";
193     undef $error;
194    
195     ok $efh->getc, undef, "getc 4 [-1]";
196     ok $error, undef, "getc 4 [-1] error";
197     }
198    
199     ## |ungetc|
200     {
201     my $byte = "a\x{4E00}b\x{4E11}";
202    
203     open my $fh, '<', \$byte;
204     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
205     ($PERL_CHARSET.'utf8', $fh);
206    
207     ok $efh->getc, "a", "ungetc [1]";
208    
209     $efh->ungetc (ord "a");
210     ok $efh->getc, "a", "ungetc [2]";
211    
212     ok $efh->getc, "\x{4E00}", "ungetc [3]";
213    
214     $efh->ungetc (ord "\x{4E00}");
215     ok $efh->getc, "\x{4E00}", "ungetc [4]";
216    
217     ok $efh->getc, "b", "ungetc [5]";
218    
219     ok $efh->getc, "\x{4E11}", "ungetc [6]";
220    
221     $efh->ungetc (ord "\x{4E11}");
222     ok $efh->getc, "\x{4E11}", "ungetc [7]";
223     }
224    
225     ## UTF-8, UTF-16 and BOM
226     for my $test (
227     ["UTF-8 BOM 1", qq<\xEF\xBB\xBFabc>, $XML_CHARSET.'utf-8',
228     ["a", "b", "c", undef], 1],
229     ["UTF-8 no BOM 1", qq<abc>, $XML_CHARSET.'utf-8',
230     ["a", "b", "c", undef], 0],
231     ["UTF-8 BOM 2", qq<\xEF\xBB\xBF\xEF\xBB\xBFabc>, $XML_CHARSET.'utf-8',
232     ["\x{FEFF}", "a", "b", "c", undef], 1],
233     ["UTF-8 BOM 3", qq<\xEF\xBB\xBF>, $XML_CHARSET.'utf-8',
234     [undef], 1],
235     ["UTF-8 no BOM 2", qq<>, $XML_CHARSET.'utf-8',
236     [undef], 0],
237     ["UTF-8 no BOM 3", qq<ab>, $XML_CHARSET.'utf-8',
238     [qw/a b/, undef], 0],
239     ["UTF-8 no BOM 4", qq<a>, $XML_CHARSET.'utf-8',
240     [qw/a/, undef], 0],
241     ["UTF-16BE BOM 1", qq<\xFE\xFF\x4E\x00\x00a>, $XML_CHARSET.'utf-16',
242     ["\x{4E00}", "a", undef], 1],
243     ["UTF-16LE BOM 1", qq<\xFF\xFE\x00\x4Ea\x00>, $XML_CHARSET.'utf-16',
244     ["\x{4E00}", "a", undef], 1],
245     ["UTF-16BE BOM 2", qq<\xFE\xFF\x00a>, $XML_CHARSET.'utf-16',
246     ["a", undef], 1],
247     ["UTF-16LE BOM 2", qq<\xFF\xFEa\x00>, $XML_CHARSET.'utf-16',
248     ["a", undef], 1],
249     ["UTF-16BE BOM 3", qq<\xFE\xFF>, $XML_CHARSET.'utf-16',
250     [undef], 1],
251     ["UTF-16LE BOM 3", qq<\xFF\xFE>, $XML_CHARSET.'utf-16',
252     [undef], 1],
253     ) {
254     my $error;
255    
256     open my $fh, '<', \($test->[1]);
257     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
258     ($test->[2], $fh, sub { $error = 1 });
259    
260     for my $i (0..$#{$test->[3]}) {
261     ok $efh->getc, $test->[3]->[$i], $test->[0] . " $i";
262     }
263     ok $error, undef, $test->[0] . " error";
264     ok $efh->has_bom ? 1 : 0, $test->[4], $test->[0] . " has_bom";
265     }
266    
267     {
268     my $byte = qq<\xFE\xFFa>;
269    
270     my $error;
271    
272     open my $fh, '<', \$byte;
273     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
274     ($XML_CHARSET.'utf-16', $fh, sub { $error = $_[1] });
275    
276     ok $error, undef, "UTF-16 [1]";
277     ok $efh->getc, "a", "UTF-16 [2]";
278     ok $error, 'illegal-octets-error', "UTF-16 [3]";
279     undef $error;
280     ok $efh->getc, undef, "UTF-16 [4]";
281     ok $error, undef, "UTF-16 [5]";
282     ok $efh->has_bom ? 1 : 0, 1, "UTF-16 [6]";
283     }
284     {
285     my $byte = qq<\xFF\xFEa>;
286    
287     my $error;
288    
289     open my $fh, '<', \$byte;
290     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
291     ($XML_CHARSET.'utf-16', $fh, sub { $error = $_[1] });
292    
293     ok $error, undef, "UTF-16 [7]";
294     ok $efh->getc, "a", "UTF-16 [8]";
295     ok $error, 'illegal-octets-error', "UTF-16 [9]";
296     undef $error;
297     ok $efh->getc, undef, "UTF-16 [10]";
298     ok $error, undef, "UTF-16 [11]";
299     ok $efh->has_bom ? 1 : 0, 1, "UTF-16 [12]";
300     }
301    
302     {
303     my $byte = qq<\xFD\xFF>;
304    
305     my $error;
306    
307     open my $fh, '<', \$byte;
308     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
309     ($XML_CHARSET.'utf-16', $fh, sub { $error = $_[1] });
310    
311     ok $error, 'no-bom-error', "UTF-16 [13]";
312     undef $error;
313    
314     ok $efh->getc, "\x{FDFF}", "UTF-16 [14]";
315     ok $error, undef, "UTF-16 [15]";
316     ok $efh->getc, undef, "UTF-16 [16]";
317     ok $error, undef, "UTF-16 [17]";
318     ok $efh->has_bom ? 1 : 0, 0, "UTF-16 [18]";
319     }
320    
321     {
322     my $byte = qq<\xFD>;
323    
324     my $error;
325    
326     open my $fh, '<', \$byte;
327     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
328     ($XML_CHARSET.'utf-16', $fh, sub { $error = $_[1] });
329    
330     ok $error, 'no-bom-error', "UTF-16 [19]";
331     undef $error;
332    
333     ok $efh->getc, "\xFD", "UTF-16 [20]";
334     ok $error, 'illegal-octets-error', "UTF-16 [21]";
335     undef $error;
336    
337     ok $efh->getc, undef, "UTF-16 [22]";
338     ok $error, undef, "UTF-16 [23]";
339     ok $efh->has_bom ? 1 : 0, 0, "UTF-16 [24]";
340     }
341    
342     {
343     my $byte = qq<>;
344    
345     my $error;
346    
347     open my $fh, '<', \$byte;
348     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
349     ($XML_CHARSET.'utf-16', $fh, sub { $error = $_[1] });
350    
351     ok $error, 'no-bom-error', "UTF-16 [25]";
352     undef $error;
353    
354     ok $efh->getc, undef, "UTF-16 [26]";
355     ok $error, undef, "UTF-16 [27]";
356     ok $efh->has_bom ? 1 : 0, 0, "UTF-16 [28]";
357     }
358    
359     sub check_charset ($$$) {
360     my $test_name = $_[0];
361     my $charset_uri = $_[1];
362     for my $testdata (@{$_[2]}) {
363     my $byte = $testdata->{in};
364     my $error;
365     my $i = 0;
366    
367     open my $fh, '<', \$byte;
368     my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
369     ($charset_uri, $fh, sub {
370     my (undef, $etype, %opt) = @_;
371     $error = [$etype, \%opt];
372     });
373    
374     ok defined $efh ? 1 : 0, 1, "$test_name $testdata->{id} return";
375     next unless defined $efh;
376     ok $efh->has_bom ? 1 : 0, $testdata->{bom} || 0,
377     "$test_name $testdata->{id} BOM";
378     ok $efh->input_encoding, $testdata->{name}, "$test_name $testdata->{id} ie";
379    
380     while (@{$testdata->{out}}) {
381     if ($i != 0) {
382     my $c = shift @{$testdata->{out}};
383     ok $efh->getc, $c, "$test_name $testdata->{id} $i";
384     }
385    
386     my $v = shift @{$testdata->{out}};
387     if (defined $v) {
388     ok defined $error ? 1 : 0, 1, "$test_name $testdata->{id} $i error";
389     ok $error->[0], $v->[0], "$test_name $testdata->{id} $i error 0";
390     } else {
391     ok defined $error ? 1 : 0, 0, "$test_name $testdata->{id} $i error";
392     }
393     undef $error;
394     $i++;
395     }
396    
397     ok $efh->getc, undef, "$test_name $testdata->{id} EOF";
398     if ($testdata->{eof_error}) {
399     ok defined $error ? 1 : 0, 1, "$test_name $testdata->{id} EOF error";
400     ok $error->[0], $testdata->{eof_error}->[0],
401     "$test_name $testdata->{id} EOF error 0";
402     } else {
403     ok $error, undef, "$test_name $testdata->{id} EOF error";
404     }
405     } # testdata
406     } # check_charset
407    
408     ## XML Character Encoding Autodetection
409     {
410     my @testdata = (
411     {
412     id => q<l=0>,
413     in => q<>,
414     out => [undef],
415     name => 'utf-8', bom => 0,
416     },
417     {
418     id => q<l=1>,
419     in => "a",
420     out => [undef, "a", undef],
421     name => 'utf-8', bom => 0,
422     },
423     {
424     id => q<bom8.l=0>,
425     in => "\xEF\xBB\xBF",
426     out => [undef],
427     name => 'utf-8', bom => 1,
428     },
429     {
430     id => q<bom8.l=1>,
431     in => "\xEF\xBB\xBFa",
432     out => [undef, "a", undef],
433     name => 'utf-8', bom => 1,
434     },
435     {
436     id => q<bom8.zwnbsp>,
437     in => "\xEF\xBB\xBF\xEF\xBB\xBF",
438     out => [undef, "\x{FEFF}", undef],
439     name => 'utf-8', bom => 1,
440     },
441     {
442     id => q<bom16be.l=0>,
443     in => "\xFE\xFF",
444     out => [undef],
445     name => 'utf-16', bom => 1,
446     },
447     {
448     id => q<bom16le.l=0>,
449     in => "\xFF\xFE",
450     out => [undef],
451     name => 'utf-16', bom => 1,
452     },
453     {
454     id => q<bom16be.l=1>,
455     in => "\xFE\xFFa",
456     out => [undef, "a", [q<illegal-octets-error>]],
457     name => 'utf-16', bom => 1,
458     },
459     {
460     id => q<bom16le.l=1>,
461     in => "\xFF\xFEa",
462     out => [undef, "a", [q<illegal-octets-error>]],
463     name => 'utf-16', bom => 1,
464     },
465     {
466     id => q<bom16be.l=2>,
467     in => "\xFE\xFF\x4E\x00",
468     out => [undef, "\x{4E00}", undef],
469     name => 'utf-16', bom => 1,
470     },
471     {
472     id => q<bom16le.l=2>,
473     in => "\xFF\xFE\x00\x4E",
474     out => [undef, "\x{4E00}", undef],
475     name => 'utf-16', bom => 1,
476     },
477     {
478     id => q<bom16be.l=2lt>,
479     in => "\xFE\xFF\x00<",
480     out => [undef, "<", undef],
481     name => 'utf-16', bom => 1,
482     },
483     {
484     id => q<bom16le.l=2lt>,
485     in => "\xFF\xFE<\x00",
486     out => [undef, "<", undef],
487     name => 'utf-16', bom => 1,
488     },
489     {
490     id => q<bom16be.zwnbsp>,
491     in => "\xFE\xFF\xFE\xFF",
492     out => [undef, "\x{FEFF}", undef],
493     name => 'utf-16', bom => 1,
494     },
495     {
496     id => q<bom16le.zwnbsp>,
497     in => "\xFF\xFE\xFF\xFE",
498     out => [undef, "\x{FEFF}", undef],
499     name => 'utf-16', bom => 1,
500     },
501     {
502     id => q<bom32e3412.l=0>,
503     in => "\xFE\xFF\x00\x00",
504     out => [undef, "\x00", undef],
505     name => 'utf-16', bom => 1,
506     },
507     {
508     id => q<bom32e4321.l=0>,
509     in => "\xFF\xFE\x00\x00",
510     out => [undef, "\x00", undef],
511     name => 'utf-16', bom => 1,
512     },
513     {
514     id => q<bom16be.l=4ltq>,
515     in => "\xFE\xFF\x00<\x00?",
516     out => [undef, "<", undef, "?", undef],
517     name => 'utf-16', bom => 1,
518     },
519     {
520     id => q<bom16le.l=4ltq>,
521     in => "\xFF\xFE<\x00?\x00",
522     out => [undef, "<", undef, "?", undef],
523     name => 'utf-16', bom => 1,
524     },
525     {
526     id => q<bom16be.decl.1>,
527     in => qq[\xFE\xFF\x00<\x00?\x00x\x00m\x00l\x00 \x00v\x00e\x00r].
528     qq[\x00s\x00i\x00o\x00n\x00=\x00"\x001\x00.\x000\x00"].
529     qq[\x00 \x00e\x00n\x00c\x00o\x00d\x00i\x00n\x00g\x00=].
530     qq[\x00"\x00u\x00t\x00f\x00-\x001\x006\x00"\x00?\x00>],
531     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
532     " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
533     "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
534     "1", undef, ".", undef, "0", undef, '"', undef, " ", undef,
535     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
536     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
537     "u", undef, "t", undef, "f", undef, "-", undef, "1", undef,
538     "6", undef, '"', undef, "?", undef, ">", undef],
539     name => 'utf-16', bom => 1,
540     },
541     {
542     id => q<bom16le.decl.1>,
543     in => qq[\xFF\xFE<\x00?\x00x\x00m\x00l\x00 \x00v\x00e\x00r].
544     qq[\x00s\x00i\x00o\x00n\x00=\x00"\x001\x00.\x000\x00"].
545     qq[\x00 \x00e\x00n\x00c\x00o\x00d\x00i\x00n\x00g\x00=].
546     qq[\x00"\x00u\x00t\x00f\x00-\x001\x006\x00"\x00?\x00>\x00],
547     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
548     " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
549     "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
550     "1", undef, ".", undef, "0", undef, '"', undef, " ", undef,
551     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
552     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
553     "u", undef, "t", undef, "f", undef, "-", undef, "1", undef,
554     "6", undef, '"', undef, "?", undef, ">", undef],
555     name => 'utf-16', bom => 1,
556     },
557     {
558     id => q<utf16be.decl.1>,
559     in => qq[\x00<\x00?\x00x\x00m\x00l\x00 \x00v\x00e\x00r].
560     qq[\x00s\x00i\x00o\x00n\x00=\x00"\x001\x00.\x000\x00"].
561     qq[\x00 \x00e\x00n\x00c\x00o\x00d\x00i\x00n\x00g\x00=].
562     qq[\x00"\x00u\x00t\x00f\x00-\x001\x006\x00b\x00e\x00"\x00?\x00>],
563     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
564     " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
565     "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
566     "1", undef, ".", undef, "0", undef, '"', undef, " ", undef,
567     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
568     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
569     "u", undef, "t", undef, "f", undef, "-", undef, "1", undef,
570     "6", undef, "b", undef, "e", undef, '"', undef,
571     "?", undef, ">", undef],
572     name => 'utf-16be', bom => 0,
573     },
574     {
575     id => q<utf16le.decl.1>,
576     in => qq[<\x00?\x00x\x00m\x00l\x00 \x00v\x00e\x00r].
577     qq[\x00s\x00i\x00o\x00n\x00=\x00"\x001\x00.\x000\x00"].
578     qq[\x00 \x00e\x00n\x00c\x00o\x00d\x00i\x00n\x00g\x00=].
579     qq[\x00"\x00u\x00t\x00f\x00-\x001\x006\x00l\x00e\x00"].
580     qq[\x00?\x00>\x00],
581     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
582     " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
583     "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
584     "1", undef, ".", undef, "0", undef, '"', undef, " ", undef,
585     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
586     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
587     "u", undef, "t", undef, "f", undef, "-", undef, "1", undef,
588     "6", undef, "l", undef, "e", undef, '"', undef, "?", undef,
589     ">", undef],
590     name => 'utf-16le', bom => 0,
591     },
592     {
593     id => q<16be.decl.1>,
594     in => qq[\x00<\x00?\x00x\x00m\x00l\x00 \x00v\x00e\x00r].
595     qq[\x00s\x00i\x00o\x00n\x00=\x00"\x001\x00.\x000\x00"].
596     qq[\x00 \x00e\x00n\x00c\x00o\x00d\x00i\x00n\x00g\x00=].
597     qq[\x00"\x00u\x00t\x00f\x00-\x001\x006\x00"\x00?\x00>],
598     out => [[q<charset-name-mismatch-error>],
599     "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
600     " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
601     "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
602     "1", undef, ".", undef, "0", undef, '"', undef, " ", undef,
603     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
604     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
605     "u", undef, "t", undef, "f", undef, "-", undef, "1", undef,
606     "6", undef, '"', undef, "?", undef, ">", undef],
607     name => 'utf-16', bom => 0,
608     },
609     {
610     id => q<16le.decl.1>,
611     in => qq[<\x00?\x00x\x00m\x00l\x00 \x00v\x00e\x00r].
612     qq[\x00s\x00i\x00o\x00n\x00=\x00"\x001\x00.\x000\x00"].
613     qq[\x00 \x00e\x00n\x00c\x00o\x00d\x00i\x00n\x00g\x00=].
614     qq[\x00"\x00u\x00t\x00f\x00-\x001\x006\x00"\x00?\x00>\x00],
615     out => [[q<charset-name-mismatch-error>],
616     "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
617     " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
618     "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
619     "1", undef, ".", undef, "0", undef, '"', undef, " ", undef,
620     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
621     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
622     "u", undef, "t", undef, "f", undef, "-", undef, "1", undef,
623     "6", undef, '"', undef, "?", undef, ">", undef],
624     name => 'utf-16', bom => 0,
625     },
626     {
627     id => q<8.decl.1>,
628     in => qq[<?xml version="1.0" encoding="utf-8"?>],
629     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
630     " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
631     "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
632     "1", undef, ".", undef, "0", undef, '"', undef, " ", undef,
633     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
634     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
635     "u", undef, "t", undef, "f", undef, "-", undef, "8", undef,
636     '"', undef, "?", undef, ">", undef],
637     name => 'utf-8', bom => 0,
638     },
639     {
640     id => q<8.decl.2>,
641     in => qq[<?xml encoding="utf-8"?>],
642     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
643     " ", undef,
644     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
645     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
646     "u", undef, "t", undef, "f", undef, "-", undef, "8", undef,
647     '"', undef, "?", undef, ">", undef],
648     name => 'utf-8', bom => 0,
649     },
650     {
651     id => q<8.decl.3>,
652     in => qq[<?xml version="1.1" encoding="utf-8"?>],
653     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
654     " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
655     "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
656     "1", undef, ".", undef, "1", undef, '"', undef, " ", undef,
657     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
658     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
659     "u", undef, "t", undef, "f", undef, "-", undef, "8", undef,
660     '"', undef, "?", undef, ">", undef],
661     name => 'utf-8', bom => 0,
662     },
663     {
664     id => q<8.decl.4>,
665     in => qq[<?xml version="1.0"?>],
666     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
667     " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
668     "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
669     "1", undef, ".", undef, "0", undef, '"', undef,
670     "?", undef, ">", undef],
671     name => 'utf-8', bom => 0,
672     },
673     {
674     id => q<bom8.decl.1>,
675     in => qq[\xEF\xBB\xBF<?xml encoding="utf-8"?>],
676     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
677     " ", undef,
678     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
679     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
680     "u", undef, "t", undef, "f", undef, "-", undef, "8", undef,
681     '"', undef, "?", undef, ">", undef],
682     name => 'utf-8', bom => 1,
683     },
684     {
685     id => q<us-ascii.decl.1>,
686     in => qq[<?xml encoding="us-ascii"?>],
687     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
688     " ", undef,
689     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
690     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
691     "u", undef, "s", undef, "-", undef, "a", undef, "s", undef,
692     "c", undef, "i", undef, "i", undef,
693     '"', undef, "?", undef, ">", undef],
694     name => 'us-ascii', bom => 0,
695     },
696     {
697     id => q<us-ascii.decl.2>,
698     in => qq[<?xml encoding="US-ascii"?>],
699     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
700     " ", undef,
701     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
702     "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
703     "U", undef, "S", undef, "-", undef, "a", undef, "s", undef,
704     "c", undef, "i", undef, "i", undef,
705     '"', undef, "?", undef, ">", undef],
706     name => 'us-ascii', bom => 0,
707     },
708     {
709     id => q<us-ascii.decl.3>,
710     in => qq[<?xml encoding='us-ascii'?>],
711     out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
712     " ", undef,
713     "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
714     "i", undef, "n", undef, "g", undef, "=", undef, "'", undef,
715     "u", undef, "s", undef, "-", undef, "a", undef, "s", undef,
716     "c", undef, "i", undef, "i", undef,
717     "'", undef, "?", undef, ">", undef],
718     name => 'us-ascii', bom => 0,
719     },
720     );
721     check_charset ('XML', $XML_AUTO_CHARSET, \@testdata);
722     }
723    
724     ## EUC-JP
725     {
726     my @testdata = (
727     {
728     id => q<l=0>,
729     in => q<>,
730     out => [undef],
731     },
732     {
733     id => q<l=1.00>,
734     in => qq<\x00>,
735     out => [undef, "\x00", undef],
736     },
737     {
738     id => q<l=1.0d>,
739     in => qq<\x0D>,
740     out => [undef, "\x0D", undef],
741     },
742     {
743     id => q<l=1.0e>,
744     in => qq<\x0E>,
745     out => [undef, "\x0E", undef],
746     }, # Error??
747     {
748     id => q<l=1.0f>,
749     in => qq<\x0F>,
750     out => [undef, "\x0F", undef],
751     }, # Error??
752     {
753     id => q<l=1.1b>,
754     in => qq<\x1B>,
755     out => [undef, "\x1B", undef],
756     }, # Error??
757     {
758     id => q<l=1.a>,
759     in => q<a>,
760     out => [undef, "a", undef],
761     },
762     {
763     id => q<l=1.20>,
764     in => qq<\x20>,
765     out => [undef, "\x20", undef],
766     },
767     {
768     id => q<5C>,
769     in => qq<\x5C>,
770     out => [undef, "\x5C", undef],
771     },
772     {
773     id => q<l=1.7E>,
774     in => qq<\x7E>,
775     out => [undef, "\x7E", undef],
776     },
777     {
778     id => q<l=1.7F>,
779     in => qq<\x7F>,
780     out => [undef, "\x7F", undef],
781     },
782     {
783     id => q<l=1.80>,
784     in => qq<\x80>,
785     out => [undef, "\x80", undef],
786     },
787     {
788     id => q<l=1.8c>,
789     in => qq<\x8C>,
790     out => [undef, "\x8C", undef],
791     },
792     {
793     id => q<l=1.8e>,
794     in => qq<\x8E>,
795     out => [undef, "\x8E", [q<illegal-octets-error>]],
796     },
797     {
798     id => q<l=1.8f>,
799     in => qq<\x8F>,
800     out => [undef, "\x8F", [q<illegal-octets-error>]],
801     },
802     {
803     id => q<l=1.a0>,
804     in => qq<\xA0>,
805     out => [undef, "\xA0", [q<unassigned-code-point-error>]],
806     },
807     {
808     id => q<l=1.a1>,
809     in => qq<\xA1>,
810     out => [undef, "\xA1", [q<illegal-octets-error>]],
811     },
812     {
813     id => q<l=1.a2>,
814     in => qq<\xA2>,
815     out => [undef, "\xA2", [q<illegal-octets-error>]],
816     },
817     {
818     id => q<l=1.fd>,
819     in => qq<\xFD>,
820     out => [undef, "\xFD", [q<illegal-octets-error>]],
821     },
822     {
823     id => q<l=1.fe>,
824     in => qq<\xFE>,
825     out => [undef, "\xFE", [q<illegal-octets-error>]],
826     },
827     {
828     id => q<l=1.ff>,
829     in => qq<\xFF>,
830     out => [undef, "\xFF", [q<unassigned-code-point-error>]],
831     },
832     {
833     id => q<l=2.0000>,
834     in => qq<\x00\x00>,
835     out => [undef, "\x00", undef, "\x00", undef],
836     },
837     {
838     id => q<l=2.0D0A>,
839     in => qq<\x0D\x0A>,
840     out => [undef, "\x0D", undef, "\x0A", undef],
841     },
842     {
843     id => q<l=2.1B28>,
844     in => qq<\x1B\x28>,
845     out => [undef, "\x1B", undef, "\x28", undef],
846     },# Error??
847     {
848     id => q<l=2.2020>,
849     in => qq<\x20\x20>,
850     out => [undef, "\x20", undef, "\x20", undef],
851     },
852     {
853     id => q<l=2.ab>,
854     in => qq<ab>,
855     out => [undef, "a", undef, "b", undef],
856     },
857     {
858     id => q<l=2.a0a1>,
859     in => qq<\xA0\xA1>,
860     out => [undef, "\xA0", [q<unassigned-code-point-error>],
861     "\xA1", [q<illegal-octets-error>]],
862     },
863     {
864     id => q<l=2.a1a1>,
865     in => qq<\xA1\xA1>,
866     out => [undef, "\x{3000}", undef],
867     },
868     {
869     id => q<l=2.a1a2>,
870     in => qq<\xA1\xA2>,
871     out => [undef, "\x{3001}", undef],
872     },
873     {
874     id => q<l=2.a1a4>,
875     in => qq<\xA1\xA4>,
876     out => [undef, "\x{FF0C}", undef], # FULLWIDTH COMMA
877     },
878     {
879     id => q<a1a6>,
880     in => qq<\xA1\xA6>,
881     out => [undef, "\x{30FB}", undef], # KATAKABA MIDDLE DOT
882     },
883     {
884     id => q<a1a7>,
885     in => qq<\xA1\xA7>,
886     out => [undef, "\x{FF1A}", undef], # FULLWIDTH COLON
887     },
888     {
889     id => q<a1b1>,
890     in => qq<\xA1\xB1>,
891     out => [undef, "\x{203E}", undef], # OVERLINE
892     },
893     {
894     id => q<a1bd>,
895     in => qq<\xA1\xBD>,
896     out => [undef, "\x{2014}", undef], # EM DASH
897     },
898     {
899     id => q<a1c0>,
900     in => qq<\xA1\xC0>,
901     out => [undef, "\x{FF3C}", undef], # FULLWIDTH REVERSE SOLIDUS
902     },
903     {
904     id => q<a1c1>,
905     in => qq<\xA1\xC1>,
906     out => [undef, "\x{301C}", undef], # WAVE DASH
907     },
908     {
909     id => q<a1c2>,
910     in => qq<\xA1\xC2>,
911     out => [undef, "\x{2016}", undef], # DOUBLE VERTICAL LINE
912     },
913     {
914     id => q<a1c4>,
915     in => qq<\xA1\xC4>,
916     out => [undef, "\x{2026}", undef], # HORIZONTAL ELLIPSIS
917     },
918     {
919     id => q<a1dd>,
920     in => qq<\xA1\xDD>,
921     out => [undef, "\x{2212}", undef], # MINUS SIGN
922     },
923     {
924     id => q<a1ef>,
925     in => qq<\xA1\xEF>,
926     out => [undef, "\x{00A5}", undef], # YEN SIGN
927     },
928     {
929     id => q<a1f1>,
930     in => qq<\xA1\xF1>,
931     out => [undef, "\x{00A2}", undef], # CENT SIGN
932     },
933     {
934     id => q<a1f2>,
935     in => qq<\xA1\xF2>,
936     out => [undef, "\x{00A3}", undef], # POUND SIGN
937     },
938     {
939     id => q<a1f2>,
940     in => qq<\xA1\xFF>,
941     out => [undef, "\xA1", [q<illegal-octets-error>],
942     "\xFF", [q<unassigned-code-point-error>]],
943     },
944     {
945     id => q<a2ae>,
946     in => qq<\xA2\xAE>,
947     out => [undef, "\x{3013}", undef], # GETA MARK
948     },
949     {
950     id => q<a2af>,
951     in => qq<\xA2\xAF>,
952     out => [undef, "\xA2\xAF", [q<unassigned-code-point-error>]],
953     },
954     {
955     id => q<a2ba>,
956     in => qq<\xA2\xBA>,
957     out => [undef, "\x{2208}", undef], # ELEMENT OF
958     },
959     {
960     id => q<a2fe>,
961     in => qq<\xA2\xFE>,
962     out => [undef, "\x{25EF}", undef], # LARGE CIRCLE
963     },
964     {
965     id => q<adce>,
966     in => qq<\xAD\xCE>,
967     out => [undef, "\xAD\xCE", [q<unassigned-code-point-error>]],
968     },
969     {
970     id => q<b0a6>,
971     in => qq<\xB0\xA6>,
972     out => [undef, "\x{611B}", undef], # han
973     },
974     {
975     id => q<f4a6>,
976     in => qq<\xF4\xA6>,
977     out => [undef, "\x{7199}", undef], # han
978     },
979     {
980     id => q<8ea1>,
981     in => qq<\x8E\xA1>,
982     out => [undef, "\x{FF61}", undef],
983     },
984     {
985     id => q<8efe>,
986     in => qq<\x8E\xFE>,
987     out => [undef, "\x8E\xFE", [q<unassigned-code-point-error>]],
988     },
989     {
990     id => q<8ffe>,
991     in => qq<\x8F\xFE>,
992     out => [undef, "\x8F\xFE", [q<illegal-octets-error>]],
993     },
994     {
995     id => q<l=2.a1a2a3>,
996     in => qq<\xA1\xA2\xA3>,
997     out => [undef, "\x{3001}", undef,
998     "\xA3", [q<illegal-octets-error>]],
999     },
1000     {
1001     id => q<8ea1a1>,
1002     in => qq<\x8E\xA1\xA1>,
1003     out => [undef, "\x{FF61}", undef,
1004     "\xA1", [q<illegal-octets-error>]],
1005     },
1006     {
1007     id => q<8fa1a1>,
1008     in => qq<\x8F\xA1\xA1>,
1009     out => [undef, "\x8F\xA1\xA1", [q<unassigned-code-point-error>]],
1010     },
1011     {
1012     id => q<8fa2af>,
1013     in => qq<\x8F\xA2\xAF>,
1014     out => [undef, "\x{02D8}", undef],
1015     },
1016     {
1017     id => q<8fa2b7>,
1018     in => qq<\x8F\xA2\xB7>,
1019     out => [undef, "\x{FF5E}", undef], # FULLWIDTH TILDE
1020     },
1021     {
1022     id => q<a1a2a1a3>,
1023     in => qq<\xA1\xA2\xA1\xA3>,
1024     out => [undef, "\x{3001}", undef, "\x{3002}", undef],
1025     },
1026     {
1027     id => q<8fa2af>,
1028     in => qq<\x8F\xA2\xAF\xAF>,
1029     out => [undef, "\x{02D8}", undef,
1030     "\xAF", [q<illegal-octets-error>]],
1031     },
1032     {
1033     id => q<8fa2afafa1>,
1034     in => qq<\x8F\xA2\xAF\xAF\xA1>,
1035     out => [undef, "\x{02D8}", undef,
1036     "\xAF\xA1", [q<unassigned-code-point-error>]],
1037     },
1038     );
1039     check_charset ('XML-EUC-JP', $XML_CHARSET.'euc-jp', \@testdata);
1040     }
1041    
1042     ## Shift_JIS
1043     {
1044     my @testdata = (
1045     {
1046     id => q<l=0>,
1047     in => q<>,
1048     out => [undef],
1049     },
1050     {
1051     id => q<l=1.00>,
1052     in => qq<\x00>,
1053     out => [undef, "\x00", undef],
1054     },
1055     {
1056     id => q<l=1.0d>,
1057     in => qq<\x0D>,
1058     out => [undef, "\x0D", undef],
1059     },
1060     {
1061     id => q<l=1.0e>,
1062     in => qq<\x0E>,
1063     out => [undef, "\x0E", undef],
1064     }, # Error??
1065     {
1066     id => q<l=1.0f>,
1067     in => qq<\x0F>,
1068     out => [undef, "\x0F", undef],
1069     }, # Error??
1070     {
1071     id => q<l=1.1b>,
1072     in => qq<\x1B>,
1073     out => [undef, "\x1B", undef],
1074     }, # Error??
1075     {
1076     id => q<l=1.a>,
1077     in => q<a>,
1078     out => [undef, "a", undef],
1079     },
1080     {
1081     id => q<l=1.20>,
1082     in => qq<\x20>,
1083     out => [undef, "\x20", undef],
1084     },
1085     {
1086     id => q<l=1.5C>,
1087     in => qq<\x5C>,
1088     out => [undef, "\xA5", undef], # YEN SIGN
1089     },
1090     {
1091     id => q<l=1.7E>,
1092     in => qq<\x7E>,
1093     out => [undef, "\x{203E}", undef], # OVERLINE
1094     },
1095     {
1096     id => q<l=1.7F>,
1097     in => qq<\x7F>,
1098     out => [undef, "\x7F", undef],
1099     },
1100     {
1101     id => q<l=1.80>,
1102     in => qq<\x80>,
1103     out => [undef, "\x80", [q<unassigned-code-point-error>]],
1104     },
1105     {
1106     id => q<l=1.8c>,
1107     in => qq<\x8C>,
1108     out => [undef, "\x8C", [q<illegal-octets-error>]],
1109     },
1110     {
1111     id => q<l=1.8e>,
1112     in => qq<\x8E>,
1113     out => [undef, "\x8E", [q<illegal-octets-error>]],
1114     },
1115     {
1116     id => q<l=1.8f>,
1117     in => qq<\x8F>,
1118     out => [undef, "\x8F", [q<illegal-octets-error>]],
1119     },
1120     {
1121     id => q<l=1.a0>,
1122     in => qq<\xA0>,
1123     out => [undef, "\xA0", [q<unassigned-code-point-error>]],
1124     },
1125     {
1126     id => q<l=1.a1>,
1127     in => qq<\xA1>,
1128     out => [undef, "\x{FF61}", undef],
1129     },
1130     {
1131     id => q<l=1.a2>,
1132     in => qq<\xA2>,
1133     out => [undef, "\x{FF62}", undef],
1134     },
1135     {
1136     id => q<l=1.df>,
1137     in => qq<\xdf>,
1138     out => [undef, "\x{FF9F}", undef],
1139     },
1140     {
1141     id => q<l=1.e0>,
1142     in => qq<\xe0>,
1143     out => [undef, "\xE0", [q<illegal-octets-error>]],
1144     },
1145     {
1146     id => q<l=1.ef>,
1147     in => qq<\xEF>,
1148     out => [undef, "\xEF", [q<illegal-octets-error>]],
1149     },
1150     {
1151     id => q<F0>,
1152     in => qq<\xF0>,
1153     out => [undef, "\xF0", [q<unassigned-code-point-error>]],
1154     },
1155     {
1156     id => q<l=1.fc>,
1157     in => qq<\xFC>,
1158     out => [undef, "\xFC", [q<unassigned-code-point-error>]],
1159     },
1160     {
1161     id => q<l=1.fd>,
1162     in => qq<\xFD>,
1163     out => [undef, "\xFD", [q<unassigned-code-point-error>]],
1164     },
1165     {
1166     id => q<l=1.fe>,
1167     in => qq<\xFE>,
1168     out => [undef, "\xFE", [q<unassigned-code-point-error>]],
1169     },
1170     {
1171     id => q<l=1.ff>,
1172     in => qq<\xFF>,
1173     out => [undef, "\xFF", [q<unassigned-code-point-error>]],
1174     },
1175     {
1176     id => q<l=2.0000>,
1177     in => qq<\x00\x00>,
1178     out => [undef, "\x00", undef, "\x00", undef],
1179     },
1180     {
1181     id => q<l=2.0D0A>,
1182     in => qq<\x0D\x0A>,
1183     out => [undef, "\x0D", undef, "\x0A", undef],
1184     },
1185     {
1186     id => q<l=2.1B28>,
1187     in => qq<\x1B\x28>,
1188     out => [undef, "\x1B", undef, "\x28", undef],
1189     },# Error??
1190     {
1191     id => q<l=2.2020>,
1192     in => qq<\x20\x20>,
1193     out => [undef, "\x20", undef, "\x20", undef],
1194     },
1195     {
1196     id => q<l=2.ab>,
1197     in => qq<ab>,
1198     out => [undef, "a", undef, "b", undef],
1199     },
1200     {
1201     id => q<8040>,
1202     in => qq<\x80\x40>,
1203     out => [undef, "\x80", [q<unassigned-code-point-error>],
1204     "\x40", undef],
1205     },
1206     {
1207     id => q<8100>,
1208     in => qq<\x81\x00>,
1209     out => [undef, "\x81\x00", [q<unassigned-code-point-error>]],
1210     },
1211     {
1212     id => q<8101>,
1213     in => qq<\x81\x01>,
1214     out => [undef, "\x81\x01", [q<unassigned-code-point-error>]],
1215     },
1216     {
1217     id => q<813F>,
1218     in => qq<\x81\x3F>,
1219     out => [undef, "\x81\x3F", [q<unassigned-code-point-error>]],
1220     },
1221     {
1222     id => q<8140>,
1223     in => qq<\x81\x40>,
1224     out => [undef, "\x{3000}", undef],
1225     },
1226     {
1227     id => q<8141>,
1228     in => qq<\x81\x41>,
1229     out => [undef, "\x{3001}", undef],
1230     },
1231     {
1232     id => q<8143>,
1233     in => qq<\x81\x43>,
1234     out => [undef, "\x{FF0C}", undef], # FULLWIDTH COMMA
1235     },
1236     {
1237     id => q<8150>,
1238     in => qq<\x81\x50>,
1239     out => [undef, "\x{FFE3}", undef], # FULLWIDTH MACRON
1240     },
1241     {
1242     id => q<815C>,
1243     in => qq<\x81\x5C>,
1244     out => [undef, "\x{2014}", undef], # EM DASH
1245     },
1246     {
1247     id => q<815F>,
1248     in => qq<\x81\x5F>,
1249     out => [undef, "\x{005C}", undef], # REVERSE SOLIDUS
1250     },
1251     {
1252     id => q<8160>,
1253     in => qq<\x81\x60>,
1254     out => [undef, "\x{301C}", undef], # WAVE DASH
1255     },
1256     {
1257     id => q<8161>,
1258     in => qq<\x81\x61>,
1259     out => [undef, "\x{2016}", undef], # DOUBLE VERTICAL LINE
1260     },
1261     {
1262     id => q<8163>,
1263     in => qq<\x81\x63>,
1264     out => [undef, "\x{2026}", undef], # HORIZONTAL ELLIPSIS
1265     },
1266     {
1267     id => q<817C>,
1268     in => qq<\x81\x7C>,
1269     out => [undef, "\x{2212}", undef], # MINUS SIGN
1270     },
1271     {
1272     id => q<817F>,
1273     in => qq<\x81\x7F>,
1274     out => [undef, "\x81\x7F", [q<unassigned-code-point-error>]],
1275     },
1276     {
1277     id => q<818F>,
1278     in => qq<\x81\x8F>,
1279     out => [undef, "\x{FFE5}", undef], # FULLWIDTH YEN SIGN
1280     },
1281     {
1282     id => q<8191>,
1283     in => qq<\x81\x91>,
1284     out => [undef, "\x{00A2}", undef], # CENT SIGN
1285     },
1286     {
1287     id => q<8192>,
1288     in => qq<\x81\x92>,
1289     out => [undef, "\x{00A3}", undef], # POUND SIGN
1290     },
1291     {
1292     id => q<81AC>,
1293     in => qq<\x81\xAC>,
1294     out => [undef, "\x{3013}", undef], # GETA MARK
1295     },
1296     {
1297     id => q<81AD>,
1298     in => qq<\x81\xAD>,
1299     out => [undef, "\x81\xAD", [q<unassigned-code-point-error>]],
1300     },
1301     {
1302     id => q<81B8>,
1303     in => qq<\x81\xB8>,
1304     out => [undef, "\x{2208}", undef], # ELEMENT OF
1305     },
1306     {
1307     id => q<81CA>,
1308     in => qq<\x81\xCA>,
1309     out => [undef, "\x{00AC}", undef], # NOT SIGN
1310     },
1311     {
1312     id => q<81FC>,
1313     in => qq<\x81\xFC>,
1314     out => [undef, "\x{25EF}", undef], # LARGE CIRCLE
1315     },
1316     {
1317     id => q<81FD>,
1318     in => qq<\x81\xFD>,
1319     out => [undef, "\x81\xFD", [q<unassigned-code-point-error>]],
1320     },
1321     {
1322     id => q<81FE>,
1323     in => qq<\x81\xFE>,
1324     out => [undef, "\x81\xFE", [q<unassigned-code-point-error>]],
1325     },
1326     {
1327     id => q<81FF>,
1328     in => qq<\x81\xFF>,
1329     out => [undef, "\x81\xFF", [q<unassigned-code-point-error>]],
1330     },
1331     {
1332     id => q<DDDE>,
1333     in => qq<\xDD\xDE>,
1334     out => [undef, "\x{FF9D}", undef, "\x{FF9E}", undef],
1335     },
1336     {
1337     id => q<e040>,
1338     in => qq<\xE0\x40>,
1339     out => [undef, "\x{6F3E}", undef],
1340     },
1341     {
1342     id => q<eaa4>,
1343     in => qq<\xEA\xA4>,
1344     out => [undef, "\x{7199}", undef],
1345     },
1346     {
1347     id => q<eaa5>,
1348     in => qq<\xEA\xA5>,
1349     out => [undef, "\xEA\xA5", [q<unassigned-code-point-error>]],
1350     },
1351     {
1352     id => q<eb40>,
1353     in => qq<\xEB\x40>,
1354     out => [undef, "\xEB\x40", [q<unassigned-code-point-error>]],
1355     },
1356     {
1357     id => q<ed40>,
1358     in => qq<\xED\x40>,
1359     out => [undef, "\xED\x40", [q<unassigned-code-point-error>]],
1360     },
1361     {
1362     id => q<effc>,
1363     in => qq<\xEF\xFC>,
1364     out => [undef, "\xEF\xFC", [q<unassigned-code-point-error>]],
1365     },
1366     {
1367     id => q<f040>,
1368     in => qq<\xF0\x40>,
1369     out => [undef, "\xF0", [q<unassigned-code-point-error>],
1370     "\x40", undef],
1371     },
1372     {
1373     id => q<f140>,
1374     in => qq<\xF1\x40>,
1375     out => [undef, "\xF1", [q<unassigned-code-point-error>],
1376     "\x40", undef],
1377     },
1378     {
1379     id => q<fb40>,
1380     in => qq<\xFB\x40>,
1381     out => [undef, "\xFB", [q<unassigned-code-point-error>],
1382     "\x40", undef],
1383     },
1384     {
1385     id => q<fc40>,
1386     in => qq<\xFc\x40>,
1387     out => [undef, "\xFC", [q<unassigned-code-point-error>],
1388     "\x40", undef],
1389     },
1390     {
1391     id => q<fd40>,
1392     in => qq<\xFD\x40>,
1393     out => [undef, "\xFD", [q<unassigned-code-point-error>],
1394     "\x40", undef],
1395     },
1396     {
1397     id => q<fE40>,
1398     in => qq<\xFE\x40>,
1399     out => [undef, "\xFE", [q<unassigned-code-point-error>],
1400     "\x40", undef],
1401     },
1402     {
1403     id => q<ff40>,
1404     in => qq<\xFF\x40>,
1405     out => [undef, "\xFF", [q<unassigned-code-point-error>],
1406     "\x40", undef],
1407     },
1408     {
1409     id => q<81408142>,
1410     in => qq<\x81\x40\x81\x42>,
1411     out => [undef, "\x{3000}", undef, "\x{3002}", undef],
1412     },
1413     );
1414    
1415     check_charset ('XML-Shift_JIS', $XML_CHARSET.'shift_jis', \@testdata);
1416     }
1417    
1418     ## ISO-2022-JP
1419     {
1420    
1421     my @testdata = (
1422     {
1423     id => q<l=0>,
1424     in => q<>,
1425     out1 => [undef],
1426     out2 => [undef],
1427     },
1428     {
1429     id => q<l=1.00>,
1430     in => qq<\x00>,
1431     out1 => [undef, "\x00", undef],
1432     out2 => [undef, "\x00", undef],
1433     },
1434     {
1435     id => q<l=1.0d>,
1436     in => qq<\x0D>,
1437     out1 => [undef, "\x0D", undef],
1438     out2 => [undef, "\x0D", undef],
1439     }, # Error?
1440     {
1441     id => q<0A>,
1442     in => qq<\x0A>,
1443     out1 => [undef, "\x0A", undef],
1444     out2 => [undef, "\x0A", undef],
1445     }, # Error?
1446     {
1447     id => q<l=1.0e>,
1448     in => qq<\x0E>,
1449     out1 => [undef, "\x0E", [q<illegal-octets-error>]],
1450     out2 => [undef, "\x0E", [q<illegal-octets-error>]],
1451     },
1452     {
1453     id => q<l=1.0f>,
1454     in => qq<\x0F>,
1455     out1 => [undef, "\x0F", [q<illegal-octets-error>]],
1456     out2 => [undef, "\x0F", [q<illegal-octets-error>]],
1457     },
1458     {
1459     id => q<l=1.1b>,
1460     in => qq<\x1B>,
1461     out1 => [undef, "\x1B", [q<illegal-octets-error>]],
1462     out2 => [undef, "\x1B", [q<illegal-octets-error>]],
1463     },
1464     {
1465     id => q<l=1.a>,
1466     in => q<a>,
1467     out1 => [undef, "a", undef],
1468     out2 => [undef, "a", undef],
1469     },
1470     {
1471     id => q<l=1.20>,
1472     in => qq<\x20>,
1473     out1 => [undef, "\x20", undef],
1474     out2 => [undef, "\x20", undef],
1475     },
1476     {
1477     id => q<l=1.5C>,
1478     in => qq<\x5C>,
1479     out1 => [undef, "\x5C", undef],
1480     out2 => [undef, "\x5C", undef],
1481     },
1482     {
1483     id => q<l=1.7E>,
1484     in => qq<\x7E>,
1485     out1 => [undef, "\x7E", undef],
1486     out2 => [undef, "\x7E", undef],
1487     },
1488     {
1489     id => q<l=1.7F>,
1490     in => qq<\x7F>,
1491     out1 => [undef, "\x7F", undef],
1492     out2 => [undef, "\x7F", undef],
1493     },
1494     {
1495     id => q<l=1.80>,
1496     in => qq<\x80>,
1497     out1 => [undef, "\x80", [q<illegal-octets-error>]],
1498     out2 => [undef, "\x80", [q<illegal-octets-error>]],
1499     },
1500     {
1501     id => q<l=1.8c>,
1502     in => qq<\x8C>,
1503     out1 => [undef, "\x8C", [q<illegal-octets-error>]],
1504     out2 => [undef, "\x8C", [q<illegal-octets-error>]],
1505     },
1506     {
1507     id => q<l=1.8e>,
1508     in => qq<\x8E>,
1509     out1 => [undef, "\x8E", [q<illegal-octets-error>]],
1510     out2 => [undef, "\x8E", [q<illegal-octets-error>]],
1511     },
1512     {
1513     id => q<l=1.8f>,
1514     in => qq<\x8F>,
1515     out1 => [undef, "\x8F", [q<illegal-octets-error>]],
1516     out2 => [undef, "\x8F", [q<illegal-octets-error>]],
1517     },
1518     {
1519     id => q<l=1.a0>,
1520     in => qq<\xA0>,
1521     out1 => [undef, "\xA0", [q<illegal-octets-error>]],
1522     out2 => [undef, "\xA0", [q<illegal-octets-error>]],
1523     },
1524     {
1525     id => q<l=1.a1>,
1526     in => qq<\xA1>,
1527     out1 => [undef, "\xA1", [q<illegal-octets-error>]],
1528     out2 => [undef, "\xA1", [q<illegal-octets-error>]],
1529     },
1530     {
1531     id => q<l=1.a2>,
1532     in => qq<\xA2>,
1533     out1 => [undef, "\xA2", [q<illegal-octets-error>]],
1534     out2 => [undef, "\xA2", [q<illegal-octets-error>]],
1535     },
1536     {
1537     id => q<l=1.df>,
1538     in => qq<\xdf>,
1539     out1 => [undef, "\xDF", [q<illegal-octets-error>]],
1540     out2 => [undef, "\xDF", [q<illegal-octets-error>]],
1541     },
1542     {
1543     id => q<l=1.e0>,
1544     in => qq<\xe0>,
1545     out1 => [undef, "\xE0", [q<illegal-octets-error>]],
1546     out2 => [undef, "\xE0", [q<illegal-octets-error>]],
1547     },
1548     {
1549     id => q<l=1.ef>,
1550     in => qq<\xEF>,
1551     out1 => [undef, "\xEF", [q<illegal-octets-error>]],
1552     out2 => [undef, "\xEF", [q<illegal-octets-error>]],
1553     },
1554     {
1555     id => q<F0>,
1556     in => qq<\xF0>,
1557     out1 => [undef, "\xF0", [q<illegal-octets-error>]],
1558     out2 => [undef, "\xF0", [q<illegal-octets-error>]],
1559     },
1560     {
1561     id => q<l=1.fc>,
1562     in => qq<\xFC>,
1563     out1 => [undef, "\xFC", [q<illegal-octets-error>]],
1564     out2 => [undef, "\xFC", [q<illegal-octets-error>]],
1565     },
1566     {
1567     id => q<l=1.fd>,
1568     in => qq<\xFD>,
1569     out1 => [undef, "\xFD", [q<illegal-octets-error>]],
1570     out2 => [undef, "\xFD", [q<illegal-octets-error>]],
1571     },
1572     {
1573     id => q<l=1.fe>,
1574     in => qq<\xFE>,
1575     out1 => [undef, "\xFE", [q<illegal-octets-error>]],
1576     out2 => [undef, "\xFE", [q<illegal-octets-error>]],
1577     },
1578     {
1579     id => q<l=1.ff>,
1580     in => qq<\xFF>,
1581     out1 => [undef, "\xFF", [q<illegal-octets-error>]],
1582     out2 => [undef, "\xFF", [q<illegal-octets-error>]],
1583     },
1584     {
1585     id => q<l=2.0000>,
1586     in => qq<\x00\x00>,
1587     out1 => [undef, "\x00", undef, "\x00", undef],
1588     out2 => [undef, "\x00", undef, "\x00", undef],
1589     },
1590     {
1591     id => q<l=2.0D0A>,
1592     in => qq<\x0D\x0A>,
1593     out1 => [undef, "\x0D", undef, "\x0A", undef],
1594     out2 => [undef, "\x0D", undef, "\x0A", undef],
1595     },
1596     {
1597     id => q<l=2.1B1B>,
1598     in => qq<\x1B\x1B>,
1599     out1 => [undef, "\x1B", [q<illegal-octets-error>],
1600     "\x1B", [q<illegal-octets-error>]],
1601     out2 => [undef, "\x1B", [q<illegal-octets-error>],
1602     "\x1B", [q<illegal-octets-error>]],
1603     },
1604     {
1605     id => q<l=2.1B20>,
1606     in => qq<\x1B\x20>,
1607     out1 => [undef, "\x1B", [q<illegal-octets-error>], "\x20", undef],
1608     out2 => [undef, "\x1B", [q<illegal-octets-error>], "\x20", undef],
1609     },
1610     {
1611     id => q<l=2.1B24>,
1612     in => qq<\x1B\x24>,
1613     out1 => [undef, "\x1B", [q<illegal-octets-error>], "\x24", undef],
1614     out2 => [undef, "\x1B", [q<illegal-octets-error>], "\x24", undef],
1615     },
1616     {
1617     id => q<l=2.1B28>,
1618     in => qq<\x1B\x28>,
1619     out1 => [undef, "\x1B", [q<illegal-octets-error>], "\x28", undef],
1620     out2 => [undef, "\x1B", [q<illegal-octets-error>], "\x28", undef],
1621     },
1622     {
1623     id => q<l=2.2020>,
1624     in => qq<\x20\x20>,
1625     out1 => [undef, "\x20", undef, "\x20", undef],
1626     out2 => [undef, "\x20", undef, "\x20", undef],
1627     },
1628     {
1629     id => q<l=2.ab>,
1630     in => qq<ab>,
1631     out1 => [undef, "a", undef, "b", undef],
1632     out2 => [undef, "a", undef, "b", undef],
1633     },
1634     {
1635     id => q<8040>,
1636     in => qq<\x80\x40>,
1637     out1 => [undef, "\x80", [q<illegal-octets-error>],
1638     "\x40", undef],
1639     out2 => [undef, "\x80", [q<illegal-octets-error>],
1640     "\x40", undef],
1641     },
1642     {
1643     id => q<1B2440>,
1644     in => qq<\x1B\x24\x40>,
1645     out1 => [undef],
1646     out2 => [undef],
1647     eof_error => [q<invalid-state-error>],
1648     },
1649     {
1650     id => q<1B2442>,
1651     in => qq<\x1B\x24\x42>,
1652     out1 => [undef],
1653     out2 => [undef],
1654     eof_error => [q<invalid-state-error>],
1655     },
1656     {
1657     id => q<1B2840>,
1658     in => qq<\x1B\x28\x40>,
1659     out1 => [undef, "\x1B", [q<illegal-octets-error>], "(", undef,
1660     "\x40", undef],
1661     out2 => [undef, "\x1B", [q<illegal-octets-error>], "(", undef,
1662     "\x40", undef],
1663     },
1664     {
1665     id => q<1B2842>,
1666     in => qq<\x1B\x28\x42>,
1667     out1 => [undef],
1668     out2 => [undef],
1669     },
1670     {
1671     id => q<1B284A>,
1672     in => qq<\x1B\x28\x4A>,
1673     out1 => [undef],
1674     out2 => [undef],
1675     eof_error => [q<invalid-state-error>],
1676     },
1677     {
1678     id => q<1B$B1B(B>,
1679     in => qq<\x1B\x24\x42\x1B\x28\x42>,
1680     out1 => [undef],
1681     out2 => [undef],
1682     },
1683     {
1684     id => q<1B(B1B(B>,
1685     in => qq<\x1B\x28\x42\x1B\x28\x42>,
1686     out1 => [undef],
1687     out2 => [undef],
1688     },
1689     {
1690     id => q<1B(Ba1B(B>,
1691     in => qq<\x1B\x28\x42a\x1B\x28\x42>,
1692     out1 => [undef, "a", undef],
1693     out2 => [undef, "a", undef],
1694     },
1695     {
1696     id => q<1B(Ba1B(B1B(B>,
1697     in => qq<\x1B\x28\x42a\x1B\x28\x42\x1B\x28\x42>,
1698     out1 => [undef, "a", undef],
1699     out2 => [undef, "a", undef],
1700     },
1701     {
1702     id => q<1B$42!!1B2842>,
1703     in => qq<\x1B\x24\x42!!\x1B\x28\x42>,
1704     out1 => [undef, "\x{3000}", undef],
1705     out2 => [undef, "\x{3000}", undef],
1706     },
1707     {
1708     id => q<1B$4221211B284A>,
1709     in => qq<\x1B\x24\x42!!\x1B\x28\x4A>,
1710     out1 => [undef, "\x{3000}", undef],
1711     out2 => [undef, "\x{3000}", undef],
1712     eof_error => [q<invalid-state-error>],
1713     },
1714     {
1715     id => q<1B$4021211B2842>,
1716     in => qq<\x1B\x24\x40!!\x1B\x28\x42>,
1717     out1 => [undef, "\x{3000}", undef],
1718     out2 => [undef, "\x{3000}", undef],
1719     },
1720     {
1721     id => q<1B$402121211B2842>,
1722     in => qq<\x1B\x24\x40!!!\x1B\x28\x42>,
1723     out1 => [undef, "\x{3000}", undef, "!", [q<illegal-octets-error>]],
1724     out2 => [undef, "\x{3000}", undef, "!", [q<illegal-octets-error>]],
1725     },
1726     {
1727     id => q<1B$4021211B2442!!1B2842>,
1728     in => qq<\x1B\x24\x40!!\x1B\x24\x42!!\x1B\x28\x42>,
1729     out1 => [undef, "\x{3000}", undef, "\x{3000}", undef],
1730     out2 => [undef, "\x{3000}", undef, "\x{3000}", undef],
1731     },
1732     {
1733     id => q<1B$4021211B2440!!1B2842>,
1734     in => qq<\x1B\x24\x40!!\x1B\x24\x40!!\x1B\x28\x42>,
1735     out1 => [undef, "\x{3000}", undef, "\x{3000}", undef],
1736     out2 => [undef, "\x{3000}", undef, "\x{3000}", undef],
1737     },
1738     {
1739     id => q<1B$@!"1B(B\~|>,
1740     in => qq<\x1B\x24\x40!"\x1B(B\\~|>,
1741     out1 => [undef, "\x{3001}", undef, "\x5C", undef,
1742     "\x7E", undef, "|", undef],
1743     out2 => [undef, "\x{3001}", undef, "\x5C", undef,
1744     "\x7E", undef, "|", undef],
1745     },
1746     {
1747     id => q<1B$B!"1B(J\~|1B(B>,
1748     in => qq<\x1B\x24\x42!"\x1B(J\\~|\x1B(B>,
1749     out1 => [undef, "\x{3001}", undef, "\xA5", undef,
1750     "\x{203E}", undef, "|", undef],
1751     out2 => [undef, "\x{3001}", undef, "\xA5", undef,
1752     "\x{203E}", undef, "|", undef],
1753     },
1754     {
1755     id => q<78compat.3022(16-02)>,
1756     in => qq<\x1B\$\@\x30\x22\x1B\$B\x30\x22\x1B(B>,
1757     out1 => [undef, "\x{555E}", undef, "\x{5516}", undef],
1758     out2 => [undef, "\x{5516}", undef, "\x{5516}", undef],
1759     },
1760     {
1761     id => q<unassigned.2239>,
1762     in => qq<\x1B\$\@\x22\x39\x1B\$B\x22\x39\x1B(B>,
1763     out1 => [undef, "\x22\x39", [q<unassigned-code-point-error>],
1764     "\x22\x39", [q<unassigned-code-point-error>]],
1765     out2 => [undef, "\x22\x39", [q<unassigned-code-point-error>],
1766     "\x22\x39", [q<unassigned-code-point-error>]],
1767     },
1768     {
1769     id => q<83add.223A>,
1770     in => qq<\x1B\$\@\x22\x3A\x1B\$B\x22\x3A\x1B(B>,
1771     out1 => [undef, "\x22\x3A", [q<unassigned-code-point-error>],
1772     "\x{2208}", undef],
1773     out2 => [undef, "\x{2208}", undef, "\x{2208}", undef],
1774     },
1775     {
1776     id => q<83add.2840>,
1777     in => qq<\x1B\$\@\x28\x40\x1B\$B\x28\x40\x1B(B>,
1778     out1 => [undef, "\x28\x40", [q<unassigned-code-point-error>],
1779     "\x{2542}", undef],
1780     out2 => [undef, "\x{2542}", undef, "\x{2542}", undef],
1781     },
1782     {
1783     id => q<83add.7421>,
1784     in => qq<\x1B\$\@\x74\x21\x1B\$B\x74\x21\x1B(B>,
1785     out1 => [undef, "\x74\x21", [q<unassigned-code-point-error>],
1786     "\x{582F}", undef],
1787     out2 => [undef, "\x{5C2D}", undef, "\x{582F}", undef],
1788     },
1789     {
1790     id => q<83swap.3033>,
1791     in => qq<\x1B\$\@\x30\x33\x1B\$B\x30\x33\x1B(B>,
1792     out1 => [undef, "\x{9C3A}", undef, "\x{9BF5}", undef],
1793     out2 => [undef, "\x{9C3A}", undef, "\x{9BF5}", undef],
1794     },
1795     {
1796     id => q<83swap.724D>,
1797     in => qq<\x1B\$\@\x72\x4D\x1B\$B\x72\x4D\x1B(B>,
1798     out1 => [undef, "\x{9BF5}", undef, "\x{9C3A}", undef],
1799     out2 => [undef, "\x{9BF5}", undef, "\x{9C3A}", undef],
1800     },
1801     {
1802     id => q<90add.7425>,
1803     in => qq<\x1B\$\@\x74\x25\x1B\$B\x74\x25\x1B(B>,
1804     out1 => [undef, "\x74\x25", [q<unassigned-code-point-error>],
1805     "\x74\x25", [q<unassigned-code-point-error>]],
1806     out2 => [undef, "\x{51DC}", undef, "\x{51DC}", undef],
1807     },
1808     {
1809     id => q<90add.7426>,
1810     in => qq<\x1B\$\@\x74\x26\x1B\$B\x74\x26\x1B(B>,
1811     out1 => [undef, "\x74\x26", [q<unassigned-code-point-error>],
1812     "\x74\x26", [q<unassigned-code-point-error>]],
1813     out2 => [undef, "\x{7199}", undef, "\x{7199}", undef],
1814     },
1815     );
1816    
1817     check_charset ('IETF-ISO-2022-JP', $IANA_CHARSET.'iso-2022-jp',
1818     [map {$_->{out} = $_->{out1}; $_} @testdata]);
1819     check_charset ('XML-ISO-2022-JP', $XML_CHARSET.'iso-2022-jp',
1820     [map {$_->{out} = $_->{out2}; $_} @testdata]);
1821     }

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24