/[suikacvs]/markup/html/whatpm/t/Charset-DecodeHandler.t
Suika

Contents of /markup/html/whatpm/t/Charset-DecodeHandler.t

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (show annotations) (download) (as text)
Sun Jul 15 16:51:14 2007 UTC (18 years ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +30 -26 lines
File MIME type: application/x-troff
++ whatpm/t/ChangeLog	15 Jul 2007 16:51:05 -0000
	* Charset-DecodeHandler.t: Cases for charset names has
	been changed to match with the module's implementation.

2007-07-15  Wakaba  <wakaba@suika.fam.cx>

++ whatpm/Whatpm/Charset/ChangeLog	15 Jul 2007 16:49:53 -0000
	* DecodeHandle.pm (create_decode_handle): Set canonical
	or specified name to |input_encoding| attribute.
	(uri_to_name): Reimplemented.
	(input_encoding): Return charset name returned
	by |uri_to_name| if available.
	($CharsetDef): Property |xml_name| is now contain
	only name defined in XML specifications.

2007-07-15  Wakaba  <wakaba@suika.fam.cx>

1 #!/usr/bin/perl
2 use strict;
3 use Test;
4 BEGIN { plan tests => 6185 }
5
6 require Whatpm::Charset::DecodeHandle;
7
8 my $XML_AUTO_CHARSET = q<http://suika.fam.cx/www/2006/03/xml-entity/>;
9 my $IANA_CHARSET = q<urn:x-suika-fam-cx:charset:>;
10 my $PERL_CHARSET = q<http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.>;
11 my $XML_CHARSET = q<http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.>;
12
13 ## |create_decode_handle|
14 for my $test (
15 ['perl.utf8', $PERL_CHARSET.'utf8', 1],
16 ['xml', $XML_AUTO_CHARSET, 1],
17 ['unknown', q<http://www.unknown.test/>, 0],
18 ['iana.euc-jp', $IANA_CHARSET.'euc-jp', 1],
19 ['xml.euc-jp', $XML_CHARSET.'euc-jp', 1],
20 ['iana.shift_jis', $IANA_CHARSET.'shift_jis', 1],
21 ['xml.shift_jis', $XML_CHARSET.'shift_jis', 1],
22 ['iana.iso-2022-jp', $IANA_CHARSET.'iso-2022-jp', 1],
23 ['xml.iso-2022-jp', $XML_CHARSET.'iso-2022-jp', 1],
24 ) {
25 open my $fh, '<', \'';
26 my $dh = Whatpm::Charset::DecodeHandle->create_decode_handle ($test->[1], $fh);
27
28 if ($test->[2]) {
29 ok UNIVERSAL::isa ($dh, 'Whatpm::Charset::DecodeHandle::Encode') ? 1 : 0, 1,
30 'create_decode_handle ' . $test->[0] . ' object';
31 ok ref $dh->onerror eq 'CODE' ? 1 : 0, 1,
32 'create_decode_handle ' . $test->[0] . ' onerror';
33 } else {
34 ok UNIVERSAL::isa ($dh, 'Whatpm::Charset::DecodeHandle::Encode') ? 1 : 0, 0,
35 'create_decode_handle ' . $test->[0] . ' object';
36
37 Whatpm::Charset::DecodeHandle->create_decode_handle ($test->[1], $fh, sub {
38 ok $_[1], 'charset-not-supported-error',
39 'create_decode_handle ' . $test->[0] . ' error';
40 });
41 }
42 }
43
44 ## |name_to_uri|
45 for (
46 [$IANA_CHARSET.'utf-8', 'utf-8'],
47 [$IANA_CHARSET.'x-no-such-charset', 'x-no-such-charset'],
48 [$IANA_CHARSET.'utf-8', 'UTF-8'],
49 [$IANA_CHARSET.'utf-8', 'uTf-8'],
50 [$IANA_CHARSET.'utf-16be', 'utf-16be'],
51 ) {
52 my $iname = Whatpm::Charset::DecodeHandle->name_to_uri (ietf => $_->[1]);
53 ok $iname, $_->[0], 'ietf charset URI ' . $_->[1];
54 }
55
56 for (
57 [$XML_CHARSET.'utf-8', 'utf-8'],
58 [$XML_CHARSET.'x-no-such-charset', 'x-no-such-charset'],
59 [$XML_CHARSET.'utf-8', 'UTF-8'],
60 [$XML_CHARSET.'utf-8', 'uTf-8'],
61 [$IANA_CHARSET.'utf-16be', 'utf-16be'],
62 ) {
63 my $iname = Whatpm::Charset::DecodeHandle->name_to_uri (xml => $_->[1]);
64 ok $iname, $_->[0], 'XML encoding URI ' . $_->[1];
65 }
66
67 ## |uri_to_name|
68 for (
69 [$IANA_CHARSET.'utf-8', 'utf-8'],
70 [$IANA_CHARSET.'x-no-such-charset', 'x-no-such-charset'],
71 [q<http://charset.example/>, undef],
72 ) {
73 my $uri = Whatpm::Charset::DecodeHandle->uri_to_name (ietf => $_->[0]);
74 ok $uri, $_->[1], 'URI -> IETF charset ' . $_->[0];
75 }
76
77 for (
78 [$XML_CHARSET.'utf-8', 'UTF-8'],
79 [$XML_CHARSET.'x-no-such-charset', 'x-no-such-charset'],
80 [q<http://charset.example/>, undef],
81 ) {
82 my $uri = Whatpm::Charset::DecodeHandle->uri_to_name (xml => $_->[0]);
83 ok $uri, $_->[1], 'URI -> XML encoding ' . $_->[0];
84 }
85
86 ## |getc|
87 {
88 my $byte = "a\xE3\x81\x82\x81a";
89 open my $fh, '<', \$byte;
90 my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
91 ($PERL_CHARSET.'utf8', $fh);
92
93 my $error;
94 $efh->onerror (sub {
95 my ($efh, $type, %opt) = @_;
96 $error = ${$opt{octets}};
97 });
98
99 ok $efh->getc, "a", "getc 1 [1]";
100 ok $error, undef, "getc 1 [1] error";
101 ok $efh->getc, "\x{3042}", "getc 1 [2]";
102 ok $error, undef, "getc 1 [2] error";
103 ok $efh->getc, "\x81", "getc 1 [3]";
104 ok $error, "\x81", "getc 1 [3] error";
105 undef $error;
106 ok $efh->getc, "a", "getc 1 [4]";
107 ok $error, undef, "getc 1 [4] error";
108 ok $efh->getc, undef, "getc 1 [5]";
109 ok $error, undef, "getc 1 [5] error";
110 }
111
112 {
113 my $byte = "a" x 256;
114 $byte .= "b" x 256;
115
116 open my $fh, '<', \$byte;
117 my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
118 ($PERL_CHARSET.'utf8', $fh);
119
120 my $error;
121 $efh->onerror (sub {
122 my ($efh, $type, %opt) = @_;
123 $error = ${$opt{octets}};
124 });
125
126 for my $i (0..255) {
127 ok $efh->getc, "a", "getc 2 [$i]";
128 ok $error, undef, "getc 2 [$i] error";
129 }
130
131 for my $i (0..255) {
132 ok $efh->getc, "b", "getc 2 [255+$i]";
133 ok $error, undef, "getc 2 [255+$i] error";
134 }
135
136 ok $efh->getc, undef, "getc 2 [-1]";
137 ok $error, undef, "getc 2 [-1] error";
138 }
139
140 {
141 my $byte = "a" x 255;
142 $byte .= "\xE3\x81\x82";
143 $byte .= "b" x 256;
144
145 open my $fh, '<', \$byte;
146 my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
147 ($PERL_CHARSET.'utf8', $fh);
148
149 my $error;
150 $efh->onerror (sub {
151 my ($efh, $type, %opt) = @_;
152 $error = ${$opt{octets}};
153 });
154
155 for my $i (0..254) {
156 ok $efh->getc, "a", "getc 3 [$i]";
157 ok $error, undef, "getc 3 [$i] error";
158 }
159
160 ok $efh->getc, "\x{3042}", "getc 3 [255]";
161 ok $error, undef, "getc 3 [255] error";
162
163 for my $i (0..255) {
164 ok $efh->getc, "b", "getc 3 [255+$i]";
165 ok $error, undef, "getc 3 [255+$i] error";
166 }
167
168 ok $efh->getc, undef, "getc 3 [-1]";
169 ok $error, undef, "getc 3 [-1] error";
170 }
171
172 {
173 my $byte = "a" x 255;
174 $byte .= "\xE3";
175
176 open my $fh, '<', \$byte;
177 my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
178 ($PERL_CHARSET.'utf8', $fh);
179
180 my $error;
181 $efh->onerror (sub {
182 my ($efh, $type, %opt) = @_;
183 $error = ${$opt{octets}};
184 });
185
186 for my $i (0..254) {
187 ok $efh->getc, "a", "getc 4 [$i]";
188 ok $error, undef, "getc 4 [$i] error";
189 }
190
191 ok $efh->getc, "\xE3", "getc 4 [255]";
192 ok $error, "\xE3", "getc 4 [255] error";
193 undef $error;
194
195 ok $efh->getc, undef, "getc 4 [-1]";
196 ok $error, undef, "getc 4 [-1] error";
197 }
198
199 ## |ungetc|
200 {
201 my $byte = "a\x{4E00}b\x{4E11}";
202
203 open my $fh, '<', \$byte;
204 my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
205 ($PERL_CHARSET.'utf8', $fh);
206
207 ok $efh->getc, "a", "ungetc [1]";
208
209 $efh->ungetc (ord "a");
210 ok $efh->getc, "a", "ungetc [2]";
211
212 ok $efh->getc, "\x{4E00}", "ungetc [3]";
213
214 $efh->ungetc (ord "\x{4E00}");
215 ok $efh->getc, "\x{4E00}", "ungetc [4]";
216
217 ok $efh->getc, "b", "ungetc [5]";
218
219 ok $efh->getc, "\x{4E11}", "ungetc [6]";
220
221 $efh->ungetc (ord "\x{4E11}");
222 ok $efh->getc, "\x{4E11}", "ungetc [7]";
223 }
224
225 ## UTF-8, UTF-16 and BOM
226 for my $test (
227 ["UTF-8 BOM 1", qq<\xEF\xBB\xBFabc>, $XML_CHARSET.'utf-8',
228 ["a", "b", "c", undef], 1],
229 ["UTF-8 no BOM 1", qq<abc>, $XML_CHARSET.'utf-8',
230 ["a", "b", "c", undef], 0],
231 ["UTF-8 BOM 2", qq<\xEF\xBB\xBF\xEF\xBB\xBFabc>, $XML_CHARSET.'utf-8',
232 ["\x{FEFF}", "a", "b", "c", undef], 1],
233 ["UTF-8 BOM 3", qq<\xEF\xBB\xBF>, $XML_CHARSET.'utf-8',
234 [undef], 1],
235 ["UTF-8 no BOM 2", qq<>, $XML_CHARSET.'utf-8',
236 [undef], 0],
237 ["UTF-8 no BOM 3", qq<ab>, $XML_CHARSET.'utf-8',
238 [qw/a b/, undef], 0],
239 ["UTF-8 no BOM 4", qq<a>, $XML_CHARSET.'utf-8',
240 [qw/a/, undef], 0],
241 ["UTF-16BE BOM 1", qq<\xFE\xFF\x4E\x00\x00a>, $XML_CHARSET.'utf-16',
242 ["\x{4E00}", "a", undef], 1],
243 ["UTF-16LE BOM 1", qq<\xFF\xFE\x00\x4Ea\x00>, $XML_CHARSET.'utf-16',
244 ["\x{4E00}", "a", undef], 1],
245 ["UTF-16BE BOM 2", qq<\xFE\xFF\x00a>, $XML_CHARSET.'utf-16',
246 ["a", undef], 1],
247 ["UTF-16LE BOM 2", qq<\xFF\xFEa\x00>, $XML_CHARSET.'utf-16',
248 ["a", undef], 1],
249 ["UTF-16BE BOM 3", qq<\xFE\xFF>, $XML_CHARSET.'utf-16',
250 [undef], 1],
251 ["UTF-16LE BOM 3", qq<\xFF\xFE>, $XML_CHARSET.'utf-16',
252 [undef], 1],
253 ) {
254 my $error;
255
256 open my $fh, '<', \($test->[1]);
257 my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
258 ($test->[2], $fh, sub { $error = 1 });
259
260 for my $i (0..$#{$test->[3]}) {
261 ok $efh->getc, $test->[3]->[$i], $test->[0] . " $i";
262 }
263 ok $error, undef, $test->[0] . " error";
264 ok $efh->has_bom ? 1 : 0, $test->[4], $test->[0] . " has_bom";
265 }
266
267 {
268 my $byte = qq<\xFE\xFFa>;
269
270 my $error;
271
272 open my $fh, '<', \$byte;
273 my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
274 ($XML_CHARSET.'utf-16', $fh, sub { $error = $_[1] });
275
276 ok $error, undef, "UTF-16 [1]";
277 ok $efh->getc, "a", "UTF-16 [2]";
278 ok $error, 'illegal-octets-error', "UTF-16 [3]";
279 undef $error;
280 ok $efh->getc, undef, "UTF-16 [4]";
281 ok $error, undef, "UTF-16 [5]";
282 ok $efh->has_bom ? 1 : 0, 1, "UTF-16 [6]";
283 }
284 {
285 my $byte = qq<\xFF\xFEa>;
286
287 my $error;
288
289 open my $fh, '<', \$byte;
290 my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
291 ($XML_CHARSET.'utf-16', $fh, sub { $error = $_[1] });
292
293 ok $error, undef, "UTF-16 [7]";
294 ok $efh->getc, "a", "UTF-16 [8]";
295 ok $error, 'illegal-octets-error', "UTF-16 [9]";
296 undef $error;
297 ok $efh->getc, undef, "UTF-16 [10]";
298 ok $error, undef, "UTF-16 [11]";
299 ok $efh->has_bom ? 1 : 0, 1, "UTF-16 [12]";
300 }
301
302 {
303 my $byte = qq<\xFD\xFF>;
304
305 my $error;
306
307 open my $fh, '<', \$byte;
308 my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
309 ($XML_CHARSET.'utf-16', $fh, sub { $error = $_[1] });
310
311 ok $error, 'no-bom-error', "UTF-16 [13]";
312 undef $error;
313
314 ok $efh->getc, "\x{FDFF}", "UTF-16 [14]";
315 ok $error, undef, "UTF-16 [15]";
316 ok $efh->getc, undef, "UTF-16 [16]";
317 ok $error, undef, "UTF-16 [17]";
318 ok $efh->has_bom ? 1 : 0, 0, "UTF-16 [18]";
319 }
320
321 {
322 my $byte = qq<\xFD>;
323
324 my $error;
325
326 open my $fh, '<', \$byte;
327 my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
328 ($XML_CHARSET.'utf-16', $fh, sub { $error = $_[1] });
329
330 ok $error, 'no-bom-error', "UTF-16 [19]";
331 undef $error;
332
333 ok $efh->getc, "\xFD", "UTF-16 [20]";
334 ok $error, 'illegal-octets-error', "UTF-16 [21]";
335 undef $error;
336
337 ok $efh->getc, undef, "UTF-16 [22]";
338 ok $error, undef, "UTF-16 [23]";
339 ok $efh->has_bom ? 1 : 0, 0, "UTF-16 [24]";
340 }
341
342 {
343 my $byte = qq<>;
344
345 my $error;
346
347 open my $fh, '<', \$byte;
348 my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
349 ($XML_CHARSET.'utf-16', $fh, sub { $error = $_[1] });
350
351 ok $error, 'no-bom-error', "UTF-16 [25]";
352 undef $error;
353
354 ok $efh->getc, undef, "UTF-16 [26]";
355 ok $error, undef, "UTF-16 [27]";
356 ok $efh->has_bom ? 1 : 0, 0, "UTF-16 [28]";
357 }
358
359 sub check_charset ($$$) {
360 my $test_name = $_[0];
361 my $charset_uri = $_[1];
362 for my $testdata (@{$_[2]}) {
363 my $byte = $testdata->{in};
364 my $error;
365 my $i = 0;
366
367 open my $fh, '<', \$byte;
368 my $efh = Whatpm::Charset::DecodeHandle->create_decode_handle
369 ($charset_uri, $fh, sub {
370 my (undef, $etype, %opt) = @_;
371 $error = [$etype, \%opt];
372 });
373
374 ok defined $efh ? 1 : 0, 1, "$test_name $testdata->{id} return";
375 next unless defined $efh;
376 ok $efh->has_bom ? 1 : 0, $testdata->{bom} || 0,
377 "$test_name $testdata->{id} BOM";
378 ok $efh->input_encoding, $testdata->{name}, "$test_name $testdata->{id} ie";
379
380 while (@{$testdata->{out}}) {
381 if ($i != 0) {
382 my $c = shift @{$testdata->{out}};
383 ok $efh->getc, $c, "$test_name $testdata->{id} $i";
384 }
385
386 my $v = shift @{$testdata->{out}};
387 if (defined $v) {
388 ok defined $error ? 1 : 0, 1, "$test_name $testdata->{id} $i error";
389 ok $error->[0], $v->[0], "$test_name $testdata->{id} $i error 0";
390 } else {
391 ok defined $error ? 1 : 0, 0, "$test_name $testdata->{id} $i error";
392 }
393 undef $error;
394 $i++;
395 }
396
397 ok $efh->getc, undef, "$test_name $testdata->{id} EOF";
398 if ($testdata->{eof_error}) {
399 ok defined $error ? 1 : 0, 1, "$test_name $testdata->{id} EOF error";
400 ok $error->[0], $testdata->{eof_error}->[0],
401 "$test_name $testdata->{id} EOF error 0";
402 } else {
403 ok $error, undef, "$test_name $testdata->{id} EOF error";
404 }
405 } # testdata
406 } # check_charset
407
408 ## XML Character Encoding Autodetection
409 {
410 my @testdata = (
411 {
412 id => q<l=0>,
413 in => q<>,
414 out => [undef],
415 name => 'UTF-8', bom => 0,
416 },
417 {
418 id => q<l=1>,
419 in => "a",
420 out => [undef, "a", undef],
421 name => 'UTF-8', bom => 0,
422 },
423 {
424 id => q<bom8.l=0>,
425 in => "\xEF\xBB\xBF",
426 out => [undef],
427 name => 'UTF-8', bom => 1,
428 },
429 {
430 id => q<bom8.l=1>,
431 in => "\xEF\xBB\xBFa",
432 out => [undef, "a", undef],
433 name => 'UTF-8', bom => 1,
434 },
435 {
436 id => q<bom8.zwnbsp>,
437 in => "\xEF\xBB\xBF\xEF\xBB\xBF",
438 out => [undef, "\x{FEFF}", undef],
439 name => 'UTF-8', bom => 1,
440 },
441 {
442 id => q<bom16be.l=0>,
443 in => "\xFE\xFF",
444 out => [undef],
445 name => 'UTF-16', bom => 1,
446 },
447 {
448 id => q<bom16le.l=0>,
449 in => "\xFF\xFE",
450 out => [undef],
451 name => 'UTF-16', bom => 1,
452 },
453 {
454 id => q<bom16be.l=1>,
455 in => "\xFE\xFFa",
456 out => [undef, "a", [q<illegal-octets-error>]],
457 name => 'UTF-16', bom => 1,
458 },
459 {
460 id => q<bom16le.l=1>,
461 in => "\xFF\xFEa",
462 out => [undef, "a", [q<illegal-octets-error>]],
463 name => 'UTF-16', bom => 1,
464 },
465 {
466 id => q<bom16be.l=2>,
467 in => "\xFE\xFF\x4E\x00",
468 out => [undef, "\x{4E00}", undef],
469 name => 'UTF-16', bom => 1,
470 },
471 {
472 id => q<bom16le.l=2>,
473 in => "\xFF\xFE\x00\x4E",
474 out => [undef, "\x{4E00}", undef],
475 name => 'UTF-16', bom => 1,
476 },
477 {
478 id => q<bom16be.l=2lt>,
479 in => "\xFE\xFF\x00<",
480 out => [undef, "<", undef],
481 name => 'UTF-16', bom => 1,
482 },
483 {
484 id => q<bom16le.l=2lt>,
485 in => "\xFF\xFE<\x00",
486 out => [undef, "<", undef],
487 name => 'UTF-16', bom => 1,
488 },
489 {
490 id => q<bom16be.zwnbsp>,
491 in => "\xFE\xFF\xFE\xFF",
492 out => [undef, "\x{FEFF}", undef],
493 name => 'UTF-16', bom => 1,
494 },
495 {
496 id => q<bom16le.zwnbsp>,
497 in => "\xFF\xFE\xFF\xFE",
498 out => [undef, "\x{FEFF}", undef],
499 name => 'UTF-16', bom => 1,
500 },
501 {
502 id => q<bom32e3412.l=0>,
503 in => "\xFE\xFF\x00\x00",
504 out => [undef, "\x00", undef],
505 name => 'UTF-16', bom => 1,
506 },
507 {
508 id => q<bom32e4321.l=0>,
509 in => "\xFF\xFE\x00\x00",
510 out => [undef, "\x00", undef],
511 name => 'UTF-16', bom => 1,
512 },
513 {
514 id => q<bom16be.l=4ltq>,
515 in => "\xFE\xFF\x00<\x00?",
516 out => [undef, "<", undef, "?", undef],
517 name => 'UTF-16', bom => 1,
518 },
519 {
520 id => q<bom16le.l=4ltq>,
521 in => "\xFF\xFE<\x00?\x00",
522 out => [undef, "<", undef, "?", undef],
523 name => 'UTF-16', bom => 1,
524 },
525 {
526 id => q<bom16be.decl.1>,
527 in => qq[\xFE\xFF\x00<\x00?\x00x\x00m\x00l\x00 \x00v\x00e\x00r].
528 qq[\x00s\x00i\x00o\x00n\x00=\x00"\x001\x00.\x000\x00"].
529 qq[\x00 \x00e\x00n\x00c\x00o\x00d\x00i\x00n\x00g\x00=].
530 qq[\x00"\x00u\x00t\x00f\x00-\x001\x006\x00"\x00?\x00>],
531 out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
532 " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
533 "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
534 "1", undef, ".", undef, "0", undef, '"', undef, " ", undef,
535 "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
536 "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
537 "u", undef, "t", undef, "f", undef, "-", undef, "1", undef,
538 "6", undef, '"', undef, "?", undef, ">", undef],
539 name => 'utf-16', bom => 1,
540 },
541 {
542 id => q<bom16le.decl.1>,
543 in => qq[\xFF\xFE<\x00?\x00x\x00m\x00l\x00 \x00v\x00e\x00r].
544 qq[\x00s\x00i\x00o\x00n\x00=\x00"\x001\x00.\x000\x00"].
545 qq[\x00 \x00e\x00n\x00c\x00o\x00d\x00i\x00n\x00g\x00=].
546 qq[\x00"\x00u\x00t\x00f\x00-\x001\x006\x00"\x00?\x00>\x00],
547 out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
548 " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
549 "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
550 "1", undef, ".", undef, "0", undef, '"', undef, " ", undef,
551 "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
552 "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
553 "u", undef, "t", undef, "f", undef, "-", undef, "1", undef,
554 "6", undef, '"', undef, "?", undef, ">", undef],
555 name => 'utf-16', bom => 1,
556 },
557 {
558 id => q<utf16be.decl.1>,
559 in => qq[\x00<\x00?\x00x\x00m\x00l\x00 \x00v\x00e\x00r].
560 qq[\x00s\x00i\x00o\x00n\x00=\x00"\x001\x00.\x000\x00"].
561 qq[\x00 \x00e\x00n\x00c\x00o\x00d\x00i\x00n\x00g\x00=].
562 qq[\x00"\x00u\x00t\x00f\x00-\x001\x006\x00b\x00e\x00"\x00?\x00>],
563 out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
564 " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
565 "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
566 "1", undef, ".", undef, "0", undef, '"', undef, " ", undef,
567 "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
568 "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
569 "u", undef, "t", undef, "f", undef, "-", undef, "1", undef,
570 "6", undef, "b", undef, "e", undef, '"', undef,
571 "?", undef, ">", undef],
572 name => 'utf-16be', bom => 0,
573 },
574 {
575 id => q<utf16le.decl.1>,
576 in => qq[<\x00?\x00x\x00m\x00l\x00 \x00v\x00e\x00r].
577 qq[\x00s\x00i\x00o\x00n\x00=\x00"\x001\x00.\x000\x00"].
578 qq[\x00 \x00e\x00n\x00c\x00o\x00d\x00i\x00n\x00g\x00=].
579 qq[\x00"\x00u\x00t\x00f\x00-\x001\x006\x00l\x00e\x00"].
580 qq[\x00?\x00>\x00],
581 out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
582 " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
583 "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
584 "1", undef, ".", undef, "0", undef, '"', undef, " ", undef,
585 "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
586 "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
587 "u", undef, "t", undef, "f", undef, "-", undef, "1", undef,
588 "6", undef, "l", undef, "e", undef, '"', undef, "?", undef,
589 ">", undef],
590 name => 'utf-16le', bom => 0,
591 },
592 {
593 id => q<16be.decl.1>,
594 in => qq[\x00<\x00?\x00x\x00m\x00l\x00 \x00v\x00e\x00r].
595 qq[\x00s\x00i\x00o\x00n\x00=\x00"\x001\x00.\x000\x00"].
596 qq[\x00 \x00e\x00n\x00c\x00o\x00d\x00i\x00n\x00g\x00=].
597 qq[\x00"\x00u\x00t\x00f\x00-\x001\x006\x00"\x00?\x00>],
598 out => [[q<charset-name-mismatch-error>],
599 "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
600 " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
601 "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
602 "1", undef, ".", undef, "0", undef, '"', undef, " ", undef,
603 "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
604 "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
605 "u", undef, "t", undef, "f", undef, "-", undef, "1", undef,
606 "6", undef, '"', undef, "?", undef, ">", undef],
607 name => 'utf-16', bom => 0,
608 },
609 {
610 id => q<16le.decl.1>,
611 in => qq[<\x00?\x00x\x00m\x00l\x00 \x00v\x00e\x00r].
612 qq[\x00s\x00i\x00o\x00n\x00=\x00"\x001\x00.\x000\x00"].
613 qq[\x00 \x00e\x00n\x00c\x00o\x00d\x00i\x00n\x00g\x00=].
614 qq[\x00"\x00u\x00t\x00f\x00-\x001\x006\x00"\x00?\x00>\x00],
615 out => [[q<charset-name-mismatch-error>],
616 "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
617 " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
618 "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
619 "1", undef, ".", undef, "0", undef, '"', undef, " ", undef,
620 "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
621 "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
622 "u", undef, "t", undef, "f", undef, "-", undef, "1", undef,
623 "6", undef, '"', undef, "?", undef, ">", undef],
624 name => 'utf-16', bom => 0,
625 },
626 {
627 id => q<8.decl.1>,
628 in => qq[<?xml version="1.0" encoding="utf-8"?>],
629 out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
630 " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
631 "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
632 "1", undef, ".", undef, "0", undef, '"', undef, " ", undef,
633 "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
634 "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
635 "u", undef, "t", undef, "f", undef, "-", undef, "8", undef,
636 '"', undef, "?", undef, ">", undef],
637 name => 'utf-8', bom => 0,
638 },
639 {
640 id => q<8.decl.2>,
641 in => qq[<?xml encoding="utf-8"?>],
642 out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
643 " ", undef,
644 "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
645 "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
646 "u", undef, "t", undef, "f", undef, "-", undef, "8", undef,
647 '"', undef, "?", undef, ">", undef],
648 name => 'utf-8', bom => 0,
649 },
650 {
651 id => q<8.decl.3>,
652 in => qq[<?xml version="1.1" encoding="utf-8"?>],
653 out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
654 " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
655 "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
656 "1", undef, ".", undef, "1", undef, '"', undef, " ", undef,
657 "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
658 "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
659 "u", undef, "t", undef, "f", undef, "-", undef, "8", undef,
660 '"', undef, "?", undef, ">", undef],
661 name => 'utf-8', bom => 0,
662 },
663 {
664 id => q<8.decl.4>,
665 in => qq[<?xml version="1.0"?>],
666 out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
667 " ", undef, "v", undef, "e", undef, "r", undef, "s", undef,
668 "i", undef, "o", undef, "n", undef, "=", undef, '"', undef,
669 "1", undef, ".", undef, "0", undef, '"', undef,
670 "?", undef, ">", undef],
671 name => 'UTF-8', bom => 0,
672 },
673 {
674 id => q<bom8.decl.1>,
675 in => qq[\xEF\xBB\xBF<?xml encoding="utf-8"?>],
676 out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
677 " ", undef,
678 "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
679 "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
680 "u", undef, "t", undef, "f", undef, "-", undef, "8", undef,
681 '"', undef, "?", undef, ">", undef],
682 name => 'utf-8', bom => 1,
683 },
684 {
685 id => q<us-ascii.decl.1>,
686 in => qq[<?xml encoding="us-ascii"?>],
687 out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
688 " ", undef,
689 "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
690 "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
691 "u", undef, "s", undef, "-", undef, "a", undef, "s", undef,
692 "c", undef, "i", undef, "i", undef,
693 '"', undef, "?", undef, ">", undef],
694 name => 'us-ascii', bom => 0,
695 },
696 {
697 id => q<us-ascii.decl.2>,
698 in => qq[<?xml encoding="US-ascii"?>],
699 out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
700 " ", undef,
701 "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
702 "i", undef, "n", undef, "g", undef, "=", undef, '"', undef,
703 "U", undef, "S", undef, "-", undef, "a", undef, "s", undef,
704 "c", undef, "i", undef, "i", undef,
705 '"', undef, "?", undef, ">", undef],
706 name => 'US-ascii', bom => 0,
707 },
708 {
709 id => q<us-ascii.decl.3>,
710 in => qq[<?xml encoding='us-ascii'?>],
711 out => [undef, "<", undef, "?", undef, "x", undef, "m", undef, "l", undef,
712 " ", undef,
713 "e", undef, "n", undef, "c", undef, "o", undef, "d", undef,
714 "i", undef, "n", undef, "g", undef, "=", undef, "'", undef,
715 "u", undef, "s", undef, "-", undef, "a", undef, "s", undef,
716 "c", undef, "i", undef, "i", undef,
717 "'", undef, "?", undef, ">", undef],
718 name => 'us-ascii', bom => 0,
719 },
720 );
721 check_charset ('XML', $XML_AUTO_CHARSET, \@testdata);
722 }
723
724 ## EUC-JP
725 {
726 my @testdata = (
727 {
728 id => q<l=0>,
729 in => q<>,
730 out => [undef],
731 },
732 {
733 id => q<l=1.00>,
734 in => qq<\x00>,
735 out => [undef, "\x00", undef],
736 },
737 {
738 id => q<l=1.0d>,
739 in => qq<\x0D>,
740 out => [undef, "\x0D", undef],
741 },
742 {
743 id => q<l=1.0e>,
744 in => qq<\x0E>,
745 out => [undef, "\x0E", undef],
746 }, # Error??
747 {
748 id => q<l=1.0f>,
749 in => qq<\x0F>,
750 out => [undef, "\x0F", undef],
751 }, # Error??
752 {
753 id => q<l=1.1b>,
754 in => qq<\x1B>,
755 out => [undef, "\x1B", undef],
756 }, # Error??
757 {
758 id => q<l=1.a>,
759 in => q<a>,
760 out => [undef, "a", undef],
761 },
762 {
763 id => q<l=1.20>,
764 in => qq<\x20>,
765 out => [undef, "\x20", undef],
766 },
767 {
768 id => q<5C>,
769 in => qq<\x5C>,
770 out => [undef, "\x5C", undef],
771 },
772 {
773 id => q<l=1.7E>,
774 in => qq<\x7E>,
775 out => [undef, "\x7E", undef],
776 },
777 {
778 id => q<l=1.7F>,
779 in => qq<\x7F>,
780 out => [undef, "\x7F", undef],
781 },
782 {
783 id => q<l=1.80>,
784 in => qq<\x80>,
785 out => [undef, "\x80", undef],
786 },
787 {
788 id => q<l=1.8c>,
789 in => qq<\x8C>,
790 out => [undef, "\x8C", undef],
791 },
792 {
793 id => q<l=1.8e>,
794 in => qq<\x8E>,
795 out => [undef, "\x8E", [q<illegal-octets-error>]],
796 },
797 {
798 id => q<l=1.8f>,
799 in => qq<\x8F>,
800 out => [undef, "\x8F", [q<illegal-octets-error>]],
801 },
802 {
803 id => q<l=1.a0>,
804 in => qq<\xA0>,
805 out => [undef, "\xA0", [q<unassigned-code-point-error>]],
806 },
807 {
808 id => q<l=1.a1>,
809 in => qq<\xA1>,
810 out => [undef, "\xA1", [q<illegal-octets-error>]],
811 },
812 {
813 id => q<l=1.a2>,
814 in => qq<\xA2>,
815 out => [undef, "\xA2", [q<illegal-octets-error>]],
816 },
817 {
818 id => q<l=1.fd>,
819 in => qq<\xFD>,
820 out => [undef, "\xFD", [q<illegal-octets-error>]],
821 },
822 {
823 id => q<l=1.fe>,
824 in => qq<\xFE>,
825 out => [undef, "\xFE", [q<illegal-octets-error>]],
826 },
827 {
828 id => q<l=1.ff>,
829 in => qq<\xFF>,
830 out => [undef, "\xFF", [q<unassigned-code-point-error>]],
831 },
832 {
833 id => q<l=2.0000>,
834 in => qq<\x00\x00>,
835 out => [undef, "\x00", undef, "\x00", undef],
836 },
837 {
838 id => q<l=2.0D0A>,
839 in => qq<\x0D\x0A>,
840 out => [undef, "\x0D", undef, "\x0A", undef],
841 },
842 {
843 id => q<l=2.1B28>,
844 in => qq<\x1B\x28>,
845 out => [undef, "\x1B", undef, "\x28", undef],
846 },# Error??
847 {
848 id => q<l=2.2020>,
849 in => qq<\x20\x20>,
850 out => [undef, "\x20", undef, "\x20", undef],
851 },
852 {
853 id => q<l=2.ab>,
854 in => qq<ab>,
855 out => [undef, "a", undef, "b", undef],
856 },
857 {
858 id => q<l=2.a0a1>,
859 in => qq<\xA0\xA1>,
860 out => [undef, "\xA0", [q<unassigned-code-point-error>],
861 "\xA1", [q<illegal-octets-error>]],
862 },
863 {
864 id => q<l=2.a1a1>,
865 in => qq<\xA1\xA1>,
866 out => [undef, "\x{3000}", undef],
867 },
868 {
869 id => q<l=2.a1a2>,
870 in => qq<\xA1\xA2>,
871 out => [undef, "\x{3001}", undef],
872 },
873 {
874 id => q<l=2.a1a4>,
875 in => qq<\xA1\xA4>,
876 out => [undef, "\x{FF0C}", undef], # FULLWIDTH COMMA
877 },
878 {
879 id => q<a1a6>,
880 in => qq<\xA1\xA6>,
881 out => [undef, "\x{30FB}", undef], # KATAKABA MIDDLE DOT
882 },
883 {
884 id => q<a1a7>,
885 in => qq<\xA1\xA7>,
886 out => [undef, "\x{FF1A}", undef], # FULLWIDTH COLON
887 },
888 {
889 id => q<a1b1>,
890 in => qq<\xA1\xB1>,
891 out => [undef, "\x{203E}", undef], # OVERLINE
892 },
893 {
894 id => q<a1bd>,
895 in => qq<\xA1\xBD>,
896 out => [undef, "\x{2014}", undef], # EM DASH
897 },
898 {
899 id => q<a1c0>,
900 in => qq<\xA1\xC0>,
901 out => [undef, "\x{FF3C}", undef], # FULLWIDTH REVERSE SOLIDUS
902 },
903 {
904 id => q<a1c1>,
905 in => qq<\xA1\xC1>,
906 out => [undef, "\x{301C}", undef], # WAVE DASH
907 },
908 {
909 id => q<a1c2>,
910 in => qq<\xA1\xC2>,
911 out => [undef, "\x{2016}", undef], # DOUBLE VERTICAL LINE
912 },
913 {
914 id => q<a1c4>,
915 in => qq<\xA1\xC4>,
916 out => [undef, "\x{2026}", undef], # HORIZONTAL ELLIPSIS
917 },
918 {
919 id => q<a1dd>,
920 in => qq<\xA1\xDD>,
921 out => [undef, "\x{2212}", undef], # MINUS SIGN
922 },
923 {
924 id => q<a1ef>,
925 in => qq<\xA1\xEF>,
926 out => [undef, "\x{00A5}", undef], # YEN SIGN
927 },
928 {
929 id => q<a1f1>,
930 in => qq<\xA1\xF1>,
931 out => [undef, "\x{00A2}", undef], # CENT SIGN
932 },
933 {
934 id => q<a1f2>,
935 in => qq<\xA1\xF2>,
936 out => [undef, "\x{00A3}", undef], # POUND SIGN
937 },
938 {
939 id => q<a1f2>,
940 in => qq<\xA1\xFF>,
941 out => [undef, "\xA1", [q<illegal-octets-error>],
942 "\xFF", [q<unassigned-code-point-error>]],
943 },
944 {
945 id => q<a2ae>,
946 in => qq<\xA2\xAE>,
947 out => [undef, "\x{3013}", undef], # GETA MARK
948 },
949 {
950 id => q<a2af>,
951 in => qq<\xA2\xAF>,
952 out => [undef, "\xA2\xAF", [q<unassigned-code-point-error>]],
953 },
954 {
955 id => q<a2ba>,
956 in => qq<\xA2\xBA>,
957 out => [undef, "\x{2208}", undef], # ELEMENT OF
958 },
959 {
960 id => q<a2fe>,
961 in => qq<\xA2\xFE>,
962 out => [undef, "\x{25EF}", undef], # LARGE CIRCLE
963 },
964 {
965 id => q<adce>,
966 in => qq<\xAD\xCE>,
967 out => [undef, "\xAD\xCE", [q<unassigned-code-point-error>]],
968 },
969 {
970 id => q<b0a6>,
971 in => qq<\xB0\xA6>,
972 out => [undef, "\x{611B}", undef], # han
973 },
974 {
975 id => q<f4a6>,
976 in => qq<\xF4\xA6>,
977 out => [undef, "\x{7199}", undef], # han
978 },
979 {
980 id => q<8ea1>,
981 in => qq<\x8E\xA1>,
982 out => [undef, "\x{FF61}", undef],
983 },
984 {
985 id => q<8efe>,
986 in => qq<\x8E\xFE>,
987 out => [undef, "\x8E\xFE", [q<unassigned-code-point-error>]],
988 },
989 {
990 id => q<8ffe>,
991 in => qq<\x8F\xFE>,
992 out => [undef, "\x8F\xFE", [q<illegal-octets-error>]],
993 },
994 {
995 id => q<l=2.a1a2a3>,
996 in => qq<\xA1\xA2\xA3>,
997 out => [undef, "\x{3001}", undef,
998 "\xA3", [q<illegal-octets-error>]],
999 },
1000 {
1001 id => q<8ea1a1>,
1002 in => qq<\x8E\xA1\xA1>,
1003 out => [undef, "\x{FF61}", undef,
1004 "\xA1", [q<illegal-octets-error>]],
1005 },
1006 {
1007 id => q<8fa1a1>,
1008 in => qq<\x8F\xA1\xA1>,
1009 out => [undef, "\x8F\xA1\xA1", [q<unassigned-code-point-error>]],
1010 },
1011 {
1012 id => q<8fa2af>,
1013 in => qq<\x8F\xA2\xAF>,
1014 out => [undef, "\x{02D8}", undef],
1015 },
1016 {
1017 id => q<8fa2b7>,
1018 in => qq<\x8F\xA2\xB7>,
1019 out => [undef, "\x{FF5E}", undef], # FULLWIDTH TILDE
1020 },
1021 {
1022 id => q<a1a2a1a3>,
1023 in => qq<\xA1\xA2\xA1\xA3>,
1024 out => [undef, "\x{3001}", undef, "\x{3002}", undef],
1025 },
1026 {
1027 id => q<8fa2af>,
1028 in => qq<\x8F\xA2\xAF\xAF>,
1029 out => [undef, "\x{02D8}", undef,
1030 "\xAF", [q<illegal-octets-error>]],
1031 },
1032 {
1033 id => q<8fa2afafa1>,
1034 in => qq<\x8F\xA2\xAF\xAF\xA1>,
1035 out => [undef, "\x{02D8}", undef,
1036 "\xAF\xA1", [q<unassigned-code-point-error>]],
1037 },
1038 );
1039 check_charset ('XML-EUC-JP', $XML_CHARSET.'euc-jp',
1040 [map {$_->{name} = 'EUC-JP'; $_} @testdata]);
1041 }
1042
1043 ## Shift_JIS
1044 {
1045 my @testdata = (
1046 {
1047 id => q<l=0>,
1048 in => q<>,
1049 out => [undef],
1050 },
1051 {
1052 id => q<l=1.00>,
1053 in => qq<\x00>,
1054 out => [undef, "\x00", undef],
1055 },
1056 {
1057 id => q<l=1.0d>,
1058 in => qq<\x0D>,
1059 out => [undef, "\x0D", undef],
1060 },
1061 {
1062 id => q<l=1.0e>,
1063 in => qq<\x0E>,
1064 out => [undef, "\x0E", undef],
1065 }, # Error??
1066 {
1067 id => q<l=1.0f>,
1068 in => qq<\x0F>,
1069 out => [undef, "\x0F", undef],
1070 }, # Error??
1071 {
1072 id => q<l=1.1b>,
1073 in => qq<\x1B>,
1074 out => [undef, "\x1B", undef],
1075 }, # Error??
1076 {
1077 id => q<l=1.a>,
1078 in => q<a>,
1079 out => [undef, "a", undef],
1080 },
1081 {
1082 id => q<l=1.20>,
1083 in => qq<\x20>,
1084 out => [undef, "\x20", undef],
1085 },
1086 {
1087 id => q<l=1.5C>,
1088 in => qq<\x5C>,
1089 out => [undef, "\xA5", undef], # YEN SIGN
1090 },
1091 {
1092 id => q<l=1.7E>,
1093 in => qq<\x7E>,
1094 out => [undef, "\x{203E}", undef], # OVERLINE
1095 },
1096 {
1097 id => q<l=1.7F>,
1098 in => qq<\x7F>,
1099 out => [undef, "\x7F", undef],
1100 },
1101 {
1102 id => q<l=1.80>,
1103 in => qq<\x80>,
1104 out => [undef, "\x80", [q<unassigned-code-point-error>]],
1105 },
1106 {
1107 id => q<l=1.8c>,
1108 in => qq<\x8C>,
1109 out => [undef, "\x8C", [q<illegal-octets-error>]],
1110 },
1111 {
1112 id => q<l=1.8e>,
1113 in => qq<\x8E>,
1114 out => [undef, "\x8E", [q<illegal-octets-error>]],
1115 },
1116 {
1117 id => q<l=1.8f>,
1118 in => qq<\x8F>,
1119 out => [undef, "\x8F", [q<illegal-octets-error>]],
1120 },
1121 {
1122 id => q<l=1.a0>,
1123 in => qq<\xA0>,
1124 out => [undef, "\xA0", [q<unassigned-code-point-error>]],
1125 },
1126 {
1127 id => q<l=1.a1>,
1128 in => qq<\xA1>,
1129 out => [undef, "\x{FF61}", undef],
1130 },
1131 {
1132 id => q<l=1.a2>,
1133 in => qq<\xA2>,
1134 out => [undef, "\x{FF62}", undef],
1135 },
1136 {
1137 id => q<l=1.df>,
1138 in => qq<\xdf>,
1139 out => [undef, "\x{FF9F}", undef],
1140 },
1141 {
1142 id => q<l=1.e0>,
1143 in => qq<\xe0>,
1144 out => [undef, "\xE0", [q<illegal-octets-error>]],
1145 },
1146 {
1147 id => q<l=1.ef>,
1148 in => qq<\xEF>,
1149 out => [undef, "\xEF", [q<illegal-octets-error>]],
1150 },
1151 {
1152 id => q<F0>,
1153 in => qq<\xF0>,
1154 out => [undef, "\xF0", [q<unassigned-code-point-error>]],
1155 },
1156 {
1157 id => q<l=1.fc>,
1158 in => qq<\xFC>,
1159 out => [undef, "\xFC", [q<unassigned-code-point-error>]],
1160 },
1161 {
1162 id => q<l=1.fd>,
1163 in => qq<\xFD>,
1164 out => [undef, "\xFD", [q<unassigned-code-point-error>]],
1165 },
1166 {
1167 id => q<l=1.fe>,
1168 in => qq<\xFE>,
1169 out => [undef, "\xFE", [q<unassigned-code-point-error>]],
1170 },
1171 {
1172 id => q<l=1.ff>,
1173 in => qq<\xFF>,
1174 out => [undef, "\xFF", [q<unassigned-code-point-error>]],
1175 },
1176 {
1177 id => q<l=2.0000>,
1178 in => qq<\x00\x00>,
1179 out => [undef, "\x00", undef, "\x00", undef],
1180 },
1181 {
1182 id => q<l=2.0D0A>,
1183 in => qq<\x0D\x0A>,
1184 out => [undef, "\x0D", undef, "\x0A", undef],
1185 },
1186 {
1187 id => q<l=2.1B28>,
1188 in => qq<\x1B\x28>,
1189 out => [undef, "\x1B", undef, "\x28", undef],
1190 },# Error??
1191 {
1192 id => q<l=2.2020>,
1193 in => qq<\x20\x20>,
1194 out => [undef, "\x20", undef, "\x20", undef],
1195 },
1196 {
1197 id => q<l=2.ab>,
1198 in => qq<ab>,
1199 out => [undef, "a", undef, "b", undef],
1200 },
1201 {
1202 id => q<8040>,
1203 in => qq<\x80\x40>,
1204 out => [undef, "\x80", [q<unassigned-code-point-error>],
1205 "\x40", undef],
1206 },
1207 {
1208 id => q<8100>,
1209 in => qq<\x81\x00>,
1210 out => [undef, "\x81\x00", [q<unassigned-code-point-error>]],
1211 },
1212 {
1213 id => q<8101>,
1214 in => qq<\x81\x01>,
1215 out => [undef, "\x81\x01", [q<unassigned-code-point-error>]],
1216 },
1217 {
1218 id => q<813F>,
1219 in => qq<\x81\x3F>,
1220 out => [undef, "\x81\x3F", [q<unassigned-code-point-error>]],
1221 },
1222 {
1223 id => q<8140>,
1224 in => qq<\x81\x40>,
1225 out => [undef, "\x{3000}", undef],
1226 },
1227 {
1228 id => q<8141>,
1229 in => qq<\x81\x41>,
1230 out => [undef, "\x{3001}", undef],
1231 },
1232 {
1233 id => q<8143>,
1234 in => qq<\x81\x43>,
1235 out => [undef, "\x{FF0C}", undef], # FULLWIDTH COMMA
1236 },
1237 {
1238 id => q<8150>,
1239 in => qq<\x81\x50>,
1240 out => [undef, "\x{FFE3}", undef], # FULLWIDTH MACRON
1241 },
1242 {
1243 id => q<815C>,
1244 in => qq<\x81\x5C>,
1245 out => [undef, "\x{2014}", undef], # EM DASH
1246 },
1247 {
1248 id => q<815F>,
1249 in => qq<\x81\x5F>,
1250 out => [undef, "\x{005C}", undef], # REVERSE SOLIDUS
1251 },
1252 {
1253 id => q<8160>,
1254 in => qq<\x81\x60>,
1255 out => [undef, "\x{301C}", undef], # WAVE DASH
1256 },
1257 {
1258 id => q<8161>,
1259 in => qq<\x81\x61>,
1260 out => [undef, "\x{2016}", undef], # DOUBLE VERTICAL LINE
1261 },
1262 {
1263 id => q<8163>,
1264 in => qq<\x81\x63>,
1265 out => [undef, "\x{2026}", undef], # HORIZONTAL ELLIPSIS
1266 },
1267 {
1268 id => q<817C>,
1269 in => qq<\x81\x7C>,
1270 out => [undef, "\x{2212}", undef], # MINUS SIGN
1271 },
1272 {
1273 id => q<817F>,
1274 in => qq<\x81\x7F>,
1275 out => [undef, "\x81\x7F", [q<unassigned-code-point-error>]],
1276 },
1277 {
1278 id => q<818F>,
1279 in => qq<\x81\x8F>,
1280 out => [undef, "\x{FFE5}", undef], # FULLWIDTH YEN SIGN
1281 },
1282 {
1283 id => q<8191>,
1284 in => qq<\x81\x91>,
1285 out => [undef, "\x{00A2}", undef], # CENT SIGN
1286 },
1287 {
1288 id => q<8192>,
1289 in => qq<\x81\x92>,
1290 out => [undef, "\x{00A3}", undef], # POUND SIGN
1291 },
1292 {
1293 id => q<81AC>,
1294 in => qq<\x81\xAC>,
1295 out => [undef, "\x{3013}", undef], # GETA MARK
1296 },
1297 {
1298 id => q<81AD>,
1299 in => qq<\x81\xAD>,
1300 out => [undef, "\x81\xAD", [q<unassigned-code-point-error>]],
1301 },
1302 {
1303 id => q<81B8>,
1304 in => qq<\x81\xB8>,
1305 out => [undef, "\x{2208}", undef], # ELEMENT OF
1306 },
1307 {
1308 id => q<81CA>,
1309 in => qq<\x81\xCA>,
1310 out => [undef, "\x{00AC}", undef], # NOT SIGN
1311 },
1312 {
1313 id => q<81FC>,
1314 in => qq<\x81\xFC>,
1315 out => [undef, "\x{25EF}", undef], # LARGE CIRCLE
1316 },
1317 {
1318 id => q<81FD>,
1319 in => qq<\x81\xFD>,
1320 out => [undef, "\x81\xFD", [q<unassigned-code-point-error>]],
1321 },
1322 {
1323 id => q<81FE>,
1324 in => qq<\x81\xFE>,
1325 out => [undef, "\x81\xFE", [q<unassigned-code-point-error>]],
1326 },
1327 {
1328 id => q<81FF>,
1329 in => qq<\x81\xFF>,
1330 out => [undef, "\x81\xFF", [q<unassigned-code-point-error>]],
1331 },
1332 {
1333 id => q<DDDE>,
1334 in => qq<\xDD\xDE>,
1335 out => [undef, "\x{FF9D}", undef, "\x{FF9E}", undef],
1336 },
1337 {
1338 id => q<e040>,
1339 in => qq<\xE0\x40>,
1340 out => [undef, "\x{6F3E}", undef],
1341 },
1342 {
1343 id => q<eaa4>,
1344 in => qq<\xEA\xA4>,
1345 out => [undef, "\x{7199}", undef],
1346 },
1347 {
1348 id => q<eaa5>,
1349 in => qq<\xEA\xA5>,
1350 out => [undef, "\xEA\xA5", [q<unassigned-code-point-error>]],
1351 },
1352 {
1353 id => q<eb40>,
1354 in => qq<\xEB\x40>,
1355 out => [undef, "\xEB\x40", [q<unassigned-code-point-error>]],
1356 },
1357 {
1358 id => q<ed40>,
1359 in => qq<\xED\x40>,
1360 out => [undef, "\xED\x40", [q<unassigned-code-point-error>]],
1361 },
1362 {
1363 id => q<effc>,
1364 in => qq<\xEF\xFC>,
1365 out => [undef, "\xEF\xFC", [q<unassigned-code-point-error>]],
1366 },
1367 {
1368 id => q<f040>,
1369 in => qq<\xF0\x40>,
1370 out => [undef, "\xF0", [q<unassigned-code-point-error>],
1371 "\x40", undef],
1372 },
1373 {
1374 id => q<f140>,
1375 in => qq<\xF1\x40>,
1376 out => [undef, "\xF1", [q<unassigned-code-point-error>],
1377 "\x40", undef],
1378 },
1379 {
1380 id => q<fb40>,
1381 in => qq<\xFB\x40>,
1382 out => [undef, "\xFB", [q<unassigned-code-point-error>],
1383 "\x40", undef],
1384 },
1385 {
1386 id => q<fc40>,
1387 in => qq<\xFc\x40>,
1388 out => [undef, "\xFC", [q<unassigned-code-point-error>],
1389 "\x40", undef],
1390 },
1391 {
1392 id => q<fd40>,
1393 in => qq<\xFD\x40>,
1394 out => [undef, "\xFD", [q<unassigned-code-point-error>],
1395 "\x40", undef],
1396 },
1397 {
1398 id => q<fE40>,
1399 in => qq<\xFE\x40>,
1400 out => [undef, "\xFE", [q<unassigned-code-point-error>],
1401 "\x40", undef],
1402 },
1403 {
1404 id => q<ff40>,
1405 in => qq<\xFF\x40>,
1406 out => [undef, "\xFF", [q<unassigned-code-point-error>],
1407 "\x40", undef],
1408 },
1409 {
1410 id => q<81408142>,
1411 in => qq<\x81\x40\x81\x42>,
1412 out => [undef, "\x{3000}", undef, "\x{3002}", undef],
1413 },
1414 );
1415
1416 check_charset ('XML-Shift_JIS', $XML_CHARSET.'shift_jis',
1417 [map {$_->{name} = 'Shift_JIS'; $_} @testdata]);
1418 }
1419
1420 ## ISO-2022-JP
1421 {
1422
1423 my @testdata = (
1424 {
1425 id => q<l=0>,
1426 in => q<>,
1427 out1 => [undef],
1428 out2 => [undef],
1429 },
1430 {
1431 id => q<l=1.00>,
1432 in => qq<\x00>,
1433 out1 => [undef, "\x00", undef],
1434 out2 => [undef, "\x00", undef],
1435 },
1436 {
1437 id => q<l=1.0d>,
1438 in => qq<\x0D>,
1439 out1 => [undef, "\x0D", undef],
1440 out2 => [undef, "\x0D", undef],
1441 }, # Error?
1442 {
1443 id => q<0A>,
1444 in => qq<\x0A>,
1445 out1 => [undef, "\x0A", undef],
1446 out2 => [undef, "\x0A", undef],
1447 }, # Error?
1448 {
1449 id => q<l=1.0e>,
1450 in => qq<\x0E>,
1451 out1 => [undef, "\x0E", [q<illegal-octets-error>]],
1452 out2 => [undef, "\x0E", [q<illegal-octets-error>]],
1453 },
1454 {
1455 id => q<l=1.0f>,
1456 in => qq<\x0F>,
1457 out1 => [undef, "\x0F", [q<illegal-octets-error>]],
1458 out2 => [undef, "\x0F", [q<illegal-octets-error>]],
1459 },
1460 {
1461 id => q<l=1.1b>,
1462 in => qq<\x1B>,
1463 out1 => [undef, "\x1B", [q<illegal-octets-error>]],
1464 out2 => [undef, "\x1B", [q<illegal-octets-error>]],
1465 },
1466 {
1467 id => q<l=1.a>,
1468 in => q<a>,
1469 out1 => [undef, "a", undef],
1470 out2 => [undef, "a", undef],
1471 },
1472 {
1473 id => q<l=1.20>,
1474 in => qq<\x20>,
1475 out1 => [undef, "\x20", undef],
1476 out2 => [undef, "\x20", undef],
1477 },
1478 {
1479 id => q<l=1.5C>,
1480 in => qq<\x5C>,
1481 out1 => [undef, "\x5C", undef],
1482 out2 => [undef, "\x5C", undef],
1483 },
1484 {
1485 id => q<l=1.7E>,
1486 in => qq<\x7E>,
1487 out1 => [undef, "\x7E", undef],
1488 out2 => [undef, "\x7E", undef],
1489 },
1490 {
1491 id => q<l=1.7F>,
1492 in => qq<\x7F>,
1493 out1 => [undef, "\x7F", undef],
1494 out2 => [undef, "\x7F", undef],
1495 },
1496 {
1497 id => q<l=1.80>,
1498 in => qq<\x80>,
1499 out1 => [undef, "\x80", [q<illegal-octets-error>]],
1500 out2 => [undef, "\x80", [q<illegal-octets-error>]],
1501 },
1502 {
1503 id => q<l=1.8c>,
1504 in => qq<\x8C>,
1505 out1 => [undef, "\x8C", [q<illegal-octets-error>]],
1506 out2 => [undef, "\x8C", [q<illegal-octets-error>]],
1507 },
1508 {
1509 id => q<l=1.8e>,
1510 in => qq<\x8E>,
1511 out1 => [undef, "\x8E", [q<illegal-octets-error>]],
1512 out2 => [undef, "\x8E", [q<illegal-octets-error>]],
1513 },
1514 {
1515 id => q<l=1.8f>,
1516 in => qq<\x8F>,
1517 out1 => [undef, "\x8F", [q<illegal-octets-error>]],
1518 out2 => [undef, "\x8F", [q<illegal-octets-error>]],
1519 },
1520 {
1521 id => q<l=1.a0>,
1522 in => qq<\xA0>,
1523 out1 => [undef, "\xA0", [q<illegal-octets-error>]],
1524 out2 => [undef, "\xA0", [q<illegal-octets-error>]],
1525 },
1526 {
1527 id => q<l=1.a1>,
1528 in => qq<\xA1>,
1529 out1 => [undef, "\xA1", [q<illegal-octets-error>]],
1530 out2 => [undef, "\xA1", [q<illegal-octets-error>]],
1531 },
1532 {
1533 id => q<l=1.a2>,
1534 in => qq<\xA2>,
1535 out1 => [undef, "\xA2", [q<illegal-octets-error>]],
1536 out2 => [undef, "\xA2", [q<illegal-octets-error>]],
1537 },
1538 {
1539 id => q<l=1.df>,
1540 in => qq<\xdf>,
1541 out1 => [undef, "\xDF", [q<illegal-octets-error>]],
1542 out2 => [undef, "\xDF", [q<illegal-octets-error>]],
1543 },
1544 {
1545 id => q<l=1.e0>,
1546 in => qq<\xe0>,
1547 out1 => [undef, "\xE0", [q<illegal-octets-error>]],
1548 out2 => [undef, "\xE0", [q<illegal-octets-error>]],
1549 },
1550 {
1551 id => q<l=1.ef>,
1552 in => qq<\xEF>,
1553 out1 => [undef, "\xEF", [q<illegal-octets-error>]],
1554 out2 => [undef, "\xEF", [q<illegal-octets-error>]],
1555 },
1556 {
1557 id => q<F0>,
1558 in => qq<\xF0>,
1559 out1 => [undef, "\xF0", [q<illegal-octets-error>]],
1560 out2 => [undef, "\xF0", [q<illegal-octets-error>]],
1561 },
1562 {
1563 id => q<l=1.fc>,
1564 in => qq<\xFC>,
1565 out1 => [undef, "\xFC", [q<illegal-octets-error>]],
1566 out2 => [undef, "\xFC", [q<illegal-octets-error>]],
1567 },
1568 {
1569 id => q<l=1.fd>,
1570 in => qq<\xFD>,
1571 out1 => [undef, "\xFD", [q<illegal-octets-error>]],
1572 out2 => [undef, "\xFD", [q<illegal-octets-error>]],
1573 },
1574 {
1575 id => q<l=1.fe>,
1576 in => qq<\xFE>,
1577 out1 => [undef, "\xFE", [q<illegal-octets-error>]],
1578 out2 => [undef, "\xFE", [q<illegal-octets-error>]],
1579 },
1580 {
1581 id => q<l=1.ff>,
1582 in => qq<\xFF>,
1583 out1 => [undef, "\xFF", [q<illegal-octets-error>]],
1584 out2 => [undef, "\xFF", [q<illegal-octets-error>]],
1585 },
1586 {
1587 id => q<l=2.0000>,
1588 in => qq<\x00\x00>,
1589 out1 => [undef, "\x00", undef, "\x00", undef],
1590 out2 => [undef, "\x00", undef, "\x00", undef],
1591 },
1592 {
1593 id => q<l=2.0D0A>,
1594 in => qq<\x0D\x0A>,
1595 out1 => [undef, "\x0D", undef, "\x0A", undef],
1596 out2 => [undef, "\x0D", undef, "\x0A", undef],
1597 },
1598 {
1599 id => q<l=2.1B1B>,
1600 in => qq<\x1B\x1B>,
1601 out1 => [undef, "\x1B", [q<illegal-octets-error>],
1602 "\x1B", [q<illegal-octets-error>]],
1603 out2 => [undef, "\x1B", [q<illegal-octets-error>],
1604 "\x1B", [q<illegal-octets-error>]],
1605 },
1606 {
1607 id => q<l=2.1B20>,
1608 in => qq<\x1B\x20>,
1609 out1 => [undef, "\x1B", [q<illegal-octets-error>], "\x20", undef],
1610 out2 => [undef, "\x1B", [q<illegal-octets-error>], "\x20", undef],
1611 },
1612 {
1613 id => q<l=2.1B24>,
1614 in => qq<\x1B\x24>,
1615 out1 => [undef, "\x1B", [q<illegal-octets-error>], "\x24", undef],
1616 out2 => [undef, "\x1B", [q<illegal-octets-error>], "\x24", undef],
1617 },
1618 {
1619 id => q<l=2.1B28>,
1620 in => qq<\x1B\x28>,
1621 out1 => [undef, "\x1B", [q<illegal-octets-error>], "\x28", undef],
1622 out2 => [undef, "\x1B", [q<illegal-octets-error>], "\x28", undef],
1623 },
1624 {
1625 id => q<l=2.2020>,
1626 in => qq<\x20\x20>,
1627 out1 => [undef, "\x20", undef, "\x20", undef],
1628 out2 => [undef, "\x20", undef, "\x20", undef],
1629 },
1630 {
1631 id => q<l=2.ab>,
1632 in => qq<ab>,
1633 out1 => [undef, "a", undef, "b", undef],
1634 out2 => [undef, "a", undef, "b", undef],
1635 },
1636 {
1637 id => q<8040>,
1638 in => qq<\x80\x40>,
1639 out1 => [undef, "\x80", [q<illegal-octets-error>],
1640 "\x40", undef],
1641 out2 => [undef, "\x80", [q<illegal-octets-error>],
1642 "\x40", undef],
1643 },
1644 {
1645 id => q<1B2440>,
1646 in => qq<\x1B\x24\x40>,
1647 out1 => [undef],
1648 out2 => [undef],
1649 eof_error => [q<invalid-state-error>],
1650 },
1651 {
1652 id => q<1B2442>,
1653 in => qq<\x1B\x24\x42>,
1654 out1 => [undef],
1655 out2 => [undef],
1656 eof_error => [q<invalid-state-error>],
1657 },
1658 {
1659 id => q<1B2840>,
1660 in => qq<\x1B\x28\x40>,
1661 out1 => [undef, "\x1B", [q<illegal-octets-error>], "(", undef,
1662 "\x40", undef],
1663 out2 => [undef, "\x1B", [q<illegal-octets-error>], "(", undef,
1664 "\x40", undef],
1665 },
1666 {
1667 id => q<1B2842>,
1668 in => qq<\x1B\x28\x42>,
1669 out1 => [undef],
1670 out2 => [undef],
1671 },
1672 {
1673 id => q<1B284A>,
1674 in => qq<\x1B\x28\x4A>,
1675 out1 => [undef],
1676 out2 => [undef],
1677 eof_error => [q<invalid-state-error>],
1678 },
1679 {
1680 id => q<1B$B1B(B>,
1681 in => qq<\x1B\x24\x42\x1B\x28\x42>,
1682 out1 => [undef],
1683 out2 => [undef],
1684 },
1685 {
1686 id => q<1B(B1B(B>,
1687 in => qq<\x1B\x28\x42\x1B\x28\x42>,
1688 out1 => [undef],
1689 out2 => [undef],
1690 },
1691 {
1692 id => q<1B(Ba1B(B>,
1693 in => qq<\x1B\x28\x42a\x1B\x28\x42>,
1694 out1 => [undef, "a", undef],
1695 out2 => [undef, "a", undef],
1696 },
1697 {
1698 id => q<1B(Ba1B(B1B(B>,
1699 in => qq<\x1B\x28\x42a\x1B\x28\x42\x1B\x28\x42>,
1700 out1 => [undef, "a", undef],
1701 out2 => [undef, "a", undef],
1702 },
1703 {
1704 id => q<1B$42!!1B2842>,
1705 in => qq<\x1B\x24\x42!!\x1B\x28\x42>,
1706 out1 => [undef, "\x{3000}", undef],
1707 out2 => [undef, "\x{3000}", undef],
1708 },
1709 {
1710 id => q<1B$4221211B284A>,
1711 in => qq<\x1B\x24\x42!!\x1B\x28\x4A>,
1712 out1 => [undef, "\x{3000}", undef],
1713 out2 => [undef, "\x{3000}", undef],
1714 eof_error => [q<invalid-state-error>],
1715 },
1716 {
1717 id => q<1B$4021211B2842>,
1718 in => qq<\x1B\x24\x40!!\x1B\x28\x42>,
1719 out1 => [undef, "\x{3000}", undef],
1720 out2 => [undef, "\x{3000}", undef],
1721 },
1722 {
1723 id => q<1B$402121211B2842>,
1724 in => qq<\x1B\x24\x40!!!\x1B\x28\x42>,
1725 out1 => [undef, "\x{3000}", undef, "!", [q<illegal-octets-error>]],
1726 out2 => [undef, "\x{3000}", undef, "!", [q<illegal-octets-error>]],
1727 },
1728 {
1729 id => q<1B$4021211B2442!!1B2842>,
1730 in => qq<\x1B\x24\x40!!\x1B\x24\x42!!\x1B\x28\x42>,
1731 out1 => [undef, "\x{3000}", undef, "\x{3000}", undef],
1732 out2 => [undef, "\x{3000}", undef, "\x{3000}", undef],
1733 },
1734 {
1735 id => q<1B$4021211B2440!!1B2842>,
1736 in => qq<\x1B\x24\x40!!\x1B\x24\x40!!\x1B\x28\x42>,
1737 out1 => [undef, "\x{3000}", undef, "\x{3000}", undef],
1738 out2 => [undef, "\x{3000}", undef, "\x{3000}", undef],
1739 },
1740 {
1741 id => q<1B$@!"1B(B\~|>,
1742 in => qq<\x1B\x24\x40!"\x1B(B\\~|>,
1743 out1 => [undef, "\x{3001}", undef, "\x5C", undef,
1744 "\x7E", undef, "|", undef],
1745 out2 => [undef, "\x{3001}", undef, "\x5C", undef,
1746 "\x7E", undef, "|", undef],
1747 },
1748 {
1749 id => q<1B$B!"1B(J\~|1B(B>,
1750 in => qq<\x1B\x24\x42!"\x1B(J\\~|\x1B(B>,
1751 out1 => [undef, "\x{3001}", undef, "\xA5", undef,
1752 "\x{203E}", undef, "|", undef],
1753 out2 => [undef, "\x{3001}", undef, "\xA5", undef,
1754 "\x{203E}", undef, "|", undef],
1755 },
1756 {
1757 id => q<78compat.3022(16-02)>,
1758 in => qq<\x1B\$\@\x30\x22\x1B\$B\x30\x22\x1B(B>,
1759 out1 => [undef, "\x{555E}", undef, "\x{5516}", undef],
1760 out2 => [undef, "\x{5516}", undef, "\x{5516}", undef],
1761 },
1762 {
1763 id => q<unassigned.2239>,
1764 in => qq<\x1B\$\@\x22\x39\x1B\$B\x22\x39\x1B(B>,
1765 out1 => [undef, "\x22\x39", [q<unassigned-code-point-error>],
1766 "\x22\x39", [q<unassigned-code-point-error>]],
1767 out2 => [undef, "\x22\x39", [q<unassigned-code-point-error>],
1768 "\x22\x39", [q<unassigned-code-point-error>]],
1769 },
1770 {
1771 id => q<83add.223A>,
1772 in => qq<\x1B\$\@\x22\x3A\x1B\$B\x22\x3A\x1B(B>,
1773 out1 => [undef, "\x22\x3A", [q<unassigned-code-point-error>],
1774 "\x{2208}", undef],
1775 out2 => [undef, "\x{2208}", undef, "\x{2208}", undef],
1776 },
1777 {
1778 id => q<83add.2840>,
1779 in => qq<\x1B\$\@\x28\x40\x1B\$B\x28\x40\x1B(B>,
1780 out1 => [undef, "\x28\x40", [q<unassigned-code-point-error>],
1781 "\x{2542}", undef],
1782 out2 => [undef, "\x{2542}", undef, "\x{2542}", undef],
1783 },
1784 {
1785 id => q<83add.7421>,
1786 in => qq<\x1B\$\@\x74\x21\x1B\$B\x74\x21\x1B(B>,
1787 out1 => [undef, "\x74\x21", [q<unassigned-code-point-error>],
1788 "\x{582F}", undef],
1789 out2 => [undef, "\x{5C2D}", undef, "\x{582F}", undef],
1790 },
1791 {
1792 id => q<83swap.3033>,
1793 in => qq<\x1B\$\@\x30\x33\x1B\$B\x30\x33\x1B(B>,
1794 out1 => [undef, "\x{9C3A}", undef, "\x{9BF5}", undef],
1795 out2 => [undef, "\x{9C3A}", undef, "\x{9BF5}", undef],
1796 },
1797 {
1798 id => q<83swap.724D>,
1799 in => qq<\x1B\$\@\x72\x4D\x1B\$B\x72\x4D\x1B(B>,
1800 out1 => [undef, "\x{9BF5}", undef, "\x{9C3A}", undef],
1801 out2 => [undef, "\x{9BF5}", undef, "\x{9C3A}", undef],
1802 },
1803 {
1804 id => q<90add.7425>,
1805 in => qq<\x1B\$\@\x74\x25\x1B\$B\x74\x25\x1B(B>,
1806 out1 => [undef, "\x74\x25", [q<unassigned-code-point-error>],
1807 "\x74\x25", [q<unassigned-code-point-error>]],
1808 out2 => [undef, "\x{51DC}", undef, "\x{51DC}", undef],
1809 },
1810 {
1811 id => q<90add.7426>,
1812 in => qq<\x1B\$\@\x74\x26\x1B\$B\x74\x26\x1B(B>,
1813 out1 => [undef, "\x74\x26", [q<unassigned-code-point-error>],
1814 "\x74\x26", [q<unassigned-code-point-error>]],
1815 out2 => [undef, "\x{7199}", undef, "\x{7199}", undef],
1816 },
1817 );
1818
1819 check_charset ('IETF-ISO-2022-JP', $IANA_CHARSET.'iso-2022-jp',
1820 [map {$_->{out} = $_->{out1}; $_->{name} = 'iso-2022-jp'; $_}
1821 @testdata]);
1822 check_charset ('XML-ISO-2022-JP', $XML_CHARSET.'iso-2022-jp',
1823 [map {$_->{out} = $_->{out2}; $_->{name} = 'ISO-2022-JP'; $_}
1824 @testdata]);
1825 }

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24