/[pub]/test/oldencodeutils/lib/Encode/ISO2022/JIS.pm
Suika

Contents of /test/oldencodeutils/lib/Encode/ISO2022/JIS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (show annotations) (download)
Mon Sep 16 06:34:35 2002 UTC (23 years, 4 months ago) by wakaba
Branch: MAIN
2002-09-16  Wakaba <w@suika.fam.cx>

	* EightBit.pm: New module.
	* JIS.pm: New module.
	* SevenBit.pm: Don't invoke G1 to GR when encoding.

1 =head1 NAME
2
3 C<Encode::ISO2022::JIS> --- Encode and decode of ISO/IEC 2022
4 based encodings defined by JIS (Japan Industrial Standard),
5 other than RFC 1468 coded representation, C<ISO-2022-JP-3>
6 coded representations and C<EUC-JISX0213> coded representations
7
8 =head1 ENCODINGS
9
10 =over 4
11
12 =cut
13
14 require 5.7.3;
15 use strict;
16 package Encode::ISO2022::JIS;
17 use vars qw($VERSION);
18 $VERSION=do{my @r=(q$Revision: 1.2 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
19 use base qw(Encode::Encoding);
20 require Encode::ISO2022;
21
22 sub encode ($$;$) {
23 my ($obj, $str, $chk) = @_;
24 $_[1] = '' if $chk;
25 $str = &Encode::ISO2022::internal_to_iso2022 ($str, $obj->__2022_encode);
26 return $str;
27 }
28
29 sub decode ($$;$) {
30 my ($obj, $str, $chk) = @_;
31 $_[1] = '' if $chk;
32 return &Encode::ISO2022::iso2022_to_internal ($str, $obj->__2022_decode);
33 }
34
35 sub __2022__common ($) {
36 my $C = Encode::ISO2022->new_object;
37 $C->{option}->{designate_to}->{C0}->{default} = -1;
38 $C->{option}->{designate_to}->{C1}->{default} = -1;
39 $C->{option}->{designate_to}->{G94}->{default} = -1;
40 $C->{option}->{designate_to}->{G94}->{B} = -1;
41 $C->{option}->{designate_to}->{G94n}->{default} = -1;
42 $C->{option}->{designate_to}->{G96}->{default} = -1;
43 $C->{option}->{designate_to}->{G96n}->{default} = -1;
44 $C->{G1} = $Encode::ISO2022::CHARSET{G94}->{"\x7E"}; ## empty
45 $C->{option}->{reset}->{Gdesignation} = 0;
46 $C->{option}->{reset}->{Ginvoke} = 0;
47 $C->{option}->{undef_char} = ["\x22\x2E", ## GETA MARK
48 {type => 'G94n', charset => 'B', revision => '@'}];
49 $C;
50 }
51 sub __2022_encode ($) {
52 my $C = shift->__2022__common;
53 $C;
54 }
55 sub __2022_decode ($) {
56 my $C = shift->__2022__common;
57 $C;
58 }
59
60 package Encode::ISO2022::EightBit::JISX0201Latin7;
61 use vars qw/@ISA/;
62 push @ISA, 'Encode::ISO2022::JIS';
63 __PACKAGE__->Define (qw/jisx0201-1997-latin-7bit JIS_C6220-1969-ro
64 iso-ir-14 jp ISO646-JP csISO14JISC6220ro/);
65
66 =item jisx0201-1997-latin-7bit
67
68 JIS X 0201:1997 6.1 7-bit code for Latin.
69 (Alias: C<JIS_C6220-1969-ro> (RFC 1345), C<iso-ir-14> (RFC 1345),
70 C<jp> (RFC 1345), C<ISO646-JP> (RFC 1345), C<csISO14JISC6220ro> (IANA))
71
72 =cut
73
74 sub __2022__common ($) {
75 my $C = shift->SUPER::__2022__common;
76 $C->{bit} = 7;
77 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin set
78 $C->{option}->{undef_char} = ["\x3F", {type => 'G94', charset => 'J'}];
79 $C;
80 }
81
82 package Encode::ISO2022::EightBit::JISX0201Katakana7;
83 use vars qw/@ISA/;
84 push @ISA, 'Encode::ISO2022::JIS';
85 __PACKAGE__->Define (qw/jisx0201-1997-katakana-7bit JIS_C6220-1969-jp JIS_C6220-1969
86 iso-ir-13 katakana x0201-7 csISO13JISC6220jp/);
87
88 =item jisx0201-1997-katakana-7bit
89
90 JIS X 0201:1997 6.2 7-bit code for Katakana
91 (Alias: JIS_C6220-1969-jp (RFC 1345), JIS_C6220-1969 (RFC 1345),
92 iso-ir-13 (RFC 1345), katakana (RFC 1345), x0201-7 (RFC 1345),
93 csISO13JISC6220jp (IANA))
94
95 =cut
96
97 sub __2022__common ($) {
98 my $C = shift->SUPER::__2022__common;
99 $C->{bit} = 7;
100 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201:1997 Katakana set
101 $C->{option}->{undef_char} = ["\x25", {type => 'G94', charset => 'I'}];
102 $C;
103 }
104
105 package Encode::ISO2022::EightBit::JISX0201LatinKatakana7;
106 use vars qw/@ISA/;
107 push @ISA, 'Encode::ISO2022::JIS';
108 __PACKAGE__->Define (qw/jisx0201-1997-latin-katakana-7bit
109 JIS_X0201 X0201 csHalfWidthKatakana/);
110
111 =item jisx0201-1997-latin-katakana-7bit
112
113 JIS X 0201:1997 6.3 7-bit code for Latin and Katakana
114 (Alias: JIS_X0201 (RFC 1345), X0201 (RFC 1345), csHalfWidthKatakana (IANA))
115
116 =cut
117
118 sub __2022__common ($) {
119 my $C = shift->SUPER::__2022__common;
120 $C->{bit} = 7;
121 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin set
122 $C->{G1} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201:1997 Katakana set
123 $C->{option}->{Ginvoke_to_left} = [1,1,1,1];
124 $C->{option}->{reset}->{Ginvoke} = 1;
125 ## JIS X 0201:1997 does not specify this limitation.
126 $C->{option}->{undef_char} = ["\x3F", {type => 'G94', charset => 'J'}];
127 $C;
128 }
129 sub __2022_encode ($) {
130 my $C = shift->__2022__common;
131 $C->{GR} = undef;
132 $C;
133 }
134
135 package Encode::ISO2022::EightBit::JISX0201LatinKatakana8;
136 use vars qw/@ISA/;
137 push @ISA, 'Encode::ISO2022::JIS';
138 __PACKAGE__->Define (qw/jisx0201-1997-latin-latin-8bit/);
139
140 =item jisx0201-1997-latin-katakana-8bit
141
142 JIS X 0208:1997 7.4 8-bit code for Latin and Katakana
143
144 =cut
145
146 sub __2022__common ($) {
147 my $C = shift->SUPER::__2022__common;
148 $C->{bit} = 8;
149 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin set
150 $C->{G1} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201:1997 Katakana set
151 $C->{option}->{undef_char} = ["\x3F", {type => 'G94', charset => 'J'}];
152 $C;
153 }
154
155 package Encode::ISO2022::EightBit::JISX0208Kanji7;
156 use vars qw/@ISA/;
157 push @ISA, 'Encode::ISO2022::JIS';
158 __PACKAGE__->Define (qw/jisx0208-1997-kanji-7bit/);
159
160 =item jisx0208-1997-kanji-7bit
161
162 JIS X 0208:1997 7.1.1 7-bit code for Kanji
163
164 =cut
165
166 sub __2022__common ($) {
167 my $C = shift->SUPER::__2022__common;
168 $C->{bit} = 7;
169 $C->{G0} = $Encode::ISO2022::CHARSET{G94n}->{'B@'}; ## JIS X 0208:1997
170 $C;
171 }
172
173 package Encode::ISO2022::EightBit::JISX0208Kanji8;
174 use vars qw/@ISA/;
175 push @ISA, 'Encode::ISO2022::JIS';
176 __PACKAGE__->Define (qw/jisx0208-1997-kanji-8bit/);
177
178 =item jisx0208-1997-kanji-8bit
179
180 JIS X 0208:1997 7.1.2 8-bit code for Kanji
181
182 =cut
183
184 sub __2022__common ($) {
185 my $C = shift->SUPER::__2022__common;
186 $C->{bit} = 8;
187 $C->{G0} = $Encode::ISO2022::CHARSET{G94n}->{'B@'}; ## JIS X 0208:1997
188 $C->{G1} = $Encode::ISO2022::CHARSET{G94}->{"\x7E"}; ## empty
189 $C->{option}->{C1invoke_to_right} = 1;
190 $C;
191 }
192
193 package Encode::ISO2022::EightBit::JISX0208IRVKanji7;
194 use vars qw/@ISA/;
195 push @ISA, 'Encode::ISO2022::JIS';
196 __PACKAGE__->Define (qw/jisx0208-1997-irv-kanji-7bit/);
197
198 =item jisx0208-1997-irv-kanji-7bit
199
200 JIS X 0208:1997 7.2.1 7-bit code for IRV and Kanji
201
202 =cut
203
204 sub __2022__common ($) {
205 my $C = shift->SUPER::__2022__common;
206 $C->{bit} = 7;
207 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{B}; ## ISO/IEC 646 IRV
208 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{'B@'}; ## JIS X 0208:1997
209 $C->{option}->{Ginvoke_to_left} = [1,1,1,1];
210 $C->{option}->{reset}->{Ginvoke} = 1;
211 ## JIS X 0208:1997 does not specify this limitation.
212 $C;
213 }
214 sub __2022_encode ($) {
215 my $C = shift->__2022__common;
216 $C->{GR} = undef;
217 $C;
218 }
219
220 package Encode::ISO2022::EightBit::JISX0208IRVKanji8;
221 use vars qw/@ISA/;
222 push @ISA, 'Encode::ISO2022::JIS';
223 __PACKAGE__->Define (qw/jisx0208-1997-irv-kanji-8bit/);
224
225 =item jisx0208-1997-irv-kanji-8bit
226
227 JIS X 0208:1997 7.2.2 8-bit code for IRV and Kanji.
228 (A subset of EUC-japan)
229
230 =cut
231
232 sub __2022__common ($) {
233 my $C = shift->SUPER::__2022__common;
234 $C->{bit} = 8;
235 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{B}; ## ISO/IEC 646 IRV
236 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{'B@'}; ## JIS X 0208:1997
237 $C;
238 }
239
240 package Encode::ISO2022::EightBit::JISX0208LatinKanji7;
241 use vars qw/@ISA/;
242 push @ISA, 'Encode::ISO2022::JIS';
243 __PACKAGE__->Define (qw/jisx0208-1997-latin-kanji-7bit/);
244
245 =item jisx0208-1997-latin-kanji-7bit
246
247 JIS X 0208:1997 7.3.1 7-bit code for Latin and Kanji
248
249 =cut
250
251 sub __2022__common ($) {
252 my $C = shift->SUPER::__2022__common;
253 $C->{bit} = 7;
254 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin set
255 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{'B@'}; ## JIS X 0208:1997
256 $C->{option}->{Ginvoke_to_left} = [1,1,1,1];
257 $C->{option}->{reset}->{Ginvoke} = 1;
258 ## JIS X 0208:1997 does not specify this limitation.
259 $C;
260 }
261 sub __2022_encode ($) {
262 my $C = shift->__2022__common;
263 $C->{GR} = undef;
264 $C;
265 }
266
267 package Encode::ISO2022::EightBit::JISX0208LatinKanji8;
268 use vars qw/@ISA/;
269 push @ISA, 'Encode::ISO2022::JIS';
270 __PACKAGE__->Define (qw/jisx0208-1997-latin-kanji-8bit/);
271
272 =item jisx0208-1997-latin-kanji-8bit
273
274 JIS X 0208:1997 7.2.2 8-bit code for Latin and Kanji
275
276 =cut
277
278 sub __2022__common ($) {
279 my $C = shift->SUPER::__2022__common;
280 $C->{bit} = 8;
281 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin set
282 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{'B@'}; ## JIS X 0208:1997
283 $C;
284 }
285
286 package Encode::ISO2022::EightBit::JISX0213Kanji7;
287 use vars qw/@ISA/;
288 push @ISA, 'Encode::ISO2022::JIS';
289 __PACKAGE__->Define (qw/jisx0213-2000-kanji-7bit/);
290
291 =item jisx0213-2000-kanji-7bit
292
293 JIS X 0213:2000 7.1.1 7-bit code for Kanji
294
295 =cut
296
297 sub __2022__common ($) {
298 my $C = shift->SUPER::__2022__common;
299 $C->{bit} = 7;
300 $C->{G0} = $Encode::ISO2022::CHARSET{G94n}->{O}; ## plane 1
301 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{P}; ## plane 2
302 $C->{option}->{Ginvoke_to_left} = [1,1,1,1];
303 $C->{option}->{reset}->{Ginvoke} = 1;
304 ## JIS X 0213:2000 does not specify this limitation.
305 $C->{option}->{undef_char} = ["\x22\x2E", {type => 'G94n', charset => 'O'}];
306 $C;
307 }
308 sub __2022_encode ($) {
309 my $C = shift->__2022__common;
310 $C->{GR} = undef;
311 $C;
312 }
313
314 package Encode::ISO2022::EightBit::JISX0213Kanji8;
315 use vars qw/@ISA/;
316 push @ISA, 'Encode::ISO2022::JIS';
317 __PACKAGE__->Define (qw/jisx0213-2000-kanji-8bit/);
318
319 =item jisx0213-2000-kanji-8bit
320
321 JIS X 0213:2000 7.1.2 8-bit code for Kanji
322
323 =cut
324
325 sub __2022__common ($) {
326 my $C = shift->SUPER::__2022__common;
327 $C->{bit} = 7;
328 $C->{G0} = $Encode::ISO2022::CHARSET{G94n}->{O}; ## plane 1
329 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{P}; ## plane 2
330 $C->{option}->{undef_char} = ["\x22\x2E", {type => 'G94n', charset => 'O'}];
331 $C;
332 }
333
334 package Encode::ISO2022::EightBit::JISX0213IRVKanji7;
335 use vars qw/@ISA/;
336 push @ISA, 'Encode::ISO2022::JIS';
337 __PACKAGE__->Define (qw/jisx0213-2000-irv-kanji-7bit/);
338
339 =item jisx0213-2000-irv-kanji-7bit
340
341 JIS X 0213:2000 7.2.1 7-bit code for IRV and Kanji
342
343 =cut
344
345 sub __2022__common ($) {
346 my $C = shift->SUPER::__2022__common;
347 $C->{bit} = 7;
348 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{B}; ## ISO/IEC 646 IRV
349 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{O}; ## plane 1
350 $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{P}; ## plane 2
351 $C->{option}->{Ginvoke_by_single_shift}->[3] = 1;
352 $C->{option}->{Ginvoke_to_left} = [1,1,1,1];
353 $C->{option}->{reset}->{Ginvoke} = 1;
354 ## JIS X 0213:2000 does not specify this limitation.
355 $C->{option}->{undef_char} = ["\x22\x2E", {type => 'G94n', charset => 'O'}];
356 $C;
357 }
358 sub __2022_encode ($) {
359 my $C = shift->__2022__common;
360 $C->{GR} = undef;
361 $C;
362 }
363
364 package Encode::ISO2022::EightBit::JISX0213IRVKanji8;
365 use vars qw/@ISA/;
366 push @ISA, 'Encode::ISO2022::JIS';
367 __PACKAGE__->Define (qw/jisx0213-2000-irv-kanji-8bit/);
368
369 =item jisx0213-2000-irv-kanji-8bit
370
371 JIS X 0213:2000 7.2.2 8-bit code for IRV and Kanji.
372 (A subset of EUC-JISX0213)
373
374 =cut
375
376 sub __2022__common ($) {
377 my $C = shift->SUPER::__2022__common;
378 $C->{bit} = 8;
379 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{B}; ## ISO/IEC 646 IRV
380 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{O}; ## plane 1
381 $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{P}; ## plane 2
382 $C->{option}->{Ginvoke_by_single_shift}->[3] = 1;
383 $C->{option}->{Ginvoke_to_left} = [1,0,0,0];
384 $C->{option}->{undef_char} = ["\x22\x2E", {type => 'G94n', charset => 'O'}];
385 $C;
386 }
387
388 package Encode::ISO2022::EightBit::JISX0213LatinKanji7;
389 use vars qw/@ISA/;
390 push @ISA, 'Encode::ISO2022::JIS';
391 __PACKAGE__->Define (qw/jisx0213-2000-latin-kanji-7bit/);
392
393 =item jisx0213-2000-latin-kanji-7bit
394
395 JIS X 0213:2000 7.3.1 7-bit code for Latin and Kanji
396
397 =cut
398
399 sub __2022__common ($) {
400 my $C = shift->SUPER::__2022__common;
401 $C->{bit} = 7;
402 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin set
403 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{O}; ## plane 1
404 $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{P}; ## plane 2
405 $C->{option}->{Ginvoke_by_single_shift}->[3] = 1;
406 $C->{option}->{Ginvoke_to_left} = [1,1,1,1];
407 $C->{option}->{reset}->{Ginvoke} = 1;
408 ## JIS X 0213:2000 does not specify this limitation.
409 $C->{option}->{undef_char} = ["\x22\x2E", {type => 'G94n', charset => 'O'}];
410 $C;
411 }
412 sub __2022_encode ($) {
413 my $C = shift->__2022__common;
414 $C->{GR} = undef;
415 $C;
416 }
417
418 package Encode::ISO2022::EightBit::JISX0213LatinKanji8;
419 use vars qw/@ISA/;
420 push @ISA, 'Encode::ISO2022::JIS';
421 __PACKAGE__->Define (qw/jisx0213-2000-latin-kanji-8bit/);
422
423 =item jisx0213-2000-latin-kanji-8bit
424
425 JIS X 0213:2000 7.2.2 8-bit code for Latin and Kanji
426
427 =cut
428
429 sub __2022__common ($) {
430 my $C = shift->SUPER::__2022__common;
431 $C->{bit} = 8;
432 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin set
433 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{O}; ## plane 1
434 $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{P}; ## plane 2
435 $C->{option}->{Ginvoke_by_single_shift}->[3] = 1;
436 $C->{option}->{Ginvoke_to_left} = [1,0,0,0];
437 $C->{option}->{undef_char} = ["\x22\x2E", {type => 'G94n', charset => 'O'}];
438 $C;
439 }
440
441 1;
442 __END__
443
444 =back
445
446 Note that although other JISes such as JIS X 0212 and JIS X 9010
447 define ISO/IEC 2022-comfprming coded character sets,
448 these standards do not define complete coding system (but define
449 as used on ISO/IEC 2022 environment), so this module
450 does not include those coded character sets. (IETF RFC 1345
451 and IANAREG give charset name to coded character sets
452 consist of such standards. But those are defined by RFC 1345,
453 not by JIS. Such coded character sets should be implemented
454 in Encode::ISO2022::RFC1345.)
455
456 =head1 LICENSE
457
458 Copyright 2002 wakaba <w@suika.fam.cx>
459
460 This library is free software; you can redistribute it
461 and/or modify it under the same terms as Perl itself.
462
463 =cut
464
465 # $Date: 2002/09/15 05:08:13 $
466 ### JIS.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24