/[pub]/test/oldencodeutils/lib/Encode/ISO2022/JIS.pm
Suika

Contents of /test/oldencodeutils/lib/Encode/ISO2022/JIS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (show annotations) (download)
Fri Sep 20 14:01:45 2002 UTC (23 years, 3 months ago) by wakaba
Branch: MAIN
Changes since 1.2: +3 -3 lines
2002-09-20  Wakaba <w@suika.fam.cx>

	* ISO2022.pm:
	- (iso2022_to_internal): New function.
	- (_iso2022_to_internal): Renamed from iso2022_to_internal.
	- (iso2022_to_internal): Experimental support of DOCS.
	- (internal_to_iso2022): Output in UCS coding systems
	if the character is unable to be encoded in ISO/IEC 2022
	coded character sets.
	- (_i2o): New procedure.
	- ($C->{option}->{designate_to}->{coding_system}): New option
	property object.
	- ($C->{coding_system}): New property.
	- (%CODING_SYSTEM): New hash.  (Alias to Encode::Charset's one.)
	* Charset.pm (make_initial_coding_system): Set 'reset_state'
	property with 1 value to coding systems of DOCS with 02/14 I byte.

1 =head1 NAME
2
3 C<Encode::ISO2022::JIS> --- Encode and decode of ISO/IEC 2022
4 based encodings defined by JIS (Japan Industrial Standard),
5 other than RFC 1468 coded representation, C<ISO-2022-JP-3>
6 coded representations and C<EUC-JISX0213> coded representations
7
8 =head1 ENCODINGS
9
10 =over 4
11
12 =cut
13
14 require 5.7.3;
15 use strict;
16 package Encode::ISO2022::JIS;
17 use vars qw($VERSION);
18 $VERSION=do{my @r=(q$Revision: 1.2 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
19 use base qw(Encode::Encoding);
20 require Encode::ISO2022;
21
22 sub encode ($$;$) {
23 my ($obj, $str, $chk) = @_;
24 $_[1] = '' if $chk;
25 $str = &Encode::ISO2022::internal_to_iso2022 ($str, $obj->__2022_encode);
26 return $str;
27 }
28
29 sub decode ($$;$) {
30 my ($obj, $str, $chk) = @_;
31 $_[1] = '' if $chk;
32 return &Encode::ISO2022::iso2022_to_internal ($str, $obj->__2022_decode);
33 }
34
35 sub __2022__common ($) {
36 my $C = Encode::ISO2022->new_object;
37 $C->{option}->{designate_to}->{C0}->{default} = -1;
38 $C->{option}->{designate_to}->{C1}->{default} = -1;
39 $C->{option}->{designate_to}->{G94}->{default} = -1;
40 $C->{option}->{designate_to}->{G94}->{B} = -1;
41 $C->{option}->{designate_to}->{G94n}->{default} = -1;
42 $C->{option}->{designate_to}->{G96}->{default} = -1;
43 $C->{option}->{designate_to}->{G96n}->{default} = -1;
44 $C->{G1} = $Encode::ISO2022::CHARSET{G94}->{"\x7E"}; ## empty
45 $C->{option}->{reset}->{Gdesignation} = 0;
46 $C->{option}->{reset}->{Ginvoke} = 0;
47 $C->{option}->{undef_char} = ["\x22\x2E", ## GETA MARK
48 {type => 'G94n', charset => 'B', revision => '@'}];
49 $C;
50 }
51 sub __2022_encode ($) {
52 my $C = shift->__2022__common;
53 $C;
54 }
55 sub __2022_decode ($) {
56 my $C = shift->__2022__common;
57 $C;
58 }
59
60 package Encode::ISO2022::JIS::JISX0201Latin7;
61 use vars qw/@ISA/;
62 push @ISA, 'Encode::ISO2022::JIS';
63 __PACKAGE__->Define (qw/jisx0201-1997-latin-7bit JIS_C6220-1969-ro
64 iso-ir-14 jp ISO646-JP csISO14JISC6220ro/);
65
66 =item jisx0201-1997-latin-7bit
67
68 JIS X 0201:1997 6.1 7-bit code for Latin.
69 (Alias: C<JIS_C6220-1969-ro> (RFC 1345), C<iso-ir-14> (RFC 1345),
70 C<jp> (RFC 1345), C<ISO646-JP> (RFC 1345), C<csISO14JISC6220ro> (IANA))
71
72 =cut
73
74 sub __2022__common ($) {
75 my $C = shift->SUPER::__2022__common;
76 $C->{bit} = 7;
77 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin set
78 $C->{option}->{undef_char} = ["\x3F", {type => 'G94', charset => 'J'}];
79 $C;
80 }
81
82 package Encode::ISO2022::JIS::JISX0201Katakana7;
83 use vars qw/@ISA/;
84 push @ISA, 'Encode::ISO2022::JIS';
85 __PACKAGE__->Define (qw/jisx0201-1997-katakana-7bit JIS_C6220-1969-jp JIS_C6220-1969
86 iso-ir-13 katakana x0201-7 csISO13JISC6220jp/);
87
88 =item jisx0201-1997-katakana-7bit
89
90 JIS X 0201:1997 6.2 7-bit code for Katakana
91 (Alias: JIS_C6220-1969-jp (RFC 1345), JIS_C6220-1969 (RFC 1345),
92 iso-ir-13 (RFC 1345), katakana (RFC 1345), x0201-7 (RFC 1345),
93 csISO13JISC6220jp (IANA))
94
95 =cut
96
97 sub __2022__common ($) {
98 my $C = shift->SUPER::__2022__common;
99 $C->{bit} = 7;
100 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201:1997 Katakana set
101 $C->{option}->{undef_char} = ["\x25", {type => 'G94', charset => 'I'}];
102 $C;
103 }
104
105 package Encode::ISO2022::JIS::JISX0201LatinKatakana7;
106 use vars qw/@ISA/;
107 push @ISA, 'Encode::ISO2022::JIS';
108 __PACKAGE__->Define (qw/jisx0201-1997-latin-katakana-7bit
109 JIS_X0201 X0201 csHalfWidthKatakana/);
110
111 =item jisx0201-1997-latin-katakana-7bit
112
113 JIS X 0201:1997 6.3 7-bit code for Latin and Katakana
114 (Alias: JIS_X0201 (RFC 1345), X0201 (RFC 1345), csHalfWidthKatakana (IANA))
115
116 =cut
117
118 sub __2022__common ($) {
119 my $C = shift->SUPER::__2022__common;
120 $C->{bit} = 7;
121 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin set
122 $C->{G1} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201:1997 Katakana set
123 $C->{option}->{Ginvoke_to_left} = [1,1,1,1];
124 $C->{option}->{reset}->{Ginvoke} = 1;
125 ## JIS X 0201:1997 does not specify this limitation.
126 $C->{option}->{undef_char} = ["\x3F", {type => 'G94', charset => 'J'}];
127 $C;
128 }
129 sub __2022_encode ($) {
130 my $C = shift->__2022__common;
131 $C->{GR} = undef;
132 $C;
133 }
134
135 package Encode::ISO2022::JIS::JISX0201LatinKatakana8;
136 use vars qw/@ISA/;
137 push @ISA, 'Encode::ISO2022::JIS';
138 __PACKAGE__->Define (qw/jisx0201-1997-latin-latin-8bit/);
139
140 =item jisx0201-1997-latin-katakana-8bit
141
142 JIS X 0208:1997 7.4 8-bit code for Latin and Katakana
143
144 =cut
145
146 sub __2022__common ($) {
147 my $C = shift->SUPER::__2022__common;
148 $C->{bit} = 8;
149 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin set
150 $C->{G1} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201:1997 Katakana set
151 $C->{option}->{undef_char} = ["\x3F", {type => 'G94', charset => 'J'}];
152 $C->{option}->{C1invoke_to_right} = 1;
153 $C;
154 }
155
156 package Encode::ISO2022::JIS::JISX0208Kanji7;
157 use vars qw/@ISA/;
158 push @ISA, 'Encode::ISO2022::JIS';
159 __PACKAGE__->Define (qw/jisx0208-1997-kanji-7bit/);
160
161 =item jisx0208-1997-kanji-7bit
162
163 JIS X 0208:1997 7.1.1 7-bit code for Kanji
164
165 =cut
166
167 sub __2022__common ($) {
168 my $C = shift->SUPER::__2022__common;
169 $C->{bit} = 7;
170 $C->{G0} = $Encode::ISO2022::CHARSET{G94n}->{'B@'}; ## JIS X 0208:1997
171 $C;
172 }
173
174 package Encode::ISO2022::JIS::JISX0208Kanji8;
175 use vars qw/@ISA/;
176 push @ISA, 'Encode::ISO2022::JIS';
177 __PACKAGE__->Define (qw/jisx0208-1997-kanji-8bit/);
178
179 =item jisx0208-1997-kanji-8bit
180
181 JIS X 0208:1997 7.1.2 8-bit code for Kanji
182
183 =cut
184
185 sub __2022__common ($) {
186 my $C = shift->SUPER::__2022__common;
187 $C->{bit} = 8;
188 $C->{G0} = $Encode::ISO2022::CHARSET{G94n}->{'B@'}; ## JIS X 0208:1997
189 $C->{G1} = $Encode::ISO2022::CHARSET{G94}->{"\x7E"}; ## empty
190 $C->{option}->{C1invoke_to_right} = 1;
191 $C;
192 }
193
194 package Encode::ISO2022::JIS::JISX0208IRVKanji7;
195 use vars qw/@ISA/;
196 push @ISA, 'Encode::ISO2022::JIS';
197 __PACKAGE__->Define (qw/jisx0208-1997-irv-kanji-7bit/);
198
199 =item jisx0208-1997-irv-kanji-7bit
200
201 JIS X 0208:1997 7.2.1 7-bit code for IRV and Kanji
202
203 =cut
204
205 sub __2022__common ($) {
206 my $C = shift->SUPER::__2022__common;
207 $C->{bit} = 7;
208 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{B}; ## ISO/IEC 646 IRV
209 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{'B@'}; ## JIS X 0208:1997
210 $C->{option}->{Ginvoke_to_left} = [1,1,1,1];
211 $C->{option}->{reset}->{Ginvoke} = 1;
212 ## JIS X 0208:1997 does not specify this limitation.
213 $C;
214 }
215 sub __2022_encode ($) {
216 my $C = shift->__2022__common;
217 $C->{GR} = undef;
218 $C;
219 }
220
221 package Encode::ISO2022::JIS::JISX0208IRVKanji8;
222 use vars qw/@ISA/;
223 push @ISA, 'Encode::ISO2022::JIS';
224 __PACKAGE__->Define (qw/jisx0208-1997-irv-kanji-8bit/);
225
226 =item jisx0208-1997-irv-kanji-8bit
227
228 JIS X 0208:1997 7.2.2 8-bit code for IRV and Kanji.
229 (A subset of EUC-japan)
230
231 =cut
232
233 sub __2022__common ($) {
234 my $C = shift->SUPER::__2022__common;
235 $C->{bit} = 8;
236 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{B}; ## ISO/IEC 646 IRV
237 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{'B@'}; ## JIS X 0208:1997
238 $C->{option}->{C1invoke_to_right} = 1;
239 $C;
240 }
241
242 package Encode::ISO2022::JIS::JISX0208LatinKanji7;
243 use vars qw/@ISA/;
244 push @ISA, 'Encode::ISO2022::JIS';
245 __PACKAGE__->Define (qw/jisx0208-1997-latin-kanji-7bit/);
246
247 =item jisx0208-1997-latin-kanji-7bit
248
249 JIS X 0208:1997 7.3.1 7-bit code for Latin and Kanji
250
251 =cut
252
253 sub __2022__common ($) {
254 my $C = shift->SUPER::__2022__common;
255 $C->{bit} = 7;
256 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin set
257 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{'B@'}; ## JIS X 0208:1997
258 $C->{option}->{Ginvoke_to_left} = [1,1,1,1];
259 $C->{option}->{reset}->{Ginvoke} = 1;
260 ## JIS X 0208:1997 does not specify this limitation.
261 $C;
262 }
263 sub __2022_encode ($) {
264 my $C = shift->__2022__common;
265 $C->{GR} = undef;
266 $C;
267 }
268
269 package Encode::ISO2022::JIS::JISX0208LatinKanji8;
270 use vars qw/@ISA/;
271 push @ISA, 'Encode::ISO2022::JIS';
272 __PACKAGE__->Define (qw/jisx0208-1997-latin-kanji-8bit/);
273
274 =item jisx0208-1997-latin-kanji-8bit
275
276 JIS X 0208:1997 7.2.2 8-bit code for Latin and Kanji
277
278 =cut
279
280 sub __2022__common ($) {
281 my $C = shift->SUPER::__2022__common;
282 $C->{bit} = 8;
283 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin set
284 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{'B@'}; ## JIS X 0208:1997
285 $C->{option}->{C1invoke_to_right} = 1;
286 $C;
287 }
288
289 package Encode::ISO2022::JIS::JISX0213Kanji7;
290 use vars qw/@ISA/;
291 push @ISA, 'Encode::ISO2022::JIS';
292 __PACKAGE__->Define (qw/jisx0213-2000-kanji-7bit/);
293
294 =item jisx0213-2000-kanji-7bit
295
296 JIS X 0213:2000 7.1.1 7-bit code for Kanji
297
298 =cut
299
300 sub __2022__common ($) {
301 my $C = shift->SUPER::__2022__common;
302 $C->{bit} = 7;
303 $C->{G0} = $Encode::ISO2022::CHARSET{G94n}->{O}; ## plane 1
304 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{P}; ## plane 2
305 $C->{option}->{Ginvoke_to_left} = [1,1,1,1];
306 $C->{option}->{reset}->{Ginvoke} = 1;
307 ## JIS X 0213:2000 does not specify this limitation.
308 $C->{option}->{undef_char} = ["\x22\x2E", {type => 'G94n', charset => 'O'}];
309 $C;
310 }
311 sub __2022_encode ($) {
312 my $C = shift->__2022__common;
313 $C->{GR} = undef;
314 $C;
315 }
316
317 package Encode::ISO2022::JIS::JISX0213Kanji8;
318 use vars qw/@ISA/;
319 push @ISA, 'Encode::ISO2022::JIS';
320 __PACKAGE__->Define (qw/jisx0213-2000-kanji-8bit/);
321
322 =item jisx0213-2000-kanji-8bit
323
324 JIS X 0213:2000 7.1.2 8-bit code for Kanji
325
326 =cut
327
328 sub __2022__common ($) {
329 my $C = shift->SUPER::__2022__common;
330 $C->{bit} = 7;
331 $C->{G0} = $Encode::ISO2022::CHARSET{G94n}->{O}; ## plane 1
332 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{P}; ## plane 2
333 $C->{option}->{undef_char} = ["\x22\x2E", {type => 'G94n', charset => 'O'}];
334 $C->{option}->{C1invoke_to_right} = 1;
335 $C;
336 }
337
338 package Encode::ISO2022::JIS::JISX0213IRVKanji7;
339 use vars qw/@ISA/;
340 push @ISA, 'Encode::ISO2022::JIS';
341 __PACKAGE__->Define (qw/jisx0213-2000-irv-kanji-7bit/);
342
343 =item jisx0213-2000-irv-kanji-7bit
344
345 JIS X 0213:2000 7.2.1 7-bit code for IRV and Kanji
346
347 =cut
348
349 sub __2022__common ($) {
350 my $C = shift->SUPER::__2022__common;
351 $C->{bit} = 7;
352 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{B}; ## ISO/IEC 646 IRV
353 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{O}; ## plane 1
354 $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{P}; ## plane 2
355 $C->{option}->{Ginvoke_by_single_shift}->[3] = 1;
356 $C->{option}->{Ginvoke_to_left} = [1,1,1,1];
357 $C->{option}->{reset}->{Ginvoke} = 1;
358 ## JIS X 0213:2000 does not specify this limitation.
359 $C->{option}->{undef_char} = ["\x22\x2E", {type => 'G94n', charset => 'O'}];
360 $C;
361 }
362 sub __2022_encode ($) {
363 my $C = shift->__2022__common;
364 $C->{GR} = undef;
365 $C;
366 }
367
368 package Encode::ISO2022::JIS::JISX0213IRVKanji8;
369 use vars qw/@ISA/;
370 push @ISA, 'Encode::ISO2022::JIS';
371 __PACKAGE__->Define (qw/jisx0213-2000-irv-kanji-8bit/);
372
373 =item jisx0213-2000-irv-kanji-8bit
374
375 JIS X 0213:2000 7.2.2 8-bit code for IRV and Kanji.
376 (A subset of EUC-JISX0213)
377
378 =cut
379
380 sub __2022__common ($) {
381 my $C = shift->SUPER::__2022__common;
382 $C->{bit} = 8;
383 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{B}; ## ISO/IEC 646 IRV
384 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{O}; ## plane 1
385 $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{P}; ## plane 2
386 $C->{option}->{Ginvoke_by_single_shift}->[3] = 1;
387 $C->{option}->{Ginvoke_to_left} = [1,0,0,0];
388 $C->{option}->{undef_char} = ["\x22\x2E", {type => 'G94n', charset => 'O'}];
389 $C->{option}->{C1invoke_to_right} = 1;
390 $C;
391 }
392
393 package Encode::ISO2022::JIS::JISX0213LatinKanji7;
394 use vars qw/@ISA/;
395 push @ISA, 'Encode::ISO2022::JIS';
396 __PACKAGE__->Define (qw/jisx0213-2000-latin-kanji-7bit/);
397
398 =item jisx0213-2000-latin-kanji-7bit
399
400 JIS X 0213:2000 7.3.1 7-bit code for Latin and Kanji
401
402 =cut
403
404 sub __2022__common ($) {
405 my $C = shift->SUPER::__2022__common;
406 $C->{bit} = 7;
407 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin set
408 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{O}; ## plane 1
409 $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{P}; ## plane 2
410 $C->{option}->{Ginvoke_by_single_shift}->[3] = 1;
411 $C->{option}->{Ginvoke_to_left} = [1,1,1,1];
412 $C->{option}->{reset}->{Ginvoke} = 1;
413 ## JIS X 0213:2000 does not specify this limitation.
414 $C->{option}->{undef_char} = ["\x22\x2E", {type => 'G94n', charset => 'O'}];
415 $C;
416 }
417 sub __2022_encode ($) {
418 my $C = shift->__2022__common;
419 $C->{GR} = undef;
420 $C;
421 }
422
423 package Encode::ISO2022::JIS::JISX0213LatinKanji8;
424 use vars qw/@ISA/;
425 push @ISA, 'Encode::ISO2022::JIS';
426 __PACKAGE__->Define (qw/jisx0213-2000-latin-kanji-8bit/);
427
428 =item jisx0213-2000-latin-kanji-8bit
429
430 JIS X 0213:2000 7.2.2 8-bit code for Latin and Kanji
431
432 =cut
433
434 sub __2022__common ($) {
435 my $C = shift->SUPER::__2022__common;
436 $C->{bit} = 8;
437 $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin set
438 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{O}; ## plane 1
439 $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{P}; ## plane 2
440 $C->{option}->{Ginvoke_by_single_shift}->[3] = 1;
441 $C->{option}->{Ginvoke_to_left} = [1,0,0,0];
442 $C->{option}->{undef_char} = ["\x22\x2E", {type => 'G94n', charset => 'O'}];
443 $C->{option}->{C1invoke_to_right} = 1;
444 $C;
445 }
446
447 package Encode::ISO2022::JIS::JISX4001Text7;
448 use vars qw/@ISA/;
449 push @ISA, 'Encode::ISO2022::JIS';
450 __PACKAGE__->Define (qw/jisx4001-text-7bit/);
451
452 =item jisx4001-text-7bit
453
454 JIS X 4001-1989 text (7-bit code)
455
456 =cut
457
458 sub __2022__common ($) {
459 my $C = shift->SUPER::__2022__common;
460 $C->{bit} = 7;
461 $C->{G0} = $Encode::ISO2022::CHARSET{G94n}->{B}; ## JIS X 0208-1983
462 $C->{option}->{designate_to}->{G94}->{J} = 0; ## JIS X 0201 Roman
463 $C->{option}->{designate_to}->{G94n}->{B} = 0; ## JIS X 0208-1983
464 $C;
465 }
466
467 package Encode::ISO2022::JIS::JISX4001Text8;
468 use vars qw/@ISA/;
469 push @ISA, 'Encode::ISO2022::JIS::JISX4001Text7';
470 __PACKAGE__->Define (qw/jisx4001-text-8bit/);
471
472 =item jisx4001-text-8bit
473
474 JIS X 4001-1989 text (8-bit code)
475
476 =cut
477
478 sub __2022__common ($) {
479 my $C = shift->SUPER::__2022__common;
480 $C->{bit} = 7;
481 $C->{option}->{C1invoke_to_right} = 1;
482 $C;
483 }
484
485 1;
486 __END__
487
488 =back
489
490 Note that although other JISes such as JIS X 0212 and JIS X 9010
491 define ISO/IEC 2022-comfprming coded character sets,
492 these standards do not define complete coding system (but define
493 as used on ISO/IEC 2022 environment), so this module
494 does not include those coded character sets. (IETF RFC 1345
495 and IANAREG give charset name to coded character sets
496 consist of such standards. But those are defined by RFC 1345,
497 not by JIS. Such coded character sets should be implemented
498 in Encode::ISO2022::RFC1345.)
499
500 =head1 LICENSE
501
502 Copyright 2002 Wakaba <w@suika.fam.cx>
503
504 This library is free software; you can redistribute it
505 and/or modify it under the same terms as Perl itself.
506
507 =cut
508
509 # $Date: 2002/09/16 11:00:41 $
510 ### JIS.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24