/[suikacvs]/perl/lib/Encode/ISO2022/JIS.pm
Suika

Contents of /perl/lib/Encode/ISO2022/JIS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (hide annotations) (download)
Mon Sep 16 06:34:35 2002 UTC (22 years, 2 months ago) by wakaba
Branch: MAIN
2002-09-16  Wakaba <w@suika.fam.cx>

	* EightBit.pm: New module.
	* JIS.pm: New module.
	* SevenBit.pm: Don't invoke G1 to GR when encoding.

1 wakaba 1.1 =head1 NAME
2    
3     C<Encode::ISO2022::JIS> --- Encode and decode of ISO/IEC 2022
4     based encodings defined by JIS (Japan Industrial Standard),
5     other than RFC 1468 coded representation, C<ISO-2022-JP-3>
6     coded representations and C<EUC-JISX0213> coded representations
7    
8     =head1 ENCODINGS
9    
10     =over 4
11    
12     =cut
13    
14     require 5.7.3;
15     use strict;
16     package Encode::ISO2022::JIS;
17     use vars qw($VERSION);
18     $VERSION=do{my @r=(q$Revision: 1.2 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
19     use base qw(Encode::Encoding);
20     require Encode::ISO2022;
21    
22     sub encode ($$;$) {
23     my ($obj, $str, $chk) = @_;
24     $_[1] = '' if $chk;
25     $str = &Encode::ISO2022::internal_to_iso2022 ($str, $obj->__2022_encode);
26     return $str;
27     }
28    
29     sub decode ($$;$) {
30     my ($obj, $str, $chk) = @_;
31     $_[1] = '' if $chk;
32     return &Encode::ISO2022::iso2022_to_internal ($str, $obj->__2022_decode);
33     }
34    
35     sub __2022__common ($) {
36     my $C = Encode::ISO2022->new_object;
37     $C->{option}->{designate_to}->{C0}->{default} = -1;
38     $C->{option}->{designate_to}->{C1}->{default} = -1;
39     $C->{option}->{designate_to}->{G94}->{default} = -1;
40     $C->{option}->{designate_to}->{G94}->{B} = -1;
41     $C->{option}->{designate_to}->{G94n}->{default} = -1;
42     $C->{option}->{designate_to}->{G96}->{default} = -1;
43     $C->{option}->{designate_to}->{G96n}->{default} = -1;
44     $C->{G1} = $Encode::ISO2022::CHARSET{G94}->{"\x7E"}; ## empty
45     $C->{option}->{reset}->{Gdesignation} = 0;
46     $C->{option}->{reset}->{Ginvoke} = 0;
47     $C->{option}->{undef_char} = ["\x22\x2E", ## GETA MARK
48     {type => 'G94n', charset => 'B', revision => '@'}];
49     $C;
50     }
51     sub __2022_encode ($) {
52     my $C = shift->__2022__common;
53     $C;
54     }
55     sub __2022_decode ($) {
56     my $C = shift->__2022__common;
57     $C;
58     }
59    
60     package Encode::ISO2022::EightBit::JISX0201Latin7;
61     use vars qw/@ISA/;
62     push @ISA, 'Encode::ISO2022::JIS';
63     __PACKAGE__->Define (qw/jisx0201-1997-latin-7bit JIS_C6220-1969-ro
64     iso-ir-14 jp ISO646-JP csISO14JISC6220ro/);
65    
66     =item jisx0201-1997-latin-7bit
67    
68     JIS X 0201:1997 6.1 7-bit code for Latin.
69     (Alias: C<JIS_C6220-1969-ro> (RFC 1345), C<iso-ir-14> (RFC 1345),
70     C<jp> (RFC 1345), C<ISO646-JP> (RFC 1345), C<csISO14JISC6220ro> (IANA))
71    
72     =cut
73    
74     sub __2022__common ($) {
75     my $C = shift->SUPER::__2022__common;
76     $C->{bit} = 7;
77     $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin set
78     $C->{option}->{undef_char} = ["\x3F", {type => 'G94', charset => 'J'}];
79     $C;
80     }
81    
82     package Encode::ISO2022::EightBit::JISX0201Katakana7;
83     use vars qw/@ISA/;
84     push @ISA, 'Encode::ISO2022::JIS';
85     __PACKAGE__->Define (qw/jisx0201-1997-katakana-7bit JIS_C6220-1969-jp JIS_C6220-1969
86     iso-ir-13 katakana x0201-7 csISO13JISC6220jp/);
87    
88     =item jisx0201-1997-katakana-7bit
89    
90     JIS X 0201:1997 6.2 7-bit code for Katakana
91     (Alias: JIS_C6220-1969-jp (RFC 1345), JIS_C6220-1969 (RFC 1345),
92     iso-ir-13 (RFC 1345), katakana (RFC 1345), x0201-7 (RFC 1345),
93     csISO13JISC6220jp (IANA))
94    
95     =cut
96    
97     sub __2022__common ($) {
98     my $C = shift->SUPER::__2022__common;
99     $C->{bit} = 7;
100     $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201:1997 Katakana set
101     $C->{option}->{undef_char} = ["\x25", {type => 'G94', charset => 'I'}];
102     $C;
103     }
104    
105     package Encode::ISO2022::EightBit::JISX0201LatinKatakana7;
106     use vars qw/@ISA/;
107     push @ISA, 'Encode::ISO2022::JIS';
108     __PACKAGE__->Define (qw/jisx0201-1997-latin-katakana-7bit
109     JIS_X0201 X0201 csHalfWidthKatakana/);
110    
111     =item jisx0201-1997-latin-katakana-7bit
112    
113     JIS X 0201:1997 6.3 7-bit code for Latin and Katakana
114     (Alias: JIS_X0201 (RFC 1345), X0201 (RFC 1345), csHalfWidthKatakana (IANA))
115    
116     =cut
117    
118     sub __2022__common ($) {
119     my $C = shift->SUPER::__2022__common;
120     $C->{bit} = 7;
121     $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin set
122     $C->{G1} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201:1997 Katakana set
123     $C->{option}->{Ginvoke_to_left} = [1,1,1,1];
124     $C->{option}->{reset}->{Ginvoke} = 1;
125     ## JIS X 0201:1997 does not specify this limitation.
126     $C->{option}->{undef_char} = ["\x3F", {type => 'G94', charset => 'J'}];
127     $C;
128     }
129     sub __2022_encode ($) {
130     my $C = shift->__2022__common;
131     $C->{GR} = undef;
132     $C;
133     }
134    
135     package Encode::ISO2022::EightBit::JISX0201LatinKatakana8;
136     use vars qw/@ISA/;
137     push @ISA, 'Encode::ISO2022::JIS';
138     __PACKAGE__->Define (qw/jisx0201-1997-latin-latin-8bit/);
139    
140     =item jisx0201-1997-latin-katakana-8bit
141    
142     JIS X 0208:1997 7.4 8-bit code for Latin and Katakana
143    
144     =cut
145    
146     sub __2022__common ($) {
147     my $C = shift->SUPER::__2022__common;
148     $C->{bit} = 8;
149     $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin set
150     $C->{G1} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201:1997 Katakana set
151     $C->{option}->{undef_char} = ["\x3F", {type => 'G94', charset => 'J'}];
152     $C;
153     }
154    
155     package Encode::ISO2022::EightBit::JISX0208Kanji7;
156     use vars qw/@ISA/;
157     push @ISA, 'Encode::ISO2022::JIS';
158     __PACKAGE__->Define (qw/jisx0208-1997-kanji-7bit/);
159    
160     =item jisx0208-1997-kanji-7bit
161    
162     JIS X 0208:1997 7.1.1 7-bit code for Kanji
163    
164     =cut
165    
166     sub __2022__common ($) {
167     my $C = shift->SUPER::__2022__common;
168     $C->{bit} = 7;
169     $C->{G0} = $Encode::ISO2022::CHARSET{G94n}->{'B@'}; ## JIS X 0208:1997
170     $C;
171     }
172    
173     package Encode::ISO2022::EightBit::JISX0208Kanji8;
174     use vars qw/@ISA/;
175     push @ISA, 'Encode::ISO2022::JIS';
176     __PACKAGE__->Define (qw/jisx0208-1997-kanji-8bit/);
177    
178     =item jisx0208-1997-kanji-8bit
179    
180     JIS X 0208:1997 7.1.2 8-bit code for Kanji
181    
182     =cut
183    
184     sub __2022__common ($) {
185     my $C = shift->SUPER::__2022__common;
186     $C->{bit} = 8;
187     $C->{G0} = $Encode::ISO2022::CHARSET{G94n}->{'B@'}; ## JIS X 0208:1997
188     $C->{G1} = $Encode::ISO2022::CHARSET{G94}->{"\x7E"}; ## empty
189     $C->{option}->{C1invoke_to_right} = 1;
190     $C;
191     }
192    
193     package Encode::ISO2022::EightBit::JISX0208IRVKanji7;
194     use vars qw/@ISA/;
195     push @ISA, 'Encode::ISO2022::JIS';
196     __PACKAGE__->Define (qw/jisx0208-1997-irv-kanji-7bit/);
197    
198     =item jisx0208-1997-irv-kanji-7bit
199    
200     JIS X 0208:1997 7.2.1 7-bit code for IRV and Kanji
201    
202     =cut
203    
204     sub __2022__common ($) {
205     my $C = shift->SUPER::__2022__common;
206     $C->{bit} = 7;
207     $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{B}; ## ISO/IEC 646 IRV
208     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{'B@'}; ## JIS X 0208:1997
209     $C->{option}->{Ginvoke_to_left} = [1,1,1,1];
210     $C->{option}->{reset}->{Ginvoke} = 1;
211     ## JIS X 0208:1997 does not specify this limitation.
212     $C;
213     }
214     sub __2022_encode ($) {
215     my $C = shift->__2022__common;
216     $C->{GR} = undef;
217     $C;
218     }
219    
220     package Encode::ISO2022::EightBit::JISX0208IRVKanji8;
221     use vars qw/@ISA/;
222     push @ISA, 'Encode::ISO2022::JIS';
223     __PACKAGE__->Define (qw/jisx0208-1997-irv-kanji-8bit/);
224    
225     =item jisx0208-1997-irv-kanji-8bit
226    
227     JIS X 0208:1997 7.2.2 8-bit code for IRV and Kanji.
228     (A subset of EUC-japan)
229    
230     =cut
231    
232     sub __2022__common ($) {
233     my $C = shift->SUPER::__2022__common;
234     $C->{bit} = 8;
235     $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{B}; ## ISO/IEC 646 IRV
236     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{'B@'}; ## JIS X 0208:1997
237     $C;
238     }
239    
240     package Encode::ISO2022::EightBit::JISX0208LatinKanji7;
241     use vars qw/@ISA/;
242     push @ISA, 'Encode::ISO2022::JIS';
243     __PACKAGE__->Define (qw/jisx0208-1997-latin-kanji-7bit/);
244    
245     =item jisx0208-1997-latin-kanji-7bit
246    
247     JIS X 0208:1997 7.3.1 7-bit code for Latin and Kanji
248    
249     =cut
250    
251     sub __2022__common ($) {
252     my $C = shift->SUPER::__2022__common;
253     $C->{bit} = 7;
254     $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin set
255     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{'B@'}; ## JIS X 0208:1997
256     $C->{option}->{Ginvoke_to_left} = [1,1,1,1];
257     $C->{option}->{reset}->{Ginvoke} = 1;
258     ## JIS X 0208:1997 does not specify this limitation.
259     $C;
260     }
261     sub __2022_encode ($) {
262     my $C = shift->__2022__common;
263     $C->{GR} = undef;
264     $C;
265     }
266    
267     package Encode::ISO2022::EightBit::JISX0208LatinKanji8;
268     use vars qw/@ISA/;
269     push @ISA, 'Encode::ISO2022::JIS';
270     __PACKAGE__->Define (qw/jisx0208-1997-latin-kanji-8bit/);
271    
272     =item jisx0208-1997-latin-kanji-8bit
273    
274     JIS X 0208:1997 7.2.2 8-bit code for Latin and Kanji
275    
276     =cut
277    
278     sub __2022__common ($) {
279     my $C = shift->SUPER::__2022__common;
280     $C->{bit} = 8;
281     $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin set
282     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{'B@'}; ## JIS X 0208:1997
283     $C;
284     }
285    
286     package Encode::ISO2022::EightBit::JISX0213Kanji7;
287     use vars qw/@ISA/;
288     push @ISA, 'Encode::ISO2022::JIS';
289     __PACKAGE__->Define (qw/jisx0213-2000-kanji-7bit/);
290    
291     =item jisx0213-2000-kanji-7bit
292    
293     JIS X 0213:2000 7.1.1 7-bit code for Kanji
294    
295     =cut
296    
297     sub __2022__common ($) {
298     my $C = shift->SUPER::__2022__common;
299     $C->{bit} = 7;
300     $C->{G0} = $Encode::ISO2022::CHARSET{G94n}->{O}; ## plane 1
301     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{P}; ## plane 2
302     $C->{option}->{Ginvoke_to_left} = [1,1,1,1];
303     $C->{option}->{reset}->{Ginvoke} = 1;
304     ## JIS X 0213:2000 does not specify this limitation.
305     $C->{option}->{undef_char} = ["\x22\x2E", {type => 'G94n', charset => 'O'}];
306     $C;
307     }
308     sub __2022_encode ($) {
309     my $C = shift->__2022__common;
310     $C->{GR} = undef;
311     $C;
312     }
313    
314     package Encode::ISO2022::EightBit::JISX0213Kanji8;
315     use vars qw/@ISA/;
316     push @ISA, 'Encode::ISO2022::JIS';
317     __PACKAGE__->Define (qw/jisx0213-2000-kanji-8bit/);
318    
319     =item jisx0213-2000-kanji-8bit
320    
321     JIS X 0213:2000 7.1.2 8-bit code for Kanji
322    
323     =cut
324    
325     sub __2022__common ($) {
326     my $C = shift->SUPER::__2022__common;
327     $C->{bit} = 7;
328     $C->{G0} = $Encode::ISO2022::CHARSET{G94n}->{O}; ## plane 1
329     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{P}; ## plane 2
330     $C->{option}->{undef_char} = ["\x22\x2E", {type => 'G94n', charset => 'O'}];
331     $C;
332     }
333    
334     package Encode::ISO2022::EightBit::JISX0213IRVKanji7;
335     use vars qw/@ISA/;
336     push @ISA, 'Encode::ISO2022::JIS';
337     __PACKAGE__->Define (qw/jisx0213-2000-irv-kanji-7bit/);
338    
339     =item jisx0213-2000-irv-kanji-7bit
340    
341     JIS X 0213:2000 7.2.1 7-bit code for IRV and Kanji
342    
343     =cut
344    
345     sub __2022__common ($) {
346     my $C = shift->SUPER::__2022__common;
347     $C->{bit} = 7;
348     $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{B}; ## ISO/IEC 646 IRV
349     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{O}; ## plane 1
350     $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{P}; ## plane 2
351     $C->{option}->{Ginvoke_by_single_shift}->[3] = 1;
352     $C->{option}->{Ginvoke_to_left} = [1,1,1,1];
353     $C->{option}->{reset}->{Ginvoke} = 1;
354     ## JIS X 0213:2000 does not specify this limitation.
355     $C->{option}->{undef_char} = ["\x22\x2E", {type => 'G94n', charset => 'O'}];
356     $C;
357     }
358     sub __2022_encode ($) {
359     my $C = shift->__2022__common;
360     $C->{GR} = undef;
361     $C;
362     }
363    
364     package Encode::ISO2022::EightBit::JISX0213IRVKanji8;
365     use vars qw/@ISA/;
366     push @ISA, 'Encode::ISO2022::JIS';
367     __PACKAGE__->Define (qw/jisx0213-2000-irv-kanji-8bit/);
368    
369     =item jisx0213-2000-irv-kanji-8bit
370    
371     JIS X 0213:2000 7.2.2 8-bit code for IRV and Kanji.
372     (A subset of EUC-JISX0213)
373    
374     =cut
375    
376     sub __2022__common ($) {
377     my $C = shift->SUPER::__2022__common;
378     $C->{bit} = 8;
379     $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{B}; ## ISO/IEC 646 IRV
380     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{O}; ## plane 1
381     $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{P}; ## plane 2
382     $C->{option}->{Ginvoke_by_single_shift}->[3] = 1;
383     $C->{option}->{Ginvoke_to_left} = [1,0,0,0];
384     $C->{option}->{undef_char} = ["\x22\x2E", {type => 'G94n', charset => 'O'}];
385     $C;
386     }
387    
388     package Encode::ISO2022::EightBit::JISX0213LatinKanji7;
389     use vars qw/@ISA/;
390     push @ISA, 'Encode::ISO2022::JIS';
391     __PACKAGE__->Define (qw/jisx0213-2000-latin-kanji-7bit/);
392    
393     =item jisx0213-2000-latin-kanji-7bit
394    
395     JIS X 0213:2000 7.3.1 7-bit code for Latin and Kanji
396    
397     =cut
398    
399     sub __2022__common ($) {
400     my $C = shift->SUPER::__2022__common;
401     $C->{bit} = 7;
402     $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin set
403     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{O}; ## plane 1
404     $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{P}; ## plane 2
405     $C->{option}->{Ginvoke_by_single_shift}->[3] = 1;
406     $C->{option}->{Ginvoke_to_left} = [1,1,1,1];
407     $C->{option}->{reset}->{Ginvoke} = 1;
408     ## JIS X 0213:2000 does not specify this limitation.
409     $C->{option}->{undef_char} = ["\x22\x2E", {type => 'G94n', charset => 'O'}];
410     $C;
411     }
412     sub __2022_encode ($) {
413     my $C = shift->__2022__common;
414     $C->{GR} = undef;
415     $C;
416     }
417    
418     package Encode::ISO2022::EightBit::JISX0213LatinKanji8;
419     use vars qw/@ISA/;
420     push @ISA, 'Encode::ISO2022::JIS';
421     __PACKAGE__->Define (qw/jisx0213-2000-latin-kanji-8bit/);
422    
423     =item jisx0213-2000-latin-kanji-8bit
424    
425     JIS X 0213:2000 7.2.2 8-bit code for Latin and Kanji
426    
427     =cut
428    
429     sub __2022__common ($) {
430     my $C = shift->SUPER::__2022__common;
431     $C->{bit} = 8;
432     $C->{G0} = $Encode::ISO2022::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin set
433     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{O}; ## plane 1
434     $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{P}; ## plane 2
435     $C->{option}->{Ginvoke_by_single_shift}->[3] = 1;
436     $C->{option}->{Ginvoke_to_left} = [1,0,0,0];
437     $C->{option}->{undef_char} = ["\x22\x2E", {type => 'G94n', charset => 'O'}];
438     $C;
439     }
440    
441     1;
442     __END__
443    
444     =back
445    
446     Note that although other JISes such as JIS X 0212 and JIS X 9010
447     define ISO/IEC 2022-comfprming coded character sets,
448     these standards do not define complete coding system (but define
449     as used on ISO/IEC 2022 environment), so this module
450     does not include those coded character sets. (IETF RFC 1345
451     and IANAREG give charset name to coded character sets
452     consist of such standards. But those are defined by RFC 1345,
453     not by JIS. Such coded character sets should be implemented
454     in Encode::ISO2022::RFC1345.)
455    
456     =head1 LICENSE
457    
458     Copyright 2002 wakaba <w@suika.fam.cx>
459    
460     This library is free software; you can redistribute it
461     and/or modify it under the same terms as Perl itself.
462    
463     =cut
464    
465     # $Date: 2002/09/15 05:08:13 $
466     ### JIS.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24