/[suikacvs]/perl/lib/Encode/ISO2022/EightBit.pm
Suika

Contents of /perl/lib/Encode/ISO2022/EightBit.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (hide annotations) (download)
Fri Sep 20 14:01:45 2002 UTC (22 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.1: +15 -3 lines
2002-09-20  Wakaba <w@suika.fam.cx>

	* ISO2022.pm:
	- (iso2022_to_internal): New function.
	- (_iso2022_to_internal): Renamed from iso2022_to_internal.
	- (iso2022_to_internal): Experimental support of DOCS.
	- (internal_to_iso2022): Output in UCS coding systems
	if the character is unable to be encoded in ISO/IEC 2022
	coded character sets.
	- (_i2o): New procedure.
	- ($C->{option}->{designate_to}->{coding_system}): New option
	property object.
	- ($C->{coding_system}): New property.
	- (%CODING_SYSTEM): New hash.  (Alias to Encode::Charset's one.)
	* Charset.pm (make_initial_coding_system): Set 'reset_state'
	property with 1 value to coding systems of DOCS with 02/14 I byte.

1 wakaba 1.1 =head1 NAME
2    
3     Encode::ISO2022::Eight --- Encode and decode of 8-bit ISO/IEC 2022
4     based encodings (most of them are also known as EUCs)
5    
6     =head1 ENCODINGS
7    
8     =over 4
9    
10     =cut
11    
12     require 5.7.3;
13     use strict;
14     package Encode::ISO2022::EightBit;
15     use vars qw($VERSION);
16 wakaba 1.2 $VERSION=do{my @r=(q$Revision: 1.1 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
17 wakaba 1.1 use base qw(Encode::Encoding);
18     require Encode::ISO2022;
19    
20     sub encode ($$;$) {
21     my ($obj, $str, $chk) = @_;
22     $_[1] = '' if $chk;
23     $str = &Encode::ISO2022::internal_to_iso2022 ($str, $obj->__2022_encode);
24     return $str;
25     }
26    
27     sub decode ($$;$) {
28     my ($obj, $str, $chk) = @_;
29     $_[1] = '' if $chk;
30     return &Encode::ISO2022::iso2022_to_internal ($str, $obj->__2022_decode);
31     }
32    
33     ## prototype for EUCs
34    
35     sub __2022__common ($) {
36     my $C = Encode::ISO2022->new_object;
37     $C->{bit} = 8;
38     $C->{option}->{designate_to}->{C0}->{default} = -1;
39     $C->{option}->{designate_to}->{C0}->{"\x40"} = 0;
40     $C->{option}->{designate_to}->{C1}->{default} = -1;
41     $C->{option}->{designate_to}->{G94}->{default} = -1;
42     $C->{option}->{designate_to}->{G94n}->{default} = -1;
43     $C->{option}->{designate_to}->{G96}->{default} = -1;
44     $C->{option}->{designate_to}->{G96n}->{default} = -1;
45     $C->{option}->{Ginvoke_to_left} = [1,0,0,0];
46     $C->{option}->{Ginvoke_by_single_shift} = [0,0,1,1];
47     $C->{option}->{C1invoke_to_right} = 1;
48     $C->{option}->{reset}->{Gdesignation} = 0;
49     $C->{option}->{reset}->{Ginvoke} = 0;
50     $C;
51     }
52     sub __2022_encode ($) {
53     my $C = shift->__2022__common;
54     $C;
55     }
56     sub __2022_decode ($) {
57     my $C = shift->__2022__common;
58     $C;
59     }
60    
61     package Encode::ISO2022::EightBit::EUCJapanOld;
62     use vars qw/@ISA/;
63     push @ISA, 'Encode::ISO2022::EightBit';
64     __PACKAGE__->Define (qw/ujis x-ujis deckanji/);
65    
66     =item ujis
67    
68     EUC (ISO/IEC 2022 based 8-bit encoding) for Japanese,
69     old version (pre-1990). (Alias: x-ujis)
70    
71     =cut
72    
73     sub __2022__common ($) {
74     my $C = shift->SUPER::__2022__common;
75     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{B}; ## JIS X 0208-1983
76     $C->{G2} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201 Katakana
77     $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{' @'}; ## Gaiji (undefined)
78     $C;
79     }
80    
81     package Encode::ISO2022::EightBit::EUCJapan;
82     use vars qw/@ISA/;
83     push @ISA, 'Encode::ISO2022::EightBit';
84     __PACKAGE__->Define (qw/euc-japan euc-japan-1990 euc-jp eucjp euc_jp x-euc-jp
85     Extended_UNIX_Code_Packed_Format_for_Japanese csEUCPkdFmtJapanese
86     japanese-iso-8bit cp51932 japanese_euc
87     ajec eucjp-open ibm-eucjp cp33722 33722 sdeckanji/);
88    
89     =item euc-japan
90    
91     EUC (ISO/IEC 2022 based 8-bit encoding) for Japanese.
92     (Alias: euc-japan-1990 (emacsen), euc-jp (IANA),
93     eucjp (locale), euc_jp, x-euc-jp,
94     extended_unix_code_packed_format_for_japanese (IANA),
95     cseucpkdfmtjapanese (IANA), japanese-iso-8bit (emacsen),
96     cp51932 (M$), japanese_euc)
97    
98     =cut
99    
100     sub __2022__common ($) {
101     my $C = shift->SUPER::__2022__common;
102     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{'B@'}; ## JIS X 0208-1990
103     $C->{G2} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201 Katakana
104     $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{D}; ## JIS X 0212-1990
105     $C;
106     }
107    
108     package Encode::ISO2022::EightBit::EUCJISX0213;
109     use vars qw/@ISA/;
110     push @ISA, 'Encode::ISO2022::EightBit';
111     __PACKAGE__->Define (qw/euc-jisx0213 x-euc-jisx0213
112     x-euc-jisx0213-packed deckanji2000/);
113    
114     =item euc-jisx0213
115    
116     EUC (ISO/IEC 2022 based 8-bit encoding) for Japanese
117     with JIS X 0213:2000, defined by JIS X 0213:2000.
118     (Alias: x-euc-jisx0213)
119    
120     =cut
121    
122     sub __2022__common ($) {
123     my $C = shift->SUPER::__2022__common;
124     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{O}; ## JIS X 0213:2000 plane 1
125     $C->{G2} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201 Katakana
126     $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{P}; ## JIS X 0213:2000 plane 2
127     $C;
128     }
129    
130     package Encode::ISO2022::EightBit::EUCJISX0213Plane1;
131     use vars qw/@ISA/;
132     push @ISA, 'Encode::ISO2022::EightBit';
133     __PACKAGE__->Define (qw/euc-jisx0213-plane1/);
134    
135     =item euc-jisx0213-plane1
136    
137     EUC (ISO/IEC 2022 based 8-bit encoding) for Japanese
138     with JIS X 0213:2000 plane 1, defined by JIS X 0213:2000
139    
140     =cut
141    
142     sub __2022__common ($) {
143     my $C = shift->SUPER::__2022__common;
144     $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{"\x7E"}; ## empty
145     $C;
146     }
147    
148     package Encode::ISO2022::EightBit::EUCCHINA;
149     use vars qw/@ISA/;
150     push @ISA, 'Encode::ISO2022::EightBit';
151     __PACKAGE__->Define (qw/euc-china euc-cn cn-gb cn-gb-2312 chinese-iso-8bit
152     gb2312 csgb2312 x-euc-cn cp51936 ibm-euccn CP1383 1383/);
153    
154     =item euc-china
155    
156     EUC (ISO/IEC 2022 based 8-bit encoding) for Chinese.
157     (Alias: euc-cn (emacsen), cn-gb (RFC 1922), cn-gb-2312 (RFC 1922),
158     chinese-iso-8bit (emacsen), gb2312 (IANA), csgb2312 (IANA),
159     x-euc-cn, CP51936 (M$))
160    
161     =cut
162    
163     sub __2022__common ($) {
164     my $C = shift->SUPER::__2022__common;
165     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{A}; ## GB 2312
166     $C;
167     }
168    
169     package Encode::ISO2022::EightBit::EUCCHINA165;
170     use vars qw/@ISA/;
171     push @ISA, 'Encode::ISO2022::EightBit';
172     __PACKAGE__->Define (qw/cn-gb-isoir165/);
173    
174     =item euc-china
175    
176     EUC (ISO/IEC 2022 based 8-bit encoding) for Chinese
177     with ISO-IR 165. (Alias: cn-gb-isoir165 (RFC 1922))
178    
179     =cut
180    
181     sub __2022__common ($) {
182     my $C = shift->SUPER::__2022__common;
183     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{"\x45"}; ## ISO-IR 165
184     $C;
185     }
186    
187     package Encode::ISO2022::EightBit::EUCKorea;
188     use vars qw/@ISA/;
189     push @ISA, 'Encode::ISO2022::EightBit';
190     __PACKAGE__->Define (qw/euc-korea euc-kr cp970 cp51949 ibm-euckr x-euc-kr
191     cseuckr korean-iso-8bit/);
192    
193     =item euc-korea
194    
195     EUC (ISO/IEC 2022 based 8-bit encoding) for Korean
196     (Alias: euc-kr (IANA), cp970, cp51949 (M$), ibm-euckr,
197     x-euc-kr, cseuckr (IANA), korean-iso-8bit (emacsen))
198    
199     =cut
200    
201     sub __2022__common ($) {
202     my $C = shift->SUPER::__2022__common;
203     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{C}; ## KS X 1001
204     $C;
205     }
206    
207     package Encode::ISO2022::EightBit::EUCTaiwan;
208     use vars qw/@ISA/;
209     push @ISA, 'Encode::ISO2022::EightBit';
210     __PACKAGE__->Define (qw/euc-taiwan euc-tw x-euc-tw cns11643 ibm-euctw cp964/);
211    
212     =item euc-taiwan
213    
214     EUC (ISO/IEC 2022 based 8-bit encoding) for Chinese
215     with CNS 11643. (Alias: euc-tw, x-euc-tw, cns11643)
216    
217     =cut
218    
219     sub __2022__common ($) {
220     my $C = shift->SUPER::__2022__common;
221     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{G}; ## plane 1
222     $C->{G2} = $Encode::ISO2022::CHARSET{G94n}->{H}; ## plane 2
223     #$C->{G3} = ## BUG: does not support plane 3-16 yet
224     $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{' `'}; # 3byte DRCS (temporary)
225     $C;
226     }
227    
228     package Encode::ISO2022::EightBit::EUCKPS9566;
229     use vars qw/@ISA/;
230     push @ISA, 'Encode::ISO2022::EightBit';
231     __PACKAGE__->Define (qw/euc-kps9566/);
232    
233     =item euc-kps9566
234    
235     EUC (ISO/IEC 2022 based 8-bit encoding) for Korean
236     with KPS 9566-97
237    
238     =cut
239    
240     sub __2022__common ($) {
241     my $C = shift->SUPER::__2022__common;
242     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{N}; ## KPS 9566-97
243     $C;
244     }
245    
246     package Encode::ISO2022::EightBit::SS2;
247     use vars qw/@ISA/;
248     push @ISA, 'Encode::ISO2022::EightBit';
249     __PACKAGE__->Define (qw/iso-2022-8bit-ss2/);
250    
251     =item iso-2022-8bit-ss2
252    
253     ISO/IEC 2022 based 8-bit encoding using SS2 for 96-charset
254    
255     =cut
256    
257     sub __2022__common ($) {
258     my $C = Encode::ISO2022->new_object;
259     $C->{option}->{designate_to}->{G96}->{default} = 2;
260     $C->{option}->{designate_to}->{G96n}->{default} = 2;
261     $C->{option}->{G94n_designate_long} = 1;
262     $C->{option}->{Ginvoke_by_single_shift}->[2] = 1;
263     $C->{option}->{Ginvoke_to_left}->[2] = 0;
264     $C->{option}->{C1invoke_to_right} = 1;
265     $C;
266     }
267    
268     package Encode::ISO2022::EightBit::CompoundText;
269     use vars qw/@ISA/;
270     push @ISA, 'Encode::ISO2022::EightBit';
271     __PACKAGE__->Define (qw/compound-text ctext x-ctext/);
272    
273     =item compound-text
274    
275     ISO/IEC 2022 based 8-bit encoding used in inter-client
276     communication of X Window System (Alias: ctext (emacsen), x-ctext (emacsen))
277    
278 wakaba 1.2 Strictly, x-ctext, extended compound text (X Compound Text
279     based encoding for unknown ISO/IEC 2022 based encoding) is a
280     different coding system from X Compound Text. See
281     [mule-jp:7455] <mid:rsqsoa5s2hr.fsf@crane.jaist.ac.jp> and
282     [mule-jp:7457] <mid:rsq4smlky85.fsf@crane.jaist.ac.jp>.
283    
284 wakaba 1.1 =cut
285    
286     sub __2022__common ($) {
287     my $C = Encode::ISO2022->new_object;
288     $C->{option}->{designate_to}->{C0}->{default} = -1;
289     $C->{option}->{designate_to}->{C1}->{default} = -1;
290     $C->{option}->{designate_to}->{G94}->{I} = 1;
291     $C->{option}->{designate_to}->{G96}->{default} = 1;
292     $C->{option}->{designate_to}->{G96n}->{default} = -1;
293     for my $t (qw/G94 G96 G94n/) {
294     for (0x30..0x3F) {
295     my $F = chr $_;
296     $C->{option}->{designate_to}->{$t}->{$F} = -1;
297     $C->{option}->{designate_to}->{$t}->{'!'.$F} = -1;
298     $C->{option}->{designate_to}->{$t}->{'"'.$F} = -1;
299     $C->{option}->{designate_to}->{$t}->{'#'.$F} = -1;
300     $C->{option}->{designate_to}->{$t}->{' '.$F} = -1;
301     }
302     for (0x40..0x7E) {
303     $C->{option}->{designate_to}->{$t}->{' '.chr $_} = -1;
304     }
305     }
306     $C->{option}->{G94n_designate_long} = 1;
307     $C->{option}->{Ginvoke_to_left}->[1] = 0;
308     $C->{option}->{C1invoke_to_right} = 1;
309     $C->{option}->{reset}->{Ginvoke} = 0;
310     $C;
311     }
312 wakaba 1.2 sub __2022_decode ($) {
313     my $C = shift->__2022__common;
314     ## Emacsen's x-ctext
315     $C->{G1} = $Encode::ISO2022::CHARSET{G96}->{A}; ## ISO/IEC 8859-1
316     $C;
317     }
318 wakaba 1.1
319    
320     1;
321     __END__
322    
323     =back
324    
325     =head1 LICENSE
326    
327 wakaba 1.2 Copyright 2002 Wakaba <w@suika.fam.cx>
328 wakaba 1.1
329     This library is free software; you can redistribute it
330     and/or modify it under the same terms as Perl itself.
331    
332     =cut
333    
334 wakaba 1.2 # $Date: 2002/09/16 02:17:48 $
335 wakaba 1.1 ### SevenBit.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24