/[suikacvs]/perl/lib/Encode/ISO2022/EightBit.pm
Suika

Contents of /perl/lib/Encode/ISO2022/EightBit.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (hide annotations) (download)
Fri Oct 4 23:58:04 2002 UTC (22 years, 2 months ago) by wakaba
Branch: MAIN
Changes since 1.3: +6 -6 lines
2002-10-05  Nanashi-san

	* Table.pm: New module.
	(Commited by Wakaba <w@suika.fam.cx>.)

1 wakaba 1.1 =head1 NAME
2    
3     Encode::ISO2022::Eight --- Encode and decode of 8-bit ISO/IEC 2022
4     based encodings (most of them are also known as EUCs)
5    
6     =head1 ENCODINGS
7    
8     =over 4
9    
10     =cut
11    
12     require 5.7.3;
13     use strict;
14     package Encode::ISO2022::EightBit;
15     use vars qw($VERSION);
16 wakaba 1.4 $VERSION=do{my @r=(q$Revision: 1.3 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
17 wakaba 1.1 use base qw(Encode::Encoding);
18     require Encode::ISO2022;
19    
20     sub encode ($$;$) {
21     my ($obj, $str, $chk) = @_;
22     $_[1] = '' if $chk;
23     $str = &Encode::ISO2022::internal_to_iso2022 ($str, $obj->__2022_encode);
24     return $str;
25     }
26    
27     sub decode ($$;$) {
28     my ($obj, $str, $chk) = @_;
29     $_[1] = '' if $chk;
30     return &Encode::ISO2022::iso2022_to_internal ($str, $obj->__2022_decode);
31     }
32    
33     ## prototype for EUCs
34    
35     sub __2022__common ($) {
36     my $C = Encode::ISO2022->new_object;
37     $C->{bit} = 8;
38     $C->{option}->{designate_to}->{C0}->{default} = -1;
39     $C->{option}->{designate_to}->{C0}->{"\x40"} = 0;
40     $C->{option}->{designate_to}->{C1}->{default} = -1;
41     $C->{option}->{designate_to}->{G94}->{default} = -1;
42     $C->{option}->{designate_to}->{G94n}->{default} = -1;
43     $C->{option}->{designate_to}->{G96}->{default} = -1;
44     $C->{option}->{designate_to}->{G96n}->{default} = -1;
45     $C->{option}->{Ginvoke_to_left} = [1,0,0,0];
46     $C->{option}->{Ginvoke_by_single_shift} = [0,0,1,1];
47     $C->{option}->{C1invoke_to_right} = 1;
48     $C->{option}->{reset}->{Gdesignation} = 0;
49     $C->{option}->{reset}->{Ginvoke} = 0;
50     $C;
51     }
52     sub __2022_encode ($) {
53     my $C = shift->__2022__common;
54     $C;
55     }
56     sub __2022_decode ($) {
57     my $C = shift->__2022__common;
58     $C;
59     }
60    
61     package Encode::ISO2022::EightBit::EUCJapanOld;
62     use vars qw/@ISA/;
63     push @ISA, 'Encode::ISO2022::EightBit';
64     __PACKAGE__->Define (qw/ujis x-ujis deckanji/);
65    
66     =item ujis
67    
68     EUC (ISO/IEC 2022 based 8-bit encoding) for Japanese,
69     old version (pre-1990). (Alias: x-ujis)
70    
71     =cut
72    
73     sub __2022__common ($) {
74     my $C = shift->SUPER::__2022__common;
75     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{B}; ## JIS X 0208-1983
76     $C->{G2} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201 Katakana
77     $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{' @'}; ## Gaiji (undefined)
78     $C;
79     }
80    
81     package Encode::ISO2022::EightBit::EUCJapan;
82     use vars qw/@ISA/;
83     push @ISA, 'Encode::ISO2022::EightBit';
84 wakaba 1.3 __PACKAGE__->Define (qw/euc-japan euc-japan-1990 euc-jp euc-j eucjp euc_jp x-euc-jp
85 wakaba 1.4 x-eucjp eucjis euc-jis eucj Extended_UNIX_Code_Packed_Format_for_Japanese csEUCPkdFmtJapanese
86 wakaba 1.1 japanese-iso-8bit cp51932 japanese_euc
87     ajec eucjp-open ibm-eucjp cp33722 33722 sdeckanji/);
88    
89     =item euc-japan
90    
91     EUC (ISO/IEC 2022 based 8-bit encoding) for Japanese.
92     (Alias: euc-japan-1990 (emacsen), euc-jp (IANA),
93 wakaba 1.4 euc-j, eucjp (X), euc_jp, eucj, x-eucjp, x-euc-jp, eucjis, euc-jis,
94 wakaba 1.1 extended_unix_code_packed_format_for_japanese (IANA),
95     cseucpkdfmtjapanese (IANA), japanese-iso-8bit (emacsen),
96     cp51932 (M$), japanese_euc)
97    
98     =cut
99    
100     sub __2022__common ($) {
101     my $C = shift->SUPER::__2022__common;
102     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{'B@'}; ## JIS X 0208-1990
103     $C->{G2} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201 Katakana
104     $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{D}; ## JIS X 0212-1990
105     $C;
106     }
107    
108     package Encode::ISO2022::EightBit::EUCJISX0213;
109     use vars qw/@ISA/;
110     push @ISA, 'Encode::ISO2022::EightBit';
111 wakaba 1.4 __PACKAGE__->Define (qw/euc-jisx0213 x-euc-jisx0213 euc_jisx0213 eucjp0213
112 wakaba 1.3 euc-jp-3 x-euc-jisx0213-packed deckanji2000/);
113 wakaba 1.1
114     =item euc-jisx0213
115    
116     EUC (ISO/IEC 2022 based 8-bit encoding) for Japanese
117     with JIS X 0213:2000, defined by JIS X 0213:2000.
118 wakaba 1.4 (Alias: x-euc-jisx0213, euc_jisx0213, eucjp0213, euc-jp-3)
119 wakaba 1.1
120     =cut
121    
122     sub __2022__common ($) {
123     my $C = shift->SUPER::__2022__common;
124     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{O}; ## JIS X 0213:2000 plane 1
125     $C->{G2} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201 Katakana
126     $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{P}; ## JIS X 0213:2000 plane 2
127     $C;
128     }
129    
130     package Encode::ISO2022::EightBit::EUCJISX0213Plane1;
131     use vars qw/@ISA/;
132     push @ISA, 'Encode::ISO2022::EightBit';
133     __PACKAGE__->Define (qw/euc-jisx0213-plane1/);
134    
135     =item euc-jisx0213-plane1
136    
137     EUC (ISO/IEC 2022 based 8-bit encoding) for Japanese
138     with JIS X 0213:2000 plane 1, defined by JIS X 0213:2000
139    
140     =cut
141    
142     sub __2022__common ($) {
143     my $C = shift->SUPER::__2022__common;
144     $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{"\x7E"}; ## empty
145     $C;
146     }
147    
148     package Encode::ISO2022::EightBit::EUCCHINA;
149     use vars qw/@ISA/;
150     push @ISA, 'Encode::ISO2022::EightBit';
151 wakaba 1.3 __PACKAGE__->Define (qw/euc-china euc-cn euccn euc-gb
152     cn-gb cn-gb-2312 chinese-iso-8bit ugb
153 wakaba 1.1 gb2312 csgb2312 x-euc-cn cp51936 ibm-euccn CP1383 1383/);
154    
155     =item euc-china
156    
157     EUC (ISO/IEC 2022 based 8-bit encoding) for Chinese.
158 wakaba 1.3 (Alias: euc-cn (emacsen), euccn, euc-gb, cn-gb (RFC 1922), cn-gb-2312 (RFC 1922),
159     chinese-iso-8bit (emacsen), ugb, gb2312 (IANA), csgb2312 (IANA),
160 wakaba 1.1 x-euc-cn, CP51936 (M$))
161    
162     =cut
163    
164     sub __2022__common ($) {
165     my $C = shift->SUPER::__2022__common;
166     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{A}; ## GB 2312
167     $C;
168     }
169    
170     package Encode::ISO2022::EightBit::EUCCHINA165;
171     use vars qw/@ISA/;
172     push @ISA, 'Encode::ISO2022::EightBit';
173 wakaba 1.3 __PACKAGE__->Define (qw/cn-gb-isoir165 iso-ir-165/);
174 wakaba 1.1
175 wakaba 1.3 =item cn-gb-isoir165
176 wakaba 1.1
177     EUC (ISO/IEC 2022 based 8-bit encoding) for Chinese
178 wakaba 1.3 with ISO-IR 165. (Alias: cn-gb-isoir165 (RFC 1922),
179     ISO-IR-165)
180 wakaba 1.1
181     =cut
182    
183     sub __2022__common ($) {
184     my $C = shift->SUPER::__2022__common;
185     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{"\x45"}; ## ISO-IR 165
186     $C;
187     }
188    
189 wakaba 1.3 package Encode::ISO2022::EightBit::EUCcwnn;
190     use vars qw/@ISA/;
191     push @ISA, 'Encode::ISO2022::EightBit';
192     __PACKAGE__->Define (qw/euc-cwnn cwnn-iso-8bit/);
193    
194     =item euc-cwnn
195    
196     EUC (ISO/IEC 2022 based 8-bit encoding) for Chinese
197     with GB 2312, used by cwnn input system (Alias: cwnn-iso-8bit)
198    
199     =cut
200    
201     # See <http://www.tomo.gr.jp/users/wnn/9912ml/msg00088.html>
202    
203     sub __2022__common ($) {
204     my $C = shift->SUPER::__2022__common;
205     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{A}; ## GB 2312
206     $C->{G2} = $Encode::ISO2022::CHARSET{G94}->{'0'}; # omron_udc_zh (sisheng)
207     ## TODO: Implement by private set support
208     $C;
209     }
210    
211     ## cn-gb-12345, gb12345, euc-gb12345
212    
213 wakaba 1.1 package Encode::ISO2022::EightBit::EUCKorea;
214     use vars qw/@ISA/;
215     push @ISA, 'Encode::ISO2022::EightBit';
216 wakaba 1.3 __PACKAGE__->Define (qw/euc-korea euc-kr euckr cp970 cp51949 ibm-euckr x-euc-kr
217 wakaba 1.1 cseuckr korean-iso-8bit/);
218    
219     =item euc-korea
220    
221     EUC (ISO/IEC 2022 based 8-bit encoding) for Korean
222 wakaba 1.3 (Alias: euc-kr (IANA), euckr, cp970, cp51949 (M$), ibm-euckr,
223 wakaba 1.1 x-euc-kr, cseuckr (IANA), korean-iso-8bit (emacsen))
224    
225     =cut
226    
227     sub __2022__common ($) {
228     my $C = shift->SUPER::__2022__common;
229     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{C}; ## KS X 1001
230     $C;
231     }
232    
233     package Encode::ISO2022::EightBit::EUCTaiwan;
234     use vars qw/@ISA/;
235     push @ISA, 'Encode::ISO2022::EightBit';
236 wakaba 1.3 __PACKAGE__->Define (qw/euc-taiwan euc-tw euctw x-euc-tw cns11643 cseuctw
237     ibm-euctw cp964/);
238 wakaba 1.1
239     =item euc-taiwan
240    
241     EUC (ISO/IEC 2022 based 8-bit encoding) for Chinese
242 wakaba 1.3 with CNS 11643. (Alias: euc-tw, euctw, x-euc-tw, cseuctw, cns11643)
243    
244     =cut
245    
246     sub __2022__common ($) {
247     my $C = shift->SUPER::__2022__common;
248     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{G}; ## plane 1
249     #$C->{G2} = ## BUG: does not support plane 2-16 yet
250     $C->{G2} = $Encode::ISO2022::CHARSET{G94n}->{' `'}; # 3byte DRCS (temporary)
251     $C;
252     }
253    
254     package Encode::ISO2022::EightBit::EUCtwnn;
255     use vars qw/@ISA/;
256     push @ISA, 'Encode::ISO2022::EightBit';
257     __PACKAGE__->Define (qw/euc-twnn twnn-iso-8bit/);
258    
259     =item euc-twnn
260    
261     EUC (ISO/IEC 2022 based 8-bit encoding) for Chinese
262     with CNS 11643, used by twnn input system (Alias: twnn-iso-8bit)
263 wakaba 1.1
264     =cut
265    
266 wakaba 1.3 # See <http://www.tomo.gr.jp/users/wnn/9912ml/msg00088.html>
267    
268 wakaba 1.1 sub __2022__common ($) {
269     my $C = shift->SUPER::__2022__common;
270     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{G}; ## plane 1
271 wakaba 1.3 $C->{G2} = $Encode::ISO2022::CHARSET{G94}->{'0'}; # omron_udc_zh (sisheng)
272     ## TODO: Implement by private set support
273     $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{H}; ## plane 2
274 wakaba 1.1 $C;
275     }
276    
277     package Encode::ISO2022::EightBit::EUCKPS9566;
278     use vars qw/@ISA/;
279     push @ISA, 'Encode::ISO2022::EightBit';
280     __PACKAGE__->Define (qw/euc-kps9566/);
281    
282     =item euc-kps9566
283    
284     EUC (ISO/IEC 2022 based 8-bit encoding) for Korean
285     with KPS 9566-97
286    
287     =cut
288    
289     sub __2022__common ($) {
290     my $C = shift->SUPER::__2022__common;
291     $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{N}; ## KPS 9566-97
292     $C;
293     }
294    
295     package Encode::ISO2022::EightBit::SS2;
296     use vars qw/@ISA/;
297     push @ISA, 'Encode::ISO2022::EightBit';
298     __PACKAGE__->Define (qw/iso-2022-8bit-ss2/);
299    
300     =item iso-2022-8bit-ss2
301    
302     ISO/IEC 2022 based 8-bit encoding using SS2 for 96-charset
303    
304     =cut
305    
306     sub __2022__common ($) {
307     my $C = Encode::ISO2022->new_object;
308     $C->{option}->{designate_to}->{G96}->{default} = 2;
309     $C->{option}->{designate_to}->{G96n}->{default} = 2;
310     $C->{option}->{G94n_designate_long} = 1;
311     $C->{option}->{Ginvoke_by_single_shift}->[2] = 1;
312     $C->{option}->{Ginvoke_to_left}->[2] = 0;
313     $C->{option}->{C1invoke_to_right} = 1;
314     $C;
315     }
316    
317     package Encode::ISO2022::EightBit::CompoundText;
318     use vars qw/@ISA/;
319     push @ISA, 'Encode::ISO2022::EightBit';
320 wakaba 1.3 __PACKAGE__->Define (qw/compound-text compound_text
321     x-compound-text ctext x-ctext/);
322 wakaba 1.1
323     =item compound-text
324    
325     ISO/IEC 2022 based 8-bit encoding used in inter-client
326 wakaba 1.3 communication of X Window System (Alias: ctext (emacsen), x-ctext (emacsen),
327     compound_text, x-compound-text)
328 wakaba 1.1
329 wakaba 1.2 Strictly, x-ctext, extended compound text (X Compound Text
330     based encoding for unknown ISO/IEC 2022 based encoding) is a
331     different coding system from X Compound Text. See
332     [mule-jp:7455] <mid:rsqsoa5s2hr.fsf@crane.jaist.ac.jp> and
333     [mule-jp:7457] <mid:rsq4smlky85.fsf@crane.jaist.ac.jp>.
334    
335 wakaba 1.1 =cut
336    
337     sub __2022__common ($) {
338     my $C = Encode::ISO2022->new_object;
339     $C->{option}->{designate_to}->{C0}->{default} = -1;
340     $C->{option}->{designate_to}->{C1}->{default} = -1;
341     $C->{option}->{designate_to}->{G94}->{I} = 1;
342     $C->{option}->{designate_to}->{G96}->{default} = 1;
343     $C->{option}->{designate_to}->{G96n}->{default} = -1;
344     for my $t (qw/G94 G96 G94n/) {
345     for (0x30..0x3F) {
346     my $F = chr $_;
347     $C->{option}->{designate_to}->{$t}->{$F} = -1;
348     $C->{option}->{designate_to}->{$t}->{'!'.$F} = -1;
349     $C->{option}->{designate_to}->{$t}->{'"'.$F} = -1;
350     $C->{option}->{designate_to}->{$t}->{'#'.$F} = -1;
351     $C->{option}->{designate_to}->{$t}->{' '.$F} = -1;
352     }
353     for (0x40..0x7E) {
354     $C->{option}->{designate_to}->{$t}->{' '.chr $_} = -1;
355     }
356     }
357     $C->{option}->{G94n_designate_long} = 1;
358     $C->{option}->{Ginvoke_to_left}->[1] = 0;
359     $C->{option}->{C1invoke_to_right} = 1;
360     $C->{option}->{reset}->{Ginvoke} = 0;
361     $C;
362     }
363 wakaba 1.2 sub __2022_decode ($) {
364     my $C = shift->__2022__common;
365     ## Emacsen's x-ctext
366     $C->{G1} = $Encode::ISO2022::CHARSET{G96}->{A}; ## ISO/IEC 8859-1
367     $C;
368     }
369 wakaba 1.1
370    
371     1;
372     __END__
373    
374     =back
375    
376     =head1 LICENSE
377    
378 wakaba 1.2 Copyright 2002 Wakaba <w@suika.fam.cx>
379 wakaba 1.1
380     This library is free software; you can redistribute it
381     and/or modify it under the same terms as Perl itself.
382    
383     =cut
384    
385 wakaba 1.4 # $Date: 2002/09/22 11:08:23 $
386 wakaba 1.1 ### SevenBit.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24