/[pub]/test/oldencodeutils/lib/Encode/ISO2022/EightBit.pm
Suika

Contents of /test/oldencodeutils/lib/Encode/ISO2022/EightBit.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (show annotations) (download)
Fri Oct 4 23:58:04 2002 UTC (23 years, 3 months ago) by wakaba
Branch: MAIN
Changes since 1.3: +6 -6 lines
2002-10-05  Nanashi-san

	* Table.pm: New module.
	(Commited by Wakaba <w@suika.fam.cx>.)

1 =head1 NAME
2
3 Encode::ISO2022::Eight --- Encode and decode of 8-bit ISO/IEC 2022
4 based encodings (most of them are also known as EUCs)
5
6 =head1 ENCODINGS
7
8 =over 4
9
10 =cut
11
12 require 5.7.3;
13 use strict;
14 package Encode::ISO2022::EightBit;
15 use vars qw($VERSION);
16 $VERSION=do{my @r=(q$Revision: 1.3 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
17 use base qw(Encode::Encoding);
18 require Encode::ISO2022;
19
20 sub encode ($$;$) {
21 my ($obj, $str, $chk) = @_;
22 $_[1] = '' if $chk;
23 $str = &Encode::ISO2022::internal_to_iso2022 ($str, $obj->__2022_encode);
24 return $str;
25 }
26
27 sub decode ($$;$) {
28 my ($obj, $str, $chk) = @_;
29 $_[1] = '' if $chk;
30 return &Encode::ISO2022::iso2022_to_internal ($str, $obj->__2022_decode);
31 }
32
33 ## prototype for EUCs
34
35 sub __2022__common ($) {
36 my $C = Encode::ISO2022->new_object;
37 $C->{bit} = 8;
38 $C->{option}->{designate_to}->{C0}->{default} = -1;
39 $C->{option}->{designate_to}->{C0}->{"\x40"} = 0;
40 $C->{option}->{designate_to}->{C1}->{default} = -1;
41 $C->{option}->{designate_to}->{G94}->{default} = -1;
42 $C->{option}->{designate_to}->{G94n}->{default} = -1;
43 $C->{option}->{designate_to}->{G96}->{default} = -1;
44 $C->{option}->{designate_to}->{G96n}->{default} = -1;
45 $C->{option}->{Ginvoke_to_left} = [1,0,0,0];
46 $C->{option}->{Ginvoke_by_single_shift} = [0,0,1,1];
47 $C->{option}->{C1invoke_to_right} = 1;
48 $C->{option}->{reset}->{Gdesignation} = 0;
49 $C->{option}->{reset}->{Ginvoke} = 0;
50 $C;
51 }
52 sub __2022_encode ($) {
53 my $C = shift->__2022__common;
54 $C;
55 }
56 sub __2022_decode ($) {
57 my $C = shift->__2022__common;
58 $C;
59 }
60
61 package Encode::ISO2022::EightBit::EUCJapanOld;
62 use vars qw/@ISA/;
63 push @ISA, 'Encode::ISO2022::EightBit';
64 __PACKAGE__->Define (qw/ujis x-ujis deckanji/);
65
66 =item ujis
67
68 EUC (ISO/IEC 2022 based 8-bit encoding) for Japanese,
69 old version (pre-1990). (Alias: x-ujis)
70
71 =cut
72
73 sub __2022__common ($) {
74 my $C = shift->SUPER::__2022__common;
75 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{B}; ## JIS X 0208-1983
76 $C->{G2} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201 Katakana
77 $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{' @'}; ## Gaiji (undefined)
78 $C;
79 }
80
81 package Encode::ISO2022::EightBit::EUCJapan;
82 use vars qw/@ISA/;
83 push @ISA, 'Encode::ISO2022::EightBit';
84 __PACKAGE__->Define (qw/euc-japan euc-japan-1990 euc-jp euc-j eucjp euc_jp x-euc-jp
85 x-eucjp eucjis euc-jis eucj Extended_UNIX_Code_Packed_Format_for_Japanese csEUCPkdFmtJapanese
86 japanese-iso-8bit cp51932 japanese_euc
87 ajec eucjp-open ibm-eucjp cp33722 33722 sdeckanji/);
88
89 =item euc-japan
90
91 EUC (ISO/IEC 2022 based 8-bit encoding) for Japanese.
92 (Alias: euc-japan-1990 (emacsen), euc-jp (IANA),
93 euc-j, eucjp (X), euc_jp, eucj, x-eucjp, x-euc-jp, eucjis, euc-jis,
94 extended_unix_code_packed_format_for_japanese (IANA),
95 cseucpkdfmtjapanese (IANA), japanese-iso-8bit (emacsen),
96 cp51932 (M$), japanese_euc)
97
98 =cut
99
100 sub __2022__common ($) {
101 my $C = shift->SUPER::__2022__common;
102 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{'B@'}; ## JIS X 0208-1990
103 $C->{G2} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201 Katakana
104 $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{D}; ## JIS X 0212-1990
105 $C;
106 }
107
108 package Encode::ISO2022::EightBit::EUCJISX0213;
109 use vars qw/@ISA/;
110 push @ISA, 'Encode::ISO2022::EightBit';
111 __PACKAGE__->Define (qw/euc-jisx0213 x-euc-jisx0213 euc_jisx0213 eucjp0213
112 euc-jp-3 x-euc-jisx0213-packed deckanji2000/);
113
114 =item euc-jisx0213
115
116 EUC (ISO/IEC 2022 based 8-bit encoding) for Japanese
117 with JIS X 0213:2000, defined by JIS X 0213:2000.
118 (Alias: x-euc-jisx0213, euc_jisx0213, eucjp0213, euc-jp-3)
119
120 =cut
121
122 sub __2022__common ($) {
123 my $C = shift->SUPER::__2022__common;
124 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{O}; ## JIS X 0213:2000 plane 1
125 $C->{G2} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201 Katakana
126 $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{P}; ## JIS X 0213:2000 plane 2
127 $C;
128 }
129
130 package Encode::ISO2022::EightBit::EUCJISX0213Plane1;
131 use vars qw/@ISA/;
132 push @ISA, 'Encode::ISO2022::EightBit';
133 __PACKAGE__->Define (qw/euc-jisx0213-plane1/);
134
135 =item euc-jisx0213-plane1
136
137 EUC (ISO/IEC 2022 based 8-bit encoding) for Japanese
138 with JIS X 0213:2000 plane 1, defined by JIS X 0213:2000
139
140 =cut
141
142 sub __2022__common ($) {
143 my $C = shift->SUPER::__2022__common;
144 $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{"\x7E"}; ## empty
145 $C;
146 }
147
148 package Encode::ISO2022::EightBit::EUCCHINA;
149 use vars qw/@ISA/;
150 push @ISA, 'Encode::ISO2022::EightBit';
151 __PACKAGE__->Define (qw/euc-china euc-cn euccn euc-gb
152 cn-gb cn-gb-2312 chinese-iso-8bit ugb
153 gb2312 csgb2312 x-euc-cn cp51936 ibm-euccn CP1383 1383/);
154
155 =item euc-china
156
157 EUC (ISO/IEC 2022 based 8-bit encoding) for Chinese.
158 (Alias: euc-cn (emacsen), euccn, euc-gb, cn-gb (RFC 1922), cn-gb-2312 (RFC 1922),
159 chinese-iso-8bit (emacsen), ugb, gb2312 (IANA), csgb2312 (IANA),
160 x-euc-cn, CP51936 (M$))
161
162 =cut
163
164 sub __2022__common ($) {
165 my $C = shift->SUPER::__2022__common;
166 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{A}; ## GB 2312
167 $C;
168 }
169
170 package Encode::ISO2022::EightBit::EUCCHINA165;
171 use vars qw/@ISA/;
172 push @ISA, 'Encode::ISO2022::EightBit';
173 __PACKAGE__->Define (qw/cn-gb-isoir165 iso-ir-165/);
174
175 =item cn-gb-isoir165
176
177 EUC (ISO/IEC 2022 based 8-bit encoding) for Chinese
178 with ISO-IR 165. (Alias: cn-gb-isoir165 (RFC 1922),
179 ISO-IR-165)
180
181 =cut
182
183 sub __2022__common ($) {
184 my $C = shift->SUPER::__2022__common;
185 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{"\x45"}; ## ISO-IR 165
186 $C;
187 }
188
189 package Encode::ISO2022::EightBit::EUCcwnn;
190 use vars qw/@ISA/;
191 push @ISA, 'Encode::ISO2022::EightBit';
192 __PACKAGE__->Define (qw/euc-cwnn cwnn-iso-8bit/);
193
194 =item euc-cwnn
195
196 EUC (ISO/IEC 2022 based 8-bit encoding) for Chinese
197 with GB 2312, used by cwnn input system (Alias: cwnn-iso-8bit)
198
199 =cut
200
201 # See <http://www.tomo.gr.jp/users/wnn/9912ml/msg00088.html>
202
203 sub __2022__common ($) {
204 my $C = shift->SUPER::__2022__common;
205 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{A}; ## GB 2312
206 $C->{G2} = $Encode::ISO2022::CHARSET{G94}->{'0'}; # omron_udc_zh (sisheng)
207 ## TODO: Implement by private set support
208 $C;
209 }
210
211 ## cn-gb-12345, gb12345, euc-gb12345
212
213 package Encode::ISO2022::EightBit::EUCKorea;
214 use vars qw/@ISA/;
215 push @ISA, 'Encode::ISO2022::EightBit';
216 __PACKAGE__->Define (qw/euc-korea euc-kr euckr cp970 cp51949 ibm-euckr x-euc-kr
217 cseuckr korean-iso-8bit/);
218
219 =item euc-korea
220
221 EUC (ISO/IEC 2022 based 8-bit encoding) for Korean
222 (Alias: euc-kr (IANA), euckr, cp970, cp51949 (M$), ibm-euckr,
223 x-euc-kr, cseuckr (IANA), korean-iso-8bit (emacsen))
224
225 =cut
226
227 sub __2022__common ($) {
228 my $C = shift->SUPER::__2022__common;
229 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{C}; ## KS X 1001
230 $C;
231 }
232
233 package Encode::ISO2022::EightBit::EUCTaiwan;
234 use vars qw/@ISA/;
235 push @ISA, 'Encode::ISO2022::EightBit';
236 __PACKAGE__->Define (qw/euc-taiwan euc-tw euctw x-euc-tw cns11643 cseuctw
237 ibm-euctw cp964/);
238
239 =item euc-taiwan
240
241 EUC (ISO/IEC 2022 based 8-bit encoding) for Chinese
242 with CNS 11643. (Alias: euc-tw, euctw, x-euc-tw, cseuctw, cns11643)
243
244 =cut
245
246 sub __2022__common ($) {
247 my $C = shift->SUPER::__2022__common;
248 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{G}; ## plane 1
249 #$C->{G2} = ## BUG: does not support plane 2-16 yet
250 $C->{G2} = $Encode::ISO2022::CHARSET{G94n}->{' `'}; # 3byte DRCS (temporary)
251 $C;
252 }
253
254 package Encode::ISO2022::EightBit::EUCtwnn;
255 use vars qw/@ISA/;
256 push @ISA, 'Encode::ISO2022::EightBit';
257 __PACKAGE__->Define (qw/euc-twnn twnn-iso-8bit/);
258
259 =item euc-twnn
260
261 EUC (ISO/IEC 2022 based 8-bit encoding) for Chinese
262 with CNS 11643, used by twnn input system (Alias: twnn-iso-8bit)
263
264 =cut
265
266 # See <http://www.tomo.gr.jp/users/wnn/9912ml/msg00088.html>
267
268 sub __2022__common ($) {
269 my $C = shift->SUPER::__2022__common;
270 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{G}; ## plane 1
271 $C->{G2} = $Encode::ISO2022::CHARSET{G94}->{'0'}; # omron_udc_zh (sisheng)
272 ## TODO: Implement by private set support
273 $C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{H}; ## plane 2
274 $C;
275 }
276
277 package Encode::ISO2022::EightBit::EUCKPS9566;
278 use vars qw/@ISA/;
279 push @ISA, 'Encode::ISO2022::EightBit';
280 __PACKAGE__->Define (qw/euc-kps9566/);
281
282 =item euc-kps9566
283
284 EUC (ISO/IEC 2022 based 8-bit encoding) for Korean
285 with KPS 9566-97
286
287 =cut
288
289 sub __2022__common ($) {
290 my $C = shift->SUPER::__2022__common;
291 $C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{N}; ## KPS 9566-97
292 $C;
293 }
294
295 package Encode::ISO2022::EightBit::SS2;
296 use vars qw/@ISA/;
297 push @ISA, 'Encode::ISO2022::EightBit';
298 __PACKAGE__->Define (qw/iso-2022-8bit-ss2/);
299
300 =item iso-2022-8bit-ss2
301
302 ISO/IEC 2022 based 8-bit encoding using SS2 for 96-charset
303
304 =cut
305
306 sub __2022__common ($) {
307 my $C = Encode::ISO2022->new_object;
308 $C->{option}->{designate_to}->{G96}->{default} = 2;
309 $C->{option}->{designate_to}->{G96n}->{default} = 2;
310 $C->{option}->{G94n_designate_long} = 1;
311 $C->{option}->{Ginvoke_by_single_shift}->[2] = 1;
312 $C->{option}->{Ginvoke_to_left}->[2] = 0;
313 $C->{option}->{C1invoke_to_right} = 1;
314 $C;
315 }
316
317 package Encode::ISO2022::EightBit::CompoundText;
318 use vars qw/@ISA/;
319 push @ISA, 'Encode::ISO2022::EightBit';
320 __PACKAGE__->Define (qw/compound-text compound_text
321 x-compound-text ctext x-ctext/);
322
323 =item compound-text
324
325 ISO/IEC 2022 based 8-bit encoding used in inter-client
326 communication of X Window System (Alias: ctext (emacsen), x-ctext (emacsen),
327 compound_text, x-compound-text)
328
329 Strictly, x-ctext, extended compound text (X Compound Text
330 based encoding for unknown ISO/IEC 2022 based encoding) is a
331 different coding system from X Compound Text. See
332 [mule-jp:7455] <mid:rsqsoa5s2hr.fsf@crane.jaist.ac.jp> and
333 [mule-jp:7457] <mid:rsq4smlky85.fsf@crane.jaist.ac.jp>.
334
335 =cut
336
337 sub __2022__common ($) {
338 my $C = Encode::ISO2022->new_object;
339 $C->{option}->{designate_to}->{C0}->{default} = -1;
340 $C->{option}->{designate_to}->{C1}->{default} = -1;
341 $C->{option}->{designate_to}->{G94}->{I} = 1;
342 $C->{option}->{designate_to}->{G96}->{default} = 1;
343 $C->{option}->{designate_to}->{G96n}->{default} = -1;
344 for my $t (qw/G94 G96 G94n/) {
345 for (0x30..0x3F) {
346 my $F = chr $_;
347 $C->{option}->{designate_to}->{$t}->{$F} = -1;
348 $C->{option}->{designate_to}->{$t}->{'!'.$F} = -1;
349 $C->{option}->{designate_to}->{$t}->{'"'.$F} = -1;
350 $C->{option}->{designate_to}->{$t}->{'#'.$F} = -1;
351 $C->{option}->{designate_to}->{$t}->{' '.$F} = -1;
352 }
353 for (0x40..0x7E) {
354 $C->{option}->{designate_to}->{$t}->{' '.chr $_} = -1;
355 }
356 }
357 $C->{option}->{G94n_designate_long} = 1;
358 $C->{option}->{Ginvoke_to_left}->[1] = 0;
359 $C->{option}->{C1invoke_to_right} = 1;
360 $C->{option}->{reset}->{Ginvoke} = 0;
361 $C;
362 }
363 sub __2022_decode ($) {
364 my $C = shift->__2022__common;
365 ## Emacsen's x-ctext
366 $C->{G1} = $Encode::ISO2022::CHARSET{G96}->{A}; ## ISO/IEC 8859-1
367 $C;
368 }
369
370
371 1;
372 __END__
373
374 =back
375
376 =head1 LICENSE
377
378 Copyright 2002 Wakaba <w@suika.fam.cx>
379
380 This library is free software; you can redistribute it
381 and/or modify it under the same terms as Perl itself.
382
383 =cut
384
385 # $Date: 2002/09/22 11:08:23 $
386 ### SevenBit.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24