1 |
wakaba |
1.1 |
=head1 NAME |
2 |
|
|
|
3 |
|
|
Encode::ISO2022::Eight --- Encode and decode of 8-bit ISO/IEC 2022 |
4 |
|
|
based encodings (most of them are also known as EUCs) |
5 |
|
|
|
6 |
|
|
=head1 ENCODINGS |
7 |
|
|
|
8 |
|
|
=over 4 |
9 |
|
|
|
10 |
|
|
=cut |
11 |
|
|
|
12 |
|
|
require 5.7.3; |
13 |
|
|
use strict; |
14 |
|
|
package Encode::ISO2022::EightBit; |
15 |
|
|
use vars qw($VERSION); |
16 |
|
|
$VERSION=do{my @r=(q$Revision: 1.2 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
17 |
|
|
use base qw(Encode::Encoding); |
18 |
|
|
require Encode::ISO2022; |
19 |
|
|
|
20 |
|
|
sub encode ($$;$) { |
21 |
|
|
my ($obj, $str, $chk) = @_; |
22 |
|
|
$_[1] = '' if $chk; |
23 |
|
|
$str = &Encode::ISO2022::internal_to_iso2022 ($str, $obj->__2022_encode); |
24 |
|
|
return $str; |
25 |
|
|
} |
26 |
|
|
|
27 |
|
|
sub decode ($$;$) { |
28 |
|
|
my ($obj, $str, $chk) = @_; |
29 |
|
|
$_[1] = '' if $chk; |
30 |
|
|
return &Encode::ISO2022::iso2022_to_internal ($str, $obj->__2022_decode); |
31 |
|
|
} |
32 |
|
|
|
33 |
|
|
## prototype for EUCs |
34 |
|
|
|
35 |
|
|
sub __2022__common ($) { |
36 |
|
|
my $C = Encode::ISO2022->new_object; |
37 |
|
|
$C->{bit} = 8; |
38 |
|
|
$C->{option}->{designate_to}->{C0}->{default} = -1; |
39 |
|
|
$C->{option}->{designate_to}->{C0}->{"\x40"} = 0; |
40 |
|
|
$C->{option}->{designate_to}->{C1}->{default} = -1; |
41 |
|
|
$C->{option}->{designate_to}->{G94}->{default} = -1; |
42 |
|
|
$C->{option}->{designate_to}->{G94n}->{default} = -1; |
43 |
|
|
$C->{option}->{designate_to}->{G96}->{default} = -1; |
44 |
|
|
$C->{option}->{designate_to}->{G96n}->{default} = -1; |
45 |
|
|
$C->{option}->{Ginvoke_to_left} = [1,0,0,0]; |
46 |
|
|
$C->{option}->{Ginvoke_by_single_shift} = [0,0,1,1]; |
47 |
|
|
$C->{option}->{C1invoke_to_right} = 1; |
48 |
|
|
$C->{option}->{reset}->{Gdesignation} = 0; |
49 |
|
|
$C->{option}->{reset}->{Ginvoke} = 0; |
50 |
|
|
$C; |
51 |
|
|
} |
52 |
|
|
sub __2022_encode ($) { |
53 |
|
|
my $C = shift->__2022__common; |
54 |
|
|
$C; |
55 |
|
|
} |
56 |
|
|
sub __2022_decode ($) { |
57 |
|
|
my $C = shift->__2022__common; |
58 |
|
|
$C; |
59 |
|
|
} |
60 |
|
|
|
61 |
|
|
package Encode::ISO2022::EightBit::EUCJapanOld; |
62 |
|
|
use vars qw/@ISA/; |
63 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
64 |
|
|
__PACKAGE__->Define (qw/ujis x-ujis deckanji/); |
65 |
|
|
|
66 |
|
|
=item ujis |
67 |
|
|
|
68 |
|
|
EUC (ISO/IEC 2022 based 8-bit encoding) for Japanese, |
69 |
|
|
old version (pre-1990). (Alias: x-ujis) |
70 |
|
|
|
71 |
|
|
=cut |
72 |
|
|
|
73 |
|
|
sub __2022__common ($) { |
74 |
|
|
my $C = shift->SUPER::__2022__common; |
75 |
|
|
$C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{B}; ## JIS X 0208-1983 |
76 |
|
|
$C->{G2} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201 Katakana |
77 |
|
|
$C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{' @'}; ## Gaiji (undefined) |
78 |
|
|
$C; |
79 |
|
|
} |
80 |
|
|
|
81 |
|
|
package Encode::ISO2022::EightBit::EUCJapan; |
82 |
|
|
use vars qw/@ISA/; |
83 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
84 |
|
|
__PACKAGE__->Define (qw/euc-japan euc-japan-1990 euc-jp eucjp euc_jp x-euc-jp |
85 |
|
|
Extended_UNIX_Code_Packed_Format_for_Japanese csEUCPkdFmtJapanese |
86 |
|
|
japanese-iso-8bit cp51932 japanese_euc |
87 |
|
|
ajec eucjp-open ibm-eucjp cp33722 33722 sdeckanji/); |
88 |
|
|
|
89 |
|
|
=item euc-japan |
90 |
|
|
|
91 |
|
|
EUC (ISO/IEC 2022 based 8-bit encoding) for Japanese. |
92 |
|
|
(Alias: euc-japan-1990 (emacsen), euc-jp (IANA), |
93 |
|
|
eucjp (locale), euc_jp, x-euc-jp, |
94 |
|
|
extended_unix_code_packed_format_for_japanese (IANA), |
95 |
|
|
cseucpkdfmtjapanese (IANA), japanese-iso-8bit (emacsen), |
96 |
|
|
cp51932 (M$), japanese_euc) |
97 |
|
|
|
98 |
|
|
=cut |
99 |
|
|
|
100 |
|
|
sub __2022__common ($) { |
101 |
|
|
my $C = shift->SUPER::__2022__common; |
102 |
|
|
$C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{'B@'}; ## JIS X 0208-1990 |
103 |
|
|
$C->{G2} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201 Katakana |
104 |
|
|
$C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{D}; ## JIS X 0212-1990 |
105 |
|
|
$C; |
106 |
|
|
} |
107 |
|
|
|
108 |
|
|
package Encode::ISO2022::EightBit::EUCJISX0213; |
109 |
|
|
use vars qw/@ISA/; |
110 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
111 |
|
|
__PACKAGE__->Define (qw/euc-jisx0213 x-euc-jisx0213 |
112 |
|
|
x-euc-jisx0213-packed deckanji2000/); |
113 |
|
|
|
114 |
|
|
=item euc-jisx0213 |
115 |
|
|
|
116 |
|
|
EUC (ISO/IEC 2022 based 8-bit encoding) for Japanese |
117 |
|
|
with JIS X 0213:2000, defined by JIS X 0213:2000. |
118 |
|
|
(Alias: x-euc-jisx0213) |
119 |
|
|
|
120 |
|
|
=cut |
121 |
|
|
|
122 |
|
|
sub __2022__common ($) { |
123 |
|
|
my $C = shift->SUPER::__2022__common; |
124 |
|
|
$C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{O}; ## JIS X 0213:2000 plane 1 |
125 |
|
|
$C->{G2} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201 Katakana |
126 |
|
|
$C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{P}; ## JIS X 0213:2000 plane 2 |
127 |
|
|
$C; |
128 |
|
|
} |
129 |
|
|
|
130 |
|
|
package Encode::ISO2022::EightBit::EUCJISX0213Plane1; |
131 |
|
|
use vars qw/@ISA/; |
132 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
133 |
|
|
__PACKAGE__->Define (qw/euc-jisx0213-plane1/); |
134 |
|
|
|
135 |
|
|
=item euc-jisx0213-plane1 |
136 |
|
|
|
137 |
|
|
EUC (ISO/IEC 2022 based 8-bit encoding) for Japanese |
138 |
|
|
with JIS X 0213:2000 plane 1, defined by JIS X 0213:2000 |
139 |
|
|
|
140 |
|
|
=cut |
141 |
|
|
|
142 |
|
|
sub __2022__common ($) { |
143 |
|
|
my $C = shift->SUPER::__2022__common; |
144 |
|
|
$C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{"\x7E"}; ## empty |
145 |
|
|
$C; |
146 |
|
|
} |
147 |
|
|
|
148 |
|
|
package Encode::ISO2022::EightBit::EUCCHINA; |
149 |
|
|
use vars qw/@ISA/; |
150 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
151 |
|
|
__PACKAGE__->Define (qw/euc-china euc-cn cn-gb cn-gb-2312 chinese-iso-8bit |
152 |
|
|
gb2312 csgb2312 x-euc-cn cp51936 ibm-euccn CP1383 1383/); |
153 |
|
|
|
154 |
|
|
=item euc-china |
155 |
|
|
|
156 |
|
|
EUC (ISO/IEC 2022 based 8-bit encoding) for Chinese. |
157 |
|
|
(Alias: euc-cn (emacsen), cn-gb (RFC 1922), cn-gb-2312 (RFC 1922), |
158 |
|
|
chinese-iso-8bit (emacsen), gb2312 (IANA), csgb2312 (IANA), |
159 |
|
|
x-euc-cn, CP51936 (M$)) |
160 |
|
|
|
161 |
|
|
=cut |
162 |
|
|
|
163 |
|
|
sub __2022__common ($) { |
164 |
|
|
my $C = shift->SUPER::__2022__common; |
165 |
|
|
$C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{A}; ## GB 2312 |
166 |
|
|
$C; |
167 |
|
|
} |
168 |
|
|
|
169 |
|
|
package Encode::ISO2022::EightBit::EUCCHINA165; |
170 |
|
|
use vars qw/@ISA/; |
171 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
172 |
|
|
__PACKAGE__->Define (qw/cn-gb-isoir165/); |
173 |
|
|
|
174 |
|
|
=item euc-china |
175 |
|
|
|
176 |
|
|
EUC (ISO/IEC 2022 based 8-bit encoding) for Chinese |
177 |
|
|
with ISO-IR 165. (Alias: cn-gb-isoir165 (RFC 1922)) |
178 |
|
|
|
179 |
|
|
=cut |
180 |
|
|
|
181 |
|
|
sub __2022__common ($) { |
182 |
|
|
my $C = shift->SUPER::__2022__common; |
183 |
|
|
$C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{"\x45"}; ## ISO-IR 165 |
184 |
|
|
$C; |
185 |
|
|
} |
186 |
|
|
|
187 |
|
|
package Encode::ISO2022::EightBit::EUCKorea; |
188 |
|
|
use vars qw/@ISA/; |
189 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
190 |
|
|
__PACKAGE__->Define (qw/euc-korea euc-kr cp970 cp51949 ibm-euckr x-euc-kr |
191 |
|
|
cseuckr korean-iso-8bit/); |
192 |
|
|
|
193 |
|
|
=item euc-korea |
194 |
|
|
|
195 |
|
|
EUC (ISO/IEC 2022 based 8-bit encoding) for Korean |
196 |
|
|
(Alias: euc-kr (IANA), cp970, cp51949 (M$), ibm-euckr, |
197 |
|
|
x-euc-kr, cseuckr (IANA), korean-iso-8bit (emacsen)) |
198 |
|
|
|
199 |
|
|
=cut |
200 |
|
|
|
201 |
|
|
sub __2022__common ($) { |
202 |
|
|
my $C = shift->SUPER::__2022__common; |
203 |
|
|
$C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{C}; ## KS X 1001 |
204 |
|
|
$C; |
205 |
|
|
} |
206 |
|
|
|
207 |
|
|
package Encode::ISO2022::EightBit::EUCTaiwan; |
208 |
|
|
use vars qw/@ISA/; |
209 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
210 |
|
|
__PACKAGE__->Define (qw/euc-taiwan euc-tw x-euc-tw cns11643 ibm-euctw cp964/); |
211 |
|
|
|
212 |
|
|
=item euc-taiwan |
213 |
|
|
|
214 |
|
|
EUC (ISO/IEC 2022 based 8-bit encoding) for Chinese |
215 |
|
|
with CNS 11643. (Alias: euc-tw, x-euc-tw, cns11643) |
216 |
|
|
|
217 |
|
|
=cut |
218 |
|
|
|
219 |
|
|
sub __2022__common ($) { |
220 |
|
|
my $C = shift->SUPER::__2022__common; |
221 |
|
|
$C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{G}; ## plane 1 |
222 |
|
|
$C->{G2} = $Encode::ISO2022::CHARSET{G94n}->{H}; ## plane 2 |
223 |
|
|
#$C->{G3} = ## BUG: does not support plane 3-16 yet |
224 |
|
|
$C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{' `'}; # 3byte DRCS (temporary) |
225 |
|
|
$C; |
226 |
|
|
} |
227 |
|
|
|
228 |
|
|
package Encode::ISO2022::EightBit::EUCKPS9566; |
229 |
|
|
use vars qw/@ISA/; |
230 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
231 |
|
|
__PACKAGE__->Define (qw/euc-kps9566/); |
232 |
|
|
|
233 |
|
|
=item euc-kps9566 |
234 |
|
|
|
235 |
|
|
EUC (ISO/IEC 2022 based 8-bit encoding) for Korean |
236 |
|
|
with KPS 9566-97 |
237 |
|
|
|
238 |
|
|
=cut |
239 |
|
|
|
240 |
|
|
sub __2022__common ($) { |
241 |
|
|
my $C = shift->SUPER::__2022__common; |
242 |
|
|
$C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{N}; ## KPS 9566-97 |
243 |
|
|
$C; |
244 |
|
|
} |
245 |
|
|
|
246 |
|
|
package Encode::ISO2022::EightBit::SS2; |
247 |
|
|
use vars qw/@ISA/; |
248 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
249 |
|
|
__PACKAGE__->Define (qw/iso-2022-8bit-ss2/); |
250 |
|
|
|
251 |
|
|
=item iso-2022-8bit-ss2 |
252 |
|
|
|
253 |
|
|
ISO/IEC 2022 based 8-bit encoding using SS2 for 96-charset |
254 |
|
|
|
255 |
|
|
=cut |
256 |
|
|
|
257 |
|
|
sub __2022__common ($) { |
258 |
|
|
my $C = Encode::ISO2022->new_object; |
259 |
|
|
$C->{option}->{designate_to}->{G96}->{default} = 2; |
260 |
|
|
$C->{option}->{designate_to}->{G96n}->{default} = 2; |
261 |
|
|
$C->{option}->{G94n_designate_long} = 1; |
262 |
|
|
$C->{option}->{Ginvoke_by_single_shift}->[2] = 1; |
263 |
|
|
$C->{option}->{Ginvoke_to_left}->[2] = 0; |
264 |
|
|
$C->{option}->{C1invoke_to_right} = 1; |
265 |
|
|
$C; |
266 |
|
|
} |
267 |
|
|
|
268 |
|
|
package Encode::ISO2022::EightBit::CompoundText; |
269 |
|
|
use vars qw/@ISA/; |
270 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
271 |
|
|
__PACKAGE__->Define (qw/compound-text ctext x-ctext/); |
272 |
|
|
|
273 |
|
|
=item compound-text |
274 |
|
|
|
275 |
|
|
ISO/IEC 2022 based 8-bit encoding used in inter-client |
276 |
|
|
communication of X Window System (Alias: ctext (emacsen), x-ctext (emacsen)) |
277 |
|
|
|
278 |
|
|
=cut |
279 |
|
|
|
280 |
|
|
sub __2022__common ($) { |
281 |
|
|
my $C = Encode::ISO2022->new_object; |
282 |
|
|
$C->{option}->{designate_to}->{C0}->{default} = -1; |
283 |
|
|
$C->{option}->{designate_to}->{C1}->{default} = -1; |
284 |
|
|
$C->{option}->{designate_to}->{G94}->{I} = 1; |
285 |
|
|
$C->{option}->{designate_to}->{G96}->{default} = 1; |
286 |
|
|
$C->{option}->{designate_to}->{G96n}->{default} = -1; |
287 |
|
|
for my $t (qw/G94 G96 G94n/) { |
288 |
|
|
for (0x30..0x3F) { |
289 |
|
|
my $F = chr $_; |
290 |
|
|
$C->{option}->{designate_to}->{$t}->{$F} = -1; |
291 |
|
|
$C->{option}->{designate_to}->{$t}->{'!'.$F} = -1; |
292 |
|
|
$C->{option}->{designate_to}->{$t}->{'"'.$F} = -1; |
293 |
|
|
$C->{option}->{designate_to}->{$t}->{'#'.$F} = -1; |
294 |
|
|
$C->{option}->{designate_to}->{$t}->{' '.$F} = -1; |
295 |
|
|
} |
296 |
|
|
for (0x40..0x7E) { |
297 |
|
|
$C->{option}->{designate_to}->{$t}->{' '.chr $_} = -1; |
298 |
|
|
} |
299 |
|
|
} |
300 |
|
|
$C->{option}->{G94n_designate_long} = 1; |
301 |
|
|
$C->{option}->{Ginvoke_to_left}->[1] = 0; |
302 |
|
|
$C->{option}->{C1invoke_to_right} = 1; |
303 |
|
|
$C->{option}->{reset}->{Ginvoke} = 0; |
304 |
|
|
$C; |
305 |
|
|
} |
306 |
|
|
|
307 |
|
|
|
308 |
|
|
1; |
309 |
|
|
__END__ |
310 |
|
|
|
311 |
|
|
=back |
312 |
|
|
|
313 |
|
|
=head1 LICENSE |
314 |
|
|
|
315 |
|
|
Copyright 2002 wakaba <w@suika.fam.cx> |
316 |
|
|
|
317 |
|
|
This library is free software; you can redistribute it |
318 |
|
|
and/or modify it under the same terms as Perl itself. |
319 |
|
|
|
320 |
|
|
=cut |
321 |
|
|
|
322 |
|
|
# $Date: 2002/09/15 05:08:13 $ |
323 |
|
|
### SevenBit.pm ends here |