1 |
wakaba |
1.1 |
=head1 NAME |
2 |
|
|
|
3 |
|
|
Encode::ISO2022::Eight --- Encode and decode of 8-bit ISO/IEC 2022 |
4 |
|
|
based encodings (most of them are also known as EUCs) |
5 |
|
|
|
6 |
|
|
=head1 ENCODINGS |
7 |
|
|
|
8 |
|
|
=over 4 |
9 |
|
|
|
10 |
|
|
=cut |
11 |
|
|
|
12 |
|
|
require 5.7.3; |
13 |
|
|
use strict; |
14 |
|
|
package Encode::ISO2022::EightBit; |
15 |
|
|
use vars qw($VERSION); |
16 |
wakaba |
1.5 |
$VERSION=do{my @r=(q$Revision: 1.4 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
17 |
wakaba |
1.1 |
use base qw(Encode::Encoding); |
18 |
|
|
require Encode::ISO2022; |
19 |
|
|
|
20 |
|
|
sub encode ($$;$) { |
21 |
|
|
my ($obj, $str, $chk) = @_; |
22 |
|
|
$_[1] = '' if $chk; |
23 |
wakaba |
1.5 |
if (!defined $obj->{_encode_mapping} || $obj->{_encode_mapping}) { |
24 |
|
|
require Encode::Table; |
25 |
|
|
$str = Encode::Table::convert ($str, $obj->__encode_map, |
26 |
|
|
-autoload => defined $obj->{_encode_mapping_autoload} ? |
27 |
|
|
$obj->{_encode_mapping_autoload} : 1); |
28 |
|
|
} |
29 |
wakaba |
1.1 |
$str = &Encode::ISO2022::internal_to_iso2022 ($str, $obj->__2022_encode); |
30 |
wakaba |
1.5 |
$str; |
31 |
wakaba |
1.1 |
} |
32 |
|
|
|
33 |
|
|
sub decode ($$;$) { |
34 |
|
|
my ($obj, $str, $chk) = @_; |
35 |
|
|
$_[1] = '' if $chk; |
36 |
wakaba |
1.5 |
$str = &Encode::ISO2022::iso2022_to_internal ($str, $obj->__2022_decode); |
37 |
|
|
if (!defined $obj->{_decode_mapping} || $obj->{_decode_mapping}) { |
38 |
|
|
require Encode::Table; |
39 |
|
|
$str = Encode::Table::convert ($str, $obj->__decode_map, |
40 |
|
|
-autoload => defined $obj->{_decode_mapping_autoload} ? |
41 |
|
|
$obj->{_decode_mapping_autoload} : 1); |
42 |
|
|
} |
43 |
|
|
$str; |
44 |
wakaba |
1.1 |
} |
45 |
|
|
|
46 |
|
|
## prototype for EUCs |
47 |
|
|
|
48 |
|
|
sub __2022__common ($) { |
49 |
|
|
my $C = Encode::ISO2022->new_object; |
50 |
|
|
$C->{bit} = 8; |
51 |
|
|
$C->{option}->{designate_to}->{C0}->{default} = -1; |
52 |
|
|
$C->{option}->{designate_to}->{C0}->{"\x40"} = 0; |
53 |
|
|
$C->{option}->{designate_to}->{C1}->{default} = -1; |
54 |
|
|
$C->{option}->{designate_to}->{G94}->{default} = -1; |
55 |
|
|
$C->{option}->{designate_to}->{G94n}->{default} = -1; |
56 |
|
|
$C->{option}->{designate_to}->{G96}->{default} = -1; |
57 |
|
|
$C->{option}->{designate_to}->{G96n}->{default} = -1; |
58 |
|
|
$C->{option}->{Ginvoke_to_left} = [1,0,0,0]; |
59 |
|
|
$C->{option}->{Ginvoke_by_single_shift} = [0,0,1,1]; |
60 |
|
|
$C->{option}->{C1invoke_to_right} = 1; |
61 |
|
|
$C->{option}->{reset}->{Gdesignation} = 0; |
62 |
|
|
$C->{option}->{reset}->{Ginvoke} = 0; |
63 |
|
|
$C; |
64 |
|
|
} |
65 |
|
|
sub __2022_encode ($) { |
66 |
|
|
my $C = shift->__2022__common; |
67 |
|
|
$C; |
68 |
|
|
} |
69 |
|
|
sub __2022_decode ($) { |
70 |
|
|
my $C = shift->__2022__common; |
71 |
|
|
$C; |
72 |
|
|
} |
73 |
wakaba |
1.5 |
sub __encode_map ($) { |
74 |
|
|
[]; |
75 |
|
|
} |
76 |
|
|
sub __decode_map ($) { |
77 |
|
|
[]; |
78 |
|
|
} |
79 |
wakaba |
1.1 |
|
80 |
wakaba |
1.5 |
sub __clone ($) { |
81 |
|
|
my $self = shift; |
82 |
|
|
bless {%$self}, ref $self; |
83 |
|
|
}; |
84 |
|
|
|
85 |
|
|
package Encode::ISO2022::EightBit::EUCJapanOldDefinition; |
86 |
wakaba |
1.1 |
use vars qw/@ISA/; |
87 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
88 |
wakaba |
1.5 |
__PACKAGE__->Define (qw/ujis x-ujis euc-jp-1983 euc-japan-1983 deckanji/); |
89 |
wakaba |
1.1 |
|
90 |
|
|
=item ujis |
91 |
|
|
|
92 |
wakaba |
1.5 |
EUC (ISO/IEC 2022 based 8-bit encoding) for Japanese |
93 |
|
|
with JIS X 0208 (JIS C 6226)-1983 |
94 |
|
|
(obsoleted definition of pre-1990 days). (Alias: x-ujis, |
95 |
|
|
euc-japan-1983, euc-jp-1983) |
96 |
wakaba |
1.1 |
|
97 |
|
|
=cut |
98 |
|
|
|
99 |
|
|
sub __2022__common ($) { |
100 |
|
|
my $C = shift->SUPER::__2022__common; |
101 |
|
|
$C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{B}; ## JIS X 0208-1983 |
102 |
|
|
$C->{G2} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201 Katakana |
103 |
|
|
$C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{' @'}; ## Gaiji (undefined) |
104 |
|
|
$C; |
105 |
|
|
} |
106 |
wakaba |
1.5 |
sub __encode_map ($) { |
107 |
|
|
[qw/ucs_to_ascii ucs_to_jisx0208_1983 ucs_to_jisx0201_katakana/]; |
108 |
|
|
} |
109 |
|
|
sub __decode_map ($) { |
110 |
|
|
[qw/jisx0208_1983_to_ucs jisx0201_katakana_to_ucs/]; |
111 |
|
|
} |
112 |
wakaba |
1.1 |
|
113 |
|
|
package Encode::ISO2022::EightBit::EUCJapan; |
114 |
|
|
use vars qw/@ISA/; |
115 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
116 |
wakaba |
1.5 |
__PACKAGE__->Define (qw/euc-japan euc-japan-1990 euc-japan-1997 euc-jp euc-jp-1990 |
117 |
|
|
euc-jp-1997 euc-j eucjp euc_jp x-euc-jp |
118 |
|
|
x-eucjp eucjis euc-jis eucj Extended_UNIX_Code_Packed_Format_for_Japanese |
119 |
|
|
csEUCPkdFmtJapanese eujis |
120 |
wakaba |
1.1 |
japanese-iso-8bit cp51932 japanese_euc |
121 |
|
|
ajec eucjp-open ibm-eucjp cp33722 33722 sdeckanji/); |
122 |
|
|
|
123 |
|
|
=item euc-japan |
124 |
|
|
|
125 |
wakaba |
1.5 |
EUC (ISO/IEC 2022 based 8-bit encoding) for Japanese |
126 |
|
|
with JIS X 0208-1990. |
127 |
|
|
(Alias: euc-japan-1990 (emacsen), euc-japan-1997, euc-jp (IANA), euc-jp-1990, |
128 |
|
|
euc-jp-1997, eujis (locale), |
129 |
|
|
euc-j, eucjp (locale), euc_jp, eucj, x-eucjp, x-euc-jp, eucjis, euc-jis, |
130 |
wakaba |
1.1 |
extended_unix_code_packed_format_for_japanese (IANA), |
131 |
|
|
cseucpkdfmtjapanese (IANA), japanese-iso-8bit (emacsen), |
132 |
|
|
cp51932 (M$), japanese_euc) |
133 |
|
|
|
134 |
|
|
=cut |
135 |
|
|
|
136 |
|
|
sub __2022__common ($) { |
137 |
|
|
my $C = shift->SUPER::__2022__common; |
138 |
|
|
$C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{'B@'}; ## JIS X 0208-1990 |
139 |
|
|
$C->{G2} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201 Katakana |
140 |
|
|
$C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{D}; ## JIS X 0212-1990 |
141 |
|
|
$C; |
142 |
|
|
} |
143 |
wakaba |
1.5 |
sub __encode_map ($) { |
144 |
|
|
[qw/ucs_to_ascii ucs_to_jisx0208_1990 ucs_to_jisx0212_1990 ucs_to_jisx0201_katakana/]; |
145 |
|
|
} |
146 |
|
|
sub __decode_map ($) { |
147 |
|
|
[qw/jisx0208_1990_to_ucs jisx0212_1990_to_ucs jisx0201_katakana_to_ucs/]; |
148 |
|
|
} |
149 |
|
|
|
150 |
|
|
package Encode::ISO2022::EightBit::EUCJapan1978; |
151 |
|
|
use vars qw/@ISA/; |
152 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
153 |
|
|
__PACKAGE__->Define (qw/euc-japan-1978 euc-jp-1978/); |
154 |
|
|
|
155 |
|
|
=item euc-japan-1978 |
156 |
|
|
|
157 |
|
|
EUC (ISO/IEC 2022 based 8-bit encoding) for Japanese |
158 |
|
|
with JIS X 0208 (JIS C 6226)-1978. (Alias: euc-jp-1978) |
159 |
|
|
|
160 |
|
|
=cut |
161 |
|
|
|
162 |
|
|
sub __2022__common ($) { |
163 |
|
|
my $C = shift->SUPER::__2022__common; |
164 |
|
|
$C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{'@'}; ## JIS X 0208-1978 |
165 |
|
|
$C->{G2} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201 Katakana |
166 |
|
|
$C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{D}; ## JIS X 0212-1990 |
167 |
|
|
$C; |
168 |
|
|
} |
169 |
|
|
sub __encode_map ($) { |
170 |
|
|
[qw/ucs_to_ascii ucs_to_jisx0208_1978 ucs_to_jisx0212_1990 ucs_to_jisx0201_katakana/]; |
171 |
|
|
} |
172 |
|
|
sub __decode_map ($) { |
173 |
|
|
[qw/jisx0208_1978_to_ucs jisx0212_1990_to_ucs jisx0201_katakana_to_ucs/]; |
174 |
|
|
} |
175 |
wakaba |
1.1 |
|
176 |
|
|
package Encode::ISO2022::EightBit::EUCJISX0213; |
177 |
|
|
use vars qw/@ISA/; |
178 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
179 |
wakaba |
1.4 |
__PACKAGE__->Define (qw/euc-jisx0213 x-euc-jisx0213 euc_jisx0213 eucjp0213 |
180 |
wakaba |
1.5 |
euc-jp-3 euc-japan-2000 euc-jp-2000 x-euc-jisx0213-packed deckanji2000/); |
181 |
wakaba |
1.1 |
|
182 |
|
|
=item euc-jisx0213 |
183 |
|
|
|
184 |
|
|
EUC (ISO/IEC 2022 based 8-bit encoding) for Japanese |
185 |
|
|
with JIS X 0213:2000, defined by JIS X 0213:2000. |
186 |
wakaba |
1.5 |
(Alias: x-euc-jisx0213, euc_jisx0213, eucjp0213, euc-jp-3, |
187 |
|
|
euc-japan-2000, euc-jp-2000) |
188 |
wakaba |
1.1 |
|
189 |
|
|
=cut |
190 |
|
|
|
191 |
|
|
sub __2022__common ($) { |
192 |
|
|
my $C = shift->SUPER::__2022__common; |
193 |
|
|
$C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{O}; ## JIS X 0213:2000 plane 1 |
194 |
|
|
$C->{G2} = $Encode::ISO2022::CHARSET{G94}->{I}; ## JIS X 0201 Katakana |
195 |
|
|
$C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{P}; ## JIS X 0213:2000 plane 2 |
196 |
|
|
$C; |
197 |
|
|
} |
198 |
wakaba |
1.5 |
sub __encode_map ($) { |
199 |
|
|
[qw/ucs_to_ascii ucs_to_jisx0213_2000_1 ucs_to_jisx0213_2000_2 ucs_to_jisx0201_katakana/]; |
200 |
|
|
} |
201 |
|
|
sub __decode_map ($) { |
202 |
|
|
[qw/jisx0213_2000_1_to_ucs jisx0212_0213_to_ucs jisx0201_katakana_to_ucs/]; |
203 |
|
|
} |
204 |
wakaba |
1.1 |
|
205 |
|
|
package Encode::ISO2022::EightBit::EUCJISX0213Plane1; |
206 |
|
|
use vars qw/@ISA/; |
207 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
208 |
|
|
__PACKAGE__->Define (qw/euc-jisx0213-plane1/); |
209 |
|
|
|
210 |
|
|
=item euc-jisx0213-plane1 |
211 |
|
|
|
212 |
|
|
EUC (ISO/IEC 2022 based 8-bit encoding) for Japanese |
213 |
|
|
with JIS X 0213:2000 plane 1, defined by JIS X 0213:2000 |
214 |
|
|
|
215 |
|
|
=cut |
216 |
|
|
|
217 |
wakaba |
1.5 |
sub __2022__encode ($) { |
218 |
wakaba |
1.1 |
my $C = shift->SUPER::__2022__common; |
219 |
|
|
$C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{"\x7E"}; ## empty |
220 |
|
|
$C; |
221 |
|
|
} |
222 |
wakaba |
1.5 |
sub __encode_map ($) { |
223 |
|
|
[qw/ucs_to_ascii ucs_to_jisx0213_2000_1 ucs_to_jisx0201_katakana/]; |
224 |
|
|
} |
225 |
|
|
sub __decode_map ($) { |
226 |
|
|
[qw/jisx0213_2000_1_to_ucs jisx0212_0213_to_ucs jisx0201_katakana_to_ucs/]; |
227 |
|
|
} |
228 |
wakaba |
1.1 |
|
229 |
|
|
package Encode::ISO2022::EightBit::EUCCHINA; |
230 |
|
|
use vars qw/@ISA/; |
231 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
232 |
wakaba |
1.3 |
__PACKAGE__->Define (qw/euc-china euc-cn euccn euc-gb |
233 |
|
|
cn-gb cn-gb-2312 chinese-iso-8bit ugb |
234 |
wakaba |
1.1 |
gb2312 csgb2312 x-euc-cn cp51936 ibm-euccn CP1383 1383/); |
235 |
|
|
|
236 |
|
|
=item euc-china |
237 |
|
|
|
238 |
|
|
EUC (ISO/IEC 2022 based 8-bit encoding) for Chinese. |
239 |
wakaba |
1.3 |
(Alias: euc-cn (emacsen), euccn, euc-gb, cn-gb (RFC 1922), cn-gb-2312 (RFC 1922), |
240 |
wakaba |
1.5 |
chinese-iso-8bit (emacsen), ugb (locale), gb2312 (IANA), csgb2312 (IANA), |
241 |
wakaba |
1.1 |
x-euc-cn, CP51936 (M$)) |
242 |
|
|
|
243 |
|
|
=cut |
244 |
|
|
|
245 |
|
|
sub __2022__common ($) { |
246 |
|
|
my $C = shift->SUPER::__2022__common; |
247 |
|
|
$C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{A}; ## GB 2312 |
248 |
|
|
$C; |
249 |
|
|
} |
250 |
wakaba |
1.5 |
sub __encode_map ($) { |
251 |
|
|
[qw/ucs_to_ascii ucs_to_gb2312_1980/]; |
252 |
|
|
} |
253 |
|
|
sub __decode_map ($) { |
254 |
|
|
[qw/gb2312_1980_to_ucs/]; |
255 |
|
|
} |
256 |
wakaba |
1.1 |
|
257 |
|
|
package Encode::ISO2022::EightBit::EUCCHINA165; |
258 |
|
|
use vars qw/@ISA/; |
259 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
260 |
wakaba |
1.3 |
__PACKAGE__->Define (qw/cn-gb-isoir165 iso-ir-165/); |
261 |
wakaba |
1.1 |
|
262 |
wakaba |
1.3 |
=item cn-gb-isoir165 |
263 |
wakaba |
1.1 |
|
264 |
|
|
EUC (ISO/IEC 2022 based 8-bit encoding) for Chinese |
265 |
wakaba |
1.3 |
with ISO-IR 165. (Alias: cn-gb-isoir165 (RFC 1922), |
266 |
|
|
ISO-IR-165) |
267 |
wakaba |
1.1 |
|
268 |
|
|
=cut |
269 |
|
|
|
270 |
|
|
sub __2022__common ($) { |
271 |
|
|
my $C = shift->SUPER::__2022__common; |
272 |
|
|
$C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{"\x45"}; ## ISO-IR 165 |
273 |
|
|
$C; |
274 |
|
|
} |
275 |
wakaba |
1.5 |
sub __encode_map ($) { |
276 |
|
|
[qw/ucs_to_ascii ucs_to_iso_ir_165/]; |
277 |
|
|
} |
278 |
|
|
sub __decode_map ($) { |
279 |
|
|
[qw/iso_ir_165_to_ucs/]; |
280 |
|
|
} |
281 |
wakaba |
1.1 |
|
282 |
wakaba |
1.3 |
package Encode::ISO2022::EightBit::EUCcwnn; |
283 |
|
|
use vars qw/@ISA/; |
284 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
285 |
|
|
__PACKAGE__->Define (qw/euc-cwnn cwnn-iso-8bit/); |
286 |
|
|
|
287 |
|
|
=item euc-cwnn |
288 |
|
|
|
289 |
|
|
EUC (ISO/IEC 2022 based 8-bit encoding) for Chinese |
290 |
wakaba |
1.5 |
with GB 2312, used by cwnn input system (Alias: cwnn-iso-8bit). |
291 |
|
|
See <http://www.tomo.gr.jp/users/wnn/9912ml/msg00088.html>. |
292 |
wakaba |
1.3 |
|
293 |
|
|
=cut |
294 |
|
|
|
295 |
|
|
sub __2022__common ($) { |
296 |
|
|
my $C = shift->SUPER::__2022__common; |
297 |
|
|
$C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{A}; ## GB 2312 |
298 |
|
|
$C->{G2} = $Encode::ISO2022::CHARSET{G94}->{'0'}; # omron_udc_zh (sisheng) |
299 |
|
|
## TODO: Implement by private set support |
300 |
|
|
$C; |
301 |
|
|
} |
302 |
wakaba |
1.5 |
sub __encode_map ($) { |
303 |
|
|
[qw/ucs_to_ascii ucs_to_gb2312_1980 ucs_to_omron_udc_zh/]; |
304 |
|
|
} |
305 |
|
|
sub __decode_map ($) { |
306 |
|
|
[qw/gb2312_1980_to_ucs omron_udc_zh_to_ucs/]; |
307 |
|
|
} |
308 |
wakaba |
1.3 |
|
309 |
|
|
## cn-gb-12345, gb12345, euc-gb12345 |
310 |
|
|
|
311 |
wakaba |
1.1 |
package Encode::ISO2022::EightBit::EUCKorea; |
312 |
|
|
use vars qw/@ISA/; |
313 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
314 |
wakaba |
1.3 |
__PACKAGE__->Define (qw/euc-korea euc-kr euckr cp970 cp51949 ibm-euckr x-euc-kr |
315 |
wakaba |
1.1 |
cseuckr korean-iso-8bit/); |
316 |
|
|
|
317 |
|
|
=item euc-korea |
318 |
|
|
|
319 |
|
|
EUC (ISO/IEC 2022 based 8-bit encoding) for Korean |
320 |
wakaba |
1.3 |
(Alias: euc-kr (IANA), euckr, cp970, cp51949 (M$), ibm-euckr, |
321 |
wakaba |
1.1 |
x-euc-kr, cseuckr (IANA), korean-iso-8bit (emacsen)) |
322 |
|
|
|
323 |
|
|
=cut |
324 |
|
|
|
325 |
|
|
sub __2022__common ($) { |
326 |
|
|
my $C = shift->SUPER::__2022__common; |
327 |
|
|
$C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{C}; ## KS X 1001 |
328 |
|
|
$C; |
329 |
|
|
} |
330 |
wakaba |
1.5 |
sub __encode_map ($) { |
331 |
|
|
[qw/ucs_to_ascii ksx1001_1992_katakana/]; |
332 |
|
|
} |
333 |
|
|
sub __decode_map ($) { |
334 |
|
|
[qw/ksx1001_1992_to_ucs/]; |
335 |
|
|
} |
336 |
wakaba |
1.1 |
|
337 |
|
|
package Encode::ISO2022::EightBit::EUCTaiwan; |
338 |
|
|
use vars qw/@ISA/; |
339 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
340 |
wakaba |
1.3 |
__PACKAGE__->Define (qw/euc-taiwan euc-tw euctw x-euc-tw cns11643 cseuctw |
341 |
|
|
ibm-euctw cp964/); |
342 |
wakaba |
1.1 |
|
343 |
|
|
=item euc-taiwan |
344 |
|
|
|
345 |
|
|
EUC (ISO/IEC 2022 based 8-bit encoding) for Chinese |
346 |
wakaba |
1.3 |
with CNS 11643. (Alias: euc-tw, euctw, x-euc-tw, cseuctw, cns11643) |
347 |
|
|
|
348 |
|
|
=cut |
349 |
|
|
|
350 |
|
|
sub __2022__common ($) { |
351 |
|
|
my $C = shift->SUPER::__2022__common; |
352 |
|
|
$C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{G}; ## plane 1 |
353 |
|
|
#$C->{G2} = ## BUG: does not support plane 2-16 yet |
354 |
|
|
$C->{G2} = $Encode::ISO2022::CHARSET{G94n}->{' `'}; # 3byte DRCS (temporary) |
355 |
|
|
$C; |
356 |
|
|
} |
357 |
wakaba |
1.5 |
sub __encode_map ($) { |
358 |
|
|
[qw/ucs_to_ascii ucs_to_cns11643_1 ucs_to_cns11643_2 ucs_to_cns11643_3 ucs_to_cns11643_4 ucs_to_cns11643_5 ucs_to_cns11643_6 ucs_to_cns11643_7 ucs_to_cns11643_8 ucs_to_cns11643_9 ucs_to_cns11643_10 ucs_to_cns11643_11 ucs_to_cns11643_12 ucs_to_cns11643_13 ucs_to_cns11643_14 ucs_to_cns11643_15 ucs_to_cns11643_16/]; |
359 |
|
|
} |
360 |
|
|
sub __decode_map ($) { |
361 |
|
|
[qw/cns11643_1_to_ucs cns11643_2_to_ucs cns11643_3_to_ucs cns11643_4_to_ucs cns11643_5_to_ucs cns11643_6_to_ucs cns11643_7_to_ucs cns11643_8_to_ucs cns11643_9_to_ucs cns11643_10_to_ucs cns11643_11_to_ucs cns11643_12_to_ucs cns11643_13_to_ucs cns11643_14_to_ucs cns11643_15_to_ucs cns11643_16_to_ucs/]; |
362 |
|
|
} |
363 |
wakaba |
1.3 |
|
364 |
|
|
package Encode::ISO2022::EightBit::EUCtwnn; |
365 |
|
|
use vars qw/@ISA/; |
366 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
367 |
|
|
__PACKAGE__->Define (qw/euc-twnn twnn-iso-8bit/); |
368 |
|
|
|
369 |
|
|
=item euc-twnn |
370 |
|
|
|
371 |
|
|
EUC (ISO/IEC 2022 based 8-bit encoding) for Chinese |
372 |
|
|
with CNS 11643, used by twnn input system (Alias: twnn-iso-8bit) |
373 |
wakaba |
1.5 |
See <http://www.tomo.gr.jp/users/wnn/9912ml/msg00088.html>. |
374 |
wakaba |
1.1 |
|
375 |
|
|
=cut |
376 |
|
|
|
377 |
|
|
sub __2022__common ($) { |
378 |
|
|
my $C = shift->SUPER::__2022__common; |
379 |
|
|
$C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{G}; ## plane 1 |
380 |
wakaba |
1.3 |
$C->{G2} = $Encode::ISO2022::CHARSET{G94}->{'0'}; # omron_udc_zh (sisheng) |
381 |
|
|
## TODO: Implement by private set support |
382 |
|
|
$C->{G3} = $Encode::ISO2022::CHARSET{G94n}->{H}; ## plane 2 |
383 |
wakaba |
1.1 |
$C; |
384 |
|
|
} |
385 |
wakaba |
1.5 |
sub __encode_map ($) { |
386 |
|
|
[qw/ucs_to_ascii ucs_to_cns11643_1 ucs_to_cns11643_2 ucs_to_omron_udc_zh/]; |
387 |
|
|
} |
388 |
|
|
sub __decode_map ($) { |
389 |
|
|
[qw/cns11643_1_to_ucs cns11643_2_to_ucs omron_udc_zh_to_ucs/]; |
390 |
|
|
} |
391 |
wakaba |
1.1 |
|
392 |
|
|
package Encode::ISO2022::EightBit::EUCKPS9566; |
393 |
|
|
use vars qw/@ISA/; |
394 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
395 |
|
|
__PACKAGE__->Define (qw/euc-kps9566/); |
396 |
|
|
|
397 |
|
|
=item euc-kps9566 |
398 |
|
|
|
399 |
|
|
EUC (ISO/IEC 2022 based 8-bit encoding) for Korean |
400 |
|
|
with KPS 9566-97 |
401 |
|
|
|
402 |
|
|
=cut |
403 |
|
|
|
404 |
|
|
sub __2022__common ($) { |
405 |
|
|
my $C = shift->SUPER::__2022__common; |
406 |
|
|
$C->{G1} = $Encode::ISO2022::CHARSET{G94n}->{N}; ## KPS 9566-97 |
407 |
|
|
$C; |
408 |
|
|
} |
409 |
wakaba |
1.5 |
sub __encode_map ($) { |
410 |
|
|
[qw/ucs_to_ascii ucs_to_kps9566_1997/]; |
411 |
|
|
} |
412 |
|
|
sub __decode_map ($) { |
413 |
|
|
[qw/kps9566_1997_to_ucs/]; |
414 |
|
|
} |
415 |
wakaba |
1.1 |
|
416 |
|
|
package Encode::ISO2022::EightBit::SS2; |
417 |
|
|
use vars qw/@ISA/; |
418 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
419 |
|
|
__PACKAGE__->Define (qw/iso-2022-8bit-ss2/); |
420 |
|
|
|
421 |
|
|
=item iso-2022-8bit-ss2 |
422 |
|
|
|
423 |
|
|
ISO/IEC 2022 based 8-bit encoding using SS2 for 96-charset |
424 |
|
|
|
425 |
|
|
=cut |
426 |
|
|
|
427 |
|
|
sub __2022__common ($) { |
428 |
|
|
my $C = Encode::ISO2022->new_object; |
429 |
|
|
$C->{option}->{designate_to}->{G96}->{default} = 2; |
430 |
|
|
$C->{option}->{designate_to}->{G96n}->{default} = 2; |
431 |
|
|
$C->{option}->{G94n_designate_long} = 1; |
432 |
|
|
$C->{option}->{Ginvoke_by_single_shift}->[2] = 1; |
433 |
|
|
$C->{option}->{Ginvoke_to_left}->[2] = 0; |
434 |
|
|
$C->{option}->{C1invoke_to_right} = 1; |
435 |
|
|
$C; |
436 |
|
|
} |
437 |
|
|
|
438 |
|
|
package Encode::ISO2022::EightBit::CompoundText; |
439 |
|
|
use vars qw/@ISA/; |
440 |
|
|
push @ISA, 'Encode::ISO2022::EightBit'; |
441 |
wakaba |
1.3 |
__PACKAGE__->Define (qw/compound-text compound_text |
442 |
wakaba |
1.5 |
x-compound-text ctext x-ctext ct/); |
443 |
wakaba |
1.1 |
|
444 |
|
|
=item compound-text |
445 |
|
|
|
446 |
|
|
ISO/IEC 2022 based 8-bit encoding used in inter-client |
447 |
wakaba |
1.3 |
communication of X Window System (Alias: ctext (emacsen), x-ctext (emacsen), |
448 |
wakaba |
1.5 |
compound_text, x-compound-text, ct (loacle)) |
449 |
wakaba |
1.1 |
|
450 |
wakaba |
1.2 |
Strictly, x-ctext, extended compound text (X Compound Text |
451 |
|
|
based encoding for unknown ISO/IEC 2022 based encoding) is a |
452 |
|
|
different coding system from X Compound Text. See |
453 |
|
|
[mule-jp:7455] <mid:rsqsoa5s2hr.fsf@crane.jaist.ac.jp> and |
454 |
|
|
[mule-jp:7457] <mid:rsq4smlky85.fsf@crane.jaist.ac.jp>. |
455 |
wakaba |
1.5 |
Mule's ctext allows private final bytes and 96^n sets. |
456 |
|
|
|
457 |
|
|
XFree86 extended compound text allows to use UTF-8 |
458 |
|
|
with ESC 02/05 04/06. See |
459 |
|
|
<http://cvsweb.xfree86.org/cvsweb/xc/doc/specs/CTEXT/ctext.tbl.ms>. |
460 |
wakaba |
1.2 |
|
461 |
wakaba |
1.1 |
=cut |
462 |
|
|
|
463 |
wakaba |
1.5 |
## TODO: separate mule ctext and xfree86 ctext |
464 |
|
|
|
465 |
wakaba |
1.1 |
sub __2022__common ($) { |
466 |
|
|
my $C = Encode::ISO2022->new_object; |
467 |
|
|
$C->{option}->{designate_to}->{C0}->{default} = -1; |
468 |
|
|
$C->{option}->{designate_to}->{C1}->{default} = -1; |
469 |
wakaba |
1.5 |
$C->{option}->{designate_to}->{G94}->{I} = 1; ## JIS X 0201 Katakana |
470 |
wakaba |
1.1 |
$C->{option}->{designate_to}->{G96}->{default} = 1; |
471 |
|
|
$C->{option}->{designate_to}->{G96n}->{default} = -1; |
472 |
|
|
for my $t (qw/G94 G96 G94n/) { |
473 |
|
|
for (0x30..0x3F) { |
474 |
|
|
my $F = chr $_; |
475 |
wakaba |
1.5 |
#$C->{option}->{designate_to}->{$t}->{$F} = -1; |
476 |
wakaba |
1.1 |
$C->{option}->{designate_to}->{$t}->{'!'.$F} = -1; |
477 |
|
|
$C->{option}->{designate_to}->{$t}->{'"'.$F} = -1; |
478 |
|
|
$C->{option}->{designate_to}->{$t}->{'#'.$F} = -1; |
479 |
|
|
$C->{option}->{designate_to}->{$t}->{' '.$F} = -1; |
480 |
|
|
} |
481 |
|
|
for (0x40..0x7E) { |
482 |
|
|
$C->{option}->{designate_to}->{$t}->{' '.chr $_} = -1; |
483 |
|
|
} |
484 |
|
|
} |
485 |
|
|
$C->{option}->{G94n_designate_long} = 1; |
486 |
|
|
$C->{option}->{Ginvoke_to_left}->[1] = 0; |
487 |
|
|
$C->{option}->{C1invoke_to_right} = 1; |
488 |
|
|
$C->{option}->{reset}->{Ginvoke} = 0; |
489 |
wakaba |
1.5 |
$C->{option}->{use_revision} = 0; |
490 |
|
|
## XFree86 extension |
491 |
|
|
$C->{option}->{designate_to}->{G94n}->{"\x42\x40"} = 0; ## JIS X 0208-1990 |
492 |
|
|
$C->{option}->{designate_to}->{coding_system}->{"\x40"} = 1; ## ISO/IEC 2022 |
493 |
|
|
$C->{option}->{designate_to}->{coding_system}->{"\x47"} = 1; ## UTF-8 |
494 |
wakaba |
1.1 |
$C; |
495 |
|
|
} |
496 |
wakaba |
1.2 |
sub __2022_decode ($) { |
497 |
|
|
my $C = shift->__2022__common; |
498 |
|
|
## Emacsen's x-ctext |
499 |
|
|
$C->{G1} = $Encode::ISO2022::CHARSET{G96}->{A}; ## ISO/IEC 8859-1 |
500 |
|
|
$C; |
501 |
|
|
} |
502 |
wakaba |
1.5 |
sub __encode_map ($) { |
503 |
|
|
[qw/ucs_to_ascii ucs_to_isoiec8859_1 ucs_to_isoiec8859_2 ucs_to_isoiec8859_3 ucs_to_isoiec8859_4 ucs_to_isoiec8859_5 ucs_to_isoiec8859_6 ucs_to_isoiec8859_7 ucs_to_isoiec8859_8 ucs_to_isoiec8859_9 ucs_to_gb2312_1980 ucs_to_jisx0208_1983 ucs_to_ksx1001_1992 ucs_to_jisx0212_1990 ucs_to_jisx0201_latin ucs_to_jisx0201_katakana/]; |
504 |
|
|
} |
505 |
|
|
sub __decode_map ($) { |
506 |
|
|
[qw/isoiec8859_2_to_ucs isoiec8859_3_to_ucs isoiec8859_4_to_ucs isoiec8859_5_to_ucs isoiec8859_6_to_ucs isoiec8859_7_to_ucs isoiec8859_8_to_ucs isoiec8859_9_to_ucs gb2312_1980_to_ucs jisx0208_1983_to_ucs ksx1001_1992_to_ucs jisx0212_1990_to_ucs/]; |
507 |
|
|
} |
508 |
wakaba |
1.1 |
|
509 |
|
|
|
510 |
|
|
1; |
511 |
|
|
__END__ |
512 |
|
|
|
513 |
|
|
=back |
514 |
|
|
|
515 |
|
|
=head1 LICENSE |
516 |
|
|
|
517 |
wakaba |
1.2 |
Copyright 2002 Wakaba <w@suika.fam.cx> |
518 |
wakaba |
1.1 |
|
519 |
|
|
This library is free software; you can redistribute it |
520 |
|
|
and/or modify it under the same terms as Perl itself. |
521 |
|
|
|
522 |
|
|
=cut |
523 |
|
|
|
524 |
wakaba |
1.5 |
# $Date: 2002/10/04 23:58:04 $ |
525 |
wakaba |
1.1 |
### SevenBit.pm ends here |