1 |
|
2 |
=head1 NAME |
3 |
|
4 |
Encode::ISO2022 --- ISO/IEC 2022 encoder and decoder |
5 |
|
6 |
=cut |
7 |
|
8 |
require v5.7.3; |
9 |
package Encode::ISO2022; |
10 |
use strict; |
11 |
use vars qw(%CHARSET $VERSION); |
12 |
$VERSION=do{my @r=(q$Revision: 1.1 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
13 |
use base qw(Encode::Encoding); |
14 |
__PACKAGE__->Define (qw/iso-2022 iso2022 2022 cp2022/); |
15 |
|
16 |
### --- Intialization |
17 |
|
18 |
my %_CHARS_to_RANGE = ( |
19 |
l94 => q/[\x21-\x7E]/, |
20 |
l96 => q/[\x20-\x7F]/, |
21 |
l128 => q/[\x00-\x7F]/, |
22 |
l256 => q/[\x00-\xFF]/, |
23 |
r94 => q/[\xA1-\xFE]/, |
24 |
r96 => q/[\xA0-\xFF]/, |
25 |
r128 => q/[\x80-\xFF]/, |
26 |
r256 => q/[\x80-\xFF]/, |
27 |
b94 => q/[\x21-\x7E\xA1-\xFE]/, |
28 |
b96 => q/[\x20-\x7F\xA0-\xFF]/, |
29 |
b128 => q/[\x00-\xFF]/, |
30 |
b256 => q/[\x00-\xFF]/, |
31 |
); |
32 |
|
33 |
## --- Make initial charset definitions |
34 |
&_make_initial_charsets; |
35 |
sub _make_initial_charsets () { |
36 |
for my $f (0x30..0x7E) { |
37 |
my $F = pack 'C', $f; |
38 |
for ('', '!', '"', '#') { |
39 |
$CHARSET{G94}->{ $_.$F }->{dimension} = 1; |
40 |
$CHARSET{G94}->{ $_.$F }->{chars} = 94; |
41 |
$CHARSET{G94}->{ $_.$F }->{ucs} = |
42 |
{'' => 0xE90940, '!' => 0xE944A0, '"' => 0xE98000, '#' => 0xE9BB60}->{ $_ } |
43 |
+ 94 * ($f-0x30); |
44 |
|
45 |
$CHARSET{G96}->{ $_.$F }->{dimension} = 1; |
46 |
$CHARSET{G96}->{ $_.$F }->{chars} = 96; |
47 |
$CHARSET{G96}->{ $_.$F }->{ucs} = |
48 |
{'' => 0xE926A0, '!' => 0xE96200, '"' => 0xE99D60, '#' => 0xE9D8C0}->{ $_ } |
49 |
+ 96 * ($f-0x30); |
50 |
|
51 |
$CHARSET{C0}->{ $_.$F }->{dimension} = 1; |
52 |
$CHARSET{C0}->{ $_.$F }->{chars} = 32; |
53 |
$CHARSET{C0}->{ $_.$F }->{ucs} = |
54 |
{'' => 0x70000000, '!' => 0x70001400, |
55 |
'"' => 0x70002800, '#' => 0x70003C00}->{ $_ } + 32 * ($f-0x30); |
56 |
|
57 |
$CHARSET{C1}->{ $_.$F }->{dimension} = 1; |
58 |
$CHARSET{C1}->{ $_.$F }->{chars} = 32; |
59 |
$CHARSET{C1}->{ $_.$F }->{ucs} = |
60 |
{'' => 0x70000A00, '!' => 0x70001E00, |
61 |
'"' => 0x70003200, '#' => 0x70004600}->{ $_ } + 32 * ($f-0x30); |
62 |
|
63 |
$CHARSET{G94}->{ ' '.$_.$F }->{dimension} = 1; ## DRCS |
64 |
$CHARSET{G94}->{ ' '.$_.$F }->{chars} = 94; |
65 |
$CHARSET{G94}->{ ' '.$_.$F }->{ucs} = |
66 |
{'' => 0x70090940, '!' => 0x700944A0, |
67 |
'"' => 0x70098000, '#' => 0x7009BB60}->{ $_ } + 94 * ($f-0x30); |
68 |
|
69 |
$CHARSET{G96}->{ ' '.$_.$F }->{dimension} = 1; ## DRCS |
70 |
$CHARSET{G96}->{ ' '.$_.$F }->{chars} = 96; |
71 |
$CHARSET{G96}->{ ' '.$_.$F }->{ucs} = |
72 |
{'' => 0x700926A0, '!' => 0x70096200, |
73 |
'"' => 0x70099D60, '#' => 0x7009D8C0}->{ $_ } + 96 * ($f-0x30); |
74 |
} |
75 |
} |
76 |
for my $f (0x30..0x5F, 0x7E) { |
77 |
my $F = pack 'C', $f; |
78 |
for ('', '!', '"', '#') { |
79 |
$CHARSET{G94n}->{ $_.$F }->{dimension} = 2; |
80 |
$CHARSET{G94n}->{ $_.$F }->{chars} = 94; |
81 |
$CHARSET{G94n}->{ $_.$F }->{ucs} = |
82 |
({'' => 0xE9F6C0}->{ $_ }||0) + 94*94 * ($f-0x30); |
83 |
## BUG: 94^n DRCSes with I byte have no mapping area |
84 |
|
85 |
$CHARSET{G96n}->{ $_.$F }->{dimension} = 2; |
86 |
$CHARSET{G96n}->{ $_.$F }->{chars} = 96; |
87 |
$CHARSET{G96n}->{ $_.$F }->{ucs} = |
88 |
({'' => 0xF4C000}->{ $_ }||0) + 96*96 * ($f-0x30); |
89 |
## BUG: 94^n DRCSes with I byte have no mapping area |
90 |
} |
91 |
} |
92 |
for (0x60..0x6F) { |
93 |
my $F = pack 'C', $_; |
94 |
## BUG: 9x^3 sets have no mapping area |
95 |
for ('', '!', '"', '#', ' ') { |
96 |
$CHARSET{G94n}->{ $_.$F }->{dimension} = 3; |
97 |
$CHARSET{G94n}->{ $_.$F }->{chars} = 94; |
98 |
|
99 |
$CHARSET{G96n}->{ $_.$F }->{dimension} = 3; |
100 |
$CHARSET{G96n}->{ $_.$F }->{chars} = 96; |
101 |
} |
102 |
} |
103 |
for (0x70..0x7D) { |
104 |
my $F = pack 'C', $_; |
105 |
## BUG: 9x^4 sets have no mapping area |
106 |
for ('', '!', '"', '#', ' ') { |
107 |
$CHARSET{G94n}->{ $_.$F }->{dimension} = 4; |
108 |
$CHARSET{G94n}->{ $_.$F }->{chars} = 94; |
109 |
|
110 |
$CHARSET{G96n}->{ $_.$F }->{dimension} = 4; |
111 |
$CHARSET{G96n}->{ $_.$F }->{chars} = 96; |
112 |
} |
113 |
} |
114 |
for my $f (0x40..0x4E) { |
115 |
my $F = pack 'C', $f; |
116 |
$CHARSET{G96n}->{ ' '.$F }->{dimension} = 2; |
117 |
$CHARSET{G96n}->{ ' '.$F }->{chars} = 96; |
118 |
$CHARSET{G96n}->{ ' '.$F }->{ucs} = 0xF0000 + 96*96*($f-0x40); |
119 |
## U+F0000-U+10F7FF (private) -> ESC 02/04 02/00 <I> (04/00-04/14) (DRCS) |
120 |
} |
121 |
|
122 |
$CHARSET{G94}->{B}->{ucs} = 0x21; ## ASCII |
123 |
$CHARSET{G96}->{A}->{ucs} = 0xA0; ## ISO 8859-1 |
124 |
|
125 |
$CHARSET{G94n}->{'B@'}->{dimension} = 2; ## JIS X 0208-1990 |
126 |
$CHARSET{G94n}->{'B@'}->{chars} = 94; |
127 |
$CHARSET{G94n}->{'B@'}->{ucs} = 0xE9F6C0 + 94*94*79; |
128 |
|
129 |
## -- Control character sets |
130 |
$CHARSET{C0}->{'@'}->{ucs} = 0x00; ## ISO/IEC 6429 C0 |
131 |
for ("\x40", "\x43", "\x44", "\x45", "\x46", "\x49", "\x4A", "\x4B", "\x4C") { |
132 |
$CHARSET{C0}->{$_}->{C_LS0} = "\x0F"; |
133 |
$CHARSET{C0}->{$_}->{C_LS1} = "\x0E"; |
134 |
$CHARSET{C0}->{$_}->{r_LS0} = '\x0F'; |
135 |
$CHARSET{C0}->{$_}->{r_LS1} = '\x0E'; |
136 |
} |
137 |
for ("\x40", "\x44", "\x45", "\x46", "\x48", "\x4C") { |
138 |
$CHARSET{C0}->{$_}->{reset_all} = {"\x0A" => 1, "\x0B" => 1, |
139 |
"\x0C" => 1, "\x0D" => 1}; |
140 |
} |
141 |
$CHARSET{C0}->{"\x43"}->{reset_all} = {"\x0A" => 1}; |
142 |
$CHARSET{C0}->{"\x44"}->{C_SS2} = "\x1C"; |
143 |
$CHARSET{C0}->{"\x44"}->{r_SS2} = '\x1C'; |
144 |
for ("\x45", "\x49", "\x4A", "\x4B") { |
145 |
$CHARSET{C0}->{$_}->{C_SS2} = "\x19"; |
146 |
$CHARSET{C0}->{$_}->{C_SS3} = "\x1D"; |
147 |
$CHARSET{C0}->{$_}->{r_SS2} = '\x19'; |
148 |
$CHARSET{C0}->{$_}->{r_SS3} = '\x1D'; |
149 |
} |
150 |
$CHARSET{C0}->{"\x4C"}->{C_SS2} = "\x19"; |
151 |
$CHARSET{C0}->{"\x4C"}->{r_SS2} = '\x19'; |
152 |
|
153 |
$CHARSET{C1}->{'64291991C1'}->{dimension} = 1; ## ISO/IEC 6429:1991 C1 |
154 |
$CHARSET{C1}->{'64291991C1'}->{chars} = 32; |
155 |
$CHARSET{C1}->{'64291991C1'}->{ucs} = 0x80; |
156 |
for ("\x43", "\x45", "\x47", '64291991C1') { |
157 |
$CHARSET{C1}->{$_}->{C_SS2} = "\x8E"; |
158 |
$CHARSET{C1}->{$_}->{C_SS3} = "\x8F"; |
159 |
$CHARSET{C1}->{$_}->{r_SS2} = '\x8E'; |
160 |
$CHARSET{C1}->{$_}->{r_SS3} = '\x8F'; |
161 |
$CHARSET{C1}->{$_}->{r_SS2_ESC} = '\x1B\x4E'; |
162 |
$CHARSET{C1}->{$_}->{r_SS3_ESC} = '\x1B\x4F'; |
163 |
} |
164 |
for ("\x43", '64291991C1') { |
165 |
$CHARSET{C1}->{$_}->{r_CSI} = '\x9B'; |
166 |
$CHARSET{C1}->{$_}->{r_CSI_ESC} = '\x1B\x5B'; |
167 |
$CHARSET{C1}->{$_}->{r_DCS} = '\x90'; |
168 |
$CHARSET{C1}->{$_}->{r_ST} = '\x9C'; |
169 |
$CHARSET{C1}->{$_}->{r_OSC} = '\x9D'; |
170 |
$CHARSET{C1}->{$_}->{r_PM} = '\x9E'; |
171 |
$CHARSET{C1}->{$_}->{r_APC} = '\x9F'; |
172 |
$CHARSET{C1}->{$_}->{reset_all} = {"\x85"=>1, "\x90"=>1, |
173 |
"\x9C"=>1, "\x9D"=>1, "\x9E"=>1, "\x9F"=>1}; |
174 |
} |
175 |
$CHARSET{C1}->{'64291991C1'}->{r_SCI} = '\x9A'; |
176 |
|
177 |
$CHARSET{single_control}->{Fs} ={ucs => 0x70005000, chars => 32, dimension => 1}; |
178 |
$CHARSET{single_control}->{'3F'} ={ucs => 0x70005020, chars => 80, dimension => 1}; |
179 |
$CHARSET{single_control}->{'3F!'}={ucs => 0x70005070, chars => 80, dimension => 1}; |
180 |
$CHARSET{single_control}->{'3F"'}={ucs => 0x700050C0, chars => 80, dimension => 1}; |
181 |
$CHARSET{single_control}->{'3F#'}={ucs => 0x70005110, chars => 80, dimension => 1}; |
182 |
} |
183 |
|
184 |
|
185 |
### --- Perl Encode module common functions |
186 |
|
187 |
sub encode ($$;$) { |
188 |
my ($obj, $str, $chk) = @_; |
189 |
$_[1] = '' if $chk; |
190 |
$str = &internal_to_iso2022 ($str); |
191 |
return $str; |
192 |
} |
193 |
|
194 |
sub decode ($$;$) { |
195 |
my ($obj, $str, $chk) = @_; |
196 |
$_[1] = '' if $chk; |
197 |
return &iso2022_to_internal ($str); |
198 |
} |
199 |
|
200 |
### --- Encode::ISO2022 unique functions |
201 |
|
202 |
## Make a new ISO/IEC 2022-buffers object with default status |
203 |
sub new_object { |
204 |
my %C; |
205 |
$C{bit} = 8; |
206 |
$C{CL} = 'C0'; $C{CR} = 'C1'; $C{ESC_Fe} = 'C1'; |
207 |
$C{C0} = $CHARSET{C0}->{"\x40"}; ## ISO/IEC 6429:1991 C0 |
208 |
$C{C1} = $CHARSET{C1}->{'64291991C1'}; ## ISO/IEC 6429:1991 C1 |
209 |
$C{GL} = 'G0'; $C{GR} = 'G1'; |
210 |
$C{G0} = $CHARSET{G94}->{"\x42"}; ## ISO/IEC 646:1991 IRV |
211 |
#$C{G1} = $CHARSET{G96}->{"\x41"}; ## ISO/IEC 8859-1 GR |
212 |
$C{G1} = $CHARSET{G94}->{"\x7E"}; ## empty set |
213 |
$C{G2} = $CHARSET{G94}->{"\x7E"}; ## empty set |
214 |
$C{G3} = $CHARSET{G94}->{"\x7E"}; ## empty set |
215 |
$C{option} = { |
216 |
C1invoke_to_right => 0, ## C1 invoked to: (0: ESC Fe, 1: CR) |
217 |
G94n_designate_long => 0, ## (1: ESC 02/04 02/08 04/00..02) |
218 |
designate_to => { ## Designated G buffer (-1: not be outputed) |
219 |
C0 => { |
220 |
default => 0, |
221 |
}, |
222 |
C1 => { |
223 |
default => 1, |
224 |
}, |
225 |
G94 => { |
226 |
"\x42" => 0, |
227 |
default => 0, |
228 |
}, |
229 |
G96 => { |
230 |
default => 1, |
231 |
}, |
232 |
G94n => { |
233 |
default => 0, |
234 |
}, |
235 |
G96n => { |
236 |
default => 1, |
237 |
}, |
238 |
}, |
239 |
Ginvoke_by_single_shift => [0,0,0,0], ## Invoked by SS |
240 |
Ginvoke_to_left => [1,1,1,1], ## Which invoked to? (1: L, 0: R) |
241 |
private_set => { ## Private set vs Final byte |
242 |
C0 => [], |
243 |
C1 => [], |
244 |
G94 => [], |
245 |
G94n => [[],[],[],[],[]], |
246 |
G96 => [], |
247 |
#G96n => [], ## (not implemented) |
248 |
U96n => [], ## mule-unicode sets |
249 |
XC1 => { |
250 |
'64291991C1' => undef, ## ISO/IEC 6429:1991 C1 |
251 |
}, |
252 |
}, |
253 |
reset => { ## Reset status at top of line |
254 |
Gdesignation => "\x42", ## F of designation or 0 |
255 |
Ginvoke => 1, |
256 |
}, |
257 |
use_revision => 1, ## Output IRR |
258 |
}; |
259 |
\%C; |
260 |
} |
261 |
|
262 |
sub iso2022_to_internal ($;\%) { |
263 |
my ($s, $C) = @_; |
264 |
my %_GB_to_GN = ( |
265 |
"\x28"=>'G0',"\x29"=>'G1',"\x2A"=>'G2',"\x2B"=>'G3', |
266 |
"\x2C"=>'G0',"\x2D"=>'G1',"\x2E"=>'G2',"\x2F"=>'G3', |
267 |
); |
268 |
$C ||= &new_object; |
269 |
|
270 |
use re 'eval'; |
271 |
$s =~ s{ |
272 |
((??{ $_CHARS_to_RANGE{'l'.$C->{$C->{GL}}->{chars}} |
273 |
. qq/{$C->{$C->{GL}}->{dimension},$C->{$C->{GL}}->{dimension}}/ })) |
274 |
|((??{ $_CHARS_to_RANGE{'r'.$C->{$C->{GL}}->{chars}} |
275 |
. qq/{$C->{$C->{GR}}->{dimension},$C->{$C->{GR}}->{dimension}}/ })) |
276 |
|
277 |
| (??{ q/(?:/ . ($C->{$C->{CR}}->{r_SS2} || '(?!)') |
278 |
. ($C->{$C->{ESC_Fe}}->{r_SS2_ESC} ? |
279 |
qq/|$C->{$C->{ESC_Fe}}->{r_SS2_ESC}/ : '') |
280 |
. ($C->{$C->{CL}}->{r_SS2} ? qq/|$C->{$C->{CL}}->{r_SS2}/ : '') . q/)/ |
281 |
. ( $C->{$C->{CL}}->{r_LS0} |
282 |
||$C->{$C->{CL}}->{r_LS1}? ## ISO/IEC 6429:1992 9 |
283 |
qq/[$C->{$C->{CL}}->{r_LS0}$C->{$C->{CL}}->{r_LS1}]*/:'') |
284 |
}) |
285 |
((??{ $_CHARS_to_RANGE{'b'.$C->{$C->{GL}}->{chars}} |
286 |
. qq/{$C->{G2}->{dimension},$C->{G2}->{dimension}}/ })) |
287 |
| (??{ q/(?:/ . ($C->{$C->{CR}}->{r_SS3} || '(?!)') |
288 |
. ($C->{$C->{ESC_Fe}}->{r_SS3_ESC} ? |
289 |
qq/|$C->{$C->{ESC_Fe}}->{r_SS3_ESC}/ : '') |
290 |
. ($C->{$C->{CL}}->{r_SS3} ? qq/|$C->{$C->{CL}}->{r_SS3}/ : '') . q/)/ |
291 |
. ( $C->{$C->{CL}}->{r_LS0} |
292 |
|| $C->{$C->{CL}}->{r_LS1}? ## ISO/IEC 6429:1992 9 |
293 |
qq/[$C->{$C->{CL}}->{r_LS0}$C->{$C->{CL}}->{r_LS1}]*/:'') |
294 |
}) |
295 |
((??{ $_CHARS_to_RANGE{'b'.$C->{$C->{GL}}->{chars}} |
296 |
. qq/{$C->{G3}->{dimension},$C->{G3}->{dimension}}/ })) |
297 |
|
298 |
|((??{ $C->{$C->{CL}}->{r_LS0}||'(?!)' })) ## GL = G0 |
299 |
|((??{ $C->{$C->{CL}}->{r_LS1}||'(?!)' })) ## GL = G1 |
300 |
|
301 |
## Control sequence |
302 |
|(??{ '(?:'.($C->{$C->{CR}}->{r_CSI}||'(?!)') |
303 |
.($C->{$C->{ESC_Fe}}->{r_CSI_ESC} ? |
304 |
qq/|$C->{$C->{ESC_Fe}}->{r_CSI_ESC}/: '') |
305 |
.')' |
306 |
}) |
307 |
((??{ qq/[\x30-\x3F$C->{$C->{CL}}->{LS0}$C->{$C->{CL}}->{LS1}\xB0-\xBF]*/ |
308 |
.qq/[\x20-\x2F$C->{$C->{CL}}->{LS0}$C->{$C->{CL}}->{LS1}\xA0-\xAF]*/ |
309 |
}) [\x40-\x7E\xD0-\xFE]) |
310 |
|
311 |
## Other escape sequence |
312 |
|(\x1B[\x20-\x2F]*[\x30-\x7E]) |
313 |
|
314 |
## Misc. sequence (SP, control, or broken data) |
315 |
|([\x00-\xFF]) |
316 |
}{ |
317 |
my ($gl,$gr,$ss2,$ss3,$ls0,$ls1,$csi,$esc,$misc) |
318 |
= ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10); |
319 |
$C->{_irr} = undef unless defined $esc; |
320 |
## GL graphic character |
321 |
if (defined $gl) { |
322 |
my $c = 0; |
323 |
my $m = $C->{$C->{GL}}->{chars}==94?0x21:$C->{$C->{GL}}->{chars}==96?0x20:0; |
324 |
for (split //, $gl) { |
325 |
$c = $c * $C->{$C->{GL}}->{chars} + unpack ('C', $_) - $m; |
326 |
} |
327 |
chr ($C->{$C->{GL}}->{ucs} + $c); |
328 |
## Control, SP, or broken data |
329 |
## TODO: support control sets other than ISO/IEC 6429's |
330 |
} elsif (defined $misc) { |
331 |
$misc; |
332 |
## GR graphic character |
333 |
} elsif ($gr) { |
334 |
my $c = 0; |
335 |
my $m = $C->{$C->{GR}}->{chars}==94?0xA1:$C->{$C->{GR}}->{chars}==96?0xA0:0x80; |
336 |
for (split //, $gr) { |
337 |
$c = $c * $C->{$C->{GR}}->{chars} + unpack ('C', $_) - $m; |
338 |
} |
339 |
chr ($C->{$C->{GR}}->{ucs} + $c); |
340 |
## Graphic character with SS2 |
341 |
} elsif ($ss2) { |
342 |
$ss2 =~ tr/\x80-\xFF/\x00-\x7F/; |
343 |
my $c = 0; my $m = $C->{G2}->{chars}==94?0x21:$C->{G2}->{chars}==96?0x20:0; |
344 |
for (split //, $ss2) { |
345 |
$c = $c * $C->{G2}->{chars} + unpack ('C', $_) - $m; |
346 |
} |
347 |
chr ($C->{G2}->{ucs} + $c); |
348 |
## Graphic character with SS3 |
349 |
} elsif ($ss3) { |
350 |
$ss3 =~ tr/\x80-\xFF/\x00-\x7F/; |
351 |
my $c = 0; my $m = $C->{G3}->{chars}==94?0x21:$C->{G3}->{chars}==96?0x20:0; |
352 |
for (split //, $ss3) { |
353 |
$c = $c * $C->{G3}->{chars} + unpack ('C', $_) - $m; |
354 |
} |
355 |
chr ($C->{G3}->{ucs} + $c); |
356 |
## Locking shifts |
357 |
} elsif ($ls0) { |
358 |
$C->{GL} = 'G0'; ''; |
359 |
} elsif ($ls1) { |
360 |
$C->{GL} = 'G1'; ''; |
361 |
## Escape sequence |
362 |
} elsif ($esc) { |
363 |
## IRR (revision number) |
364 |
if ($esc =~ /\x1B\x26([\x40-\x7E])/) { |
365 |
$C->{_irr} = $1; $esc = ''; |
366 |
} else { |
367 |
$esc =~ s{ |
368 |
\x1B([\x28-\x2B])(\x20?[\x21-\x23]?[\x30-\x7E]) ## Gx = 94^1 |
369 |
|\x1B\x24([\x28-\x2B]?)(\x20?[\x21-\x23]?[\x30-\x7E]) ## Gx = 94^n |
370 |
|
371 |
|\x1B([\x2C-\x2F])(\x20?[\x21-\x23]?[\x30-\x7E]) ## Gx = 96^1 |
372 |
|\x1B\x24([\x2C-\x2F])(\x20?[\x21-\x23]?[\x30-\x7E]) ## Gx = 96^n |
373 |
|
374 |
|\x1B([\x40-\x5F]) ## ESC Fe |
375 |
|
376 |
|\x1B\x21([\x21-\x23]?[\x30-\x7E]) ## CL = C0 |
377 |
|\x1B\x22([\x21-\x23]?[\x30-\x7E]) ## CR & ESC Fe = C1 |
378 |
|
379 |
|\x1B([\x60-\x7E]) ## Single control functions |
380 |
|\x1B\x23([\x21-\x23]?)([\x30-\x7E]) |
381 |
|
382 |
|\x1B\x20([\x40-\x7E]) ## Announcer |
383 |
}{ |
384 |
my ($g94_g,$g94_f,$g94n_g,$g94n_f,$g96_g,$g96_f,$g96n_g,$g96n_f,$Fe, |
385 |
$CZD, $C1D, $Fs, $sI, $sF,$ACS) |
386 |
= ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15); |
387 |
my $rev = $C->{_irr} || ''; |
388 |
if ($g94_g) { |
389 |
$C->{ $_GB_to_GN{ $g94_g } } = $CHARSET{G94}->{ $g94_f.$rev } |
390 |
|| $CHARSET{G94}->{ $g94_f } |
391 |
|| $CHARSET{G94}->{ "\x7E" }; ''; |
392 |
} elsif (defined $g94n_f) { |
393 |
$C->{ $_GB_to_GN{ $g94n_g } || 'G0' } = $CHARSET{G94n}->{ $g94n_f.$rev } |
394 |
|| $CHARSET{G94n}->{ $g94n_f } |
395 |
|| $CHARSET{G94n}->{ "\x7E" }; ''; |
396 |
} elsif ($g96_g) { |
397 |
$C->{ $_GB_to_GN{ $g96_g } } = $CHARSET{G96}->{ $g96_f.$rev } |
398 |
|| $CHARSET{G96}->{ $g96_f } |
399 |
|| $CHARSET{G96}->{ "\x7E" }; ''; |
400 |
} elsif (defined $g96n_f) { |
401 |
$C->{ $_GB_to_GN{ $g96n_g } } = $CHARSET{G96n}->{ $g96n_f.$rev } |
402 |
|| $CHARSET{G96n}->{ $g96n_f } |
403 |
|| $CHARSET{G96n}->{ "\x7E" }; ''; |
404 |
} elsif ($Fe) { ## ESC Fe => C1 |
405 |
chr ($C->{ $C->{ESC_Fe} }->{ucs} + (ord ($Fe) - 0x40)); |
406 |
} elsif ($CZD) { |
407 |
$C->{C0} = $CHARSET{C0}->{ $CZD.$rev } |
408 |
|| $CHARSET{C0}->{ $CZD } || $CHARSET{C0}->{ "\x7E" }; ''; |
409 |
} elsif ($C1D) { |
410 |
$C->{C1} = $CHARSET{C1}->{ $C1D.$rev } |
411 |
|| $CHARSET{C1}->{ $C1D } || $CHARSET{C1}->{ "\x7E" }; ''; |
412 |
} elsif ($Fs) { |
413 |
if ($Fs eq "\x6E") { ## LS2 |
414 |
$C->{GL} = 'G2'; ''; |
415 |
} elsif ($Fs eq "\x6F") { ## LS3 |
416 |
$C->{GL} = 'G3'; ''; |
417 |
} elsif ($Fs eq "\x7E") { ## LS1R |
418 |
$C->{GR} = 'G1'; $C->{GL} = 'G1' if $C->{bit} == 7; ''; |
419 |
} elsif ($Fs eq "\x7D") { ## LS2R |
420 |
$C->{GR} = 'G2'; $C->{GL} = 'G2' if $C->{bit} == 7; ''; |
421 |
} elsif ($Fs eq "\x7C") { ## LS3R |
422 |
$C->{GR} = 'G3'; $C->{GL} = 'G3' if $C->{bit} == 7; ''; |
423 |
} else { |
424 |
chr ($CHARSET{single_control}->{Fs}->{ucs} + (ord ($Fs) - 0x60)); |
425 |
} |
426 |
} elsif ($sI) { |
427 |
chr ($CHARSET{single_control}->{'3F'.$sI}->{ucs} + (ord ($sF) - 0x30)); |
428 |
} elsif ($ACS) { ## Announcer |
429 |
if ($ACS eq "\x4A") { $C->{bit} = 7 } |
430 |
elsif ($ACS eq "\x4B") { $C->{bit} = 8 } |
431 |
''; |
432 |
} |
433 |
}gex; |
434 |
$C->{_irr} = undef; |
435 |
} |
436 |
$esc; |
437 |
## Control sequence |
438 |
} elsif ($csi) { |
439 |
$csi =~ tr/\xA0-\xFF/\x20-\x7F/d; |
440 |
$csi =~ s/$C->{$C->{CL}}->{LS0}//g if $C->{$C->{CL}}->{LS0}; |
441 |
$csi =~ s/$C->{$C->{CL}}->{LS1}//g if $C->{$C->{CL}}->{LS1}; |
442 |
"\x9B".$csi; |
443 |
} |
444 |
}gex; |
445 |
$s; |
446 |
} |
447 |
|
448 |
sub internal_to_iso2022 ($\%) { |
449 |
my ($s, $C) = @_; |
450 |
$C ||= &new_object; |
451 |
|
452 |
my $r = ''; |
453 |
for my $c (split //, $s) { |
454 |
my $cc = ord $c; |
455 |
my $t; |
456 |
if ($cc <= 0x1F) { |
457 |
$t = _i2c ($c, $C, type => 'C0', charset => '@'); |
458 |
} elsif ($cc == 0x20 || $cc == 0x7F) { |
459 |
$t = _back2ascii ($C) . $c; |
460 |
} elsif ($cc < 0x7F) { |
461 |
$t = _i2g ($c, $C, type => 'G94', charset => 'B'); |
462 |
} elsif ($cc <= 0x9F) { |
463 |
$t = _i2c ($c, $C, type => 'C1', charset_id => '64291991C1', |
464 |
charset => $C->{private_set}->{XC1}->{'64291991C1'}); |
465 |
} elsif ($cc <= 0xFF) { |
466 |
$t = _i2g (chr($cc-0x80), $C, type => 'G96', charset => 'A'); |
467 |
} elsif ($cc <= 0x24FF) { |
468 |
my $c = $cc - 0x100; |
469 |
my $final = $C->{private_set}->{U96n}->[0]; |
470 |
if (length $final) { |
471 |
$t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C, |
472 |
type => 'G96n', charset => $final); |
473 |
} |
474 |
} elsif ($cc <= 0x33FF) { |
475 |
my $c = $cc - 0x2500; |
476 |
my $final = $C->{private_set}->{U96n}->[1]; |
477 |
if (length $final) { |
478 |
$t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C, |
479 |
type => 'G96n', charset => $final); |
480 |
} |
481 |
} elsif (0xE000 <= $cc && $cc <= 0xFFFF) { |
482 |
my $c = $cc - 0xE000; |
483 |
my $final = $C->{private_set}->{U96n}->[2]; |
484 |
if (length $final) { |
485 |
$t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C, |
486 |
type => 'G96n', charset => $final); |
487 |
} |
488 |
|
489 |
} elsif (0xE9F6C0 <= $cc && $cc <= 0xF06F80) { |
490 |
my $c = $cc - 0xE9F6C0; |
491 |
$t = _i2g (chr((($c % 8836) / 94)+0x21).chr(($c % 94)+0x21), $C, |
492 |
type => 'G94n', charset => chr(($c / 8836)+0x30)); |
493 |
} elsif (0xF49D7C <= $cc && $cc <= 0xF4BFFF) { |
494 |
my $c = $cc - 0xF49D7C; |
495 |
$t = _i2g (chr(($c / 94)+0x21).chr(($c % 94)+0x21), $C, |
496 |
type => 'G94n', charset => 'B', revision => '@'); |
497 |
|
498 |
} elsif (0xF0000 <= $cc && $cc <= 0x10F7FF) { |
499 |
my $c = $cc - 0xF0000; |
500 |
$t = _i2g (chr((($c % 9216) / 96)+0x20).chr(($c % 96)+0x20), $C, |
501 |
type => 'G96n', charset => "\x20".chr(($c / 9216)+0x40)); |
502 |
} elsif (0xE90940 <= $cc && $cc <= 0xE92641) { |
503 |
my $c = $cc - 0xE90940; |
504 |
$t = _i2g (chr(($c % 94)+0x21), $C, |
505 |
type => 'G94', charset => chr(($c / 94)+0x30)); |
506 |
} elsif (0xE92642 <= $cc && $cc <= 0xE9269F) { |
507 |
my $c = $cc - 0xE92642; |
508 |
$t = _i2g (chr($c+0x21),$C,type => 'G94', charset => "\x64", revision => '@'); |
509 |
} elsif (0xE926A0 <= $cc && $cc <= 0xE9443F) { |
510 |
my $c = $cc - 0xE926A0; |
511 |
$t = _i2g (chr(($c % 96)+0x20), $C, |
512 |
type => 'G96', charset => chr(($c / 96)+0x30)); |
513 |
} elsif (0xE944A0 <= $cc && $cc <= 0xE961A1) { |
514 |
my $c = $cc - 0xE944A0; |
515 |
$t = _i2g (chr(($c % 94)+0x21), $C, |
516 |
type => 'G94', charset => '!'.chr(($c / 94)+0x30)); |
517 |
} elsif (0xE96200 <= $cc && $cc <= 0xE97F9F) { |
518 |
my $c = $cc - 0xE96200; |
519 |
$t = _i2g (chr(($c % 96)+0x20), $C, |
520 |
type => 'G96', charset => '!'.chr(($c / 96)+0x30)); |
521 |
} elsif (0xE98000 <= $cc && $cc <= 0xE99D01) { |
522 |
my $c = $cc - 0xE98000; |
523 |
$t = _i2g (chr(($c % 94)+0x21), $C, |
524 |
type => 'G94', charset => '"'.chr(($c / 94)+0x30)); |
525 |
} elsif (0xE99D60 <= $cc && $cc <= 0xE9BAFF) { |
526 |
my $c = $cc - 0xE99D60; |
527 |
$t = _i2g (chr(($c % 96)+0x20), $C, |
528 |
type => 'G96', charset => '"'.chr(($c / 96)+0x30)); |
529 |
} elsif (0xE9BB60 <= $cc && $cc <= 0xE9D861) { |
530 |
my $c = $cc - 0xE9BB60; |
531 |
$t = _i2g (chr(($c % 94)+0x21), $C, |
532 |
type => 'G94', charset => '#'.chr(($c / 94)+0x30)); |
533 |
} elsif (0xE9D8C0 <= $cc && $cc <= 0xE9F65F) { |
534 |
my $c = $cc - 0xE9D8C0; |
535 |
$t = _i2g (chr(($c % 96)+0x20), $C, |
536 |
type => 'G96', charset => '#'.chr(($c / 96)+0x30)); |
537 |
} elsif (0x70090940 <= $cc && $cc <= 0x70092641) { |
538 |
my $c = $cc - 0x70090940; |
539 |
$t = _i2g (chr(($c % 94)+0x21), $C, |
540 |
type => 'G94', charset => "\x20".chr(($c / 94)+0x30)); |
541 |
} elsif (0x700926A0 <= $cc && $cc <= 0x7009443F) { |
542 |
my $c = $cc - 0x700926A0; |
543 |
$t = _i2g (chr(($c % 96)+0x20), $C, |
544 |
type => 'G96', charset => "\x20".chr(($c / 96)+0x30)); |
545 |
## TODO: DRCS with I byte: U+700944A0-U+7009F6BF |
546 |
} elsif (0x70400000 <= $cc && $cc <= 0x7040FFED) { |
547 |
my $c = $cc - 0x70400000; |
548 |
$t = _i2g (chr(($c % 94)+0x21), $C, charset_id => 'P'.int ($c / 94), |
549 |
type => 'G94', charset => $C->{private_set}->{G94}->[ $c / 94 ]); |
550 |
} elsif (0x70410000 <= $cc && $cc <= 0x7041FFBF) { |
551 |
my $c = $cc - 0x70410000; |
552 |
$t = _i2g (chr(($c % 96)+0x20), $C, charset_id => 'P'.int ($c / 96), |
553 |
type => 'G96', charset => $C->{private_set}->{G96}->[ $c / 96 ]); |
554 |
} elsif (0x70420000 <= $cc && $cc <= 0x7046F19B) { |
555 |
my $c = $cc % 0x10000; |
556 |
$t = _i2g (chr((($c % 8836) / 94)+0x21).chr(($c % 94)+0x21), $C, |
557 |
type => 'G94n', |
558 |
charset_id => 'P'.int(($cc / 0x10000) - 0x7042).'_'.int($c / 8836), |
559 |
charset => $C->{private_set}->{G94n}->[ ($cc / 0x10000) - 0x7042 ] |
560 |
->[ $c / 8836 ]); |
561 |
} |
562 |
if (defined $t) { |
563 |
$r .= $t; |
564 |
} else { |
565 |
$r .= _i2g ("\x3F", $C, type => 'G94', charset => 'B'); |
566 |
} |
567 |
} |
568 |
$r . _back2ascii ($C); |
569 |
} |
570 |
|
571 |
## $O{charset} eq undef means that charset is same as the current designated one. |
572 |
sub _i2c ($%%) { |
573 |
my ($s, $C, %O) = @_; |
574 |
my $r = ''; |
575 |
if ($O{type} eq 'C0') { |
576 |
if (defined $O{charset}) { |
577 |
if ( $C->{C0} ne $CHARSET{C0}->{$O{charset}} |
578 |
&& $C->{C0} ne $CHARSET{C0}->{$O{charset_id}}) { |
579 |
for ($C->{option}->{designate_to}->{C0}->{$O{charset}}, |
580 |
$C->{option}->{designate_to}->{C0}->{default}) { |
581 |
if (defined $_) { return undef if $_ == -1; last } |
582 |
} |
583 |
$r .= "\x1B\x21".$O{charset}; |
584 |
$C->{C0} = $CHARSET{C0}->{$O{charset}}; |
585 |
} |
586 |
} elsif (defined $O{charset_id}) { |
587 |
if ($C->{C0} ne $CHARSET{C0}->{$O{charset_id}}) { |
588 |
return undef; ## Control set is not designated nor has F byte |
589 |
} |
590 |
} |
591 |
$r .= _back2ascii ($C, reset_all => $C->{C0}->{reset_all}->{$s}); |
592 |
return $r . $s; |
593 |
} elsif ($O{type} eq 'C1') { |
594 |
if (defined $O{charset}) { |
595 |
if ( $C->{C1} ne $CHARSET{C1}->{$O{charset}} |
596 |
&& $C->{C1} ne $CHARSET{C1}->{$O{charset_id}}) { |
597 |
for ($C->{option}->{designate_to}->{C1}->{$O{charset}}, |
598 |
$C->{option}->{designate_to}->{C1}->{default}) { |
599 |
if (defined $_) { return undef if $_ == -1; last } |
600 |
} |
601 |
$r .= "\x1B\x22".$O{charset}; |
602 |
$C->{C1} = $CHARSET{C1}->{$O{charset}}; |
603 |
} |
604 |
} elsif (defined $O{charset_id}) { |
605 |
if ($C->{C1} ne $CHARSET{C1}->{$O{charset_id}}) { |
606 |
return undef; ## Control set is not designated nor has F byte |
607 |
} |
608 |
} |
609 |
$r .= _back2ascii ($C, reset_all => $C->{C1}->{reset_all}->{$s}); |
610 |
unless ($C->{option}->{C1invoke_to_right}) { ## ESC Fe |
611 |
$s =~ s/([\x80-\x9F])/"\x1B" . chr (ord ($1) - 0x40)/ge; |
612 |
} |
613 |
return $r . $s; |
614 |
} |
615 |
} |
616 |
sub _i2g ($%%) { |
617 |
my ($s, $C, %O) = @_; |
618 |
my $r = ''; |
619 |
my $set = $CHARSET{$O{type}}->{$O{charset}}; |
620 |
my $set0 = $CHARSET{$O{type}}->{$O{charset_id}}; |
621 |
## -- designate character set |
622 |
my $G = 0; |
623 |
if ($C->{G0} eq $set || $C->{G0} eq $set0) { $G = 0 } |
624 |
elsif ($C->{G1} eq $set || $C->{G1} eq $set0) { $G = 1 } |
625 |
elsif ($C->{G2} eq $set || $C->{G2} eq $set0) { $G = 2 } |
626 |
elsif ($C->{G3} eq $set || $C->{G3} eq $set0) { $G = 3 } |
627 |
else { |
628 |
return undef unless $set; ## charset does not have F byte |
629 |
$G = 1 if $O{type} eq 'G96' || $O{type} eq 'G96n'; |
630 |
for ($C->{option}->{designate_to}->{$O{type}}->{$O{charset}}, |
631 |
$C->{option}->{designate_to}->{$O{type}}->{default}) { |
632 |
if (defined $_) { |
633 |
$G = $_; last; |
634 |
} |
635 |
} |
636 |
if ($G == -1) { |
637 |
return undef; |
638 |
} |
639 |
if ($O{type} eq 'G94') { |
640 |
$r .= ($O{revision}&&$C->{option}->{use_revision}?"\x1B\x26".$O{revision}:'') |
641 |
."\x1B".("\x28","\x29","\x2A","\x2B")[$G].$O{charset}; |
642 |
} elsif ($O{type} eq 'G94n') { |
643 |
if ($G == 0 && !$C->{option}->{G94n_designate_long} |
644 |
&& ($O{charset} eq '@' || $O{charset} eq 'A' || $O{charset} eq 'B')) { |
645 |
$r .= ($O{revision}&&$C->{option}->{use_revision}?"\x1B\x26".$O{revision}:'') |
646 |
."\x1B\x24".$O{charset}; |
647 |
} else { |
648 |
$r .= ($O{revision}&&$C->{option}->{use_revision}?"\x1B\x26".$O{revision}:'') |
649 |
."\x1B\x24".("\x28","\x29","\x2A","\x2B")[$G].$O{charset}; |
650 |
} |
651 |
} elsif ($O{type} eq 'G96') { |
652 |
$r .= ($O{revision}&&$C->{option}->{use_revision}?"\x1B\x26".$O{revision}:'') |
653 |
."\x1B".("\x2C","\x2D","\x2E","\x2F")[$G].$O{charset}; |
654 |
} elsif ($O{type} eq 'G96n') { |
655 |
$r .= ($O{revision}&&$C->{option}->{use_revision}?"\x1B\x26".$O{revision}:'') |
656 |
."\x1B\x24".("\x2C","\x2D","\x2E","\x2F")[$G].$O{charset}; |
657 |
} |
658 |
$C->{'G'.$G} = $CHARSET{$O{type}}->{$O{charset}}; |
659 |
} |
660 |
## -- invoke G buffer |
661 |
my $left = $C->{option}->{Ginvoke_to_left}->[$G]; |
662 |
if ($C->{GL} eq 'G'.$G) { |
663 |
$left = 1; |
664 |
} elsif ($C->{GR} eq 'G'.$G) { |
665 |
$left = 0; |
666 |
} else { |
667 |
if ($C->{option}->{Ginvoke_by_single_shift}->[$G]) { |
668 |
if ($C->{C1}->{'C_SS'.$G}) { |
669 |
$r .= _i2c ($C->{C1}->{'C_SS'.$G}, $C, type => 'C1') || return undef; |
670 |
} elsif ($C->{C0}->{'C_SS'.$G}) { |
671 |
$r .= _i2c ($C->{C0}->{'C_SS'.$G}, $C, type => 'C0') || return undef; |
672 |
} else { ## Both C0 and C1 set do not have SS2/3. |
673 |
$r .= __invoke ($C, $G => $left) if $C->{$left?'GL':'GR'} ne 'G'.$G; |
674 |
} |
675 |
} else { |
676 |
$left = 0 if $G == 1 && !$C->{C0}->{C_LS1}; |
677 |
$r .= __invoke ($C, $G => $left) if $C->{$left?'GL':'GR'} ne 'G'.$G; |
678 |
} |
679 |
} |
680 |
$s =~ tr/\x00-\x7F/\x80-\xFF/ unless $left; |
681 |
$r . $s; |
682 |
} |
683 |
sub _back2ascii (%) { |
684 |
my ($C, %O) = @_; |
685 |
my $r = ''; |
686 |
if ($C->{option}->{reset}->{Gdesignation}) { |
687 |
my $F = $C->{option}->{reset}->{Gdesignation}; # \x42 |
688 |
$r .= "\x1B\x28".$F unless $C->{G0} eq $CHARSET{G94}->{$F}; |
689 |
$C->{G0} = $CHARSET{G94}->{$F}; |
690 |
if ($O{reset_all}) { |
691 |
$C->{G1} = $CHARSET{G94}->{"\x7E"}; |
692 |
$C->{G2} = $CHARSET{G94}->{"\x7E"}; |
693 |
$C->{G3} = $CHARSET{G94}->{"\x7E"}; |
694 |
} |
695 |
} |
696 |
if ($C->{option}->{reset}->{Ginvoke}) { |
697 |
if ($C->{GL} ne 'G0') { |
698 |
$r .= $C->{C0}->{C_LS0} || ($C->{C0} = $CHARSET{C0}->{'@'},"\x1B\x21\x40\x0F"); |
699 |
$C->{GL} = 'G0'; |
700 |
} |
701 |
$C->{GR} = undef if $O{reset_all}; |
702 |
} |
703 |
$r; |
704 |
} |
705 |
## __invoke (\%C, $G, $left_or_right) |
706 |
sub __invoke (\%$$) { |
707 |
my ($C, $G) = @_; |
708 |
if ($_[2]) { |
709 |
$C->{GL} = 'G'.$G; |
710 |
return ($C->{C0}->{C_LS0} |
711 |
|| scalar ($C->{C0}=$CHARSET{C0}->{'@'},"\x1B\x21\x40\x0F"), |
712 |
$C->{C0}->{C_LS1}, "\x1B\x6E", "\x1B\x6F")[$G]; |
713 |
} else { |
714 |
$C->{GR} = 'G'.$G; |
715 |
return ("", "\x1B\x7E", "\x1B\x7D", "\x1B\x7C")[$G]; |
716 |
} |
717 |
''; |
718 |
} |
719 |
|
720 |
sub make_charset (%) { |
721 |
## TODO: support private charset ID such as 'X0' |
722 |
my %set = @_; |
723 |
my $setid = qq($set{I}$set{F}$set{revision}); |
724 |
my $settype = $set{type} || 'G94'; |
725 |
delete $set{type}, $set{I}, $set{F}, $set{revision}; |
726 |
$CHARSET{ $settype }->{ $setid } = \%set; |
727 |
} |
728 |
|
729 |
1; |
730 |
__END__ |
731 |
|
732 |
=head1 SEE ALSO |
733 |
|
734 |
ISO/IEC 646:1991, "7-bit coded graphic character set for intormation interchange". |
735 |
|
736 |
ISO/IEC 2022:1994, "Character Code Structure and Extension Techniques". |
737 |
(IDT with ECMA 35, JIS X 0202:1998) |
738 |
|
739 |
ISO/IEC 4873:1991, "8-Bit Coded Character Set Structure and Rules". |
740 |
(IDT with ECMA 43) |
741 |
|
742 |
ISO/IEC 6429:1992, "Control Functions for Coded Character Sets". |
743 |
(IDT with ECMA 48:1991, JIS X 0211:1998) |
744 |
|
745 |
ISO/IEC 8859, "8-Bit Single-Byte Coded Graphic Character Sets". |
746 |
|
747 |
Encode, perlunicode |
748 |
|
749 |
=head1 LICENSE |
750 |
|
751 |
Copyright 2002 wakaba <w@suika.fam.cx> |
752 |
|
753 |
This library is free software; you can redistribute it |
754 |
and/or modify it under the same terms as Perl itself. |
755 |
|
756 |
=cut |
757 |
|
758 |
# $Date: 2002/09/15 04:15:51 $ |
759 |
### ISO2022.pm ends here |