/[suikacvs]/perl/lib/Encode/ISO2022.pm
Suika

Contents of /perl/lib/Encode/ISO2022.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (show annotations) (download)
Mon Sep 16 02:20:18 2002 UTC (22 years, 1 month ago) by wakaba
Branch: MAIN
Changes since 1.2: +22 -13 lines
2002-09-16  Wakaba <w@suika.fam.cx>

	* ISO2022.pm:
	- (iso2022_to_internal): Invoke G1,G2,G3 by locking
	shifts of ESC Fs style.
	- (make_initial_charset): Create charset definition
	of 94^2 DRCSes.

1
2 =head1 NAME
3
4 Encode::ISO2022 --- ISO/IEC 2022 encoder and decoder
5
6 =cut
7
8 require v5.7.3;
9 package Encode::ISO2022;
10 use strict;
11 use vars qw(%CHARSET $VERSION);
12 $VERSION=do{my @r=(q$Revision: 1.2 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
13 use base qw(Encode::Encoding);
14 __PACKAGE__->Define (qw/iso-2022 iso2022 2022 cp2022/);
15
16 ### --- Intialization
17
18 my %_CHARS_to_RANGE = (
19 l94 => q/[\x21-\x7E]/,
20 l96 => q/[\x20-\x7F]/,
21 l128 => q/[\x00-\x7F]/,
22 l256 => q/[\x00-\xFF]/,
23 r94 => q/[\xA1-\xFE]/,
24 r96 => q/[\xA0-\xFF]/,
25 r128 => q/[\x80-\xFF]/,
26 r256 => q/[\x80-\xFF]/,
27 b94 => q/[\x21-\x7E\xA1-\xFE]/,
28 b96 => q/[\x20-\x7F\xA0-\xFF]/,
29 b128 => q/[\x00-\xFF]/,
30 b256 => q/[\x00-\xFF]/,
31 );
32
33 ## --- Make initial charset definitions
34 &_make_initial_charsets;
35 sub _make_initial_charsets () {
36 for my $f (0x30..0x7E) {
37 my $F = pack 'C', $f;
38 for ('', '!', '"', '#') {
39 $CHARSET{G94}->{ $_.$F }->{dimension} = 1;
40 $CHARSET{G94}->{ $_.$F }->{chars} = 94;
41 $CHARSET{G94}->{ $_.$F }->{ucs} =
42 {'' => 0xE90940, '!' => 0xE944A0, '"' => 0xE98000, '#' => 0xE9BB60}->{ $_ }
43 + 94 * ($f-0x30);
44
45 $CHARSET{G96}->{ $_.$F }->{dimension} = 1;
46 $CHARSET{G96}->{ $_.$F }->{chars} = 96;
47 $CHARSET{G96}->{ $_.$F }->{ucs} =
48 {'' => 0xE926A0, '!' => 0xE96200, '"' => 0xE99D60, '#' => 0xE9D8C0}->{ $_ }
49 + 96 * ($f-0x30);
50
51 $CHARSET{C0}->{ $_.$F }->{dimension} = 1;
52 $CHARSET{C0}->{ $_.$F }->{chars} = 32;
53 $CHARSET{C0}->{ $_.$F }->{ucs} =
54 {'' => 0x70000000, '!' => 0x70001400,
55 '"' => 0x70002800, '#' => 0x70003C00}->{ $_ } + 32 * ($f-0x30);
56
57 $CHARSET{C1}->{ $_.$F }->{dimension} = 1;
58 $CHARSET{C1}->{ $_.$F }->{chars} = 32;
59 $CHARSET{C1}->{ $_.$F }->{ucs} =
60 {'' => 0x70000A00, '!' => 0x70001E00,
61 '"' => 0x70003200, '#' => 0x70004600}->{ $_ } + 32 * ($f-0x30);
62
63 $CHARSET{G94}->{ ' '.$_.$F }->{dimension} = 1; ## DRCS
64 $CHARSET{G94}->{ ' '.$_.$F }->{chars} = 94;
65 $CHARSET{G94}->{ ' '.$_.$F }->{ucs} =
66 {'' => 0x70090940, '!' => 0x700944A0,
67 '"' => 0x70098000, '#' => 0x7009BB60}->{ $_ } + 94 * ($f-0x30);
68
69 $CHARSET{G96}->{ ' '.$_.$F }->{dimension} = 1; ## DRCS
70 $CHARSET{G96}->{ ' '.$_.$F }->{chars} = 96;
71 $CHARSET{G96}->{ ' '.$_.$F }->{ucs} =
72 {'' => 0x700926A0, '!' => 0x70096200,
73 '"' => 0x70099D60, '#' => 0x7009D8C0}->{ $_ } + 96 * ($f-0x30);
74 }
75 }
76 for my $f (0x30..0x5F, 0x7E) {
77 my $F = pack 'C', $f;
78 for ('', '!', '"', '#', ' ') {
79 $CHARSET{G94n}->{ $_.$F }->{dimension} = 2;
80 $CHARSET{G94n}->{ $_.$F }->{chars} = 94;
81 $CHARSET{G94n}->{ $_.$F }->{ucs} =
82 ({'' => 0xE9F6C0}->{ $_ }||0) + 94*94 * ($f-0x30);
83 ## BUG: 94^n sets with I byte have no mapping area
84
85 $CHARSET{G96n}->{ $_.$F }->{dimension} = 2;
86 $CHARSET{G96n}->{ $_.$F }->{chars} = 96;
87 $CHARSET{G96n}->{ $_.$F }->{ucs} =
88 ({'' => 0xF4C000}->{ $_ }||0) + 96*96 * ($f-0x30);
89 ## BUG: 94^n DRCSes with I byte have no mapping area
90 }
91 }
92 for (0x60..0x6F) {
93 my $F = pack 'C', $_;
94 ## BUG: 9x^3 sets have no mapping area
95 for ('', '!', '"', '#', ' ') {
96 $CHARSET{G94n}->{ $_.$F }->{dimension} = 3;
97 $CHARSET{G94n}->{ $_.$F }->{chars} = 94;
98
99 $CHARSET{G96n}->{ $_.$F }->{dimension} = 3;
100 $CHARSET{G96n}->{ $_.$F }->{chars} = 96;
101 }
102 }
103 for (0x70..0x7D) {
104 my $F = pack 'C', $_;
105 ## BUG: 9x^4 sets have no mapping area
106 for ('', '!', '"', '#', ' ') {
107 $CHARSET{G94n}->{ $_.$F }->{dimension} = 4;
108 $CHARSET{G94n}->{ $_.$F }->{chars} = 94;
109
110 $CHARSET{G96n}->{ $_.$F }->{dimension} = 4;
111 $CHARSET{G96n}->{ $_.$F }->{chars} = 96;
112 }
113 }
114 for my $f (0x40..0x4E) {
115 my $F = pack 'C', $f;
116 $CHARSET{G96n}->{ ' '.$F }->{dimension} = 2;
117 $CHARSET{G96n}->{ ' '.$F }->{chars} = 96;
118 $CHARSET{G96n}->{ ' '.$F }->{ucs} = 0xF0000 + 96*96*($f-0x40);
119 ## U+F0000-U+10F7FF (private) -> ESC 02/04 02/00 <I> (04/00-04/14) (DRCS)
120 }
121
122 $CHARSET{G94}->{B}->{ucs} = 0x21; ## ASCII
123 $CHARSET{G96}->{A}->{ucs} = 0xA0; ## ISO 8859-1
124
125 $CHARSET{G94n}->{'B@'}->{dimension} = 2; ## JIS X 0208-1990
126 $CHARSET{G94n}->{'B@'}->{chars} = 94;
127 $CHARSET{G94n}->{'B@'}->{ucs} = 0xE9F6C0 + 94*94*79;
128
129 ## -- Control character sets
130 $CHARSET{C0}->{'@'}->{ucs} = 0x00; ## ISO/IEC 6429 C0
131 for ("\x40", "\x43", "\x44", "\x45", "\x46", "\x49", "\x4A", "\x4B", "\x4C") {
132 $CHARSET{C0}->{$_}->{C_LS0} = "\x0F";
133 $CHARSET{C0}->{$_}->{C_LS1} = "\x0E";
134 $CHARSET{C0}->{$_}->{r_LS0} = '\x0F';
135 $CHARSET{C0}->{$_}->{r_LS1} = '\x0E';
136 }
137 for ("\x40", "\x44", "\x45", "\x46", "\x48", "\x4C") {
138 $CHARSET{C0}->{$_}->{reset_all} = {"\x0A" => 1, "\x0B" => 1,
139 "\x0C" => 1, "\x0D" => 1};
140 }
141 $CHARSET{C0}->{"\x43"}->{reset_all} = {"\x0A" => 1};
142 $CHARSET{C0}->{"\x44"}->{C_SS2} = "\x1C";
143 $CHARSET{C0}->{"\x44"}->{r_SS2} = '\x1C';
144 for ("\x45", "\x49", "\x4A", "\x4B") {
145 $CHARSET{C0}->{$_}->{C_SS2} = "\x19";
146 $CHARSET{C0}->{$_}->{C_SS3} = "\x1D";
147 $CHARSET{C0}->{$_}->{r_SS2} = '\x19';
148 $CHARSET{C0}->{$_}->{r_SS3} = '\x1D';
149 }
150 $CHARSET{C0}->{"\x4C"}->{C_SS2} = "\x19";
151 $CHARSET{C0}->{"\x4C"}->{r_SS2} = '\x19';
152
153 $CHARSET{C1}->{'64291991C1'}->{dimension} = 1; ## ISO/IEC 6429:1991 C1
154 $CHARSET{C1}->{'64291991C1'}->{chars} = 32;
155 $CHARSET{C1}->{'64291991C1'}->{ucs} = 0x80;
156 for ("\x43", "\x45", "\x47", '64291991C1') {
157 $CHARSET{C1}->{$_}->{C_SS2} = "\x8E";
158 $CHARSET{C1}->{$_}->{C_SS3} = "\x8F";
159 $CHARSET{C1}->{$_}->{r_SS2} = '\x8E';
160 $CHARSET{C1}->{$_}->{r_SS3} = '\x8F';
161 $CHARSET{C1}->{$_}->{r_SS2_ESC} = '\x1B\x4E';
162 $CHARSET{C1}->{$_}->{r_SS3_ESC} = '\x1B\x4F';
163 }
164 for ("\x43", '64291991C1') {
165 $CHARSET{C1}->{$_}->{r_CSI} = '\x9B';
166 $CHARSET{C1}->{$_}->{r_CSI_ESC} = '\x1B\x5B';
167 $CHARSET{C1}->{$_}->{r_DCS} = '\x90';
168 $CHARSET{C1}->{$_}->{r_ST} = '\x9C';
169 $CHARSET{C1}->{$_}->{r_OSC} = '\x9D';
170 $CHARSET{C1}->{$_}->{r_PM} = '\x9E';
171 $CHARSET{C1}->{$_}->{r_APC} = '\x9F';
172 $CHARSET{C1}->{$_}->{reset_all} = {"\x85"=>1, "\x90"=>1,
173 "\x9C"=>1, "\x9D"=>1, "\x9E"=>1, "\x9F"=>1};
174 }
175 $CHARSET{C1}->{'64291991C1'}->{r_SCI} = '\x9A';
176
177 $CHARSET{single_control}->{Fs} ={ucs => 0x70005000, chars => 32, dimension => 1};
178 $CHARSET{single_control}->{'3F'} ={ucs => 0x70005020, chars => 80, dimension => 1};
179 $CHARSET{single_control}->{'3F!'}={ucs => 0x70005070, chars => 80, dimension => 1};
180 $CHARSET{single_control}->{'3F"'}={ucs => 0x700050C0, chars => 80, dimension => 1};
181 $CHARSET{single_control}->{'3F#'}={ucs => 0x70005110, chars => 80, dimension => 1};
182 }
183
184
185 ### --- Perl Encode module common functions
186
187 sub encode ($$;$) {
188 my ($obj, $str, $chk) = @_;
189 $_[1] = '' if $chk;
190 $str = &internal_to_iso2022 ($str);
191 return $str;
192 }
193
194 sub decode ($$;$) {
195 my ($obj, $str, $chk) = @_;
196 $_[1] = '' if $chk;
197 return &iso2022_to_internal ($str);
198 }
199
200 ### --- Encode::ISO2022 unique functions
201
202 ## Make a new ISO/IEC 2022-buffers object with default status
203 sub new_object {
204 my %C;
205 $C{bit} = 8;
206 $C{CL} = 'C0'; $C{CR} = 'C1'; $C{ESC_Fe} = 'C1';
207 $C{C0} = $CHARSET{C0}->{"\x40"}; ## ISO/IEC 6429:1991 C0
208 $C{C1} = $CHARSET{C1}->{'64291991C1'}; ## ISO/IEC 6429:1991 C1
209 $C{GL} = 'G0'; $C{GR} = 'G1';
210 $C{G0} = $CHARSET{G94}->{"\x42"}; ## ISO/IEC 646:1991 IRV
211 #$C{G1} = $CHARSET{G96}->{"\x41"}; ## ISO/IEC 8859-1 GR
212 $C{G1} = $CHARSET{G94}->{"\x7E"}; ## empty set
213 $C{G2} = $CHARSET{G94}->{"\x7E"}; ## empty set
214 $C{G3} = $CHARSET{G94}->{"\x7E"}; ## empty set
215 $C{option} = {
216 C1invoke_to_right => 0, ## C1 invoked to: (0: ESC Fe, 1: CR)
217 G94n_designate_long => 0, ## (1: ESC 02/04 02/08 04/00..02)
218 designate_to => { ## Designated G buffer (-1: not be outputed)
219 C0 => {
220 default => 0,
221 },
222 C1 => {
223 default => 1,
224 },
225 G94 => {
226 "\x42" => 0,
227 default => 0,
228 },
229 G96 => {
230 default => 1,
231 },
232 G94n => {
233 default => 0,
234 },
235 G96n => {
236 default => 1,
237 },
238 },
239 Ginvoke_by_single_shift => [0,0,0,0], ## Invoked by SS
240 Ginvoke_to_left => [1,1,1,1], ## Which invoked to? (1: L, 0: R)
241 private_set => { ## Private set vs Final byte
242 C0 => [],
243 C1 => [],
244 G94 => [],
245 G94n => [[],[],[],[],[]],
246 G96 => [],
247 #G96n => [], ## (not implemented)
248 U96n => [], ## mule-unicode sets
249 XC1 => {
250 '64291991C1' => undef, ## ISO/IEC 6429:1991 C1
251 },
252 },
253 reset => { ## Reset status at top of line
254 Gdesignation => "\x42", ## F of designation or 0
255 Ginvoke => 1,
256 },
257 use_revision => 1, ## Output IRR
258 };
259 \%C;
260 }
261
262 sub iso2022_to_internal ($;\%) {
263 my ($s, $C) = @_;
264 my %_GB_to_GN = (
265 "\x28"=>'G0',"\x29"=>'G1',"\x2A"=>'G2',"\x2B"=>'G3',
266 "\x2C"=>'G0',"\x2D"=>'G1',"\x2E"=>'G2',"\x2F"=>'G3',
267 );
268 $C ||= &new_object;
269
270 use re 'eval';
271 $s =~ s{
272 ((??{ $_CHARS_to_RANGE{'l'.$C->{$C->{GL}}->{chars}}
273 . qq/{$C->{$C->{GL}}->{dimension},$C->{$C->{GL}}->{dimension}}/ }))
274 |((??{ $_CHARS_to_RANGE{'r'.$C->{$C->{GL}}->{chars}}
275 . qq/{$C->{$C->{GR}}->{dimension},$C->{$C->{GR}}->{dimension}}/ }))
276
277 | (??{ q/(?:/ . ($C->{$C->{CR}}->{r_SS2} || '(?!)')
278 . ($C->{$C->{ESC_Fe}}->{r_SS2_ESC} ?
279 qq/|$C->{$C->{ESC_Fe}}->{r_SS2_ESC}/ : '')
280 . ($C->{$C->{CL}}->{r_SS2} ? qq/|$C->{$C->{CL}}->{r_SS2}/ : '') . q/)/
281 . ( $C->{$C->{CL}}->{r_LS0}
282 ||$C->{$C->{CL}}->{r_LS1}? ## ISO/IEC 6429:1992 9
283 qq/[$C->{$C->{CL}}->{r_LS0}$C->{$C->{CL}}->{r_LS1}]*/:'')
284 })
285 ((??{ $_CHARS_to_RANGE{'b'.$C->{$C->{GL}}->{chars}}
286 . qq/{$C->{G2}->{dimension},$C->{G2}->{dimension}}/ }))
287 | (??{ q/(?:/ . ($C->{$C->{CR}}->{r_SS3} || '(?!)')
288 . ($C->{$C->{ESC_Fe}}->{r_SS3_ESC} ?
289 qq/|$C->{$C->{ESC_Fe}}->{r_SS3_ESC}/ : '')
290 . ($C->{$C->{CL}}->{r_SS3} ? qq/|$C->{$C->{CL}}->{r_SS3}/ : '') . q/)/
291 . ( $C->{$C->{CL}}->{r_LS0}
292 || $C->{$C->{CL}}->{r_LS1}? ## ISO/IEC 6429:1992 9
293 qq/[$C->{$C->{CL}}->{r_LS0}$C->{$C->{CL}}->{r_LS1}]*/:'')
294 })
295 ((??{ $_CHARS_to_RANGE{'b'.$C->{$C->{GL}}->{chars}}
296 . qq/{$C->{G3}->{dimension},$C->{G3}->{dimension}}/ }))
297
298 ## Locking shift
299 |( \x1B[\x6E\x6F\x7C-\x7E]
300 |(??{ $C->{$C->{CL}}->{r_LS0}||'(?!)' })
301 |(??{ $C->{$C->{CL}}->{r_LS1}||'(?!)' })
302 )
303
304 ## Control sequence
305 |(??{ '(?:'.($C->{$C->{CR}}->{r_CSI}||'(?!)')
306 .($C->{$C->{ESC_Fe}}->{r_CSI_ESC} ?
307 qq/|$C->{$C->{ESC_Fe}}->{r_CSI_ESC}/: '')
308 .')'
309 })
310 ((??{ qq/[\x30-\x3F$C->{$C->{CL}}->{LS0}$C->{$C->{CL}}->{LS1}\xB0-\xBF]*/
311 .qq/[\x20-\x2F$C->{$C->{CL}}->{LS0}$C->{$C->{CL}}->{LS1}\xA0-\xAF]*/
312 }) [\x40-\x7E\xD0-\xFE])
313
314 ## Other escape sequence
315 |(\x1B[\x20-\x2F]*[\x30-\x7E])
316
317 ## Misc. sequence (SP, control, or broken data)
318 |([\x00-\xFF])
319 }{
320 my ($gl,$gr,$ss2,$ss3,$ls,$csi,$esc,$misc)
321 = ($1,$2,$3,$4,$5,$6,$7,$8,$9);
322 $C->{_irr} = undef unless defined $esc;
323 ## GL graphic character
324 if (defined $gl) {
325 my $c = 0;
326 my $m = $C->{$C->{GL}}->{chars}==94?0x21:$C->{$C->{GL}}->{chars}==96?0x20:0;
327 for (split //, $gl) {
328 $c = $c * $C->{$C->{GL}}->{chars} + unpack ('C', $_) - $m;
329 }
330 chr ($C->{$C->{GL}}->{ucs} + $c);
331 ## Control, SP, or broken data
332 ## TODO: support control sets other than ISO/IEC 6429's
333 } elsif (defined $misc) {
334 $misc;
335 ## GR graphic character
336 } elsif ($gr) {
337 my $c = 0;
338 my $m = $C->{$C->{GR}}->{chars}==94?0xA1:$C->{$C->{GR}}->{chars}==96?0xA0:0x80;
339 for (split //, $gr) {
340 $c = $c * $C->{$C->{GR}}->{chars} + unpack ('C', $_) - $m;
341 }
342 chr ($C->{$C->{GR}}->{ucs} + $c);
343 ## Graphic character with SS2
344 } elsif ($ss2) {
345 $ss2 =~ tr/\x80-\xFF/\x00-\x7F/;
346 my $c = 0; my $m = $C->{G2}->{chars}==94?0x21:$C->{G2}->{chars}==96?0x20:0;
347 for (split //, $ss2) {
348 $c = $c * $C->{G2}->{chars} + unpack ('C', $_) - $m;
349 }
350 chr ($C->{G2}->{ucs} + $c);
351 ## Graphic character with SS3
352 } elsif ($ss3) {
353 $ss3 =~ tr/\x80-\xFF/\x00-\x7F/;
354 my $c = 0; my $m = $C->{G3}->{chars}==94?0x21:$C->{G3}->{chars}==96?0x20:0;
355 for (split //, $ss3) {
356 $c = $c * $C->{G3}->{chars} + unpack ('C', $_) - $m;
357 }
358 chr ($C->{G3}->{ucs} + $c);
359 ## Escape sequence
360 } elsif ($esc) {
361 ## IRR (revision number)
362 if ($esc =~ /\x1B\x26([\x40-\x7E])/) {
363 $C->{_irr} = $1; $esc = '';
364 } else {
365 $esc =~ s{
366 \x1B([\x28-\x2B])(\x20?[\x21-\x23]?[\x30-\x7E]) ## Gx = 94^1
367 |\x1B\x24([\x28-\x2B]?)(\x20?[\x21-\x23]?[\x30-\x7E]) ## Gx = 94^n
368
369 |\x1B([\x2C-\x2F])(\x20?[\x21-\x23]?[\x30-\x7E]) ## Gx = 96^1
370 |\x1B\x24([\x2C-\x2F])(\x20?[\x21-\x23]?[\x30-\x7E]) ## Gx = 96^n
371
372 |\x1B([\x40-\x5F]) ## ESC Fe
373
374 |\x1B\x21([\x21-\x23]?[\x30-\x7E]) ## CL = C0
375 |\x1B\x22([\x21-\x23]?[\x30-\x7E]) ## CR & ESC Fe = C1
376
377 |\x1B([\x60-\x7E]) ## Single control functions
378 |\x1B\x23([\x21-\x23]?)([\x30-\x7E])
379
380 |\x1B\x20([\x40-\x7E]) ## Announcer
381 }{
382 my ($g94_g,$g94_f,$g94n_g,$g94n_f,$g96_g,$g96_f,$g96n_g,$g96n_f,$Fe,
383 $CZD, $C1D, $Fs, $sI, $sF,$ACS)
384 = ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15);
385 my $rev = $C->{_irr} || '';
386 if ($g94_g) {
387 $C->{ $_GB_to_GN{ $g94_g } } = $CHARSET{G94}->{ $g94_f.$rev }
388 || $CHARSET{G94}->{ $g94_f }
389 || $CHARSET{G94}->{ "\x7E" }; '';
390 } elsif (defined $g94n_f) {
391 $C->{ $_GB_to_GN{ $g94n_g } || 'G0' } = $CHARSET{G94n}->{ $g94n_f.$rev }
392 || $CHARSET{G94n}->{ $g94n_f }
393 || $CHARSET{G94n}->{ "\x7E" }; '';
394 } elsif ($g96_g) {
395 $C->{ $_GB_to_GN{ $g96_g } } = $CHARSET{G96}->{ $g96_f.$rev }
396 || $CHARSET{G96}->{ $g96_f }
397 || $CHARSET{G96}->{ "\x7E" }; '';
398 } elsif (defined $g96n_f) {
399 $C->{ $_GB_to_GN{ $g96n_g } } = $CHARSET{G96n}->{ $g96n_f.$rev }
400 || $CHARSET{G96n}->{ $g96n_f }
401 || $CHARSET{G96n}->{ "\x7E" }; '';
402 } elsif ($Fe) { ## ESC Fe => C1
403 chr ($C->{ $C->{ESC_Fe} }->{ucs} + (ord ($Fe) - 0x40));
404 } elsif ($CZD) {
405 $C->{C0} = $CHARSET{C0}->{ $CZD.$rev }
406 || $CHARSET{C0}->{ $CZD } || $CHARSET{C0}->{ "\x7E" }; '';
407 } elsif ($C1D) {
408 $C->{C1} = $CHARSET{C1}->{ $C1D.$rev }
409 || $CHARSET{C1}->{ $C1D } || $CHARSET{C1}->{ "\x7E" }; '';
410 } elsif ($Fs) {
411 if ($Fs eq "\x6E") { ## LS2
412 $C->{GL} = 'G2'; '';
413 } elsif ($Fs eq "\x6F") { ## LS3
414 $C->{GL} = 'G3'; '';
415 } elsif ($Fs eq "\x7E") { ## LS1R
416 $C->{GR} = 'G1'; $C->{GL} = 'G1' if $C->{bit} == 7; '';
417 } elsif ($Fs eq "\x7D") { ## LS2R
418 $C->{GR} = 'G2'; $C->{GL} = 'G2' if $C->{bit} == 7; '';
419 } elsif ($Fs eq "\x7C") { ## LS3R
420 $C->{GR} = 'G3'; $C->{GL} = 'G3' if $C->{bit} == 7; '';
421 } else {
422 chr ($CHARSET{single_control}->{Fs}->{ucs} + (ord ($Fs) - 0x60));
423 }
424 } elsif ($sI) {
425 chr ($CHARSET{single_control}->{'3F'.$sI}->{ucs} + (ord ($sF) - 0x30));
426 } elsif ($ACS) { ## Announcer
427 if ($ACS eq "\x4A") { $C->{bit} = 7 }
428 elsif ($ACS eq "\x4B") { $C->{bit} = 8 }
429 '';
430 }
431 }gex;
432 $C->{_irr} = undef;
433 }
434 $esc;
435 ## Locking shifts
436 } elsif ($ls) {
437 if ($ls eq $C->{$C->{CL}}->{LS0}) {
438 $C->{GL} = 'G0'; '';
439 } elsif ($ls eq $C->{$C->{CL}}->{LS1}) {
440 $C->{GL} = 'G1'; '';
441 } elsif ($ls =~ /\x1B([\x6E\x6F])/) {
442 $C->{GL} = {"\x6E"=>2, "\x6F"=>3}->{$1}; '';
443 } elsif ($ls =~ /\x1B([\x7C-\x7E])/) {
444 $C->{GR} = {"\x7E"=>1, "\x7D"=>2, "\x7C"=>3}->{$1}; '';
445 }
446 ## Control sequence
447 } elsif ($csi) {
448 $csi =~ tr/\xA0-\xFF/\x20-\x7F/d;
449 $csi =~ s/$C->{$C->{CL}}->{LS0}//g if $C->{$C->{CL}}->{LS0};
450 $csi =~ s/$C->{$C->{CL}}->{LS1}//g if $C->{$C->{CL}}->{LS1};
451 "\x9B".$csi;
452 }
453 }gex;
454 $s;
455 }
456
457 sub internal_to_iso2022 ($\%) {
458 my ($s, $C) = @_;
459 $C ||= &new_object;
460
461 my $r = '';
462 for my $c (split //, $s) {
463 my $cc = ord $c;
464 my $t;
465 if ($cc <= 0x1F) {
466 $t = _i2c ($c, $C, type => 'C0', charset => '@');
467 } elsif ($cc == 0x20 || $cc == 0x7F) {
468 $t = _back2ascii ($C) . $c;
469 } elsif ($cc < 0x7F) {
470 $t = _i2g ($c, $C, type => 'G94', charset => 'B');
471 } elsif ($cc <= 0x9F) {
472 $t = _i2c ($c, $C, type => 'C1', charset_id => '64291991C1',
473 charset => $C->{private_set}->{XC1}->{'64291991C1'});
474 } elsif ($cc <= 0xFF) {
475 $t = _i2g (chr($cc-0x80), $C, type => 'G96', charset => 'A');
476 } elsif ($cc <= 0x24FF) {
477 my $c = $cc - 0x100;
478 my $final = $C->{private_set}->{U96n}->[0];
479 if (length $final) {
480 $t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C,
481 type => 'G96n', charset => $final);
482 }
483 } elsif ($cc <= 0x33FF) {
484 my $c = $cc - 0x2500;
485 my $final = $C->{private_set}->{U96n}->[1];
486 if (length $final) {
487 $t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C,
488 type => 'G96n', charset => $final);
489 }
490 } elsif (0xE000 <= $cc && $cc <= 0xFFFF) {
491 my $c = $cc - 0xE000;
492 my $final = $C->{private_set}->{U96n}->[2];
493 if (length $final) {
494 $t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C,
495 type => 'G96n', charset => $final);
496 }
497
498 } elsif (0xE9F6C0 <= $cc && $cc <= 0xF06F80) {
499 my $c = $cc - 0xE9F6C0;
500 $t = _i2g (chr((($c % 8836) / 94)+0x21).chr(($c % 94)+0x21), $C,
501 type => 'G94n', charset => chr(($c / 8836)+0x30));
502 } elsif (0xF49D7C <= $cc && $cc <= 0xF4BFFF) {
503 my $c = $cc - 0xF49D7C;
504 $t = _i2g (chr(($c / 94)+0x21).chr(($c % 94)+0x21), $C,
505 type => 'G94n', charset => 'B', revision => '@');
506
507 } elsif (0xF0000 <= $cc && $cc <= 0x10F7FF) {
508 my $c = $cc - 0xF0000;
509 $t = _i2g (chr((($c % 9216) / 96)+0x20).chr(($c % 96)+0x20), $C,
510 type => 'G96n', charset => "\x20".chr(($c / 9216)+0x40));
511 } elsif (0xE90940 <= $cc && $cc <= 0xE92641) {
512 my $c = $cc - 0xE90940;
513 $t = _i2g (chr(($c % 94)+0x21), $C,
514 type => 'G94', charset => chr(($c / 94)+0x30));
515 } elsif (0xE92642 <= $cc && $cc <= 0xE9269F) {
516 my $c = $cc - 0xE92642;
517 $t = _i2g (chr($c+0x21),$C,type => 'G94', charset => "\x64", revision => '@');
518 } elsif (0xE926A0 <= $cc && $cc <= 0xE9443F) {
519 my $c = $cc - 0xE926A0;
520 $t = _i2g (chr(($c % 96)+0x20), $C,
521 type => 'G96', charset => chr(($c / 96)+0x30));
522 } elsif (0xE944A0 <= $cc && $cc <= 0xE961A1) {
523 my $c = $cc - 0xE944A0;
524 $t = _i2g (chr(($c % 94)+0x21), $C,
525 type => 'G94', charset => '!'.chr(($c / 94)+0x30));
526 } elsif (0xE96200 <= $cc && $cc <= 0xE97F9F) {
527 my $c = $cc - 0xE96200;
528 $t = _i2g (chr(($c % 96)+0x20), $C,
529 type => 'G96', charset => '!'.chr(($c / 96)+0x30));
530 } elsif (0xE98000 <= $cc && $cc <= 0xE99D01) {
531 my $c = $cc - 0xE98000;
532 $t = _i2g (chr(($c % 94)+0x21), $C,
533 type => 'G94', charset => '"'.chr(($c / 94)+0x30));
534 } elsif (0xE99D60 <= $cc && $cc <= 0xE9BAFF) {
535 my $c = $cc - 0xE99D60;
536 $t = _i2g (chr(($c % 96)+0x20), $C,
537 type => 'G96', charset => '"'.chr(($c / 96)+0x30));
538 } elsif (0xE9BB60 <= $cc && $cc <= 0xE9D861) {
539 my $c = $cc - 0xE9BB60;
540 $t = _i2g (chr(($c % 94)+0x21), $C,
541 type => 'G94', charset => '#'.chr(($c / 94)+0x30));
542 } elsif (0xE9D8C0 <= $cc && $cc <= 0xE9F65F) {
543 my $c = $cc - 0xE9D8C0;
544 $t = _i2g (chr(($c % 96)+0x20), $C,
545 type => 'G96', charset => '#'.chr(($c / 96)+0x30));
546 } elsif (0x70090940 <= $cc && $cc <= 0x70092641) {
547 my $c = $cc - 0x70090940;
548 $t = _i2g (chr(($c % 94)+0x21), $C,
549 type => 'G94', charset => "\x20".chr(($c / 94)+0x30));
550 } elsif (0x700926A0 <= $cc && $cc <= 0x7009443F) {
551 my $c = $cc - 0x700926A0;
552 $t = _i2g (chr(($c % 96)+0x20), $C,
553 type => 'G96', charset => "\x20".chr(($c / 96)+0x30));
554 ## TODO: DRCS with I byte: U+700944A0-U+7009F6BF
555 } elsif (0x70400000 <= $cc && $cc <= 0x7040FFED) {
556 my $c = $cc - 0x70400000;
557 $t = _i2g (chr(($c % 94)+0x21), $C, charset_id => 'P'.int ($c / 94),
558 type => 'G94', charset => $C->{private_set}->{G94}->[ $c / 94 ]);
559 } elsif (0x70410000 <= $cc && $cc <= 0x7041FFBF) {
560 my $c = $cc - 0x70410000;
561 $t = _i2g (chr(($c % 96)+0x20), $C, charset_id => 'P'.int ($c / 96),
562 type => 'G96', charset => $C->{private_set}->{G96}->[ $c / 96 ]);
563 } elsif (0x70420000 <= $cc && $cc <= 0x7046F19B) {
564 my $c = $cc % 0x10000;
565 $t = _i2g (chr((($c % 8836) / 94)+0x21).chr(($c % 94)+0x21), $C,
566 type => 'G94n',
567 charset_id => 'P'.int(($cc / 0x10000) - 0x7042).'_'.int($c / 8836),
568 charset => $C->{private_set}->{G94n}->[ ($cc / 0x10000) - 0x7042 ]
569 ->[ $c / 8836 ]);
570 }
571 if (defined $t) {
572 $r .= $t;
573 } else {
574 $r .= _i2g ("\x3F", $C, type => 'G94', charset => 'B');
575 }
576 }
577 $r . _back2ascii ($C);
578 }
579
580 ## $O{charset} eq undef means that charset is same as the current designated one.
581 sub _i2c ($%%) {
582 my ($s, $C, %O) = @_;
583 my $r = '';
584 if ($O{type} eq 'C0') {
585 if (defined $O{charset}) {
586 if ( $C->{C0} ne $CHARSET{C0}->{$O{charset}}
587 && $C->{C0} ne $CHARSET{C0}->{$O{charset_id}}) {
588 for ($C->{option}->{designate_to}->{C0}->{$O{charset}},
589 $C->{option}->{designate_to}->{C0}->{default}) {
590 if (defined $_) { return undef if $_ == -1; last }
591 }
592 $r .= "\x1B\x21".$O{charset};
593 $C->{C0} = $CHARSET{C0}->{$O{charset}};
594 }
595 } elsif (defined $O{charset_id}) {
596 if ($C->{C0} ne $CHARSET{C0}->{$O{charset_id}}) {
597 return undef; ## Control set is not designated nor has F byte
598 }
599 }
600 $r .= _back2ascii ($C, reset_all => $C->{C0}->{reset_all}->{$s});
601 return $r . $s;
602 } elsif ($O{type} eq 'C1') {
603 if (defined $O{charset}) {
604 if ( $C->{C1} ne $CHARSET{C1}->{$O{charset}}
605 && $C->{C1} ne $CHARSET{C1}->{$O{charset_id}}) {
606 for ($C->{option}->{designate_to}->{C1}->{$O{charset}},
607 $C->{option}->{designate_to}->{C1}->{default}) {
608 if (defined $_) { return undef if $_ == -1; last }
609 }
610 $r .= "\x1B\x22".$O{charset};
611 $C->{C1} = $CHARSET{C1}->{$O{charset}};
612 }
613 } elsif (defined $O{charset_id}) {
614 if ($C->{C1} ne $CHARSET{C1}->{$O{charset_id}}) {
615 return undef; ## Control set is not designated nor has F byte
616 }
617 }
618 $r .= _back2ascii ($C, reset_all => $C->{C1}->{reset_all}->{$s});
619 unless ($C->{option}->{C1invoke_to_right}) { ## ESC Fe
620 $s =~ s/([\x80-\x9F])/"\x1B" . chr (ord ($1) - 0x40)/ge;
621 }
622 return $r . $s;
623 }
624 }
625 sub _i2g ($%%) {
626 my ($s, $C, %O) = @_;
627 my $r = '';
628 my $set = $CHARSET{$O{type}}->{$O{charset}};
629 my $set0 = $CHARSET{$O{type}}->{$O{charset_id}};
630 ## -- designate character set
631 my $G = 0;
632 if ($C->{G0} eq $set || $C->{G0} eq $set0) { $G = 0 }
633 elsif ($C->{G1} eq $set || $C->{G1} eq $set0) { $G = 1 }
634 elsif ($C->{G2} eq $set || $C->{G2} eq $set0) { $G = 2 }
635 elsif ($C->{G3} eq $set || $C->{G3} eq $set0) { $G = 3 }
636 else {
637 return undef unless $set; ## charset does not have F byte
638 $G = 1 if $O{type} eq 'G96' || $O{type} eq 'G96n';
639 for ($C->{option}->{designate_to}->{$O{type}}->{$O{charset}},
640 $C->{option}->{designate_to}->{$O{type}}->{default}) {
641 if (defined $_) {
642 $G = $_; last;
643 }
644 }
645 if ($G == -1) {
646 return undef;
647 }
648 if ($O{type} eq 'G94') {
649 $r .= ($O{revision}&&$C->{option}->{use_revision}?"\x1B\x26".$O{revision}:'')
650 ."\x1B".("\x28","\x29","\x2A","\x2B")[$G].$O{charset};
651 } elsif ($O{type} eq 'G94n') {
652 if ($G == 0 && !$C->{option}->{G94n_designate_long}
653 && ($O{charset} eq '@' || $O{charset} eq 'A' || $O{charset} eq 'B')) {
654 $r .= ($O{revision}&&$C->{option}->{use_revision}?"\x1B\x26".$O{revision}:'')
655 ."\x1B\x24".$O{charset};
656 } else {
657 $r .= ($O{revision}&&$C->{option}->{use_revision}?"\x1B\x26".$O{revision}:'')
658 ."\x1B\x24".("\x28","\x29","\x2A","\x2B")[$G].$O{charset};
659 }
660 } elsif ($O{type} eq 'G96') {
661 $r .= ($O{revision}&&$C->{option}->{use_revision}?"\x1B\x26".$O{revision}:'')
662 ."\x1B".("\x2C","\x2D","\x2E","\x2F")[$G].$O{charset};
663 } elsif ($O{type} eq 'G96n') {
664 $r .= ($O{revision}&&$C->{option}->{use_revision}?"\x1B\x26".$O{revision}:'')
665 ."\x1B\x24".("\x2C","\x2D","\x2E","\x2F")[$G].$O{charset};
666 }
667 $C->{'G'.$G} = $CHARSET{$O{type}}->{$O{charset}};
668 }
669 ## -- invoke G buffer
670 my $left = $C->{option}->{Ginvoke_to_left}->[$G];
671 if ($C->{GL} eq 'G'.$G) {
672 $left = 1;
673 } elsif ($C->{GR} eq 'G'.$G) {
674 $left = 0;
675 } else {
676 if ($C->{option}->{Ginvoke_by_single_shift}->[$G]) {
677 if ($C->{C1}->{'C_SS'.$G}) {
678 $r .= _i2c ($C->{C1}->{'C_SS'.$G}, $C, type => 'C1') || return undef;
679 } elsif ($C->{C0}->{'C_SS'.$G}) {
680 $r .= _i2c ($C->{C0}->{'C_SS'.$G}, $C, type => 'C0') || return undef;
681 } else { ## Both C0 and C1 set do not have SS2/3.
682 $r .= __invoke ($C, $G => $left) if $C->{$left?'GL':'GR'} ne 'G'.$G;
683 }
684 } else {
685 $left = 0 if $G == 1 && !$C->{C0}->{C_LS1};
686 $r .= __invoke ($C, $G => $left) if $C->{$left?'GL':'GR'} ne 'G'.$G;
687 }
688 }
689 $s =~ tr/\x00-\x7F/\x80-\xFF/ unless $left;
690 $r . $s;
691 }
692 sub _back2ascii (%) {
693 my ($C, %O) = @_;
694 my $r = '';
695 if ($C->{option}->{reset}->{Gdesignation}) {
696 my $F = $C->{option}->{reset}->{Gdesignation}; # \x42
697 $r .= "\x1B\x28".$F unless $C->{G0} eq $CHARSET{G94}->{$F};
698 $C->{G0} = $CHARSET{G94}->{$F};
699 if ($O{reset_all}) {
700 $C->{G1} = $CHARSET{G94}->{"\x7E"};
701 $C->{G2} = $CHARSET{G94}->{"\x7E"};
702 $C->{G3} = $CHARSET{G94}->{"\x7E"};
703 }
704 }
705 if ($C->{option}->{reset}->{Ginvoke}) {
706 if ($C->{GL} ne 'G0') {
707 $r .= $C->{C0}->{C_LS0} || ($C->{C0} = $CHARSET{C0}->{'@'},"\x1B\x21\x40\x0F");
708 $C->{GL} = 'G0';
709 }
710 $C->{GR} = undef if $O{reset_all};
711 }
712 $r;
713 }
714 ## __invoke (\%C, $G, $left_or_right)
715 sub __invoke (\%$$) {
716 my ($C, $G) = @_;
717 if ($_[2]) {
718 $C->{GL} = 'G'.$G;
719 return ($C->{C0}->{C_LS0}
720 || scalar ($C->{C0}=$CHARSET{C0}->{'@'},"\x1B\x21\x40\x0F"),
721 $C->{C0}->{C_LS1}, "\x1B\x6E", "\x1B\x6F")[$G];
722 } else {
723 $C->{GR} = 'G'.$G;
724 return ("", "\x1B\x7E", "\x1B\x7D", "\x1B\x7C")[$G];
725 }
726 '';
727 }
728
729 sub make_charset (%) {
730 ## TODO: support private charset ID such as 'X0'
731 my %set = @_;
732 my $setid = qq($set{I}$set{F}$set{revision});
733 my $settype = $set{type} || 'G94';
734 delete $set{type}, $set{I}, $set{F}, $set{revision};
735 $CHARSET{ $settype }->{ $setid } = \%set;
736 }
737
738 1;
739 __END__
740
741 =head1 SEE ALSO
742
743 ISO/IEC 646:1991, "7-bit coded graphic character set for intormation interchange".
744
745 ISO/IEC 2022:1994, "Character Code Structure and Extension Techniques".
746 (IDT with ECMA 35, JIS X 0202:1998)
747
748 ISO/IEC 4873:1991, "8-Bit Coded Character Set Structure and Rules".
749 (IDT with ECMA 43)
750
751 ISO/IEC 6429:1992, "Control Functions for Coded Character Sets".
752 (IDT with ECMA 48:1991, JIS X 0211:1998)
753
754 ISO/IEC 8859, "8-Bit Single-Byte Coded Graphic Character Sets".
755
756 Encode, perlunicode
757
758 =head1 LICENSE
759
760 Copyright 2002 wakaba <w@suika.fam.cx>
761
762 This library is free software; you can redistribute it
763 and/or modify it under the same terms as Perl itself.
764
765 =cut
766
767 # $Date: 2002/09/15 05:08:13 $
768 ### ISO2022.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24