/[suikacvs]/perl/lib/Encode/ISO2022.pm
Suika

Contents of /perl/lib/Encode/ISO2022.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (show annotations) (download)
Sun Sep 15 05:08:13 2002 UTC (22 years, 1 month ago) by wakaba
Branch: MAIN
Changes since 1.1: +4 -4 lines
2002-09-15  Wakaba <w@suika.fam.cx>

	* ISO2022.pm: New module.

1
2 =head1 NAME
3
4 Encode::ISO2022 --- ISO/IEC 2022 encoder and decoder
5
6 =cut
7
8 require v5.7.3;
9 package Encode::ISO2022;
10 use strict;
11 use vars qw(%CHARSET $VERSION);
12 $VERSION=do{my @r=(q$Revision: 1.1 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
13 use base qw(Encode::Encoding);
14 __PACKAGE__->Define (qw/iso-2022 iso2022 2022 cp2022/);
15
16 ### --- Intialization
17
18 my %_CHARS_to_RANGE = (
19 l94 => q/[\x21-\x7E]/,
20 l96 => q/[\x20-\x7F]/,
21 l128 => q/[\x00-\x7F]/,
22 l256 => q/[\x00-\xFF]/,
23 r94 => q/[\xA1-\xFE]/,
24 r96 => q/[\xA0-\xFF]/,
25 r128 => q/[\x80-\xFF]/,
26 r256 => q/[\x80-\xFF]/,
27 b94 => q/[\x21-\x7E\xA1-\xFE]/,
28 b96 => q/[\x20-\x7F\xA0-\xFF]/,
29 b128 => q/[\x00-\xFF]/,
30 b256 => q/[\x00-\xFF]/,
31 );
32
33 ## --- Make initial charset definitions
34 &_make_initial_charsets;
35 sub _make_initial_charsets () {
36 for my $f (0x30..0x7E) {
37 my $F = pack 'C', $f;
38 for ('', '!', '"', '#') {
39 $CHARSET{G94}->{ $_.$F }->{dimension} = 1;
40 $CHARSET{G94}->{ $_.$F }->{chars} = 94;
41 $CHARSET{G94}->{ $_.$F }->{ucs} =
42 {'' => 0xE90940, '!' => 0xE944A0, '"' => 0xE98000, '#' => 0xE9BB60}->{ $_ }
43 + 94 * ($f-0x30);
44
45 $CHARSET{G96}->{ $_.$F }->{dimension} = 1;
46 $CHARSET{G96}->{ $_.$F }->{chars} = 96;
47 $CHARSET{G96}->{ $_.$F }->{ucs} =
48 {'' => 0xE926A0, '!' => 0xE96200, '"' => 0xE99D60, '#' => 0xE9D8C0}->{ $_ }
49 + 96 * ($f-0x30);
50
51 $CHARSET{C0}->{ $_.$F }->{dimension} = 1;
52 $CHARSET{C0}->{ $_.$F }->{chars} = 32;
53 $CHARSET{C0}->{ $_.$F }->{ucs} =
54 {'' => 0x70000000, '!' => 0x70001400,
55 '"' => 0x70002800, '#' => 0x70003C00}->{ $_ } + 32 * ($f-0x30);
56
57 $CHARSET{C1}->{ $_.$F }->{dimension} = 1;
58 $CHARSET{C1}->{ $_.$F }->{chars} = 32;
59 $CHARSET{C1}->{ $_.$F }->{ucs} =
60 {'' => 0x70000A00, '!' => 0x70001E00,
61 '"' => 0x70003200, '#' => 0x70004600}->{ $_ } + 32 * ($f-0x30);
62
63 $CHARSET{G94}->{ ' '.$_.$F }->{dimension} = 1; ## DRCS
64 $CHARSET{G94}->{ ' '.$_.$F }->{chars} = 94;
65 $CHARSET{G94}->{ ' '.$_.$F }->{ucs} =
66 {'' => 0x70090940, '!' => 0x700944A0,
67 '"' => 0x70098000, '#' => 0x7009BB60}->{ $_ } + 94 * ($f-0x30);
68
69 $CHARSET{G96}->{ ' '.$_.$F }->{dimension} = 1; ## DRCS
70 $CHARSET{G96}->{ ' '.$_.$F }->{chars} = 96;
71 $CHARSET{G96}->{ ' '.$_.$F }->{ucs} =
72 {'' => 0x700926A0, '!' => 0x70096200,
73 '"' => 0x70099D60, '#' => 0x7009D8C0}->{ $_ } + 96 * ($f-0x30);
74 }
75 }
76 for my $f (0x30..0x5F, 0x7E) {
77 my $F = pack 'C', $f;
78 for ('', '!', '"', '#') {
79 $CHARSET{G94n}->{ $_.$F }->{dimension} = 2;
80 $CHARSET{G94n}->{ $_.$F }->{chars} = 94;
81 $CHARSET{G94n}->{ $_.$F }->{ucs} =
82 ({'' => 0xE9F6C0}->{ $_ }||0) + 94*94 * ($f-0x30);
83 ## BUG: 94^n DRCSes with I byte have no mapping area
84
85 $CHARSET{G96n}->{ $_.$F }->{dimension} = 2;
86 $CHARSET{G96n}->{ $_.$F }->{chars} = 96;
87 $CHARSET{G96n}->{ $_.$F }->{ucs} =
88 ({'' => 0xF4C000}->{ $_ }||0) + 96*96 * ($f-0x30);
89 ## BUG: 94^n DRCSes with I byte have no mapping area
90 }
91 }
92 for (0x60..0x6F) {
93 my $F = pack 'C', $_;
94 ## BUG: 9x^3 sets have no mapping area
95 for ('', '!', '"', '#', ' ') {
96 $CHARSET{G94n}->{ $_.$F }->{dimension} = 3;
97 $CHARSET{G94n}->{ $_.$F }->{chars} = 94;
98
99 $CHARSET{G96n}->{ $_.$F }->{dimension} = 3;
100 $CHARSET{G96n}->{ $_.$F }->{chars} = 96;
101 }
102 }
103 for (0x70..0x7D) {
104 my $F = pack 'C', $_;
105 ## BUG: 9x^4 sets have no mapping area
106 for ('', '!', '"', '#', ' ') {
107 $CHARSET{G94n}->{ $_.$F }->{dimension} = 4;
108 $CHARSET{G94n}->{ $_.$F }->{chars} = 94;
109
110 $CHARSET{G96n}->{ $_.$F }->{dimension} = 4;
111 $CHARSET{G96n}->{ $_.$F }->{chars} = 96;
112 }
113 }
114 for my $f (0x40..0x4E) {
115 my $F = pack 'C', $f;
116 $CHARSET{G96n}->{ ' '.$F }->{dimension} = 2;
117 $CHARSET{G96n}->{ ' '.$F }->{chars} = 96;
118 $CHARSET{G96n}->{ ' '.$F }->{ucs} = 0xF0000 + 96*96*($f-0x40);
119 ## U+F0000-U+10F7FF (private) -> ESC 02/04 02/00 <I> (04/00-04/14) (DRCS)
120 }
121
122 $CHARSET{G94}->{B}->{ucs} = 0x21; ## ASCII
123 $CHARSET{G96}->{A}->{ucs} = 0xA0; ## ISO 8859-1
124
125 $CHARSET{G94n}->{'B@'}->{dimension} = 2; ## JIS X 0208-1990
126 $CHARSET{G94n}->{'B@'}->{chars} = 94;
127 $CHARSET{G94n}->{'B@'}->{ucs} = 0xE9F6C0 + 94*94*79;
128
129 ## -- Control character sets
130 $CHARSET{C0}->{'@'}->{ucs} = 0x00; ## ISO/IEC 6429 C0
131 for ("\x40", "\x43", "\x44", "\x45", "\x46", "\x49", "\x4A", "\x4B", "\x4C") {
132 $CHARSET{C0}->{$_}->{C_LS0} = "\x0F";
133 $CHARSET{C0}->{$_}->{C_LS1} = "\x0E";
134 $CHARSET{C0}->{$_}->{r_LS0} = '\x0F';
135 $CHARSET{C0}->{$_}->{r_LS1} = '\x0E';
136 }
137 for ("\x40", "\x44", "\x45", "\x46", "\x48", "\x4C") {
138 $CHARSET{C0}->{$_}->{reset_all} = {"\x0A" => 1, "\x0B" => 1,
139 "\x0C" => 1, "\x0D" => 1};
140 }
141 $CHARSET{C0}->{"\x43"}->{reset_all} = {"\x0A" => 1};
142 $CHARSET{C0}->{"\x44"}->{C_SS2} = "\x1C";
143 $CHARSET{C0}->{"\x44"}->{r_SS2} = '\x1C';
144 for ("\x45", "\x49", "\x4A", "\x4B") {
145 $CHARSET{C0}->{$_}->{C_SS2} = "\x19";
146 $CHARSET{C0}->{$_}->{C_SS3} = "\x1D";
147 $CHARSET{C0}->{$_}->{r_SS2} = '\x19';
148 $CHARSET{C0}->{$_}->{r_SS3} = '\x1D';
149 }
150 $CHARSET{C0}->{"\x4C"}->{C_SS2} = "\x19";
151 $CHARSET{C0}->{"\x4C"}->{r_SS2} = '\x19';
152
153 $CHARSET{C1}->{'64291991C1'}->{dimension} = 1; ## ISO/IEC 6429:1991 C1
154 $CHARSET{C1}->{'64291991C1'}->{chars} = 32;
155 $CHARSET{C1}->{'64291991C1'}->{ucs} = 0x80;
156 for ("\x43", "\x45", "\x47", '64291991C1') {
157 $CHARSET{C1}->{$_}->{C_SS2} = "\x8E";
158 $CHARSET{C1}->{$_}->{C_SS3} = "\x8F";
159 $CHARSET{C1}->{$_}->{r_SS2} = '\x8E';
160 $CHARSET{C1}->{$_}->{r_SS3} = '\x8F';
161 $CHARSET{C1}->{$_}->{r_SS2_ESC} = '\x1B\x4E';
162 $CHARSET{C1}->{$_}->{r_SS3_ESC} = '\x1B\x4F';
163 }
164 for ("\x43", '64291991C1') {
165 $CHARSET{C1}->{$_}->{r_CSI} = '\x9B';
166 $CHARSET{C1}->{$_}->{r_CSI_ESC} = '\x1B\x5B';
167 $CHARSET{C1}->{$_}->{r_DCS} = '\x90';
168 $CHARSET{C1}->{$_}->{r_ST} = '\x9C';
169 $CHARSET{C1}->{$_}->{r_OSC} = '\x9D';
170 $CHARSET{C1}->{$_}->{r_PM} = '\x9E';
171 $CHARSET{C1}->{$_}->{r_APC} = '\x9F';
172 $CHARSET{C1}->{$_}->{reset_all} = {"\x85"=>1, "\x90"=>1,
173 "\x9C"=>1, "\x9D"=>1, "\x9E"=>1, "\x9F"=>1};
174 }
175 $CHARSET{C1}->{'64291991C1'}->{r_SCI} = '\x9A';
176
177 $CHARSET{single_control}->{Fs} ={ucs => 0x70005000, chars => 32, dimension => 1};
178 $CHARSET{single_control}->{'3F'} ={ucs => 0x70005020, chars => 80, dimension => 1};
179 $CHARSET{single_control}->{'3F!'}={ucs => 0x70005070, chars => 80, dimension => 1};
180 $CHARSET{single_control}->{'3F"'}={ucs => 0x700050C0, chars => 80, dimension => 1};
181 $CHARSET{single_control}->{'3F#'}={ucs => 0x70005110, chars => 80, dimension => 1};
182 }
183
184
185 ### --- Perl Encode module common functions
186
187 sub encode ($$;$) {
188 my ($obj, $str, $chk) = @_;
189 $_[1] = '' if $chk;
190 $str = &internal_to_iso2022 ($str);
191 return $str;
192 }
193
194 sub decode ($$;$) {
195 my ($obj, $str, $chk) = @_;
196 $_[1] = '' if $chk;
197 return &iso2022_to_internal ($str);
198 }
199
200 ### --- Encode::ISO2022 unique functions
201
202 ## Make a new ISO/IEC 2022-buffers object with default status
203 sub new_object {
204 my %C;
205 $C{bit} = 8;
206 $C{CL} = 'C0'; $C{CR} = 'C1'; $C{ESC_Fe} = 'C1';
207 $C{C0} = $CHARSET{C0}->{"\x40"}; ## ISO/IEC 6429:1991 C0
208 $C{C1} = $CHARSET{C1}->{'64291991C1'}; ## ISO/IEC 6429:1991 C1
209 $C{GL} = 'G0'; $C{GR} = 'G1';
210 $C{G0} = $CHARSET{G94}->{"\x42"}; ## ISO/IEC 646:1991 IRV
211 #$C{G1} = $CHARSET{G96}->{"\x41"}; ## ISO/IEC 8859-1 GR
212 $C{G1} = $CHARSET{G94}->{"\x7E"}; ## empty set
213 $C{G2} = $CHARSET{G94}->{"\x7E"}; ## empty set
214 $C{G3} = $CHARSET{G94}->{"\x7E"}; ## empty set
215 $C{option} = {
216 C1invoke_to_right => 0, ## C1 invoked to: (0: ESC Fe, 1: CR)
217 G94n_designate_long => 0, ## (1: ESC 02/04 02/08 04/00..02)
218 designate_to => { ## Designated G buffer (-1: not be outputed)
219 C0 => {
220 default => 0,
221 },
222 C1 => {
223 default => 1,
224 },
225 G94 => {
226 "\x42" => 0,
227 default => 0,
228 },
229 G96 => {
230 default => 1,
231 },
232 G94n => {
233 default => 0,
234 },
235 G96n => {
236 default => 1,
237 },
238 },
239 Ginvoke_by_single_shift => [0,0,0,0], ## Invoked by SS
240 Ginvoke_to_left => [1,1,1,1], ## Which invoked to? (1: L, 0: R)
241 private_set => { ## Private set vs Final byte
242 C0 => [],
243 C1 => [],
244 G94 => [],
245 G94n => [[],[],[],[],[]],
246 G96 => [],
247 #G96n => [], ## (not implemented)
248 U96n => [], ## mule-unicode sets
249 XC1 => {
250 '64291991C1' => undef, ## ISO/IEC 6429:1991 C1
251 },
252 },
253 reset => { ## Reset status at top of line
254 Gdesignation => "\x42", ## F of designation or 0
255 Ginvoke => 1,
256 },
257 use_revision => 1, ## Output IRR
258 };
259 \%C;
260 }
261
262 sub iso2022_to_internal ($;\%) {
263 my ($s, $C) = @_;
264 my %_GB_to_GN = (
265 "\x28"=>'G0',"\x29"=>'G1',"\x2A"=>'G2',"\x2B"=>'G3',
266 "\x2C"=>'G0',"\x2D"=>'G1',"\x2E"=>'G2',"\x2F"=>'G3',
267 );
268 $C ||= &new_object;
269
270 use re 'eval';
271 $s =~ s{
272 ((??{ $_CHARS_to_RANGE{'l'.$C->{$C->{GL}}->{chars}}
273 . qq/{$C->{$C->{GL}}->{dimension},$C->{$C->{GL}}->{dimension}}/ }))
274 |((??{ $_CHARS_to_RANGE{'r'.$C->{$C->{GL}}->{chars}}
275 . qq/{$C->{$C->{GR}}->{dimension},$C->{$C->{GR}}->{dimension}}/ }))
276
277 | (??{ q/(?:/ . ($C->{$C->{CR}}->{r_SS2} || '(?!)')
278 . ($C->{$C->{ESC_Fe}}->{r_SS2_ESC} ?
279 qq/|$C->{$C->{ESC_Fe}}->{r_SS2_ESC}/ : '')
280 . ($C->{$C->{CL}}->{r_SS2} ? qq/|$C->{$C->{CL}}->{r_SS2}/ : '') . q/)/
281 . ( $C->{$C->{CL}}->{r_LS0}
282 ||$C->{$C->{CL}}->{r_LS1}? ## ISO/IEC 6429:1992 9
283 qq/[$C->{$C->{CL}}->{r_LS0}$C->{$C->{CL}}->{r_LS1}]*/:'')
284 })
285 ((??{ $_CHARS_to_RANGE{'b'.$C->{$C->{GL}}->{chars}}
286 . qq/{$C->{G2}->{dimension},$C->{G2}->{dimension}}/ }))
287 | (??{ q/(?:/ . ($C->{$C->{CR}}->{r_SS3} || '(?!)')
288 . ($C->{$C->{ESC_Fe}}->{r_SS3_ESC} ?
289 qq/|$C->{$C->{ESC_Fe}}->{r_SS3_ESC}/ : '')
290 . ($C->{$C->{CL}}->{r_SS3} ? qq/|$C->{$C->{CL}}->{r_SS3}/ : '') . q/)/
291 . ( $C->{$C->{CL}}->{r_LS0}
292 || $C->{$C->{CL}}->{r_LS1}? ## ISO/IEC 6429:1992 9
293 qq/[$C->{$C->{CL}}->{r_LS0}$C->{$C->{CL}}->{r_LS1}]*/:'')
294 })
295 ((??{ $_CHARS_to_RANGE{'b'.$C->{$C->{GL}}->{chars}}
296 . qq/{$C->{G3}->{dimension},$C->{G3}->{dimension}}/ }))
297
298 |((??{ $C->{$C->{CL}}->{r_LS0}||'(?!)' })) ## GL = G0
299 |((??{ $C->{$C->{CL}}->{r_LS1}||'(?!)' })) ## GL = G1
300
301 ## Control sequence
302 |(??{ '(?:'.($C->{$C->{CR}}->{r_CSI}||'(?!)')
303 .($C->{$C->{ESC_Fe}}->{r_CSI_ESC} ?
304 qq/|$C->{$C->{ESC_Fe}}->{r_CSI_ESC}/: '')
305 .')'
306 })
307 ((??{ qq/[\x30-\x3F$C->{$C->{CL}}->{LS0}$C->{$C->{CL}}->{LS1}\xB0-\xBF]*/
308 .qq/[\x20-\x2F$C->{$C->{CL}}->{LS0}$C->{$C->{CL}}->{LS1}\xA0-\xAF]*/
309 }) [\x40-\x7E\xD0-\xFE])
310
311 ## Other escape sequence
312 |(\x1B[\x20-\x2F]*[\x30-\x7E])
313
314 ## Misc. sequence (SP, control, or broken data)
315 |([\x00-\xFF])
316 }{
317 my ($gl,$gr,$ss2,$ss3,$ls0,$ls1,$csi,$esc,$misc)
318 = ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10);
319 $C->{_irr} = undef unless defined $esc;
320 ## GL graphic character
321 if (defined $gl) {
322 my $c = 0;
323 my $m = $C->{$C->{GL}}->{chars}==94?0x21:$C->{$C->{GL}}->{chars}==96?0x20:0;
324 for (split //, $gl) {
325 $c = $c * $C->{$C->{GL}}->{chars} + unpack ('C', $_) - $m;
326 }
327 chr ($C->{$C->{GL}}->{ucs} + $c);
328 ## Control, SP, or broken data
329 ## TODO: support control sets other than ISO/IEC 6429's
330 } elsif (defined $misc) {
331 $misc;
332 ## GR graphic character
333 } elsif ($gr) {
334 my $c = 0;
335 my $m = $C->{$C->{GR}}->{chars}==94?0xA1:$C->{$C->{GR}}->{chars}==96?0xA0:0x80;
336 for (split //, $gr) {
337 $c = $c * $C->{$C->{GR}}->{chars} + unpack ('C', $_) - $m;
338 }
339 chr ($C->{$C->{GR}}->{ucs} + $c);
340 ## Graphic character with SS2
341 } elsif ($ss2) {
342 $ss2 =~ tr/\x80-\xFF/\x00-\x7F/;
343 my $c = 0; my $m = $C->{G2}->{chars}==94?0x21:$C->{G2}->{chars}==96?0x20:0;
344 for (split //, $ss2) {
345 $c = $c * $C->{G2}->{chars} + unpack ('C', $_) - $m;
346 }
347 chr ($C->{G2}->{ucs} + $c);
348 ## Graphic character with SS3
349 } elsif ($ss3) {
350 $ss3 =~ tr/\x80-\xFF/\x00-\x7F/;
351 my $c = 0; my $m = $C->{G3}->{chars}==94?0x21:$C->{G3}->{chars}==96?0x20:0;
352 for (split //, $ss3) {
353 $c = $c * $C->{G3}->{chars} + unpack ('C', $_) - $m;
354 }
355 chr ($C->{G3}->{ucs} + $c);
356 ## Locking shifts
357 } elsif ($ls0) {
358 $C->{GL} = 'G0'; '';
359 } elsif ($ls1) {
360 $C->{GL} = 'G1'; '';
361 ## Escape sequence
362 } elsif ($esc) {
363 ## IRR (revision number)
364 if ($esc =~ /\x1B\x26([\x40-\x7E])/) {
365 $C->{_irr} = $1; $esc = '';
366 } else {
367 $esc =~ s{
368 \x1B([\x28-\x2B])(\x20?[\x21-\x23]?[\x30-\x7E]) ## Gx = 94^1
369 |\x1B\x24([\x28-\x2B]?)(\x20?[\x21-\x23]?[\x30-\x7E]) ## Gx = 94^n
370
371 |\x1B([\x2C-\x2F])(\x20?[\x21-\x23]?[\x30-\x7E]) ## Gx = 96^1
372 |\x1B\x24([\x2C-\x2F])(\x20?[\x21-\x23]?[\x30-\x7E]) ## Gx = 96^n
373
374 |\x1B([\x40-\x5F]) ## ESC Fe
375
376 |\x1B\x21([\x21-\x23]?[\x30-\x7E]) ## CL = C0
377 |\x1B\x22([\x21-\x23]?[\x30-\x7E]) ## CR & ESC Fe = C1
378
379 |\x1B([\x60-\x7E]) ## Single control functions
380 |\x1B\x23([\x21-\x23]?)([\x30-\x7E])
381
382 |\x1B\x20([\x40-\x7E]) ## Announcer
383 }{
384 my ($g94_g,$g94_f,$g94n_g,$g94n_f,$g96_g,$g96_f,$g96n_g,$g96n_f,$Fe,
385 $CZD, $C1D, $Fs, $sI, $sF,$ACS)
386 = ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15);
387 my $rev = $C->{_irr} || '';
388 if ($g94_g) {
389 $C->{ $_GB_to_GN{ $g94_g } } = $CHARSET{G94}->{ $g94_f.$rev }
390 || $CHARSET{G94}->{ $g94_f }
391 || $CHARSET{G94}->{ "\x7E" }; '';
392 } elsif (defined $g94n_f) {
393 $C->{ $_GB_to_GN{ $g94n_g } || 'G0' } = $CHARSET{G94n}->{ $g94n_f.$rev }
394 || $CHARSET{G94n}->{ $g94n_f }
395 || $CHARSET{G94n}->{ "\x7E" }; '';
396 } elsif ($g96_g) {
397 $C->{ $_GB_to_GN{ $g96_g } } = $CHARSET{G96}->{ $g96_f.$rev }
398 || $CHARSET{G96}->{ $g96_f }
399 || $CHARSET{G96}->{ "\x7E" }; '';
400 } elsif (defined $g96n_f) {
401 $C->{ $_GB_to_GN{ $g96n_g } } = $CHARSET{G96n}->{ $g96n_f.$rev }
402 || $CHARSET{G96n}->{ $g96n_f }
403 || $CHARSET{G96n}->{ "\x7E" }; '';
404 } elsif ($Fe) { ## ESC Fe => C1
405 chr ($C->{ $C->{ESC_Fe} }->{ucs} + (ord ($Fe) - 0x40));
406 } elsif ($CZD) {
407 $C->{C0} = $CHARSET{C0}->{ $CZD.$rev }
408 || $CHARSET{C0}->{ $CZD } || $CHARSET{C0}->{ "\x7E" }; '';
409 } elsif ($C1D) {
410 $C->{C1} = $CHARSET{C1}->{ $C1D.$rev }
411 || $CHARSET{C1}->{ $C1D } || $CHARSET{C1}->{ "\x7E" }; '';
412 } elsif ($Fs) {
413 if ($Fs eq "\x6E") { ## LS2
414 $C->{GL} = 'G2'; '';
415 } elsif ($Fs eq "\x6F") { ## LS3
416 $C->{GL} = 'G3'; '';
417 } elsif ($Fs eq "\x7E") { ## LS1R
418 $C->{GR} = 'G1'; $C->{GL} = 'G1' if $C->{bit} == 7; '';
419 } elsif ($Fs eq "\x7D") { ## LS2R
420 $C->{GR} = 'G2'; $C->{GL} = 'G2' if $C->{bit} == 7; '';
421 } elsif ($Fs eq "\x7C") { ## LS3R
422 $C->{GR} = 'G3'; $C->{GL} = 'G3' if $C->{bit} == 7; '';
423 } else {
424 chr ($CHARSET{single_control}->{Fs}->{ucs} + (ord ($Fs) - 0x60));
425 }
426 } elsif ($sI) {
427 chr ($CHARSET{single_control}->{'3F'.$sI}->{ucs} + (ord ($sF) - 0x30));
428 } elsif ($ACS) { ## Announcer
429 if ($ACS eq "\x4A") { $C->{bit} = 7 }
430 elsif ($ACS eq "\x4B") { $C->{bit} = 8 }
431 '';
432 }
433 }gex;
434 $C->{_irr} = undef;
435 }
436 $esc;
437 ## Control sequence
438 } elsif ($csi) {
439 $csi =~ tr/\xA0-\xFF/\x20-\x7F/d;
440 $csi =~ s/$C->{$C->{CL}}->{LS0}//g if $C->{$C->{CL}}->{LS0};
441 $csi =~ s/$C->{$C->{CL}}->{LS1}//g if $C->{$C->{CL}}->{LS1};
442 "\x9B".$csi;
443 }
444 }gex;
445 $s;
446 }
447
448 sub internal_to_iso2022 ($\%) {
449 my ($s, $C) = @_;
450 $C ||= &new_object;
451
452 my $r = '';
453 for my $c (split //, $s) {
454 my $cc = ord $c;
455 my $t;
456 if ($cc <= 0x1F) {
457 $t = _i2c ($c, $C, type => 'C0', charset => '@');
458 } elsif ($cc == 0x20 || $cc == 0x7F) {
459 $t = _back2ascii ($C) . $c;
460 } elsif ($cc < 0x7F) {
461 $t = _i2g ($c, $C, type => 'G94', charset => 'B');
462 } elsif ($cc <= 0x9F) {
463 $t = _i2c ($c, $C, type => 'C1', charset_id => '64291991C1',
464 charset => $C->{private_set}->{XC1}->{'64291991C1'});
465 } elsif ($cc <= 0xFF) {
466 $t = _i2g (chr($cc-0x80), $C, type => 'G96', charset => 'A');
467 } elsif ($cc <= 0x24FF) {
468 my $c = $cc - 0x100;
469 my $final = $C->{private_set}->{U96n}->[0];
470 if (length $final) {
471 $t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C,
472 type => 'G96n', charset => $final);
473 }
474 } elsif ($cc <= 0x33FF) {
475 my $c = $cc - 0x2500;
476 my $final = $C->{private_set}->{U96n}->[1];
477 if (length $final) {
478 $t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C,
479 type => 'G96n', charset => $final);
480 }
481 } elsif (0xE000 <= $cc && $cc <= 0xFFFF) {
482 my $c = $cc - 0xE000;
483 my $final = $C->{private_set}->{U96n}->[2];
484 if (length $final) {
485 $t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C,
486 type => 'G96n', charset => $final);
487 }
488
489 } elsif (0xE9F6C0 <= $cc && $cc <= 0xF06F80) {
490 my $c = $cc - 0xE9F6C0;
491 $t = _i2g (chr((($c % 8836) / 94)+0x21).chr(($c % 94)+0x21), $C,
492 type => 'G94n', charset => chr(($c / 8836)+0x30));
493 } elsif (0xF49D7C <= $cc && $cc <= 0xF4BFFF) {
494 my $c = $cc - 0xF49D7C;
495 $t = _i2g (chr(($c / 94)+0x21).chr(($c % 94)+0x21), $C,
496 type => 'G94n', charset => 'B', revision => '@');
497
498 } elsif (0xF0000 <= $cc && $cc <= 0x10F7FF) {
499 my $c = $cc - 0xF0000;
500 $t = _i2g (chr((($c % 9216) / 96)+0x20).chr(($c % 96)+0x20), $C,
501 type => 'G96n', charset => "\x20".chr(($c / 9216)+0x40));
502 } elsif (0xE90940 <= $cc && $cc <= 0xE92641) {
503 my $c = $cc - 0xE90940;
504 $t = _i2g (chr(($c % 94)+0x21), $C,
505 type => 'G94', charset => chr(($c / 94)+0x30));
506 } elsif (0xE92642 <= $cc && $cc <= 0xE9269F) {
507 my $c = $cc - 0xE92642;
508 $t = _i2g (chr($c+0x21),$C,type => 'G94', charset => "\x64", revision => '@');
509 } elsif (0xE926A0 <= $cc && $cc <= 0xE9443F) {
510 my $c = $cc - 0xE926A0;
511 $t = _i2g (chr(($c % 96)+0x20), $C,
512 type => 'G96', charset => chr(($c / 96)+0x30));
513 } elsif (0xE944A0 <= $cc && $cc <= 0xE961A1) {
514 my $c = $cc - 0xE944A0;
515 $t = _i2g (chr(($c % 94)+0x21), $C,
516 type => 'G94', charset => '!'.chr(($c / 94)+0x30));
517 } elsif (0xE96200 <= $cc && $cc <= 0xE97F9F) {
518 my $c = $cc - 0xE96200;
519 $t = _i2g (chr(($c % 96)+0x20), $C,
520 type => 'G96', charset => '!'.chr(($c / 96)+0x30));
521 } elsif (0xE98000 <= $cc && $cc <= 0xE99D01) {
522 my $c = $cc - 0xE98000;
523 $t = _i2g (chr(($c % 94)+0x21), $C,
524 type => 'G94', charset => '"'.chr(($c / 94)+0x30));
525 } elsif (0xE99D60 <= $cc && $cc <= 0xE9BAFF) {
526 my $c = $cc - 0xE99D60;
527 $t = _i2g (chr(($c % 96)+0x20), $C,
528 type => 'G96', charset => '"'.chr(($c / 96)+0x30));
529 } elsif (0xE9BB60 <= $cc && $cc <= 0xE9D861) {
530 my $c = $cc - 0xE9BB60;
531 $t = _i2g (chr(($c % 94)+0x21), $C,
532 type => 'G94', charset => '#'.chr(($c / 94)+0x30));
533 } elsif (0xE9D8C0 <= $cc && $cc <= 0xE9F65F) {
534 my $c = $cc - 0xE9D8C0;
535 $t = _i2g (chr(($c % 96)+0x20), $C,
536 type => 'G96', charset => '#'.chr(($c / 96)+0x30));
537 } elsif (0x70090940 <= $cc && $cc <= 0x70092641) {
538 my $c = $cc - 0x70090940;
539 $t = _i2g (chr(($c % 94)+0x21), $C,
540 type => 'G94', charset => "\x20".chr(($c / 94)+0x30));
541 } elsif (0x700926A0 <= $cc && $cc <= 0x7009443F) {
542 my $c = $cc - 0x700926A0;
543 $t = _i2g (chr(($c % 96)+0x20), $C,
544 type => 'G96', charset => "\x20".chr(($c / 96)+0x30));
545 ## TODO: DRCS with I byte: U+700944A0-U+7009F6BF
546 } elsif (0x70400000 <= $cc && $cc <= 0x7040FFED) {
547 my $c = $cc - 0x70400000;
548 $t = _i2g (chr(($c % 94)+0x21), $C, charset_id => 'P'.int ($c / 94),
549 type => 'G94', charset => $C->{private_set}->{G94}->[ $c / 94 ]);
550 } elsif (0x70410000 <= $cc && $cc <= 0x7041FFBF) {
551 my $c = $cc - 0x70410000;
552 $t = _i2g (chr(($c % 96)+0x20), $C, charset_id => 'P'.int ($c / 96),
553 type => 'G96', charset => $C->{private_set}->{G96}->[ $c / 96 ]);
554 } elsif (0x70420000 <= $cc && $cc <= 0x7046F19B) {
555 my $c = $cc % 0x10000;
556 $t = _i2g (chr((($c % 8836) / 94)+0x21).chr(($c % 94)+0x21), $C,
557 type => 'G94n',
558 charset_id => 'P'.int(($cc / 0x10000) - 0x7042).'_'.int($c / 8836),
559 charset => $C->{private_set}->{G94n}->[ ($cc / 0x10000) - 0x7042 ]
560 ->[ $c / 8836 ]);
561 }
562 if (defined $t) {
563 $r .= $t;
564 } else {
565 $r .= _i2g ("\x3F", $C, type => 'G94', charset => 'B');
566 }
567 }
568 $r . _back2ascii ($C);
569 }
570
571 ## $O{charset} eq undef means that charset is same as the current designated one.
572 sub _i2c ($%%) {
573 my ($s, $C, %O) = @_;
574 my $r = '';
575 if ($O{type} eq 'C0') {
576 if (defined $O{charset}) {
577 if ( $C->{C0} ne $CHARSET{C0}->{$O{charset}}
578 && $C->{C0} ne $CHARSET{C0}->{$O{charset_id}}) {
579 for ($C->{option}->{designate_to}->{C0}->{$O{charset}},
580 $C->{option}->{designate_to}->{C0}->{default}) {
581 if (defined $_) { return undef if $_ == -1; last }
582 }
583 $r .= "\x1B\x21".$O{charset};
584 $C->{C0} = $CHARSET{C0}->{$O{charset}};
585 }
586 } elsif (defined $O{charset_id}) {
587 if ($C->{C0} ne $CHARSET{C0}->{$O{charset_id}}) {
588 return undef; ## Control set is not designated nor has F byte
589 }
590 }
591 $r .= _back2ascii ($C, reset_all => $C->{C0}->{reset_all}->{$s});
592 return $r . $s;
593 } elsif ($O{type} eq 'C1') {
594 if (defined $O{charset}) {
595 if ( $C->{C1} ne $CHARSET{C1}->{$O{charset}}
596 && $C->{C1} ne $CHARSET{C1}->{$O{charset_id}}) {
597 for ($C->{option}->{designate_to}->{C1}->{$O{charset}},
598 $C->{option}->{designate_to}->{C1}->{default}) {
599 if (defined $_) { return undef if $_ == -1; last }
600 }
601 $r .= "\x1B\x22".$O{charset};
602 $C->{C1} = $CHARSET{C1}->{$O{charset}};
603 }
604 } elsif (defined $O{charset_id}) {
605 if ($C->{C1} ne $CHARSET{C1}->{$O{charset_id}}) {
606 return undef; ## Control set is not designated nor has F byte
607 }
608 }
609 $r .= _back2ascii ($C, reset_all => $C->{C1}->{reset_all}->{$s});
610 unless ($C->{option}->{C1invoke_to_right}) { ## ESC Fe
611 $s =~ s/([\x80-\x9F])/"\x1B" . chr (ord ($1) - 0x40)/ge;
612 }
613 return $r . $s;
614 }
615 }
616 sub _i2g ($%%) {
617 my ($s, $C, %O) = @_;
618 my $r = '';
619 my $set = $CHARSET{$O{type}}->{$O{charset}};
620 my $set0 = $CHARSET{$O{type}}->{$O{charset_id}};
621 ## -- designate character set
622 my $G = 0;
623 if ($C->{G0} eq $set || $C->{G0} eq $set0) { $G = 0 }
624 elsif ($C->{G1} eq $set || $C->{G1} eq $set0) { $G = 1 }
625 elsif ($C->{G2} eq $set || $C->{G2} eq $set0) { $G = 2 }
626 elsif ($C->{G3} eq $set || $C->{G3} eq $set0) { $G = 3 }
627 else {
628 return undef unless $set; ## charset does not have F byte
629 $G = 1 if $O{type} eq 'G96' || $O{type} eq 'G96n';
630 for ($C->{option}->{designate_to}->{$O{type}}->{$O{charset}},
631 $C->{option}->{designate_to}->{$O{type}}->{default}) {
632 if (defined $_) {
633 $G = $_; last;
634 }
635 }
636 if ($G == -1) {
637 return undef;
638 }
639 if ($O{type} eq 'G94') {
640 $r .= ($O{revision}&&$C->{option}->{use_revision}?"\x1B\x26".$O{revision}:'')
641 ."\x1B".("\x28","\x29","\x2A","\x2B")[$G].$O{charset};
642 } elsif ($O{type} eq 'G94n') {
643 if ($G == 0 && !$C->{option}->{G94n_designate_long}
644 && ($O{charset} eq '@' || $O{charset} eq 'A' || $O{charset} eq 'B')) {
645 $r .= ($O{revision}&&$C->{option}->{use_revision}?"\x1B\x26".$O{revision}:'')
646 ."\x1B\x24".$O{charset};
647 } else {
648 $r .= ($O{revision}&&$C->{option}->{use_revision}?"\x1B\x26".$O{revision}:'')
649 ."\x1B\x24".("\x28","\x29","\x2A","\x2B")[$G].$O{charset};
650 }
651 } elsif ($O{type} eq 'G96') {
652 $r .= ($O{revision}&&$C->{option}->{use_revision}?"\x1B\x26".$O{revision}:'')
653 ."\x1B".("\x2C","\x2D","\x2E","\x2F")[$G].$O{charset};
654 } elsif ($O{type} eq 'G96n') {
655 $r .= ($O{revision}&&$C->{option}->{use_revision}?"\x1B\x26".$O{revision}:'')
656 ."\x1B\x24".("\x2C","\x2D","\x2E","\x2F")[$G].$O{charset};
657 }
658 $C->{'G'.$G} = $CHARSET{$O{type}}->{$O{charset}};
659 }
660 ## -- invoke G buffer
661 my $left = $C->{option}->{Ginvoke_to_left}->[$G];
662 if ($C->{GL} eq 'G'.$G) {
663 $left = 1;
664 } elsif ($C->{GR} eq 'G'.$G) {
665 $left = 0;
666 } else {
667 if ($C->{option}->{Ginvoke_by_single_shift}->[$G]) {
668 if ($C->{C1}->{'C_SS'.$G}) {
669 $r .= _i2c ($C->{C1}->{'C_SS'.$G}, $C, type => 'C1') || return undef;
670 } elsif ($C->{C0}->{'C_SS'.$G}) {
671 $r .= _i2c ($C->{C0}->{'C_SS'.$G}, $C, type => 'C0') || return undef;
672 } else { ## Both C0 and C1 set do not have SS2/3.
673 $r .= __invoke ($C, $G => $left) if $C->{$left?'GL':'GR'} ne 'G'.$G;
674 }
675 } else {
676 $left = 0 if $G == 1 && !$C->{C0}->{C_LS1};
677 $r .= __invoke ($C, $G => $left) if $C->{$left?'GL':'GR'} ne 'G'.$G;
678 }
679 }
680 $s =~ tr/\x00-\x7F/\x80-\xFF/ unless $left;
681 $r . $s;
682 }
683 sub _back2ascii (%) {
684 my ($C, %O) = @_;
685 my $r = '';
686 if ($C->{option}->{reset}->{Gdesignation}) {
687 my $F = $C->{option}->{reset}->{Gdesignation}; # \x42
688 $r .= "\x1B\x28".$F unless $C->{G0} eq $CHARSET{G94}->{$F};
689 $C->{G0} = $CHARSET{G94}->{$F};
690 if ($O{reset_all}) {
691 $C->{G1} = $CHARSET{G94}->{"\x7E"};
692 $C->{G2} = $CHARSET{G94}->{"\x7E"};
693 $C->{G3} = $CHARSET{G94}->{"\x7E"};
694 }
695 }
696 if ($C->{option}->{reset}->{Ginvoke}) {
697 if ($C->{GL} ne 'G0') {
698 $r .= $C->{C0}->{C_LS0} || ($C->{C0} = $CHARSET{C0}->{'@'},"\x1B\x21\x40\x0F");
699 $C->{GL} = 'G0';
700 }
701 $C->{GR} = undef if $O{reset_all};
702 }
703 $r;
704 }
705 ## __invoke (\%C, $G, $left_or_right)
706 sub __invoke (\%$$) {
707 my ($C, $G) = @_;
708 if ($_[2]) {
709 $C->{GL} = 'G'.$G;
710 return ($C->{C0}->{C_LS0}
711 || scalar ($C->{C0}=$CHARSET{C0}->{'@'},"\x1B\x21\x40\x0F"),
712 $C->{C0}->{C_LS1}, "\x1B\x6E", "\x1B\x6F")[$G];
713 } else {
714 $C->{GR} = 'G'.$G;
715 return ("", "\x1B\x7E", "\x1B\x7D", "\x1B\x7C")[$G];
716 }
717 '';
718 }
719
720 sub make_charset (%) {
721 ## TODO: support private charset ID such as 'X0'
722 my %set = @_;
723 my $setid = qq($set{I}$set{F}$set{revision});
724 my $settype = $set{type} || 'G94';
725 delete $set{type}, $set{I}, $set{F}, $set{revision};
726 $CHARSET{ $settype }->{ $setid } = \%set;
727 }
728
729 1;
730 __END__
731
732 =head1 SEE ALSO
733
734 ISO/IEC 646:1991, "7-bit coded graphic character set for intormation interchange".
735
736 ISO/IEC 2022:1994, "Character Code Structure and Extension Techniques".
737 (IDT with ECMA 35, JIS X 0202:1998)
738
739 ISO/IEC 4873:1991, "8-Bit Coded Character Set Structure and Rules".
740 (IDT with ECMA 43)
741
742 ISO/IEC 6429:1992, "Control Functions for Coded Character Sets".
743 (IDT with ECMA 48:1991, JIS X 0211:1998)
744
745 ISO/IEC 8859, "8-Bit Single-Byte Coded Graphic Character Sets".
746
747 Encode, perlunicode
748
749 =head1 LICENSE
750
751 Copyright 2002 wakaba <w@suika.fam.cx>
752
753 This library is free software; you can redistribute it
754 and/or modify it under the same terms as Perl itself.
755
756 =cut
757
758 # $Date: 2002/09/15 04:15:51 $
759 ### ISO2022.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24