3 |
|
|
4 |
Encode::ISO2022 --- ISO/IEC 2022 encoder and decoder |
Encode::ISO2022 --- ISO/IEC 2022 encoder and decoder |
5 |
|
|
6 |
|
=head1 ENCODINGS |
7 |
|
|
8 |
|
=over 4 |
9 |
|
|
10 |
|
=item iso2022 |
11 |
|
|
12 |
|
ISO/IEC 2022:1994. Default status is: |
13 |
|
|
14 |
|
=over 2 |
15 |
|
|
16 |
|
=item CL = C0 = ISO/IEC 6429:1991 C0 set |
17 |
|
|
18 |
|
=item CR = C1 = ISO/IEC 6429:1991 C1 set |
19 |
|
|
20 |
|
=item GL = G0 = ISO/IEC 646:1991 IRV GL(G0) set |
21 |
|
|
22 |
|
=item GR = G1 = empty set |
23 |
|
|
24 |
|
=item G2 = empty set |
25 |
|
|
26 |
|
=item G3 = empty set |
27 |
|
|
28 |
|
=back |
29 |
|
|
30 |
|
(Alias: iso/iec2022, iso-2022, 2022, cp2022) |
31 |
|
|
32 |
|
=back |
33 |
|
|
34 |
|
Note that ISO/IEC 2022 based encodings are found in |
35 |
|
Encode::ISO2022::* modules. This module, Encode::ISO2022 |
36 |
|
only provides a general ISO/IEC 2022 encoder/decoder. |
37 |
|
|
38 |
=cut |
=cut |
39 |
|
|
40 |
require v5.7.3; |
require v5.7.3; |
43 |
use vars qw(%CHARSET %CODING_SYSTEM $VERSION); |
use vars qw(%CHARSET %CODING_SYSTEM $VERSION); |
44 |
$VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
$VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
45 |
use base qw(Encode::Encoding); |
use base qw(Encode::Encoding); |
46 |
__PACKAGE__->Define (qw/iso-2022 iso2022 2022 cp2022/); |
__PACKAGE__->Define (qw!iso-2022 iso/iec2022 iso2022 2022 cp2022!); |
47 |
require Encode::Charset; |
require Encode::Charset; |
48 |
*CHARSET = \%Encode::Charset::CHARSET; |
*CHARSET = \%Encode::Charset::CHARSET; |
49 |
*CODING_SYSTEM = \%Encode::Charset::CODING_SYSTEM; |
*CODING_SYSTEM = \%Encode::Charset::CODING_SYSTEM; |
50 |
|
|
|
### --- Intialization |
|
|
|
|
|
my %_CHARS_to_RANGE = ( |
|
|
l94 => q/[\x21-\x7E]/, |
|
|
l96 => q/[\x20-\x7F]/, |
|
|
l128 => q/[\x00-\x7F]/, |
|
|
l256 => q/[\x00-\xFF]/, |
|
|
r94 => q/[\xA1-\xFE]/, |
|
|
r96 => q/[\xA0-\xFF]/, |
|
|
r128 => q/[\x80-\xFF]/, |
|
|
r256 => q/[\x80-\xFF]/, |
|
|
b94 => q/[\x21-\x7E\xA1-\xFE]/, |
|
|
b96 => q/[\x20-\x7F\xA0-\xFF]/, |
|
|
b128 => q/[\x00-\xFF]/, |
|
|
b256 => q/[\x00-\xFF]/, |
|
|
); |
|
|
|
|
51 |
### --- Perl Encode module common functions |
### --- Perl Encode module common functions |
52 |
|
|
53 |
sub encode ($$;$) { |
sub encode ($$;$) { |
64 |
} |
} |
65 |
|
|
66 |
### --- Encode::ISO2022 unique functions |
### --- Encode::ISO2022 unique functions |
67 |
|
*new_object = \&Encode::Charset::new_object; |
68 |
|
|
69 |
## Make a new ISO/IEC 2022-buffers object with default status |
sub iso2022_to_internal ($;%) { |
|
sub new_object { |
|
|
my %C; |
|
|
$C{bit} = 8; |
|
|
$C{CL} = 'C0'; $C{CR} = 'C1'; $C{ESC_Fe} = 'C1'; |
|
|
$C{C0} = $CHARSET{C0}->{"\x40"}; ## ISO/IEC 6429:1991 C0 |
|
|
$C{C1} = $CHARSET{C1}->{'64291991C1'}; ## ISO/IEC 6429:1991 C1 |
|
|
$C{GL} = 'G0'; $C{GR} = 'G1'; |
|
|
$C{G0} = $CHARSET{G94}->{"\x42"}; ## ISO/IEC 646:1991 IRV |
|
|
#$C{G1} = $CHARSET{G96}->{"\x41"}; ## ISO/IEC 8859-1 GR |
|
|
$C{G1} = $CHARSET{G94}->{"\x7E"}; ## empty set |
|
|
$C{G2} = $CHARSET{G94}->{"\x7E"}; ## empty set |
|
|
$C{G3} = $CHARSET{G94}->{"\x7E"}; ## empty set |
|
|
$C{coding_system} = $CODING_SYSTEM{"\x40"}; ## ISO/IEC 2022 |
|
|
$C{option} = { |
|
|
C1invoke_to_right => 0, ## C1 invoked to: (0: ESC Fe, 1: CR) |
|
|
G94n_designate_long => 0, ## (1: ESC 02/04 02/08 04/00..02) |
|
|
designate_to => { ## Designated G buffer (-1: not be outputed) |
|
|
C0 => { |
|
|
default => 0, |
|
|
}, |
|
|
C1 => { |
|
|
default => 1, |
|
|
}, |
|
|
G94 => { |
|
|
"\x42" => 0, |
|
|
default => 0, |
|
|
}, |
|
|
G96 => { |
|
|
default => 1, |
|
|
}, |
|
|
G94n => { |
|
|
default => 0, |
|
|
}, |
|
|
G96n => { |
|
|
default => 1, |
|
|
}, |
|
|
coding_system => { |
|
|
default => -1, |
|
|
}, |
|
|
}, |
|
|
Ginvoke_by_single_shift => [0,0,0,0], ## Invoked by SS |
|
|
Ginvoke_to_left => [1,1,1,1], ## Which invoked to? (1: L, 0: R) |
|
|
private_set => { ## Private set vs Final byte |
|
|
C0 => [], |
|
|
C1 => [], |
|
|
G94 => [], |
|
|
G94n => [[],[],[],[],[]], |
|
|
G96 => [], |
|
|
#G96n => [], ## (not implemented) |
|
|
U96n => [], ## mule-unicode sets |
|
|
XC1 => { |
|
|
'64291991C1' => undef, ## ISO/IEC 6429:1991 C1 |
|
|
}, |
|
|
}, |
|
|
reset => { ## Reset status at top of line |
|
|
Gdesignation => "\x42", ## F of designation or 0 |
|
|
Ginvoke => 1, |
|
|
}, |
|
|
undef_char => ["\x3F", {type => 'G94', charset => 'B'}], |
|
|
use_revision => 1, ## Output IRR |
|
|
}; |
|
|
\%C; |
|
|
} |
|
|
|
|
|
sub iso2022_to_internal ($;\%) { |
|
70 |
my ($s, $C) = @_; |
my ($s, $C) = @_; |
71 |
|
$C ||= &new_object; |
72 |
my $t = ''; |
my $t = ''; |
73 |
$s =~ s{ |
$s =~ s{^((?:(?!\x1B\x25\x2F?[\x30-\x7E]).)*)}{ |
|
^((?:(?!\x1B\x25\x2F?[\x30-\x7E]).)*) |
|
|
}{ |
|
74 |
my $i2 = $1; |
my $i2 = $1; |
75 |
$t = _iso2022_to_internal ($i2, $C); |
$t = _iso2022_to_internal ($i2, $C); |
76 |
''; |
''; |
77 |
}gesx; |
}es; |
78 |
|
my $pad = ''; |
79 |
|
use re 'eval'; |
80 |
$s =~ s{ |
$s =~ s{ |
81 |
## ISO/IEC 2022 |
## ISO/IEC 2022 |
82 |
\x1B\x25\x40((?:(?!\x1B\x25\x2F?[\x30-\x7E]).)*) |
(??{"$pad\x1B$pad\x25$pad\x40"})((?:(?!\x1B\x25\x2F?[\x30-\x7E]).)*) |
83 |
## UTF-8 |
## UTF-8 |
84 |
|\x1B\x25(?:\x47|\x2F[\x47-\x49])((?:(?!\x1B\x25\x2F?[\x30-\x7E]).)*) |
|(??{"$pad\x1B$pad\x25$pad(?:\x47|\x2F$pad"."[\x47-\x49])"}) |
85 |
|
((?:(?!\x1B\x25\x2F?[\x30-\x7E]).)*) |
86 |
## UCS-2, UTF-16 |
## UCS-2, UTF-16 |
87 |
|\x1B\x25\x2F[\x40\x43\x45\x4A-\x4C] |
|(??{"$pad\x1B$pad\x25$pad\x2F$pad"})([\x40\x43\x45\x4A-\x4C]) |
88 |
((?!\x00\x1B\x00\x25\x00\x2F?\x00[\x30-\x7E].)*) |
((?:(?!\x00\x1B\x00\x25(?:\x00\x2F)?\x00[\x30-\x7E])..)*) |
89 |
## UCS-4 |
## UCS-4 |
90 |
|\x1B\x25\x2F[\x41\x44\x46] |
|(??{"$pad\x1B$pad\x25$pad\x2F$pad"})[\x41\x44\x46] |
91 |
((?!\x00\x00\x00\x1B\x00\x00\x00\x25\x00\x00\x00\x2F? |
((?:(?!\x00\x00\x00\x1B\x00\x00\x00\x25(?:\x00\x00\x00\x2F)? |
92 |
\x00\x00\x00[\x30-\x7E].)*) |
\x00\x00\x00[\x30-\x7E])....)*) |
93 |
## with standard return |
## with standard return |
94 |
|\x1B\x25([\x30-\x7E])((?:(?!\x1B\x25\x2F?[\x30-\x7E]).)*) |
|(??{"$pad\x1B$pad\x25$pad"})([\x30-\x7E]) |
95 |
|
((?:(?!\x1B\x25\x2F?[\x30-\x7E]).)*) |
96 |
## without standard return |
## without standard return |
97 |
|\x1B\x25\x2F([\x30-\x7E])(.*) |
|(??{"$pad\x1B$pad\x25$pad\x2F$pad"})([\x30-\x7E])(.*) |
98 |
}{ |
}{ |
99 |
my ($i2,$u8,$Fu2,$u2,$u4,$Fsr,$sr,$Fnsr,$nsr) = ($1,$2,$3,$4,$5,$6,$7,$8,$9); |
my ($i2,$u8,$Fu2,$u2,$u4,$Fsr,$sr,$Fnsr,$nsr) = ($1,$2,$3,$4,$5,$6,$7,$8,$9); |
100 |
my $r = ''; |
my $r = ''; |
101 |
if (defined $i2) { |
if (defined $i2) { |
102 |
$r = _iso2022_to_internal ($i2, $C); |
$r = _iso2022_to_internal ($i2, $C); $pad = ''; |
103 |
} elsif (defined $u8) { |
} elsif (defined $u8) { |
104 |
$r = Encode::decode ('utf8', $u8); |
$r = Encode::decode ('utf8', $u8); $pad = ''; |
105 |
} elsif ($Fu2) { |
} elsif ($Fu2) { |
106 |
if (ord ($Fu2) > 0x49) { |
if (ord ($Fu2) > 0x49) { |
107 |
$r = Encode::decode ('utf-16be', $u2); |
$r = Encode::decode ('utf-16be', $u2); |
108 |
} else { |
} else { |
109 |
$r = Encode::decode ('ucs-2be', $u2); |
$r = Encode::decode ('ucs-2be', $u2); |
110 |
} |
} |
111 |
|
$pad = "\x00"; |
112 |
} elsif (defined $u4) { |
} elsif (defined $u4) { |
113 |
$r = Encode::decode ('ucs-4be', $u2); |
$r = Encode::decode ('ucs-4be', $u2); $pad = "\x00\x00\x00"; |
114 |
|
} elsif (defined $Fsr && $CODING_SYSTEM{$Fsr}->{perl_name}) { |
115 |
|
$r = Encode::decode ($CODING_SYSTEM{$Fsr}->{perl_name}, $sr); $pad = ''; |
116 |
|
} elsif (defined $Fnsr && $CODING_SYSTEM{$Fnsr}->{perl_name}) { |
117 |
|
$r = Encode::decode ($CODING_SYSTEM{$Fnsr}->{perl_name}, $nsr); $pad = ''; |
118 |
} else { ## temporary |
} else { ## temporary |
119 |
$r = '?+'; |
$r = '?' x length ($sr.$nsr); $pad = ''; |
120 |
} |
} |
121 |
$r; |
$r; |
122 |
}gesx; |
}gesx; |
123 |
$t . $s; |
$t . $s; |
124 |
} |
} |
125 |
|
|
126 |
sub _iso2022_to_internal ($;\%) { |
# this is very very trickey. my perl 5.8.0 does not process |
127 |
|
# regex with eval except the first time (i think it's a bug |
128 |
|
# of perl), so we redefine this function whenever being called! |
129 |
|
# when this unexpected behavior is fixed or someone finds |
130 |
|
# better way to avoid it, we will rewrite this code. |
131 |
|
&_iso2022_to_internal (undef); |
132 |
|
sub _iso2022_to_internal ($;%) { |
133 |
|
eval q{ sub __iso2022_to_internal ($;%) { 0 } }; |
134 |
|
eval q{ |
135 |
|
sub __iso2022_to_internal ($;%) { |
136 |
|
use re 'eval'; |
137 |
my ($s, $C) = @_; |
my ($s, $C) = @_; |
138 |
my %_GB_to_GN = ( |
my %_GB_to_GN = ( |
139 |
"\x28"=>'G0',"\x29"=>'G1',"\x2A"=>'G2',"\x2B"=>'G3', |
"\x28"=>'G0',"\x29"=>'G1',"\x2A"=>'G2',"\x2B"=>'G3', |
140 |
"\x2C"=>'G0',"\x2D"=>'G1',"\x2E"=>'G2',"\x2F"=>'G3', |
"\x2C"=>'G0',"\x2D"=>'G1',"\x2E"=>'G2',"\x2F"=>'G3', |
141 |
); |
); |
142 |
$C ||= &new_object; |
my %_CHARS_to_RANGE = ( |
143 |
|
l94 => q/[\x21-\x7E]/, l96 => q/[\x20-\x7F]/, |
144 |
|
l128 => q/[\x00-\x7F]/, l256 => q/[\x00-\xFF]/, |
145 |
|
r94 => q/[\xA1-\xFE]/, r96 => q/[\xA0-\xFF]/, |
146 |
|
r128 => q/[\x80-\xFF]/, r256 => q/[\x80-\xFF]/, |
147 |
|
b94 => q/[\x21-\x7E\xA1-\xFE]/, b96 => q/[\x20-\x7F\xA0-\xFF]/, |
148 |
|
b128 => q/[\x00-\xFF]/, b256 => q/[\x00-\xFF]/, |
149 |
|
); |
150 |
|
|
|
use re 'eval'; |
|
151 |
$s =~ s{ |
$s =~ s{ |
152 |
((??{ $_CHARS_to_RANGE{'l'.$C->{$C->{GL}}->{chars}} |
((??{ $_CHARS_to_RANGE{'l'.$C->{$C->{GL}}->{chars}} |
153 |
. qq/{$C->{$C->{GL}}->{dimension},$C->{$C->{GL}}->{dimension}}/ })) |
. qq/{$C->{$C->{GL}}->{dimension},$C->{$C->{GL}}->{dimension}}/ })) |
154 |
|((??{ $_CHARS_to_RANGE{'r'.$C->{$C->{GL}}->{chars}} |
|((??{ $_CHARS_to_RANGE{'r'.$C->{$C->{GR}}->{chars}} |
155 |
. qq/{$C->{$C->{GR}}->{dimension},$C->{$C->{GR}}->{dimension}}/ })) |
. qq/{$C->{$C->{GR}}->{dimension},$C->{$C->{GR}}->{dimension}}/ })) |
|
|
|
156 |
| (??{ q/(?:/ . ($C->{$C->{CR}}->{r_SS2} || '(?!)') |
| (??{ q/(?:/ . ($C->{$C->{CR}}->{r_SS2} || '(?!)') |
157 |
. ($C->{$C->{ESC_Fe}}->{r_SS2_ESC} ? |
. ($C->{$C->{ESC_Fe}}->{r_SS2_ESC} ? |
158 |
qq/|$C->{$C->{ESC_Fe}}->{r_SS2_ESC}/ : '') |
qq/|$C->{$C->{ESC_Fe}}->{r_SS2_ESC}/ : '') |
161 |
||$C->{$C->{CL}}->{r_LS1}? ## ISO/IEC 6429:1992 9 |
||$C->{$C->{CL}}->{r_LS1}? ## ISO/IEC 6429:1992 9 |
162 |
qq/[$C->{$C->{CL}}->{r_LS0}$C->{$C->{CL}}->{r_LS1}]*/:'') |
qq/[$C->{$C->{CL}}->{r_LS0}$C->{$C->{CL}}->{r_LS1}]*/:'') |
163 |
}) |
}) |
164 |
((??{ $_CHARS_to_RANGE{'b'.$C->{$C->{GL}}->{chars}} |
((??{ $_CHARS_to_RANGE{'b'.$C->{G2}->{chars}} |
165 |
. qq/{$C->{G2}->{dimension},$C->{G2}->{dimension}}/ })) |
. qq/{$C->{G2}->{dimension},$C->{G2}->{dimension}}/ })) |
166 |
| (??{ q/(?:/ . ($C->{$C->{CR}}->{r_SS3} || '(?!)') |
| (??{ q/(?:/ . ($C->{$C->{CR}}->{r_SS3} || '(?!)') |
167 |
. ($C->{$C->{ESC_Fe}}->{r_SS3_ESC} ? |
. ($C->{$C->{ESC_Fe}}->{r_SS3_ESC} ? |
168 |
qq/|$C->{$C->{ESC_Fe}}->{r_SS3_ESC}/ : '') |
qq/|$C->{$C->{ESC_Fe}}->{r_SS3_ESC}/ : '') |
171 |
|| $C->{$C->{CL}}->{r_LS1}? ## ISO/IEC 6429:1992 9 |
|| $C->{$C->{CL}}->{r_LS1}? ## ISO/IEC 6429:1992 9 |
172 |
qq/[$C->{$C->{CL}}->{r_LS0}$C->{$C->{CL}}->{r_LS1}]*/:'') |
qq/[$C->{$C->{CL}}->{r_LS0}$C->{$C->{CL}}->{r_LS1}]*/:'') |
173 |
}) |
}) |
174 |
((??{ $_CHARS_to_RANGE{'b'.$C->{$C->{GL}}->{chars}} |
((??{ $_CHARS_to_RANGE{'b'.$C->{G3}->{chars}} |
175 |
. qq/{$C->{G3}->{dimension},$C->{G3}->{dimension}}/ })) |
. qq/{$C->{G3}->{dimension},$C->{G3}->{dimension}}/ })) |
176 |
|
|
177 |
## Locking shift |
## Locking shift |
178 |
|( \x1B[\x6E\x6F\x7C-\x7E] |
|( (??{ $C->{$C->{CL}}->{r_LS0}||'(?!)' }) |
|
|(??{ $C->{$C->{CL}}->{r_LS0}||'(?!)' }) |
|
179 |
|(??{ $C->{$C->{CL}}->{r_LS1}||'(?!)' }) |
|(??{ $C->{$C->{CL}}->{r_LS1}||'(?!)' }) |
180 |
) |
) |
181 |
|
|
206 |
$c = $c * $C->{$C->{GL}}->{chars} + unpack ('C', $_) - $m; |
$c = $c * $C->{$C->{GL}}->{chars} + unpack ('C', $_) - $m; |
207 |
} |
} |
208 |
chr ($C->{$C->{GL}}->{ucs} + $c); |
chr ($C->{$C->{GL}}->{ucs} + $c); |
|
## Control, SP, or broken data |
|
|
## TODO: support control sets other than ISO/IEC 6429's |
|
|
} elsif (defined $misc) { |
|
|
$misc; |
|
209 |
## GR graphic character |
## GR graphic character |
210 |
} elsif ($gr) { |
} elsif ($gr) { |
211 |
my $c = 0; |
my $c = 0; |
214 |
$c = $c * $C->{$C->{GR}}->{chars} + unpack ('C', $_) - $m; |
$c = $c * $C->{$C->{GR}}->{chars} + unpack ('C', $_) - $m; |
215 |
} |
} |
216 |
chr ($C->{$C->{GR}}->{ucs} + $c); |
chr ($C->{$C->{GR}}->{ucs} + $c); |
217 |
|
## Control, SP, or broken data |
218 |
|
## TODO: support control sets other than ISO/IEC 6429's |
219 |
|
} elsif (defined $misc) { |
220 |
|
$misc; |
221 |
## Graphic character with SS2 |
## Graphic character with SS2 |
222 |
} elsif ($ss2) { |
} elsif ($ss2) { |
223 |
$ss2 =~ tr/\x80-\xFF/\x00-\x7F/; |
$ss2 =~ tr/\x80-\xFF/\x00-\x7F/; |
236 |
chr ($C->{G3}->{ucs} + $c); |
chr ($C->{G3}->{ucs} + $c); |
237 |
## Escape sequence |
## Escape sequence |
238 |
} elsif ($esc) { |
} elsif ($esc) { |
239 |
## IRR (revision number) |
if ($esc =~ /\x1B\x26([\x40-\x7E])/) { ## 6F (IRR) = ESC 02/06 Ft |
|
if ($esc =~ /\x1B\x26([\x40-\x7E])/) { |
|
240 |
$C->{_irr} = $1; $esc = ''; |
$C->{_irr} = $1; $esc = ''; |
241 |
} else { |
} else { |
242 |
$esc =~ s{ |
$esc =~ s{ |
260 |
$CZD, $C1D, $Fs, $sI, $sF,$ACS) |
$CZD, $C1D, $Fs, $sI, $sF,$ACS) |
261 |
= ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15); |
= ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15); |
262 |
my $rev = $C->{_irr} || ''; |
my $rev = $C->{_irr} || ''; |
263 |
if ($g94_g) { |
my $f2s = $C->{option}->{final_to_set}; |
264 |
$C->{ $_GB_to_GN{ $g94_g } } = $CHARSET{G94}->{ $g94_f.$rev } |
if ($g94_g) { ## ESC 02/08 [I] F |
265 |
|| $CHARSET{G94}->{ $g94_f } |
$C->{ $_GB_to_GN{ $g94_g } } |
266 |
|| $CHARSET{G94}->{ "\x7E" }; ''; |
= $CHARSET{G94}->{ $f2s->{G94}->{$g94_f.$rev} || $g94_f.$rev } |
267 |
} elsif (defined $g94n_f) { |
|| $CHARSET{G94}->{ $f2s->{G94}->{$g94_f} || $g94_f } |
268 |
$C->{ $_GB_to_GN{ $g94n_g } || 'G0' } = $CHARSET{G94n}->{ $g94n_f.$rev } |
|| $CHARSET{G94}->{ "\x7E" }; ''; |
269 |
|| $CHARSET{G94n}->{ $g94n_f } |
} elsif (defined $g94n_f) { ## ESC 02/04 [02/08..11] [I] F |
270 |
|| $CHARSET{G94n}->{ "\x7E" }; ''; |
$C->{ $_GB_to_GN{ $g94n_g } || 'G0' } |
271 |
} elsif ($g96_g) { |
= $CHARSET{G94n}->{ $f2s->{G94n}->{$g94n_f.$rev} || $g94n_f.$rev } |
272 |
$C->{ $_GB_to_GN{ $g96_g } } = $CHARSET{G96}->{ $g96_f.$rev } |
|| $CHARSET{G94n}->{ $f2s->{G94n}->{$g94n_f} || $g94n_f } |
273 |
|| $CHARSET{G96}->{ $g96_f } |
|| $CHARSET{G94n}->{ "\x7E" }; ''; |
274 |
|| $CHARSET{G96}->{ "\x7E" }; ''; |
} elsif ($g96_g) { ## ESC 02/12..15 [I] F |
275 |
} elsif (defined $g96n_f) { |
$C->{ $_GB_to_GN{ $g96_g } } |
276 |
$C->{ $_GB_to_GN{ $g96n_g } } = $CHARSET{G96n}->{ $g96n_f.$rev } |
= $CHARSET{G96}->{ $f2s->{G96}->{$g96_f.$rev} || $g96_f.$rev } |
277 |
|| $CHARSET{G96n}->{ $g96n_f } |
|| $CHARSET{G96}->{ $f2s->{G96}->{$g96_f} || $g96_f } |
278 |
|| $CHARSET{G96n}->{ "\x7E" }; ''; |
|| $CHARSET{G96}->{ "\x7E" }; ''; |
279 |
} elsif ($Fe) { ## ESC Fe => C1 |
} elsif ($g96n_g) { ## ESC 02/04 02/12..15 [I] F |
280 |
|
$C->{ $_GB_to_GN{ $g96n_g } } |
281 |
|
= $CHARSET{G96n}->{ $f2s->{G96n}->{$g96n_f.$rev} || $g96n_f.$rev } |
282 |
|
|| $CHARSET{G96n}->{ $f2s->{G96n}->{$g96n_f} || $g96n_f } |
283 |
|
|| $CHARSET{G96n}->{ "\x7E" }; ''; |
284 |
|
} elsif ($Fe) { ## ESC Fe = C1 |
285 |
chr ($C->{ $C->{ESC_Fe} }->{ucs} + (ord ($Fe) - 0x40)); |
chr ($C->{ $C->{ESC_Fe} }->{ucs} + (ord ($Fe) - 0x40)); |
286 |
} elsif ($CZD) { |
} elsif (defined $Fs) { ## ESC Fs |
|
$C->{C0} = $CHARSET{C0}->{ $CZD.$rev } |
|
|
|| $CHARSET{C0}->{ $CZD } || $CHARSET{C0}->{ "\x7E" }; ''; |
|
|
} elsif ($C1D) { |
|
|
$C->{C1} = $CHARSET{C1}->{ $C1D.$rev } |
|
|
|| $CHARSET{C1}->{ $C1D } || $CHARSET{C1}->{ "\x7E" }; ''; |
|
|
} elsif ($Fs) { |
|
287 |
if ($Fs eq "\x6E") { ## LS2 |
if ($Fs eq "\x6E") { ## LS2 |
288 |
$C->{GL} = 'G2'; ''; |
$C->{GL} = 'G2'; ''; |
289 |
} elsif ($Fs eq "\x6F") { ## LS3 |
} elsif ($Fs eq "\x6F") { ## LS3 |
290 |
$C->{GL} = 'G3'; ''; |
$C->{GL} = 'G3'; ''; |
291 |
} elsif ($Fs eq "\x7E") { ## LS1R |
} elsif ($Fs eq "\x7E" || $Fs eq "\x6B") { ## LS1R |
292 |
$C->{GR} = 'G1'; $C->{GL} = 'G1' if $C->{bit} == 7; ''; |
$C->{GR} = 'G1'; $C->{GL} = 'G1' if $C->{bit} == 7; ''; |
293 |
} elsif ($Fs eq "\x7D") { ## LS2R |
} elsif ($Fs eq "\x7D" || $Fs eq "\x6C") { ## LS2R |
294 |
$C->{GR} = 'G2'; $C->{GL} = 'G2' if $C->{bit} == 7; ''; |
$C->{GR} = 'G2'; $C->{GL} = 'G2' if $C->{bit} == 7; ''; |
295 |
} elsif ($Fs eq "\x7C") { ## LS3R |
} elsif ($Fs eq "\x7C" || $Fs eq "\x6D") { ## LS3R |
296 |
$C->{GR} = 'G3'; $C->{GL} = 'G3' if $C->{bit} == 7; ''; |
$C->{GR} = 'G3'; $C->{GL} = 'G3' if $C->{bit} == 7; ''; |
297 |
} else { |
} else { |
298 |
chr ($CHARSET{single_control}->{Fs}->{ucs} + (ord ($Fs) - 0x60)); |
chr ($CHARSET{single_control}->{Fs}->{ucs} + (ord ($Fs) - 0x60)); |
299 |
} |
} |
300 |
} elsif ($sI) { |
} elsif (defined $CZD) { ## 1F (CZD) = ESC 02/01 [I] F |
301 |
|
$C->{C0} = $CHARSET{C0}->{ $f2s->{C0}->{$CZD.$rev} || $CZD.$rev } |
302 |
|
|| $CHARSET{C0}->{ $f2s->{C0}->{$CZD} || $CZD } |
303 |
|
|| $CHARSET{C0}->{ "\x7E" }; ''; |
304 |
|
} elsif (defined $C1D) { ## 2F (C1D) = ESC 02/02 [I] F |
305 |
|
$C->{C1} = $CHARSET{C1}->{ $f2s->{C1}->{$C1D.$rev} || $C1D.$rev } |
306 |
|
|| $CHARSET{C1}->{ $f2s->{C1}->{$C1D} || $C1D } |
307 |
|
|| $CHARSET{C1}->{ "\x7E" }; ''; |
308 |
|
} elsif ($sI) { ## 3F = ESC 02/03 [I] F |
309 |
chr ($CHARSET{single_control}->{'3F'.$sI}->{ucs} + (ord ($sF) - 0x30)); |
chr ($CHARSET{single_control}->{'3F'.$sI}->{ucs} + (ord ($sF) - 0x30)); |
310 |
} elsif ($ACS) { ## Announcer |
} elsif ($ACS) { ## 0F (Announcer) = ESC 02/00 F |
311 |
if ($ACS eq "\x4A") { $C->{bit} = 7 } |
if ($ACS eq "\x4A") { $C->{bit} = 7 } |
312 |
elsif ($ACS eq "\x4B") { $C->{bit} = 8 } |
elsif ($ACS eq "\x4B") { $C->{bit} = 8 } |
313 |
''; |
''; |
316 |
$C->{_irr} = undef; |
$C->{_irr} = undef; |
317 |
} |
} |
318 |
$esc; |
$esc; |
319 |
## Locking shifts |
} elsif ($ls) { ## Locking shifts = LS0 / LS1 |
|
} elsif ($ls) { |
|
320 |
if ($ls eq $C->{$C->{CL}}->{LS0}) { |
if ($ls eq $C->{$C->{CL}}->{LS0}) { |
321 |
$C->{GL} = 'G0'; ''; |
$C->{GL} = 'G0'; ''; |
322 |
} elsif ($ls eq $C->{$C->{CL}}->{LS1}) { |
} elsif ($ls eq $C->{$C->{CL}}->{LS1}) { |
323 |
$C->{GL} = 'G1'; ''; |
$C->{GL} = 'G1'; ''; |
|
} elsif ($ls =~ /\x1B([\x6E\x6F])/) { |
|
|
$C->{GL} = {"\x6E"=>2, "\x6F"=>3}->{$1}; ''; |
|
|
} elsif ($ls =~ /\x1B([\x7C-\x7E])/) { |
|
|
$C->{GR} = {"\x7E"=>1, "\x7D"=>2, "\x7C"=>3}->{$1}; ''; |
|
324 |
} |
} |
325 |
## Control sequence |
} elsif ($csi) { ## Control sequence = CSI [P..] [I] F |
|
} elsif ($csi) { |
|
326 |
$csi =~ tr/\xA0-\xFF/\x20-\x7F/d; |
$csi =~ tr/\xA0-\xFF/\x20-\x7F/d; |
327 |
$csi =~ s/$C->{$C->{CL}}->{LS0}//g if $C->{$C->{CL}}->{LS0}; |
$csi =~ s/$C->{$C->{CL}}->{LS0}//g if $C->{$C->{CL}}->{LS0}; |
328 |
$csi =~ s/$C->{$C->{CL}}->{LS1}//g if $C->{$C->{CL}}->{LS1}; |
$csi =~ s/$C->{$C->{CL}}->{LS1}//g if $C->{$C->{CL}}->{LS1}; |
329 |
"\x9B".$csi; |
"\x9B".$csi; |
330 |
} |
} |
331 |
}gex; |
}gesx; |
332 |
$s; |
$s; |
333 |
} |
} # __iso2022_to_internal |
334 |
|
|
335 |
|
}; |
336 |
|
&__iso2022_to_internal (@_) if defined $_[0]; |
337 |
|
|
338 |
|
} # _iso2022_to_internal |
339 |
|
|
340 |
sub internal_to_iso2022 ($\%) { |
sub internal_to_iso2022 ($;%) { |
341 |
my ($s, $C) = @_; |
my ($s, $C) = @_; |
342 |
$C ||= &new_object; |
$C ||= &new_object; |
343 |
|
|
344 |
my $r = ''; |
my $r = ''; |
345 |
for my $c (split //, $s) { |
for my $c (split //, $s) { |
346 |
my $cc = ord $c; |
my $cc = ord $c; Encode::_utf8_off ($c); |
347 |
my $t; |
my $t; |
348 |
if ($cc <= 0x1F) { |
if ($cc <= 0x1F) { |
349 |
$t = _i2c ($c, $C, type => 'C0', charset => '@'); |
$t = _i2c ($c, $C, type => 'C0', charset => '@'); |
352 |
} elsif ($cc < 0x7F) { |
} elsif ($cc < 0x7F) { |
353 |
$t = _i2g ($c, $C, type => 'G94', charset => 'B'); |
$t = _i2g ($c, $C, type => 'G94', charset => 'B'); |
354 |
} elsif ($cc <= 0x9F) { |
} elsif ($cc <= 0x9F) { |
355 |
$t = _i2c ($c, $C, type => 'C1', charset_id => '64291991C1', |
$t = _i2c (pack ('C', $cc), $C, type => 'C1', charset_id => '64291991C1', |
356 |
charset => $C->{private_set}->{XC1}->{'64291991C1'}); |
charset => $C->{option}->{private_set}->{XC1}->{'64291991C1'}); |
357 |
} elsif ($cc <= 0xFF) { |
} elsif ($cc <= 0xFF) { |
358 |
$t = _i2g (chr($cc-0x80), $C, type => 'G96', charset => 'A'); |
$t = _i2g (pack ('C', $cc-0x80), $C, type => 'G96', charset => 'A'); |
359 |
} elsif ($cc <= 0x24FF) { |
} elsif ($cc <= 0x24FF) { |
360 |
my $c = $cc - 0x100; |
my $c = $cc - 0x100; |
361 |
my $final = $C->{private_set}->{U96n}->[0]; |
my $final = $C->{option}->{private_set}->{U96n}->[0]; |
362 |
if (length $final) { |
if (length $final) { |
363 |
$t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C, |
$t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C, |
364 |
type => 'G96n', charset => $final); |
type => 'G96n', charset => $final); |
365 |
} |
} |
366 |
} elsif ($cc <= 0x33FF) { |
} elsif ($cc <= 0x33FF) { |
367 |
my $c = $cc - 0x2500; |
my $c = $cc - 0x2500; |
368 |
my $final = $C->{private_set}->{U96n}->[1]; |
my $final = $C->{option}->{private_set}->{U96n}->[1]; |
369 |
if (length $final) { |
if (length $final) { |
370 |
$t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C, |
$t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C, |
371 |
type => 'G96n', charset => $final); |
type => 'G96n', charset => $final); |
372 |
} |
} |
373 |
} elsif (0xE000 <= $cc && $cc <= 0xFFFF) { |
} elsif (0xE000 <= $cc && $cc <= 0xFFFF) { |
374 |
my $c = $cc - 0xE000; |
my $c = $cc - 0xE000; |
375 |
my $final = $C->{private_set}->{U96n}->[2]; |
my $final = $C->{option}->{private_set}->{U96n}->[2]; |
376 |
if (length $final) { |
if (length $final) { |
377 |
$t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C, |
$t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C, |
378 |
type => 'G96n', charset => $final); |
type => 'G96n', charset => $final); |
438 |
} elsif (0x70400000 <= $cc && $cc <= 0x7040FFED) { |
} elsif (0x70400000 <= $cc && $cc <= 0x7040FFED) { |
439 |
my $c = $cc - 0x70400000; |
my $c = $cc - 0x70400000; |
440 |
$t = _i2g (chr(($c % 94)+0x21), $C, charset_id => 'P'.int ($c / 94), |
$t = _i2g (chr(($c % 94)+0x21), $C, charset_id => 'P'.int ($c / 94), |
441 |
type => 'G94', charset => $C->{private_set}->{G94}->[ $c / 94 ]); |
type => 'G94', charset => $C->{option}->{private_set}->{G94}->[ $c / 94 ]); |
442 |
} elsif (0x70410000 <= $cc && $cc <= 0x7041FFBF) { |
} elsif (0x70410000 <= $cc && $cc <= 0x7041FFBF) { |
443 |
my $c = $cc - 0x70410000; |
my $c = $cc - 0x70410000; |
444 |
$t = _i2g (chr(($c % 96)+0x20), $C, charset_id => 'P'.int ($c / 96), |
$t = _i2g (chr(($c % 96)+0x20), $C, charset_id => 'P'.int ($c / 96), |
445 |
type => 'G96', charset => $C->{private_set}->{G96}->[ $c / 96 ]); |
type => 'G96', charset => $C->{option}->{private_set}->{G96}->[ $c / 96 ]); |
446 |
} elsif (0x70420000 <= $cc && $cc <= 0x7046F19B) { |
} elsif (0x70420000 <= $cc && $cc <= 0x7046F19B) { |
447 |
my $c = $cc % 0x10000; |
my $c = $cc % 0x10000; |
448 |
$t = _i2g (chr((($c % 8836) / 94)+0x21).chr(($c % 94)+0x21), $C, |
$t = _i2g (chr((($c % 8836) / 94)+0x21).chr(($c % 94)+0x21), $C, |
449 |
type => 'G94n', |
type => 'G94n', |
450 |
charset_id => 'P'.int(($cc / 0x10000) - 0x7042).'_'.int($c / 8836), |
charset_id => 'P'.int(($cc / 0x10000) - 0x7042).'_'.int($c / 8836), |
451 |
charset => $C->{private_set}->{G94n}->[ ($cc / 0x10000) - 0x7042 ] |
charset => $C->{option}->{private_set}->{G94n} |
452 |
->[ $c / 8836 ]); |
->[ ($cc / 0x10000) - 0x7042 ]->[ $c / 8836 ]); |
453 |
} |
} |
454 |
if (defined $t) { |
if (defined $t) { |
455 |
$t = _i2o ($t, $C, cs_F => "\x40") |
$t = _i2o ($t, $C, cs_F => "\x40") |
471 |
if (defined $t) { |
if (defined $t) { |
472 |
$r .= $t; |
$r .= $t; |
473 |
} else { |
} else { |
474 |
$t = _i2g ($C->{option}->{undef_char}->[0], $C, |
unless ($C->{option}->{undef_char}->[0] eq "\x20") { |
475 |
%{ $C->{option}->{undef_char}->[1] }); |
$t = _i2g ($C->{option}->{undef_char}->[0], $C, |
476 |
|
%{ $C->{option}->{undef_char}->[1] }); |
477 |
|
} else { ## SP |
478 |
|
$t = _back2ascii ($C) . "\x20"; |
479 |
|
} |
480 |
$r .= $C->{coding_system} eq $CODING_SYSTEM{"\x40"} ? |
$r .= $C->{coding_system} eq $CODING_SYSTEM{"\x40"} ? |
481 |
$t : _i2o ($t, $C, cs_F => "\x40"); |
$t : _i2o ($t, $C, cs_F => "\x40"); |
482 |
} |
} |
524 |
} |
} |
525 |
$r .= _back2ascii ($C, reset_all => $C->{C1}->{reset_all}->{$s}); |
$r .= _back2ascii ($C, reset_all => $C->{C1}->{reset_all}->{$s}); |
526 |
unless ($C->{option}->{C1invoke_to_right}) { ## ESC Fe |
unless ($C->{option}->{C1invoke_to_right}) { ## ESC Fe |
527 |
$s =~ s/([\x80-\x9F])/"\x1B" . chr (ord ($1) - 0x40)/ge; |
$s =~ s/([\x80-\x9F])/"\x1B" . pack ('C', ord ($1) - 0x40)/ge; |
528 |
} |
} |
529 |
return $r . $s; |
return $r . $s; |
530 |
} |
} |
641 |
if ($CS ne $C->{coding_system}) { |
if ($CS ne $C->{coding_system}) { |
642 |
my $e = ''; |
my $e = ''; |
643 |
$e .= "\x1B\x25"; |
$e .= "\x1B\x25"; |
644 |
$e .= $O{cs_F} || $C->{private_set}->{coding_system}->{ $O{cs_id} } |
$e .= $O{cs_F} || $C->{option}->{private_set}->{coding_system}->{ $O{cs_id} } |
645 |
|| return undef; |
|| return undef; |
646 |
if ($C->{coding_system} eq $CODING_SYSTEM{"\x2F\x40"} |
if ($C->{coding_system} eq $CODING_SYSTEM{"\x2F\x40"} |
647 |
|| $C->{coding_system} eq $CODING_SYSTEM{"\x2F\x43"} |
|| $C->{coding_system} eq $CODING_SYSTEM{"\x2F\x43"} |
774 |
=cut |
=cut |
775 |
|
|
776 |
# $Date$ |
# $Date$ |
|
### ISO2022.pm ends here |
|