3 |
|
|
4 |
Encode::ISO2022 --- ISO/IEC 2022 encoder and decoder |
Encode::ISO2022 --- ISO/IEC 2022 encoder and decoder |
5 |
|
|
6 |
|
=head1 ENCODINGS |
7 |
|
|
8 |
|
=over 4 |
9 |
|
|
10 |
|
=item iso2022 |
11 |
|
|
12 |
|
ISO/IEC 2022:1994. Default status is: |
13 |
|
|
14 |
|
=over 2 |
15 |
|
|
16 |
|
=item CL = C0 = ISO/IEC 6429:1991 C0 set |
17 |
|
|
18 |
|
=item CR = C1 = ISO/IEC 6429:1991 C1 set |
19 |
|
|
20 |
|
=item GL = G0 = ISO/IEC 646:1991 IRV GL(G0) set |
21 |
|
|
22 |
|
=item GR = G1 = empty set |
23 |
|
|
24 |
|
=item G2 = empty set |
25 |
|
|
26 |
|
=item G3 = empty set |
27 |
|
|
28 |
|
=back |
29 |
|
|
30 |
|
(Alias: iso/iec2022, iso-2022, 2022, cp2022) |
31 |
|
|
32 |
|
=back |
33 |
|
|
34 |
|
Note that ISO/IEC 2022 based encodings are found in |
35 |
|
Encode::ISO2022::* modules. This module, Encode::ISO2022 |
36 |
|
only provides a general ISO/IEC 2022 encoder/decoder. |
37 |
|
|
38 |
=cut |
=cut |
39 |
|
|
40 |
require v5.7.3; |
require v5.7.3; |
43 |
use vars qw(%CHARSET %CODING_SYSTEM $VERSION); |
use vars qw(%CHARSET %CODING_SYSTEM $VERSION); |
44 |
$VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
$VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
45 |
use base qw(Encode::Encoding); |
use base qw(Encode::Encoding); |
46 |
__PACKAGE__->Define (qw/iso-2022 iso2022 2022 cp2022/); |
__PACKAGE__->Define (qw!iso-2022 iso/iec2022 iso2022 2022 cp2022!); |
47 |
require Encode::Charset; |
require Encode::Charset; |
48 |
*CHARSET = \%Encode::Charset::CHARSET; |
*CHARSET = \%Encode::Charset::CHARSET; |
49 |
*CODING_SYSTEM = \%Encode::Charset::CODING_SYSTEM; |
*CODING_SYSTEM = \%Encode::Charset::CODING_SYSTEM; |
81 |
} |
} |
82 |
|
|
83 |
### --- Encode::ISO2022 unique functions |
### --- Encode::ISO2022 unique functions |
84 |
|
*new_object = \&Encode::Charset::new_object; |
|
## Make a new ISO/IEC 2022-buffers object with default status |
|
|
sub new_object { |
|
|
my %C; |
|
|
$C{bit} = 8; |
|
|
$C{CL} = 'C0'; $C{CR} = 'C1'; $C{ESC_Fe} = 'C1'; |
|
|
$C{C0} = $CHARSET{C0}->{"\x40"}; ## ISO/IEC 6429:1991 C0 |
|
|
$C{C1} = $CHARSET{C1}->{'64291991C1'}; ## ISO/IEC 6429:1991 C1 |
|
|
$C{GL} = 'G0'; $C{GR} = 'G1'; |
|
|
$C{G0} = $CHARSET{G94}->{"\x42"}; ## ISO/IEC 646:1991 IRV |
|
|
#$C{G1} = $CHARSET{G96}->{"\x41"}; ## ISO/IEC 8859-1 GR |
|
|
$C{G1} = $CHARSET{G94}->{"\x7E"}; ## empty set |
|
|
$C{G2} = $CHARSET{G94}->{"\x7E"}; ## empty set |
|
|
$C{G3} = $CHARSET{G94}->{"\x7E"}; ## empty set |
|
|
$C{coding_system} = $CODING_SYSTEM{"\x40"}; ## ISO/IEC 2022 |
|
|
$C{option} = { |
|
|
C1invoke_to_right => 0, ## C1 invoked to: (0: ESC Fe, 1: CR) |
|
|
G94n_designate_long => 0, ## (1: ESC 02/04 02/08 04/00..02) |
|
|
designate_to => { ## Designated G buffer (-1: not be outputed) |
|
|
C0 => { |
|
|
default => 0, |
|
|
}, |
|
|
C1 => { |
|
|
default => 1, |
|
|
}, |
|
|
G94 => { |
|
|
"\x42" => 0, |
|
|
default => 0, |
|
|
}, |
|
|
G96 => { |
|
|
default => 1, |
|
|
}, |
|
|
G94n => { |
|
|
default => 0, |
|
|
}, |
|
|
G96n => { |
|
|
default => 1, |
|
|
}, |
|
|
coding_system => { |
|
|
default => -1, |
|
|
}, |
|
|
}, |
|
|
Ginvoke_by_single_shift => [0,0,0,0], ## Invoked by SS |
|
|
Ginvoke_to_left => [1,1,1,1], ## Which invoked to? (1: L, 0: R) |
|
|
private_set => { ## Private set vs Final byte |
|
|
C0 => [], |
|
|
C1 => [], |
|
|
G94 => [], |
|
|
G94n => [[],[],[],[],[]], |
|
|
G96 => [], |
|
|
#G96n => [], ## (not implemented) |
|
|
U96n => [], ## mule-unicode sets |
|
|
XC1 => { |
|
|
'64291991C1' => undef, ## ISO/IEC 6429:1991 C1 |
|
|
}, |
|
|
}, |
|
|
reset => { ## Reset status at top of line |
|
|
Gdesignation => "\x42", ## F of designation or 0 |
|
|
Ginvoke => 1, |
|
|
}, |
|
|
undef_char => ["\x3F", {type => 'G94', charset => 'B'}], |
|
|
use_revision => 1, ## Output IRR |
|
|
}; |
|
|
\%C; |
|
|
} |
|
85 |
|
|
86 |
sub iso2022_to_internal ($;\%) { |
sub iso2022_to_internal ($;\%) { |
87 |
my ($s, $C) = @_; |
my ($s, $C) = @_; |
88 |
|
$C ||= &new_object; |
89 |
my $t = ''; |
my $t = ''; |
90 |
$s =~ s{ |
$s =~ s{ |
91 |
^((?:(?!\x1B\x25\x2F?[\x30-\x7E]).)*) |
^((?:(?!\x1B\x25\x2F?[\x30-\x7E]).)*) |
94 |
$t = _iso2022_to_internal ($i2, $C); |
$t = _iso2022_to_internal ($i2, $C); |
95 |
''; |
''; |
96 |
}gesx; |
}gesx; |
97 |
|
my $pad = ''; |
98 |
|
use re 'eval'; |
99 |
$s =~ s{ |
$s =~ s{ |
100 |
## ISO/IEC 2022 |
## ISO/IEC 2022 |
101 |
\x1B\x25\x40((?:(?!\x1B\x25\x2F?[\x30-\x7E]).)*) |
(??{"$pad\x1B$pad\x25$pad\x40"})((?:(?!\x1B\x25\x2F?[\x30-\x7E]).)*) |
102 |
## UTF-8 |
## UTF-8 |
103 |
|\x1B\x25(?:\x47|\x2F[\x47-\x49])((?:(?!\x1B\x25\x2F?[\x30-\x7E]).)*) |
|(??{"$pad\x1B$pad\x25$pad(?:\x47|\x2F$pad"."[\x47-\x49])"}) |
104 |
|
((?:(?!\x1B\x25\x2F?[\x30-\x7E]).)*) |
105 |
## UCS-2, UTF-16 |
## UCS-2, UTF-16 |
106 |
|\x1B\x25\x2F[\x40\x43\x45\x4A-\x4C] |
|(??{"$pad\x1B$pad\x25$pad\x2F$pad"})([\x40\x43\x45\x4A-\x4C]) |
107 |
((?!\x00\x1B\x00\x25\x00\x2F?\x00[\x30-\x7E].)*) |
((?:(?!\x00\x1B\x00\x25(?:\x00\x2F)?\x00[\x30-\x7E])..)*) |
108 |
## UCS-4 |
## UCS-4 |
109 |
|\x1B\x25\x2F[\x41\x44\x46] |
|(??{"$pad\x1B$pad\x25$pad\x2F$pad"})[\x41\x44\x46] |
110 |
((?!\x00\x00\x00\x1B\x00\x00\x00\x25\x00\x00\x00\x2F? |
((?:(?!\x00\x00\x00\x1B\x00\x00\x00\x25(?:\x00\x00\x00\x2F)? |
111 |
\x00\x00\x00[\x30-\x7E].)*) |
\x00\x00\x00[\x30-\x7E])....)*) |
112 |
## with standard return |
## with standard return |
113 |
|\x1B\x25([\x30-\x7E])((?:(?!\x1B\x25\x2F?[\x30-\x7E]).)*) |
|(??{"$pad\x1B$pad\x25$pad"})([\x30-\x7E]) |
114 |
|
((?:(?!\x1B\x25\x2F?[\x30-\x7E]).)*) |
115 |
## without standard return |
## without standard return |
116 |
|\x1B\x25\x2F([\x30-\x7E])(.*) |
|(??{"$pad\x1B$pad\x25$pad\x2F$pad"})([\x30-\x7E])(.*) |
117 |
}{ |
}{ |
118 |
my ($i2,$u8,$Fu2,$u2,$u4,$Fsr,$sr,$Fnsr,$nsr) = ($1,$2,$3,$4,$5,$6,$7,$8,$9); |
my ($i2,$u8,$Fu2,$u2,$u4,$Fsr,$sr,$Fnsr,$nsr) = ($1,$2,$3,$4,$5,$6,$7,$8,$9); |
119 |
my $r = ''; |
my $r = ''; |
120 |
if (defined $i2) { |
if (defined $i2) { |
121 |
$r = _iso2022_to_internal ($i2, $C); |
$r = _iso2022_to_internal ($i2, $C); $pad = ''; |
122 |
} elsif (defined $u8) { |
} elsif (defined $u8) { |
123 |
$r = Encode::decode ('utf8', $u8); |
$r = Encode::decode ('utf8', $u8); $pad = ''; |
124 |
} elsif ($Fu2) { |
} elsif ($Fu2) { |
125 |
if (ord ($Fu2) > 0x49) { |
if (ord ($Fu2) > 0x49) { |
126 |
$r = Encode::decode ('utf-16be', $u2); |
$r = Encode::decode ('utf-16be', $u2); |
127 |
} else { |
} else { |
128 |
$r = Encode::decode ('ucs-2be', $u2); |
$r = Encode::decode ('ucs-2be', $u2); |
129 |
} |
} |
130 |
|
$pad = "\x00"; |
131 |
} elsif (defined $u4) { |
} elsif (defined $u4) { |
132 |
$r = Encode::decode ('ucs-4be', $u2); |
$r = Encode::decode ('ucs-4be', $u2); $pad = "\x00\x00\x00"; |
133 |
|
} elsif (defined $Fsr && $CODING_SYSTEM{$Fsr}->{perl_name}) { |
134 |
|
$r = Encode::decode ($CODING_SYSTEM{$Fsr}->{perl_name}, $sr); $pad = ''; |
135 |
|
} elsif (defined $Fnsr && $CODING_SYSTEM{$Fnsr}->{perl_name}) { |
136 |
|
$r = Encode::decode ($CODING_SYSTEM{$Fnsr}->{perl_name}, $nsr); $pad = ''; |
137 |
} else { ## temporary |
} else { ## temporary |
138 |
$r = '?+'; |
$r = '?' x length ($sr.$nsr); $pad = ''; |
139 |
} |
} |
140 |
$r; |
$r; |
141 |
}gesx; |
}gesx; |
148 |
"\x28"=>'G0',"\x29"=>'G1',"\x2A"=>'G2',"\x2B"=>'G3', |
"\x28"=>'G0',"\x29"=>'G1',"\x2A"=>'G2',"\x2B"=>'G3', |
149 |
"\x2C"=>'G0',"\x2D"=>'G1',"\x2E"=>'G2',"\x2F"=>'G3', |
"\x2C"=>'G0',"\x2D"=>'G1',"\x2E"=>'G2',"\x2F"=>'G3', |
150 |
); |
); |
|
$C ||= &new_object; |
|
151 |
|
|
152 |
use re 'eval'; |
use re 'eval'; |
153 |
$s =~ s{ |
$s =~ s{ |
154 |
((??{ $_CHARS_to_RANGE{'l'.$C->{$C->{GL}}->{chars}} |
((??{ $_CHARS_to_RANGE{'l'.$C->{$C->{GL}}->{chars}} |
155 |
. qq/{$C->{$C->{GL}}->{dimension},$C->{$C->{GL}}->{dimension}}/ })) |
. qq/{$C->{$C->{GL}}->{dimension},$C->{$C->{GL}}->{dimension}}/ })) |
156 |
|((??{ $_CHARS_to_RANGE{'r'.$C->{$C->{GL}}->{chars}} |
|((??{ $_CHARS_to_RANGE{'r'.$C->{$C->{GR}}->{chars}} |
157 |
. qq/{$C->{$C->{GR}}->{dimension},$C->{$C->{GR}}->{dimension}}/ })) |
. qq/{$C->{$C->{GR}}->{dimension},$C->{$C->{GR}}->{dimension}}/ })) |
|
|
|
158 |
| (??{ q/(?:/ . ($C->{$C->{CR}}->{r_SS2} || '(?!)') |
| (??{ q/(?:/ . ($C->{$C->{CR}}->{r_SS2} || '(?!)') |
159 |
. ($C->{$C->{ESC_Fe}}->{r_SS2_ESC} ? |
. ($C->{$C->{ESC_Fe}}->{r_SS2_ESC} ? |
160 |
qq/|$C->{$C->{ESC_Fe}}->{r_SS2_ESC}/ : '') |
qq/|$C->{$C->{ESC_Fe}}->{r_SS2_ESC}/ : '') |
163 |
||$C->{$C->{CL}}->{r_LS1}? ## ISO/IEC 6429:1992 9 |
||$C->{$C->{CL}}->{r_LS1}? ## ISO/IEC 6429:1992 9 |
164 |
qq/[$C->{$C->{CL}}->{r_LS0}$C->{$C->{CL}}->{r_LS1}]*/:'') |
qq/[$C->{$C->{CL}}->{r_LS0}$C->{$C->{CL}}->{r_LS1}]*/:'') |
165 |
}) |
}) |
166 |
((??{ $_CHARS_to_RANGE{'b'.$C->{$C->{GL}}->{chars}} |
((??{ $_CHARS_to_RANGE{'b'.$C->{G2}->{chars}} |
167 |
. qq/{$C->{G2}->{dimension},$C->{G2}->{dimension}}/ })) |
. qq/{$C->{G2}->{dimension},$C->{G2}->{dimension}}/ })) |
168 |
| (??{ q/(?:/ . ($C->{$C->{CR}}->{r_SS3} || '(?!)') |
| (??{ q/(?:/ . ($C->{$C->{CR}}->{r_SS3} || '(?!)') |
169 |
. ($C->{$C->{ESC_Fe}}->{r_SS3_ESC} ? |
. ($C->{$C->{ESC_Fe}}->{r_SS3_ESC} ? |
173 |
|| $C->{$C->{CL}}->{r_LS1}? ## ISO/IEC 6429:1992 9 |
|| $C->{$C->{CL}}->{r_LS1}? ## ISO/IEC 6429:1992 9 |
174 |
qq/[$C->{$C->{CL}}->{r_LS0}$C->{$C->{CL}}->{r_LS1}]*/:'') |
qq/[$C->{$C->{CL}}->{r_LS0}$C->{$C->{CL}}->{r_LS1}]*/:'') |
175 |
}) |
}) |
176 |
((??{ $_CHARS_to_RANGE{'b'.$C->{$C->{GL}}->{chars}} |
((??{ $_CHARS_to_RANGE{'b'.$C->{G3}->{chars}} |
177 |
. qq/{$C->{G3}->{dimension},$C->{G3}->{dimension}}/ })) |
. qq/{$C->{G3}->{dimension},$C->{G3}->{dimension}}/ })) |
178 |
|
|
179 |
## Locking shift |
## Locking shift |
180 |
|( \x1B[\x6E\x6F\x7C-\x7E] |
|( (??{ $C->{$C->{CL}}->{r_LS0}||'(?!)' }) |
|
|(??{ $C->{$C->{CL}}->{r_LS0}||'(?!)' }) |
|
181 |
|(??{ $C->{$C->{CL}}->{r_LS1}||'(?!)' }) |
|(??{ $C->{$C->{CL}}->{r_LS1}||'(?!)' }) |
182 |
) |
) |
183 |
|
|
318 |
$C->{GL} = 'G0'; ''; |
$C->{GL} = 'G0'; ''; |
319 |
} elsif ($ls eq $C->{$C->{CL}}->{LS1}) { |
} elsif ($ls eq $C->{$C->{CL}}->{LS1}) { |
320 |
$C->{GL} = 'G1'; ''; |
$C->{GL} = 'G1'; ''; |
|
} elsif ($ls =~ /\x1B([\x6E\x6F])/) { |
|
|
$C->{GL} = {"\x6E"=>2, "\x6F"=>3}->{$1}; ''; |
|
|
} elsif ($ls =~ /\x1B([\x7C-\x7E])/) { |
|
|
$C->{GR} = {"\x7E"=>1, "\x7D"=>2, "\x7C"=>3}->{$1}; ''; |
|
321 |
} |
} |
322 |
## Control sequence |
## Control sequence |
323 |
} elsif ($csi) { |
} elsif ($csi) { |