| 1 |
wakaba |
1.1 |
|
| 2 |
|
|
=head1 NAME |
| 3 |
|
|
|
| 4 |
|
|
Encode::ISO2022 --- ISO/IEC 2022 encoder and decoder |
| 5 |
|
|
|
| 6 |
|
|
=cut |
| 7 |
|
|
|
| 8 |
|
|
require v5.7.3; |
| 9 |
|
|
package Encode::ISO2022; |
| 10 |
|
|
use strict; |
| 11 |
|
|
use vars qw(%CHARSET $VERSION); |
| 12 |
wakaba |
1.3 |
$VERSION=do{my @r=(q$Revision: 1.2 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
| 13 |
wakaba |
1.1 |
use base qw(Encode::Encoding); |
| 14 |
wakaba |
1.2 |
__PACKAGE__->Define (qw/iso-2022 iso2022 2022 cp2022/); |
| 15 |
wakaba |
1.1 |
|
| 16 |
|
|
### --- Intialization |
| 17 |
|
|
|
| 18 |
|
|
my %_CHARS_to_RANGE = ( |
| 19 |
|
|
l94 => q/[\x21-\x7E]/, |
| 20 |
|
|
l96 => q/[\x20-\x7F]/, |
| 21 |
|
|
l128 => q/[\x00-\x7F]/, |
| 22 |
|
|
l256 => q/[\x00-\xFF]/, |
| 23 |
|
|
r94 => q/[\xA1-\xFE]/, |
| 24 |
|
|
r96 => q/[\xA0-\xFF]/, |
| 25 |
|
|
r128 => q/[\x80-\xFF]/, |
| 26 |
|
|
r256 => q/[\x80-\xFF]/, |
| 27 |
|
|
b94 => q/[\x21-\x7E\xA1-\xFE]/, |
| 28 |
|
|
b96 => q/[\x20-\x7F\xA0-\xFF]/, |
| 29 |
|
|
b128 => q/[\x00-\xFF]/, |
| 30 |
|
|
b256 => q/[\x00-\xFF]/, |
| 31 |
|
|
); |
| 32 |
|
|
|
| 33 |
|
|
## --- Make initial charset definitions |
| 34 |
|
|
&_make_initial_charsets; |
| 35 |
|
|
sub _make_initial_charsets () { |
| 36 |
|
|
for my $f (0x30..0x7E) { |
| 37 |
|
|
my $F = pack 'C', $f; |
| 38 |
|
|
for ('', '!', '"', '#') { |
| 39 |
|
|
$CHARSET{G94}->{ $_.$F }->{dimension} = 1; |
| 40 |
|
|
$CHARSET{G94}->{ $_.$F }->{chars} = 94; |
| 41 |
|
|
$CHARSET{G94}->{ $_.$F }->{ucs} = |
| 42 |
|
|
{'' => 0xE90940, '!' => 0xE944A0, '"' => 0xE98000, '#' => 0xE9BB60}->{ $_ } |
| 43 |
|
|
+ 94 * ($f-0x30); |
| 44 |
|
|
|
| 45 |
|
|
$CHARSET{G96}->{ $_.$F }->{dimension} = 1; |
| 46 |
|
|
$CHARSET{G96}->{ $_.$F }->{chars} = 96; |
| 47 |
|
|
$CHARSET{G96}->{ $_.$F }->{ucs} = |
| 48 |
|
|
{'' => 0xE926A0, '!' => 0xE96200, '"' => 0xE99D60, '#' => 0xE9D8C0}->{ $_ } |
| 49 |
|
|
+ 96 * ($f-0x30); |
| 50 |
|
|
|
| 51 |
|
|
$CHARSET{C0}->{ $_.$F }->{dimension} = 1; |
| 52 |
|
|
$CHARSET{C0}->{ $_.$F }->{chars} = 32; |
| 53 |
|
|
$CHARSET{C0}->{ $_.$F }->{ucs} = |
| 54 |
|
|
{'' => 0x70000000, '!' => 0x70001400, |
| 55 |
|
|
'"' => 0x70002800, '#' => 0x70003C00}->{ $_ } + 32 * ($f-0x30); |
| 56 |
|
|
|
| 57 |
|
|
$CHARSET{C1}->{ $_.$F }->{dimension} = 1; |
| 58 |
|
|
$CHARSET{C1}->{ $_.$F }->{chars} = 32; |
| 59 |
|
|
$CHARSET{C1}->{ $_.$F }->{ucs} = |
| 60 |
|
|
{'' => 0x70000A00, '!' => 0x70001E00, |
| 61 |
|
|
'"' => 0x70003200, '#' => 0x70004600}->{ $_ } + 32 * ($f-0x30); |
| 62 |
|
|
|
| 63 |
|
|
$CHARSET{G94}->{ ' '.$_.$F }->{dimension} = 1; ## DRCS |
| 64 |
|
|
$CHARSET{G94}->{ ' '.$_.$F }->{chars} = 94; |
| 65 |
|
|
$CHARSET{G94}->{ ' '.$_.$F }->{ucs} = |
| 66 |
|
|
{'' => 0x70090940, '!' => 0x700944A0, |
| 67 |
|
|
'"' => 0x70098000, '#' => 0x7009BB60}->{ $_ } + 94 * ($f-0x30); |
| 68 |
|
|
|
| 69 |
|
|
$CHARSET{G96}->{ ' '.$_.$F }->{dimension} = 1; ## DRCS |
| 70 |
|
|
$CHARSET{G96}->{ ' '.$_.$F }->{chars} = 96; |
| 71 |
|
|
$CHARSET{G96}->{ ' '.$_.$F }->{ucs} = |
| 72 |
|
|
{'' => 0x700926A0, '!' => 0x70096200, |
| 73 |
|
|
'"' => 0x70099D60, '#' => 0x7009D8C0}->{ $_ } + 96 * ($f-0x30); |
| 74 |
|
|
} |
| 75 |
|
|
} |
| 76 |
|
|
for my $f (0x30..0x5F, 0x7E) { |
| 77 |
|
|
my $F = pack 'C', $f; |
| 78 |
wakaba |
1.3 |
for ('', '!', '"', '#', ' ') { |
| 79 |
wakaba |
1.1 |
$CHARSET{G94n}->{ $_.$F }->{dimension} = 2; |
| 80 |
|
|
$CHARSET{G94n}->{ $_.$F }->{chars} = 94; |
| 81 |
|
|
$CHARSET{G94n}->{ $_.$F }->{ucs} = |
| 82 |
|
|
({'' => 0xE9F6C0}->{ $_ }||0) + 94*94 * ($f-0x30); |
| 83 |
wakaba |
1.3 |
## BUG: 94^n sets with I byte have no mapping area |
| 84 |
wakaba |
1.1 |
|
| 85 |
|
|
$CHARSET{G96n}->{ $_.$F }->{dimension} = 2; |
| 86 |
|
|
$CHARSET{G96n}->{ $_.$F }->{chars} = 96; |
| 87 |
|
|
$CHARSET{G96n}->{ $_.$F }->{ucs} = |
| 88 |
|
|
({'' => 0xF4C000}->{ $_ }||0) + 96*96 * ($f-0x30); |
| 89 |
|
|
## BUG: 94^n DRCSes with I byte have no mapping area |
| 90 |
|
|
} |
| 91 |
|
|
} |
| 92 |
|
|
for (0x60..0x6F) { |
| 93 |
|
|
my $F = pack 'C', $_; |
| 94 |
|
|
## BUG: 9x^3 sets have no mapping area |
| 95 |
|
|
for ('', '!', '"', '#', ' ') { |
| 96 |
|
|
$CHARSET{G94n}->{ $_.$F }->{dimension} = 3; |
| 97 |
|
|
$CHARSET{G94n}->{ $_.$F }->{chars} = 94; |
| 98 |
|
|
|
| 99 |
|
|
$CHARSET{G96n}->{ $_.$F }->{dimension} = 3; |
| 100 |
|
|
$CHARSET{G96n}->{ $_.$F }->{chars} = 96; |
| 101 |
|
|
} |
| 102 |
|
|
} |
| 103 |
|
|
for (0x70..0x7D) { |
| 104 |
|
|
my $F = pack 'C', $_; |
| 105 |
|
|
## BUG: 9x^4 sets have no mapping area |
| 106 |
|
|
for ('', '!', '"', '#', ' ') { |
| 107 |
|
|
$CHARSET{G94n}->{ $_.$F }->{dimension} = 4; |
| 108 |
|
|
$CHARSET{G94n}->{ $_.$F }->{chars} = 94; |
| 109 |
|
|
|
| 110 |
|
|
$CHARSET{G96n}->{ $_.$F }->{dimension} = 4; |
| 111 |
|
|
$CHARSET{G96n}->{ $_.$F }->{chars} = 96; |
| 112 |
|
|
} |
| 113 |
|
|
} |
| 114 |
|
|
for my $f (0x40..0x4E) { |
| 115 |
|
|
my $F = pack 'C', $f; |
| 116 |
|
|
$CHARSET{G96n}->{ ' '.$F }->{dimension} = 2; |
| 117 |
|
|
$CHARSET{G96n}->{ ' '.$F }->{chars} = 96; |
| 118 |
|
|
$CHARSET{G96n}->{ ' '.$F }->{ucs} = 0xF0000 + 96*96*($f-0x40); |
| 119 |
|
|
## U+F0000-U+10F7FF (private) -> ESC 02/04 02/00 <I> (04/00-04/14) (DRCS) |
| 120 |
|
|
} |
| 121 |
|
|
|
| 122 |
|
|
$CHARSET{G94}->{B}->{ucs} = 0x21; ## ASCII |
| 123 |
|
|
$CHARSET{G96}->{A}->{ucs} = 0xA0; ## ISO 8859-1 |
| 124 |
|
|
|
| 125 |
|
|
$CHARSET{G94n}->{'B@'}->{dimension} = 2; ## JIS X 0208-1990 |
| 126 |
|
|
$CHARSET{G94n}->{'B@'}->{chars} = 94; |
| 127 |
|
|
$CHARSET{G94n}->{'B@'}->{ucs} = 0xE9F6C0 + 94*94*79; |
| 128 |
|
|
|
| 129 |
|
|
## -- Control character sets |
| 130 |
|
|
$CHARSET{C0}->{'@'}->{ucs} = 0x00; ## ISO/IEC 6429 C0 |
| 131 |
|
|
for ("\x40", "\x43", "\x44", "\x45", "\x46", "\x49", "\x4A", "\x4B", "\x4C") { |
| 132 |
|
|
$CHARSET{C0}->{$_}->{C_LS0} = "\x0F"; |
| 133 |
|
|
$CHARSET{C0}->{$_}->{C_LS1} = "\x0E"; |
| 134 |
|
|
$CHARSET{C0}->{$_}->{r_LS0} = '\x0F'; |
| 135 |
|
|
$CHARSET{C0}->{$_}->{r_LS1} = '\x0E'; |
| 136 |
|
|
} |
| 137 |
|
|
for ("\x40", "\x44", "\x45", "\x46", "\x48", "\x4C") { |
| 138 |
|
|
$CHARSET{C0}->{$_}->{reset_all} = {"\x0A" => 1, "\x0B" => 1, |
| 139 |
|
|
"\x0C" => 1, "\x0D" => 1}; |
| 140 |
|
|
} |
| 141 |
|
|
$CHARSET{C0}->{"\x43"}->{reset_all} = {"\x0A" => 1}; |
| 142 |
|
|
$CHARSET{C0}->{"\x44"}->{C_SS2} = "\x1C"; |
| 143 |
|
|
$CHARSET{C0}->{"\x44"}->{r_SS2} = '\x1C'; |
| 144 |
|
|
for ("\x45", "\x49", "\x4A", "\x4B") { |
| 145 |
|
|
$CHARSET{C0}->{$_}->{C_SS2} = "\x19"; |
| 146 |
|
|
$CHARSET{C0}->{$_}->{C_SS3} = "\x1D"; |
| 147 |
|
|
$CHARSET{C0}->{$_}->{r_SS2} = '\x19'; |
| 148 |
|
|
$CHARSET{C0}->{$_}->{r_SS3} = '\x1D'; |
| 149 |
|
|
} |
| 150 |
|
|
$CHARSET{C0}->{"\x4C"}->{C_SS2} = "\x19"; |
| 151 |
|
|
$CHARSET{C0}->{"\x4C"}->{r_SS2} = '\x19'; |
| 152 |
|
|
|
| 153 |
|
|
$CHARSET{C1}->{'64291991C1'}->{dimension} = 1; ## ISO/IEC 6429:1991 C1 |
| 154 |
|
|
$CHARSET{C1}->{'64291991C1'}->{chars} = 32; |
| 155 |
|
|
$CHARSET{C1}->{'64291991C1'}->{ucs} = 0x80; |
| 156 |
|
|
for ("\x43", "\x45", "\x47", '64291991C1') { |
| 157 |
|
|
$CHARSET{C1}->{$_}->{C_SS2} = "\x8E"; |
| 158 |
|
|
$CHARSET{C1}->{$_}->{C_SS3} = "\x8F"; |
| 159 |
|
|
$CHARSET{C1}->{$_}->{r_SS2} = '\x8E'; |
| 160 |
|
|
$CHARSET{C1}->{$_}->{r_SS3} = '\x8F'; |
| 161 |
|
|
$CHARSET{C1}->{$_}->{r_SS2_ESC} = '\x1B\x4E'; |
| 162 |
|
|
$CHARSET{C1}->{$_}->{r_SS3_ESC} = '\x1B\x4F'; |
| 163 |
|
|
} |
| 164 |
|
|
for ("\x43", '64291991C1') { |
| 165 |
|
|
$CHARSET{C1}->{$_}->{r_CSI} = '\x9B'; |
| 166 |
|
|
$CHARSET{C1}->{$_}->{r_CSI_ESC} = '\x1B\x5B'; |
| 167 |
|
|
$CHARSET{C1}->{$_}->{r_DCS} = '\x90'; |
| 168 |
|
|
$CHARSET{C1}->{$_}->{r_ST} = '\x9C'; |
| 169 |
|
|
$CHARSET{C1}->{$_}->{r_OSC} = '\x9D'; |
| 170 |
|
|
$CHARSET{C1}->{$_}->{r_PM} = '\x9E'; |
| 171 |
|
|
$CHARSET{C1}->{$_}->{r_APC} = '\x9F'; |
| 172 |
|
|
$CHARSET{C1}->{$_}->{reset_all} = {"\x85"=>1, "\x90"=>1, |
| 173 |
|
|
"\x9C"=>1, "\x9D"=>1, "\x9E"=>1, "\x9F"=>1}; |
| 174 |
|
|
} |
| 175 |
|
|
$CHARSET{C1}->{'64291991C1'}->{r_SCI} = '\x9A'; |
| 176 |
|
|
|
| 177 |
|
|
$CHARSET{single_control}->{Fs} ={ucs => 0x70005000, chars => 32, dimension => 1}; |
| 178 |
|
|
$CHARSET{single_control}->{'3F'} ={ucs => 0x70005020, chars => 80, dimension => 1}; |
| 179 |
|
|
$CHARSET{single_control}->{'3F!'}={ucs => 0x70005070, chars => 80, dimension => 1}; |
| 180 |
|
|
$CHARSET{single_control}->{'3F"'}={ucs => 0x700050C0, chars => 80, dimension => 1}; |
| 181 |
|
|
$CHARSET{single_control}->{'3F#'}={ucs => 0x70005110, chars => 80, dimension => 1}; |
| 182 |
|
|
} |
| 183 |
|
|
|
| 184 |
|
|
|
| 185 |
|
|
### --- Perl Encode module common functions |
| 186 |
|
|
|
| 187 |
|
|
sub encode ($$;$) { |
| 188 |
|
|
my ($obj, $str, $chk) = @_; |
| 189 |
|
|
$_[1] = '' if $chk; |
| 190 |
|
|
$str = &internal_to_iso2022 ($str); |
| 191 |
|
|
return $str; |
| 192 |
|
|
} |
| 193 |
|
|
|
| 194 |
|
|
sub decode ($$;$) { |
| 195 |
|
|
my ($obj, $str, $chk) = @_; |
| 196 |
|
|
$_[1] = '' if $chk; |
| 197 |
|
|
return &iso2022_to_internal ($str); |
| 198 |
|
|
} |
| 199 |
|
|
|
| 200 |
|
|
### --- Encode::ISO2022 unique functions |
| 201 |
|
|
|
| 202 |
|
|
## Make a new ISO/IEC 2022-buffers object with default status |
| 203 |
|
|
sub new_object { |
| 204 |
|
|
my %C; |
| 205 |
|
|
$C{bit} = 8; |
| 206 |
|
|
$C{CL} = 'C0'; $C{CR} = 'C1'; $C{ESC_Fe} = 'C1'; |
| 207 |
|
|
$C{C0} = $CHARSET{C0}->{"\x40"}; ## ISO/IEC 6429:1991 C0 |
| 208 |
|
|
$C{C1} = $CHARSET{C1}->{'64291991C1'}; ## ISO/IEC 6429:1991 C1 |
| 209 |
|
|
$C{GL} = 'G0'; $C{GR} = 'G1'; |
| 210 |
|
|
$C{G0} = $CHARSET{G94}->{"\x42"}; ## ISO/IEC 646:1991 IRV |
| 211 |
|
|
#$C{G1} = $CHARSET{G96}->{"\x41"}; ## ISO/IEC 8859-1 GR |
| 212 |
|
|
$C{G1} = $CHARSET{G94}->{"\x7E"}; ## empty set |
| 213 |
|
|
$C{G2} = $CHARSET{G94}->{"\x7E"}; ## empty set |
| 214 |
|
|
$C{G3} = $CHARSET{G94}->{"\x7E"}; ## empty set |
| 215 |
|
|
$C{option} = { |
| 216 |
|
|
C1invoke_to_right => 0, ## C1 invoked to: (0: ESC Fe, 1: CR) |
| 217 |
|
|
G94n_designate_long => 0, ## (1: ESC 02/04 02/08 04/00..02) |
| 218 |
|
|
designate_to => { ## Designated G buffer (-1: not be outputed) |
| 219 |
|
|
C0 => { |
| 220 |
|
|
default => 0, |
| 221 |
|
|
}, |
| 222 |
|
|
C1 => { |
| 223 |
|
|
default => 1, |
| 224 |
|
|
}, |
| 225 |
|
|
G94 => { |
| 226 |
|
|
"\x42" => 0, |
| 227 |
|
|
default => 0, |
| 228 |
|
|
}, |
| 229 |
|
|
G96 => { |
| 230 |
|
|
default => 1, |
| 231 |
|
|
}, |
| 232 |
|
|
G94n => { |
| 233 |
|
|
default => 0, |
| 234 |
|
|
}, |
| 235 |
|
|
G96n => { |
| 236 |
|
|
default => 1, |
| 237 |
|
|
}, |
| 238 |
|
|
}, |
| 239 |
|
|
Ginvoke_by_single_shift => [0,0,0,0], ## Invoked by SS |
| 240 |
|
|
Ginvoke_to_left => [1,1,1,1], ## Which invoked to? (1: L, 0: R) |
| 241 |
|
|
private_set => { ## Private set vs Final byte |
| 242 |
|
|
C0 => [], |
| 243 |
|
|
C1 => [], |
| 244 |
|
|
G94 => [], |
| 245 |
|
|
G94n => [[],[],[],[],[]], |
| 246 |
|
|
G96 => [], |
| 247 |
|
|
#G96n => [], ## (not implemented) |
| 248 |
|
|
U96n => [], ## mule-unicode sets |
| 249 |
|
|
XC1 => { |
| 250 |
|
|
'64291991C1' => undef, ## ISO/IEC 6429:1991 C1 |
| 251 |
|
|
}, |
| 252 |
|
|
}, |
| 253 |
|
|
reset => { ## Reset status at top of line |
| 254 |
|
|
Gdesignation => "\x42", ## F of designation or 0 |
| 255 |
|
|
Ginvoke => 1, |
| 256 |
|
|
}, |
| 257 |
|
|
use_revision => 1, ## Output IRR |
| 258 |
|
|
}; |
| 259 |
|
|
\%C; |
| 260 |
|
|
} |
| 261 |
|
|
|
| 262 |
|
|
sub iso2022_to_internal ($;\%) { |
| 263 |
|
|
my ($s, $C) = @_; |
| 264 |
|
|
my %_GB_to_GN = ( |
| 265 |
|
|
"\x28"=>'G0',"\x29"=>'G1',"\x2A"=>'G2',"\x2B"=>'G3', |
| 266 |
|
|
"\x2C"=>'G0',"\x2D"=>'G1',"\x2E"=>'G2',"\x2F"=>'G3', |
| 267 |
|
|
); |
| 268 |
|
|
$C ||= &new_object; |
| 269 |
|
|
|
| 270 |
|
|
use re 'eval'; |
| 271 |
|
|
$s =~ s{ |
| 272 |
|
|
((??{ $_CHARS_to_RANGE{'l'.$C->{$C->{GL}}->{chars}} |
| 273 |
|
|
. qq/{$C->{$C->{GL}}->{dimension},$C->{$C->{GL}}->{dimension}}/ })) |
| 274 |
|
|
|((??{ $_CHARS_to_RANGE{'r'.$C->{$C->{GL}}->{chars}} |
| 275 |
|
|
. qq/{$C->{$C->{GR}}->{dimension},$C->{$C->{GR}}->{dimension}}/ })) |
| 276 |
|
|
|
| 277 |
|
|
| (??{ q/(?:/ . ($C->{$C->{CR}}->{r_SS2} || '(?!)') |
| 278 |
|
|
. ($C->{$C->{ESC_Fe}}->{r_SS2_ESC} ? |
| 279 |
|
|
qq/|$C->{$C->{ESC_Fe}}->{r_SS2_ESC}/ : '') |
| 280 |
|
|
. ($C->{$C->{CL}}->{r_SS2} ? qq/|$C->{$C->{CL}}->{r_SS2}/ : '') . q/)/ |
| 281 |
|
|
. ( $C->{$C->{CL}}->{r_LS0} |
| 282 |
|
|
||$C->{$C->{CL}}->{r_LS1}? ## ISO/IEC 6429:1992 9 |
| 283 |
|
|
qq/[$C->{$C->{CL}}->{r_LS0}$C->{$C->{CL}}->{r_LS1}]*/:'') |
| 284 |
|
|
}) |
| 285 |
|
|
((??{ $_CHARS_to_RANGE{'b'.$C->{$C->{GL}}->{chars}} |
| 286 |
|
|
. qq/{$C->{G2}->{dimension},$C->{G2}->{dimension}}/ })) |
| 287 |
|
|
| (??{ q/(?:/ . ($C->{$C->{CR}}->{r_SS3} || '(?!)') |
| 288 |
|
|
. ($C->{$C->{ESC_Fe}}->{r_SS3_ESC} ? |
| 289 |
|
|
qq/|$C->{$C->{ESC_Fe}}->{r_SS3_ESC}/ : '') |
| 290 |
|
|
. ($C->{$C->{CL}}->{r_SS3} ? qq/|$C->{$C->{CL}}->{r_SS3}/ : '') . q/)/ |
| 291 |
|
|
. ( $C->{$C->{CL}}->{r_LS0} |
| 292 |
|
|
|| $C->{$C->{CL}}->{r_LS1}? ## ISO/IEC 6429:1992 9 |
| 293 |
|
|
qq/[$C->{$C->{CL}}->{r_LS0}$C->{$C->{CL}}->{r_LS1}]*/:'') |
| 294 |
|
|
}) |
| 295 |
|
|
((??{ $_CHARS_to_RANGE{'b'.$C->{$C->{GL}}->{chars}} |
| 296 |
|
|
. qq/{$C->{G3}->{dimension},$C->{G3}->{dimension}}/ })) |
| 297 |
|
|
|
| 298 |
wakaba |
1.3 |
## Locking shift |
| 299 |
|
|
|( \x1B[\x6E\x6F\x7C-\x7E] |
| 300 |
|
|
|(??{ $C->{$C->{CL}}->{r_LS0}||'(?!)' }) |
| 301 |
|
|
|(??{ $C->{$C->{CL}}->{r_LS1}||'(?!)' }) |
| 302 |
|
|
) |
| 303 |
wakaba |
1.1 |
|
| 304 |
|
|
## Control sequence |
| 305 |
|
|
|(??{ '(?:'.($C->{$C->{CR}}->{r_CSI}||'(?!)') |
| 306 |
|
|
.($C->{$C->{ESC_Fe}}->{r_CSI_ESC} ? |
| 307 |
|
|
qq/|$C->{$C->{ESC_Fe}}->{r_CSI_ESC}/: '') |
| 308 |
|
|
.')' |
| 309 |
|
|
}) |
| 310 |
|
|
((??{ qq/[\x30-\x3F$C->{$C->{CL}}->{LS0}$C->{$C->{CL}}->{LS1}\xB0-\xBF]*/ |
| 311 |
|
|
.qq/[\x20-\x2F$C->{$C->{CL}}->{LS0}$C->{$C->{CL}}->{LS1}\xA0-\xAF]*/ |
| 312 |
|
|
}) [\x40-\x7E\xD0-\xFE]) |
| 313 |
|
|
|
| 314 |
|
|
## Other escape sequence |
| 315 |
|
|
|(\x1B[\x20-\x2F]*[\x30-\x7E]) |
| 316 |
|
|
|
| 317 |
|
|
## Misc. sequence (SP, control, or broken data) |
| 318 |
|
|
|([\x00-\xFF]) |
| 319 |
|
|
}{ |
| 320 |
wakaba |
1.3 |
my ($gl,$gr,$ss2,$ss3,$ls,$csi,$esc,$misc) |
| 321 |
|
|
= ($1,$2,$3,$4,$5,$6,$7,$8,$9); |
| 322 |
wakaba |
1.1 |
$C->{_irr} = undef unless defined $esc; |
| 323 |
|
|
## GL graphic character |
| 324 |
|
|
if (defined $gl) { |
| 325 |
|
|
my $c = 0; |
| 326 |
|
|
my $m = $C->{$C->{GL}}->{chars}==94?0x21:$C->{$C->{GL}}->{chars}==96?0x20:0; |
| 327 |
|
|
for (split //, $gl) { |
| 328 |
|
|
$c = $c * $C->{$C->{GL}}->{chars} + unpack ('C', $_) - $m; |
| 329 |
|
|
} |
| 330 |
|
|
chr ($C->{$C->{GL}}->{ucs} + $c); |
| 331 |
|
|
## Control, SP, or broken data |
| 332 |
|
|
## TODO: support control sets other than ISO/IEC 6429's |
| 333 |
|
|
} elsif (defined $misc) { |
| 334 |
|
|
$misc; |
| 335 |
|
|
## GR graphic character |
| 336 |
|
|
} elsif ($gr) { |
| 337 |
|
|
my $c = 0; |
| 338 |
|
|
my $m = $C->{$C->{GR}}->{chars}==94?0xA1:$C->{$C->{GR}}->{chars}==96?0xA0:0x80; |
| 339 |
|
|
for (split //, $gr) { |
| 340 |
|
|
$c = $c * $C->{$C->{GR}}->{chars} + unpack ('C', $_) - $m; |
| 341 |
|
|
} |
| 342 |
|
|
chr ($C->{$C->{GR}}->{ucs} + $c); |
| 343 |
|
|
## Graphic character with SS2 |
| 344 |
|
|
} elsif ($ss2) { |
| 345 |
|
|
$ss2 =~ tr/\x80-\xFF/\x00-\x7F/; |
| 346 |
|
|
my $c = 0; my $m = $C->{G2}->{chars}==94?0x21:$C->{G2}->{chars}==96?0x20:0; |
| 347 |
|
|
for (split //, $ss2) { |
| 348 |
|
|
$c = $c * $C->{G2}->{chars} + unpack ('C', $_) - $m; |
| 349 |
|
|
} |
| 350 |
|
|
chr ($C->{G2}->{ucs} + $c); |
| 351 |
|
|
## Graphic character with SS3 |
| 352 |
|
|
} elsif ($ss3) { |
| 353 |
|
|
$ss3 =~ tr/\x80-\xFF/\x00-\x7F/; |
| 354 |
|
|
my $c = 0; my $m = $C->{G3}->{chars}==94?0x21:$C->{G3}->{chars}==96?0x20:0; |
| 355 |
|
|
for (split //, $ss3) { |
| 356 |
|
|
$c = $c * $C->{G3}->{chars} + unpack ('C', $_) - $m; |
| 357 |
|
|
} |
| 358 |
|
|
chr ($C->{G3}->{ucs} + $c); |
| 359 |
|
|
## Escape sequence |
| 360 |
|
|
} elsif ($esc) { |
| 361 |
|
|
## IRR (revision number) |
| 362 |
|
|
if ($esc =~ /\x1B\x26([\x40-\x7E])/) { |
| 363 |
|
|
$C->{_irr} = $1; $esc = ''; |
| 364 |
|
|
} else { |
| 365 |
|
|
$esc =~ s{ |
| 366 |
|
|
\x1B([\x28-\x2B])(\x20?[\x21-\x23]?[\x30-\x7E]) ## Gx = 94^1 |
| 367 |
|
|
|\x1B\x24([\x28-\x2B]?)(\x20?[\x21-\x23]?[\x30-\x7E]) ## Gx = 94^n |
| 368 |
|
|
|
| 369 |
|
|
|\x1B([\x2C-\x2F])(\x20?[\x21-\x23]?[\x30-\x7E]) ## Gx = 96^1 |
| 370 |
|
|
|\x1B\x24([\x2C-\x2F])(\x20?[\x21-\x23]?[\x30-\x7E]) ## Gx = 96^n |
| 371 |
|
|
|
| 372 |
|
|
|\x1B([\x40-\x5F]) ## ESC Fe |
| 373 |
|
|
|
| 374 |
|
|
|\x1B\x21([\x21-\x23]?[\x30-\x7E]) ## CL = C0 |
| 375 |
|
|
|\x1B\x22([\x21-\x23]?[\x30-\x7E]) ## CR & ESC Fe = C1 |
| 376 |
|
|
|
| 377 |
|
|
|\x1B([\x60-\x7E]) ## Single control functions |
| 378 |
|
|
|\x1B\x23([\x21-\x23]?)([\x30-\x7E]) |
| 379 |
|
|
|
| 380 |
|
|
|\x1B\x20([\x40-\x7E]) ## Announcer |
| 381 |
|
|
}{ |
| 382 |
|
|
my ($g94_g,$g94_f,$g94n_g,$g94n_f,$g96_g,$g96_f,$g96n_g,$g96n_f,$Fe, |
| 383 |
|
|
$CZD, $C1D, $Fs, $sI, $sF,$ACS) |
| 384 |
|
|
= ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15); |
| 385 |
|
|
my $rev = $C->{_irr} || ''; |
| 386 |
|
|
if ($g94_g) { |
| 387 |
|
|
$C->{ $_GB_to_GN{ $g94_g } } = $CHARSET{G94}->{ $g94_f.$rev } |
| 388 |
|
|
|| $CHARSET{G94}->{ $g94_f } |
| 389 |
|
|
|| $CHARSET{G94}->{ "\x7E" }; ''; |
| 390 |
|
|
} elsif (defined $g94n_f) { |
| 391 |
|
|
$C->{ $_GB_to_GN{ $g94n_g } || 'G0' } = $CHARSET{G94n}->{ $g94n_f.$rev } |
| 392 |
|
|
|| $CHARSET{G94n}->{ $g94n_f } |
| 393 |
|
|
|| $CHARSET{G94n}->{ "\x7E" }; ''; |
| 394 |
|
|
} elsif ($g96_g) { |
| 395 |
|
|
$C->{ $_GB_to_GN{ $g96_g } } = $CHARSET{G96}->{ $g96_f.$rev } |
| 396 |
|
|
|| $CHARSET{G96}->{ $g96_f } |
| 397 |
|
|
|| $CHARSET{G96}->{ "\x7E" }; ''; |
| 398 |
|
|
} elsif (defined $g96n_f) { |
| 399 |
|
|
$C->{ $_GB_to_GN{ $g96n_g } } = $CHARSET{G96n}->{ $g96n_f.$rev } |
| 400 |
|
|
|| $CHARSET{G96n}->{ $g96n_f } |
| 401 |
|
|
|| $CHARSET{G96n}->{ "\x7E" }; ''; |
| 402 |
|
|
} elsif ($Fe) { ## ESC Fe => C1 |
| 403 |
|
|
chr ($C->{ $C->{ESC_Fe} }->{ucs} + (ord ($Fe) - 0x40)); |
| 404 |
|
|
} elsif ($CZD) { |
| 405 |
|
|
$C->{C0} = $CHARSET{C0}->{ $CZD.$rev } |
| 406 |
|
|
|| $CHARSET{C0}->{ $CZD } || $CHARSET{C0}->{ "\x7E" }; ''; |
| 407 |
|
|
} elsif ($C1D) { |
| 408 |
|
|
$C->{C1} = $CHARSET{C1}->{ $C1D.$rev } |
| 409 |
|
|
|| $CHARSET{C1}->{ $C1D } || $CHARSET{C1}->{ "\x7E" }; ''; |
| 410 |
|
|
} elsif ($Fs) { |
| 411 |
|
|
if ($Fs eq "\x6E") { ## LS2 |
| 412 |
|
|
$C->{GL} = 'G2'; ''; |
| 413 |
|
|
} elsif ($Fs eq "\x6F") { ## LS3 |
| 414 |
|
|
$C->{GL} = 'G3'; ''; |
| 415 |
|
|
} elsif ($Fs eq "\x7E") { ## LS1R |
| 416 |
|
|
$C->{GR} = 'G1'; $C->{GL} = 'G1' if $C->{bit} == 7; ''; |
| 417 |
|
|
} elsif ($Fs eq "\x7D") { ## LS2R |
| 418 |
|
|
$C->{GR} = 'G2'; $C->{GL} = 'G2' if $C->{bit} == 7; ''; |
| 419 |
|
|
} elsif ($Fs eq "\x7C") { ## LS3R |
| 420 |
|
|
$C->{GR} = 'G3'; $C->{GL} = 'G3' if $C->{bit} == 7; ''; |
| 421 |
|
|
} else { |
| 422 |
|
|
chr ($CHARSET{single_control}->{Fs}->{ucs} + (ord ($Fs) - 0x60)); |
| 423 |
|
|
} |
| 424 |
|
|
} elsif ($sI) { |
| 425 |
|
|
chr ($CHARSET{single_control}->{'3F'.$sI}->{ucs} + (ord ($sF) - 0x30)); |
| 426 |
|
|
} elsif ($ACS) { ## Announcer |
| 427 |
|
|
if ($ACS eq "\x4A") { $C->{bit} = 7 } |
| 428 |
|
|
elsif ($ACS eq "\x4B") { $C->{bit} = 8 } |
| 429 |
|
|
''; |
| 430 |
|
|
} |
| 431 |
|
|
}gex; |
| 432 |
|
|
$C->{_irr} = undef; |
| 433 |
|
|
} |
| 434 |
|
|
$esc; |
| 435 |
wakaba |
1.3 |
## Locking shifts |
| 436 |
|
|
} elsif ($ls) { |
| 437 |
|
|
if ($ls eq $C->{$C->{CL}}->{LS0}) { |
| 438 |
|
|
$C->{GL} = 'G0'; ''; |
| 439 |
|
|
} elsif ($ls eq $C->{$C->{CL}}->{LS1}) { |
| 440 |
|
|
$C->{GL} = 'G1'; ''; |
| 441 |
|
|
} elsif ($ls =~ /\x1B([\x6E\x6F])/) { |
| 442 |
|
|
$C->{GL} = {"\x6E"=>2, "\x6F"=>3}->{$1}; ''; |
| 443 |
|
|
} elsif ($ls =~ /\x1B([\x7C-\x7E])/) { |
| 444 |
|
|
$C->{GR} = {"\x7E"=>1, "\x7D"=>2, "\x7C"=>3}->{$1}; ''; |
| 445 |
|
|
} |
| 446 |
wakaba |
1.1 |
## Control sequence |
| 447 |
|
|
} elsif ($csi) { |
| 448 |
|
|
$csi =~ tr/\xA0-\xFF/\x20-\x7F/d; |
| 449 |
|
|
$csi =~ s/$C->{$C->{CL}}->{LS0}//g if $C->{$C->{CL}}->{LS0}; |
| 450 |
|
|
$csi =~ s/$C->{$C->{CL}}->{LS1}//g if $C->{$C->{CL}}->{LS1}; |
| 451 |
|
|
"\x9B".$csi; |
| 452 |
|
|
} |
| 453 |
|
|
}gex; |
| 454 |
|
|
$s; |
| 455 |
|
|
} |
| 456 |
|
|
|
| 457 |
|
|
sub internal_to_iso2022 ($\%) { |
| 458 |
|
|
my ($s, $C) = @_; |
| 459 |
|
|
$C ||= &new_object; |
| 460 |
|
|
|
| 461 |
|
|
my $r = ''; |
| 462 |
|
|
for my $c (split //, $s) { |
| 463 |
|
|
my $cc = ord $c; |
| 464 |
|
|
my $t; |
| 465 |
|
|
if ($cc <= 0x1F) { |
| 466 |
|
|
$t = _i2c ($c, $C, type => 'C0', charset => '@'); |
| 467 |
|
|
} elsif ($cc == 0x20 || $cc == 0x7F) { |
| 468 |
|
|
$t = _back2ascii ($C) . $c; |
| 469 |
|
|
} elsif ($cc < 0x7F) { |
| 470 |
|
|
$t = _i2g ($c, $C, type => 'G94', charset => 'B'); |
| 471 |
|
|
} elsif ($cc <= 0x9F) { |
| 472 |
|
|
$t = _i2c ($c, $C, type => 'C1', charset_id => '64291991C1', |
| 473 |
|
|
charset => $C->{private_set}->{XC1}->{'64291991C1'}); |
| 474 |
|
|
} elsif ($cc <= 0xFF) { |
| 475 |
|
|
$t = _i2g (chr($cc-0x80), $C, type => 'G96', charset => 'A'); |
| 476 |
|
|
} elsif ($cc <= 0x24FF) { |
| 477 |
|
|
my $c = $cc - 0x100; |
| 478 |
|
|
my $final = $C->{private_set}->{U96n}->[0]; |
| 479 |
|
|
if (length $final) { |
| 480 |
|
|
$t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C, |
| 481 |
|
|
type => 'G96n', charset => $final); |
| 482 |
|
|
} |
| 483 |
|
|
} elsif ($cc <= 0x33FF) { |
| 484 |
|
|
my $c = $cc - 0x2500; |
| 485 |
|
|
my $final = $C->{private_set}->{U96n}->[1]; |
| 486 |
|
|
if (length $final) { |
| 487 |
|
|
$t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C, |
| 488 |
|
|
type => 'G96n', charset => $final); |
| 489 |
|
|
} |
| 490 |
|
|
} elsif (0xE000 <= $cc && $cc <= 0xFFFF) { |
| 491 |
|
|
my $c = $cc - 0xE000; |
| 492 |
|
|
my $final = $C->{private_set}->{U96n}->[2]; |
| 493 |
|
|
if (length $final) { |
| 494 |
|
|
$t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C, |
| 495 |
|
|
type => 'G96n', charset => $final); |
| 496 |
|
|
} |
| 497 |
|
|
|
| 498 |
|
|
} elsif (0xE9F6C0 <= $cc && $cc <= 0xF06F80) { |
| 499 |
|
|
my $c = $cc - 0xE9F6C0; |
| 500 |
|
|
$t = _i2g (chr((($c % 8836) / 94)+0x21).chr(($c % 94)+0x21), $C, |
| 501 |
|
|
type => 'G94n', charset => chr(($c / 8836)+0x30)); |
| 502 |
|
|
} elsif (0xF49D7C <= $cc && $cc <= 0xF4BFFF) { |
| 503 |
|
|
my $c = $cc - 0xF49D7C; |
| 504 |
|
|
$t = _i2g (chr(($c / 94)+0x21).chr(($c % 94)+0x21), $C, |
| 505 |
|
|
type => 'G94n', charset => 'B', revision => '@'); |
| 506 |
|
|
|
| 507 |
|
|
} elsif (0xF0000 <= $cc && $cc <= 0x10F7FF) { |
| 508 |
|
|
my $c = $cc - 0xF0000; |
| 509 |
|
|
$t = _i2g (chr((($c % 9216) / 96)+0x20).chr(($c % 96)+0x20), $C, |
| 510 |
|
|
type => 'G96n', charset => "\x20".chr(($c / 9216)+0x40)); |
| 511 |
|
|
} elsif (0xE90940 <= $cc && $cc <= 0xE92641) { |
| 512 |
|
|
my $c = $cc - 0xE90940; |
| 513 |
|
|
$t = _i2g (chr(($c % 94)+0x21), $C, |
| 514 |
|
|
type => 'G94', charset => chr(($c / 94)+0x30)); |
| 515 |
|
|
} elsif (0xE92642 <= $cc && $cc <= 0xE9269F) { |
| 516 |
|
|
my $c = $cc - 0xE92642; |
| 517 |
|
|
$t = _i2g (chr($c+0x21),$C,type => 'G94', charset => "\x64", revision => '@'); |
| 518 |
|
|
} elsif (0xE926A0 <= $cc && $cc <= 0xE9443F) { |
| 519 |
|
|
my $c = $cc - 0xE926A0; |
| 520 |
|
|
$t = _i2g (chr(($c % 96)+0x20), $C, |
| 521 |
|
|
type => 'G96', charset => chr(($c / 96)+0x30)); |
| 522 |
|
|
} elsif (0xE944A0 <= $cc && $cc <= 0xE961A1) { |
| 523 |
|
|
my $c = $cc - 0xE944A0; |
| 524 |
|
|
$t = _i2g (chr(($c % 94)+0x21), $C, |
| 525 |
|
|
type => 'G94', charset => '!'.chr(($c / 94)+0x30)); |
| 526 |
|
|
} elsif (0xE96200 <= $cc && $cc <= 0xE97F9F) { |
| 527 |
|
|
my $c = $cc - 0xE96200; |
| 528 |
|
|
$t = _i2g (chr(($c % 96)+0x20), $C, |
| 529 |
|
|
type => 'G96', charset => '!'.chr(($c / 96)+0x30)); |
| 530 |
|
|
} elsif (0xE98000 <= $cc && $cc <= 0xE99D01) { |
| 531 |
|
|
my $c = $cc - 0xE98000; |
| 532 |
|
|
$t = _i2g (chr(($c % 94)+0x21), $C, |
| 533 |
|
|
type => 'G94', charset => '"'.chr(($c / 94)+0x30)); |
| 534 |
|
|
} elsif (0xE99D60 <= $cc && $cc <= 0xE9BAFF) { |
| 535 |
|
|
my $c = $cc - 0xE99D60; |
| 536 |
|
|
$t = _i2g (chr(($c % 96)+0x20), $C, |
| 537 |
|
|
type => 'G96', charset => '"'.chr(($c / 96)+0x30)); |
| 538 |
|
|
} elsif (0xE9BB60 <= $cc && $cc <= 0xE9D861) { |
| 539 |
|
|
my $c = $cc - 0xE9BB60; |
| 540 |
|
|
$t = _i2g (chr(($c % 94)+0x21), $C, |
| 541 |
|
|
type => 'G94', charset => '#'.chr(($c / 94)+0x30)); |
| 542 |
|
|
} elsif (0xE9D8C0 <= $cc && $cc <= 0xE9F65F) { |
| 543 |
|
|
my $c = $cc - 0xE9D8C0; |
| 544 |
|
|
$t = _i2g (chr(($c % 96)+0x20), $C, |
| 545 |
|
|
type => 'G96', charset => '#'.chr(($c / 96)+0x30)); |
| 546 |
|
|
} elsif (0x70090940 <= $cc && $cc <= 0x70092641) { |
| 547 |
|
|
my $c = $cc - 0x70090940; |
| 548 |
|
|
$t = _i2g (chr(($c % 94)+0x21), $C, |
| 549 |
|
|
type => 'G94', charset => "\x20".chr(($c / 94)+0x30)); |
| 550 |
|
|
} elsif (0x700926A0 <= $cc && $cc <= 0x7009443F) { |
| 551 |
|
|
my $c = $cc - 0x700926A0; |
| 552 |
|
|
$t = _i2g (chr(($c % 96)+0x20), $C, |
| 553 |
|
|
type => 'G96', charset => "\x20".chr(($c / 96)+0x30)); |
| 554 |
|
|
## TODO: DRCS with I byte: U+700944A0-U+7009F6BF |
| 555 |
|
|
} elsif (0x70400000 <= $cc && $cc <= 0x7040FFED) { |
| 556 |
|
|
my $c = $cc - 0x70400000; |
| 557 |
|
|
$t = _i2g (chr(($c % 94)+0x21), $C, charset_id => 'P'.int ($c / 94), |
| 558 |
|
|
type => 'G94', charset => $C->{private_set}->{G94}->[ $c / 94 ]); |
| 559 |
|
|
} elsif (0x70410000 <= $cc && $cc <= 0x7041FFBF) { |
| 560 |
|
|
my $c = $cc - 0x70410000; |
| 561 |
|
|
$t = _i2g (chr(($c % 96)+0x20), $C, charset_id => 'P'.int ($c / 96), |
| 562 |
|
|
type => 'G96', charset => $C->{private_set}->{G96}->[ $c / 96 ]); |
| 563 |
|
|
} elsif (0x70420000 <= $cc && $cc <= 0x7046F19B) { |
| 564 |
|
|
my $c = $cc % 0x10000; |
| 565 |
|
|
$t = _i2g (chr((($c % 8836) / 94)+0x21).chr(($c % 94)+0x21), $C, |
| 566 |
|
|
type => 'G94n', |
| 567 |
wakaba |
1.2 |
charset_id => 'P'.int(($cc / 0x10000) - 0x7042).'_'.int($c / 8836), |
| 568 |
wakaba |
1.1 |
charset => $C->{private_set}->{G94n}->[ ($cc / 0x10000) - 0x7042 ] |
| 569 |
|
|
->[ $c / 8836 ]); |
| 570 |
|
|
} |
| 571 |
|
|
if (defined $t) { |
| 572 |
|
|
$r .= $t; |
| 573 |
|
|
} else { |
| 574 |
|
|
$r .= _i2g ("\x3F", $C, type => 'G94', charset => 'B'); |
| 575 |
|
|
} |
| 576 |
|
|
} |
| 577 |
|
|
$r . _back2ascii ($C); |
| 578 |
|
|
} |
| 579 |
|
|
|
| 580 |
|
|
## $O{charset} eq undef means that charset is same as the current designated one. |
| 581 |
|
|
sub _i2c ($%%) { |
| 582 |
|
|
my ($s, $C, %O) = @_; |
| 583 |
|
|
my $r = ''; |
| 584 |
|
|
if ($O{type} eq 'C0') { |
| 585 |
|
|
if (defined $O{charset}) { |
| 586 |
|
|
if ( $C->{C0} ne $CHARSET{C0}->{$O{charset}} |
| 587 |
|
|
&& $C->{C0} ne $CHARSET{C0}->{$O{charset_id}}) { |
| 588 |
|
|
for ($C->{option}->{designate_to}->{C0}->{$O{charset}}, |
| 589 |
|
|
$C->{option}->{designate_to}->{C0}->{default}) { |
| 590 |
|
|
if (defined $_) { return undef if $_ == -1; last } |
| 591 |
|
|
} |
| 592 |
|
|
$r .= "\x1B\x21".$O{charset}; |
| 593 |
|
|
$C->{C0} = $CHARSET{C0}->{$O{charset}}; |
| 594 |
|
|
} |
| 595 |
|
|
} elsif (defined $O{charset_id}) { |
| 596 |
|
|
if ($C->{C0} ne $CHARSET{C0}->{$O{charset_id}}) { |
| 597 |
|
|
return undef; ## Control set is not designated nor has F byte |
| 598 |
|
|
} |
| 599 |
|
|
} |
| 600 |
|
|
$r .= _back2ascii ($C, reset_all => $C->{C0}->{reset_all}->{$s}); |
| 601 |
|
|
return $r . $s; |
| 602 |
|
|
} elsif ($O{type} eq 'C1') { |
| 603 |
|
|
if (defined $O{charset}) { |
| 604 |
|
|
if ( $C->{C1} ne $CHARSET{C1}->{$O{charset}} |
| 605 |
|
|
&& $C->{C1} ne $CHARSET{C1}->{$O{charset_id}}) { |
| 606 |
|
|
for ($C->{option}->{designate_to}->{C1}->{$O{charset}}, |
| 607 |
|
|
$C->{option}->{designate_to}->{C1}->{default}) { |
| 608 |
|
|
if (defined $_) { return undef if $_ == -1; last } |
| 609 |
|
|
} |
| 610 |
|
|
$r .= "\x1B\x22".$O{charset}; |
| 611 |
|
|
$C->{C1} = $CHARSET{C1}->{$O{charset}}; |
| 612 |
|
|
} |
| 613 |
|
|
} elsif (defined $O{charset_id}) { |
| 614 |
|
|
if ($C->{C1} ne $CHARSET{C1}->{$O{charset_id}}) { |
| 615 |
|
|
return undef; ## Control set is not designated nor has F byte |
| 616 |
|
|
} |
| 617 |
|
|
} |
| 618 |
|
|
$r .= _back2ascii ($C, reset_all => $C->{C1}->{reset_all}->{$s}); |
| 619 |
|
|
unless ($C->{option}->{C1invoke_to_right}) { ## ESC Fe |
| 620 |
|
|
$s =~ s/([\x80-\x9F])/"\x1B" . chr (ord ($1) - 0x40)/ge; |
| 621 |
|
|
} |
| 622 |
|
|
return $r . $s; |
| 623 |
|
|
} |
| 624 |
|
|
} |
| 625 |
|
|
sub _i2g ($%%) { |
| 626 |
|
|
my ($s, $C, %O) = @_; |
| 627 |
|
|
my $r = ''; |
| 628 |
|
|
my $set = $CHARSET{$O{type}}->{$O{charset}}; |
| 629 |
|
|
my $set0 = $CHARSET{$O{type}}->{$O{charset_id}}; |
| 630 |
|
|
## -- designate character set |
| 631 |
|
|
my $G = 0; |
| 632 |
|
|
if ($C->{G0} eq $set || $C->{G0} eq $set0) { $G = 0 } |
| 633 |
|
|
elsif ($C->{G1} eq $set || $C->{G1} eq $set0) { $G = 1 } |
| 634 |
|
|
elsif ($C->{G2} eq $set || $C->{G2} eq $set0) { $G = 2 } |
| 635 |
|
|
elsif ($C->{G3} eq $set || $C->{G3} eq $set0) { $G = 3 } |
| 636 |
|
|
else { |
| 637 |
|
|
return undef unless $set; ## charset does not have F byte |
| 638 |
|
|
$G = 1 if $O{type} eq 'G96' || $O{type} eq 'G96n'; |
| 639 |
|
|
for ($C->{option}->{designate_to}->{$O{type}}->{$O{charset}}, |
| 640 |
|
|
$C->{option}->{designate_to}->{$O{type}}->{default}) { |
| 641 |
|
|
if (defined $_) { |
| 642 |
|
|
$G = $_; last; |
| 643 |
|
|
} |
| 644 |
|
|
} |
| 645 |
|
|
if ($G == -1) { |
| 646 |
|
|
return undef; |
| 647 |
|
|
} |
| 648 |
|
|
if ($O{type} eq 'G94') { |
| 649 |
|
|
$r .= ($O{revision}&&$C->{option}->{use_revision}?"\x1B\x26".$O{revision}:'') |
| 650 |
|
|
."\x1B".("\x28","\x29","\x2A","\x2B")[$G].$O{charset}; |
| 651 |
|
|
} elsif ($O{type} eq 'G94n') { |
| 652 |
|
|
if ($G == 0 && !$C->{option}->{G94n_designate_long} |
| 653 |
|
|
&& ($O{charset} eq '@' || $O{charset} eq 'A' || $O{charset} eq 'B')) { |
| 654 |
|
|
$r .= ($O{revision}&&$C->{option}->{use_revision}?"\x1B\x26".$O{revision}:'') |
| 655 |
|
|
."\x1B\x24".$O{charset}; |
| 656 |
|
|
} else { |
| 657 |
|
|
$r .= ($O{revision}&&$C->{option}->{use_revision}?"\x1B\x26".$O{revision}:'') |
| 658 |
|
|
."\x1B\x24".("\x28","\x29","\x2A","\x2B")[$G].$O{charset}; |
| 659 |
|
|
} |
| 660 |
|
|
} elsif ($O{type} eq 'G96') { |
| 661 |
|
|
$r .= ($O{revision}&&$C->{option}->{use_revision}?"\x1B\x26".$O{revision}:'') |
| 662 |
|
|
."\x1B".("\x2C","\x2D","\x2E","\x2F")[$G].$O{charset}; |
| 663 |
|
|
} elsif ($O{type} eq 'G96n') { |
| 664 |
|
|
$r .= ($O{revision}&&$C->{option}->{use_revision}?"\x1B\x26".$O{revision}:'') |
| 665 |
|
|
."\x1B\x24".("\x2C","\x2D","\x2E","\x2F")[$G].$O{charset}; |
| 666 |
|
|
} |
| 667 |
|
|
$C->{'G'.$G} = $CHARSET{$O{type}}->{$O{charset}}; |
| 668 |
|
|
} |
| 669 |
|
|
## -- invoke G buffer |
| 670 |
|
|
my $left = $C->{option}->{Ginvoke_to_left}->[$G]; |
| 671 |
|
|
if ($C->{GL} eq 'G'.$G) { |
| 672 |
|
|
$left = 1; |
| 673 |
|
|
} elsif ($C->{GR} eq 'G'.$G) { |
| 674 |
|
|
$left = 0; |
| 675 |
|
|
} else { |
| 676 |
|
|
if ($C->{option}->{Ginvoke_by_single_shift}->[$G]) { |
| 677 |
|
|
if ($C->{C1}->{'C_SS'.$G}) { |
| 678 |
|
|
$r .= _i2c ($C->{C1}->{'C_SS'.$G}, $C, type => 'C1') || return undef; |
| 679 |
|
|
} elsif ($C->{C0}->{'C_SS'.$G}) { |
| 680 |
|
|
$r .= _i2c ($C->{C0}->{'C_SS'.$G}, $C, type => 'C0') || return undef; |
| 681 |
|
|
} else { ## Both C0 and C1 set do not have SS2/3. |
| 682 |
|
|
$r .= __invoke ($C, $G => $left) if $C->{$left?'GL':'GR'} ne 'G'.$G; |
| 683 |
|
|
} |
| 684 |
|
|
} else { |
| 685 |
|
|
$left = 0 if $G == 1 && !$C->{C0}->{C_LS1}; |
| 686 |
|
|
$r .= __invoke ($C, $G => $left) if $C->{$left?'GL':'GR'} ne 'G'.$G; |
| 687 |
|
|
} |
| 688 |
|
|
} |
| 689 |
|
|
$s =~ tr/\x00-\x7F/\x80-\xFF/ unless $left; |
| 690 |
|
|
$r . $s; |
| 691 |
|
|
} |
| 692 |
|
|
sub _back2ascii (%) { |
| 693 |
|
|
my ($C, %O) = @_; |
| 694 |
|
|
my $r = ''; |
| 695 |
|
|
if ($C->{option}->{reset}->{Gdesignation}) { |
| 696 |
|
|
my $F = $C->{option}->{reset}->{Gdesignation}; # \x42 |
| 697 |
|
|
$r .= "\x1B\x28".$F unless $C->{G0} eq $CHARSET{G94}->{$F}; |
| 698 |
|
|
$C->{G0} = $CHARSET{G94}->{$F}; |
| 699 |
|
|
if ($O{reset_all}) { |
| 700 |
|
|
$C->{G1} = $CHARSET{G94}->{"\x7E"}; |
| 701 |
|
|
$C->{G2} = $CHARSET{G94}->{"\x7E"}; |
| 702 |
|
|
$C->{G3} = $CHARSET{G94}->{"\x7E"}; |
| 703 |
|
|
} |
| 704 |
|
|
} |
| 705 |
|
|
if ($C->{option}->{reset}->{Ginvoke}) { |
| 706 |
|
|
if ($C->{GL} ne 'G0') { |
| 707 |
|
|
$r .= $C->{C0}->{C_LS0} || ($C->{C0} = $CHARSET{C0}->{'@'},"\x1B\x21\x40\x0F"); |
| 708 |
|
|
$C->{GL} = 'G0'; |
| 709 |
|
|
} |
| 710 |
|
|
$C->{GR} = undef if $O{reset_all}; |
| 711 |
|
|
} |
| 712 |
|
|
$r; |
| 713 |
|
|
} |
| 714 |
|
|
## __invoke (\%C, $G, $left_or_right) |
| 715 |
|
|
sub __invoke (\%$$) { |
| 716 |
|
|
my ($C, $G) = @_; |
| 717 |
|
|
if ($_[2]) { |
| 718 |
|
|
$C->{GL} = 'G'.$G; |
| 719 |
|
|
return ($C->{C0}->{C_LS0} |
| 720 |
|
|
|| scalar ($C->{C0}=$CHARSET{C0}->{'@'},"\x1B\x21\x40\x0F"), |
| 721 |
|
|
$C->{C0}->{C_LS1}, "\x1B\x6E", "\x1B\x6F")[$G]; |
| 722 |
|
|
} else { |
| 723 |
|
|
$C->{GR} = 'G'.$G; |
| 724 |
|
|
return ("", "\x1B\x7E", "\x1B\x7D", "\x1B\x7C")[$G]; |
| 725 |
|
|
} |
| 726 |
|
|
''; |
| 727 |
|
|
} |
| 728 |
|
|
|
| 729 |
|
|
sub make_charset (%) { |
| 730 |
|
|
## TODO: support private charset ID such as 'X0' |
| 731 |
|
|
my %set = @_; |
| 732 |
|
|
my $setid = qq($set{I}$set{F}$set{revision}); |
| 733 |
|
|
my $settype = $set{type} || 'G94'; |
| 734 |
|
|
delete $set{type}, $set{I}, $set{F}, $set{revision}; |
| 735 |
|
|
$CHARSET{ $settype }->{ $setid } = \%set; |
| 736 |
|
|
} |
| 737 |
|
|
|
| 738 |
|
|
1; |
| 739 |
|
|
__END__ |
| 740 |
|
|
|
| 741 |
|
|
=head1 SEE ALSO |
| 742 |
|
|
|
| 743 |
|
|
ISO/IEC 646:1991, "7-bit coded graphic character set for intormation interchange". |
| 744 |
|
|
|
| 745 |
|
|
ISO/IEC 2022:1994, "Character Code Structure and Extension Techniques". |
| 746 |
|
|
(IDT with ECMA 35, JIS X 0202:1998) |
| 747 |
|
|
|
| 748 |
|
|
ISO/IEC 4873:1991, "8-Bit Coded Character Set Structure and Rules". |
| 749 |
|
|
(IDT with ECMA 43) |
| 750 |
|
|
|
| 751 |
|
|
ISO/IEC 6429:1992, "Control Functions for Coded Character Sets". |
| 752 |
|
|
(IDT with ECMA 48:1991, JIS X 0211:1998) |
| 753 |
|
|
|
| 754 |
|
|
ISO/IEC 8859, "8-Bit Single-Byte Coded Graphic Character Sets". |
| 755 |
|
|
|
| 756 |
|
|
Encode, perlunicode |
| 757 |
|
|
|
| 758 |
|
|
=head1 LICENSE |
| 759 |
|
|
|
| 760 |
|
|
Copyright 2002 wakaba <w@suika.fam.cx> |
| 761 |
|
|
|
| 762 |
|
|
This library is free software; you can redistribute it |
| 763 |
|
|
and/or modify it under the same terms as Perl itself. |
| 764 |
|
|
|
| 765 |
|
|
=cut |
| 766 |
|
|
|
| 767 |
wakaba |
1.3 |
# $Date: 2002/09/15 05:08:13 $ |
| 768 |
wakaba |
1.1 |
### ISO2022.pm ends here |