11 |
use vars qw(%CHARSET $VERSION); |
use vars qw(%CHARSET $VERSION); |
12 |
$VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
$VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
13 |
use base qw(Encode::Encoding); |
use base qw(Encode::Encoding); |
14 |
__PACKAGE__->Define (qw/iso-2022 iso2022/); |
__PACKAGE__->Define (qw/iso-2022 iso2022 2022 cp2022/); |
15 |
|
|
16 |
### --- Intialization |
### --- Intialization |
17 |
|
|
75 |
} |
} |
76 |
for my $f (0x30..0x5F, 0x7E) { |
for my $f (0x30..0x5F, 0x7E) { |
77 |
my $F = pack 'C', $f; |
my $F = pack 'C', $f; |
78 |
for ('', '!', '"', '#') { |
for ('', '!', '"', '#', ' ') { |
79 |
$CHARSET{G94n}->{ $_.$F }->{dimension} = 2; |
$CHARSET{G94n}->{ $_.$F }->{dimension} = 2; |
80 |
$CHARSET{G94n}->{ $_.$F }->{chars} = 94; |
$CHARSET{G94n}->{ $_.$F }->{chars} = 94; |
81 |
$CHARSET{G94n}->{ $_.$F }->{ucs} = |
$CHARSET{G94n}->{ $_.$F }->{ucs} = |
82 |
({'' => 0xE9F6C0}->{ $_ }||0) + 94*94 * ($f-0x30); |
({'' => 0xE9F6C0}->{ $_ }||0) + 94*94 * ($f-0x30); |
83 |
## BUG: 94^n DRCSes with I byte have no mapping area |
## BUG: 94^n sets with I byte have no mapping area |
84 |
|
|
85 |
$CHARSET{G96n}->{ $_.$F }->{dimension} = 2; |
$CHARSET{G96n}->{ $_.$F }->{dimension} = 2; |
86 |
$CHARSET{G96n}->{ $_.$F }->{chars} = 96; |
$CHARSET{G96n}->{ $_.$F }->{chars} = 96; |
254 |
Gdesignation => "\x42", ## F of designation or 0 |
Gdesignation => "\x42", ## F of designation or 0 |
255 |
Ginvoke => 1, |
Ginvoke => 1, |
256 |
}, |
}, |
257 |
|
undef_char => ["\x3F", {type => 'G94', charset => 'B'}], |
258 |
use_revision => 1, ## Output IRR |
use_revision => 1, ## Output IRR |
259 |
}; |
}; |
260 |
\%C; |
\%C; |
296 |
((??{ $_CHARS_to_RANGE{'b'.$C->{$C->{GL}}->{chars}} |
((??{ $_CHARS_to_RANGE{'b'.$C->{$C->{GL}}->{chars}} |
297 |
. qq/{$C->{G3}->{dimension},$C->{G3}->{dimension}}/ })) |
. qq/{$C->{G3}->{dimension},$C->{G3}->{dimension}}/ })) |
298 |
|
|
299 |
|((??{ $C->{$C->{CL}}->{r_LS0}||'(?!)' })) ## GL = G0 |
## Locking shift |
300 |
|((??{ $C->{$C->{CL}}->{r_LS1}||'(?!)' })) ## GL = G1 |
|( \x1B[\x6E\x6F\x7C-\x7E] |
301 |
|
|(??{ $C->{$C->{CL}}->{r_LS0}||'(?!)' }) |
302 |
|
|(??{ $C->{$C->{CL}}->{r_LS1}||'(?!)' }) |
303 |
|
) |
304 |
|
|
305 |
## Control sequence |
## Control sequence |
306 |
|(??{ '(?:'.($C->{$C->{CR}}->{r_CSI}||'(?!)') |
|(??{ '(?:'.($C->{$C->{CR}}->{r_CSI}||'(?!)') |
318 |
## Misc. sequence (SP, control, or broken data) |
## Misc. sequence (SP, control, or broken data) |
319 |
|([\x00-\xFF]) |
|([\x00-\xFF]) |
320 |
}{ |
}{ |
321 |
my ($gl,$gr,$ss2,$ss3,$ls0,$ls1,$csi,$esc,$misc) |
my ($gl,$gr,$ss2,$ss3,$ls,$csi,$esc,$misc) |
322 |
= ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10); |
= ($1,$2,$3,$4,$5,$6,$7,$8,$9); |
323 |
$C->{_irr} = undef unless defined $esc; |
$C->{_irr} = undef unless defined $esc; |
324 |
## GL graphic character |
## GL graphic character |
325 |
if (defined $gl) { |
if (defined $gl) { |
357 |
$c = $c * $C->{G3}->{chars} + unpack ('C', $_) - $m; |
$c = $c * $C->{G3}->{chars} + unpack ('C', $_) - $m; |
358 |
} |
} |
359 |
chr ($C->{G3}->{ucs} + $c); |
chr ($C->{G3}->{ucs} + $c); |
|
## Locking shifts |
|
|
} elsif ($ls0) { |
|
|
$C->{GL} = 'G0'; ''; |
|
|
} elsif ($ls1) { |
|
|
$C->{GL} = 'G1'; ''; |
|
360 |
## Escape sequence |
## Escape sequence |
361 |
} elsif ($esc) { |
} elsif ($esc) { |
362 |
## IRR (revision number) |
## IRR (revision number) |
433 |
$C->{_irr} = undef; |
$C->{_irr} = undef; |
434 |
} |
} |
435 |
$esc; |
$esc; |
436 |
|
## Locking shifts |
437 |
|
} elsif ($ls) { |
438 |
|
if ($ls eq $C->{$C->{CL}}->{LS0}) { |
439 |
|
$C->{GL} = 'G0'; ''; |
440 |
|
} elsif ($ls eq $C->{$C->{CL}}->{LS1}) { |
441 |
|
$C->{GL} = 'G1'; ''; |
442 |
|
} elsif ($ls =~ /\x1B([\x6E\x6F])/) { |
443 |
|
$C->{GL} = {"\x6E"=>2, "\x6F"=>3}->{$1}; ''; |
444 |
|
} elsif ($ls =~ /\x1B([\x7C-\x7E])/) { |
445 |
|
$C->{GR} = {"\x7E"=>1, "\x7D"=>2, "\x7C"=>3}->{$1}; ''; |
446 |
|
} |
447 |
## Control sequence |
## Control sequence |
448 |
} elsif ($csi) { |
} elsif ($csi) { |
449 |
$csi =~ tr/\xA0-\xFF/\x20-\x7F/d; |
$csi =~ tr/\xA0-\xFF/\x20-\x7F/d; |
565 |
my $c = $cc % 0x10000; |
my $c = $cc % 0x10000; |
566 |
$t = _i2g (chr((($c % 8836) / 94)+0x21).chr(($c % 94)+0x21), $C, |
$t = _i2g (chr((($c % 8836) / 94)+0x21).chr(($c % 94)+0x21), $C, |
567 |
type => 'G94n', |
type => 'G94n', |
568 |
charset_id => 'P'.int(($cc / 0x10000) - 0x7042).int($c / 8836), |
charset_id => 'P'.int(($cc / 0x10000) - 0x7042).'_'.int($c / 8836), |
569 |
charset => $C->{private_set}->{G94n}->[ ($cc / 0x10000) - 0x7042 ] |
charset => $C->{private_set}->{G94n}->[ ($cc / 0x10000) - 0x7042 ] |
570 |
->[ $c / 8836 ]); |
->[ $c / 8836 ]); |
571 |
} |
} |
572 |
if (defined $t) { |
if (defined $t) { |
573 |
$r .= $t; |
$r .= $t; |
574 |
} else { |
} else { |
575 |
$r .= _i2g ("\x3F", $C, type => 'G94', charset => 'B'); |
$r .= _i2g ($C->{option}->{undef_char}->[0], $C, |
576 |
|
%{ $C->{option}->{undef_char}->[1] }); |
577 |
} |
} |
578 |
} |
} |
579 |
$r . _back2ascii ($C); |
$r . _back2ascii ($C); |
627 |
sub _i2g ($%%) { |
sub _i2g ($%%) { |
628 |
my ($s, $C, %O) = @_; |
my ($s, $C, %O) = @_; |
629 |
my $r = ''; |
my $r = ''; |
630 |
my $set = $CHARSET{$O{type}}->{$O{charset}}; |
my $set = $CHARSET{$O{type}}->{$O{charset}. |
631 |
|
($O{revision}&&$C->{option}->{use_revision}?$O{revision}:'')}; |
632 |
my $set0 = $CHARSET{$O{type}}->{$O{charset_id}}; |
my $set0 = $CHARSET{$O{type}}->{$O{charset_id}}; |
633 |
## -- designate character set |
## -- designate character set |
634 |
my $G = 0; |
my $G = 0; |
682 |
} elsif ($C->{C0}->{'C_SS'.$G}) { |
} elsif ($C->{C0}->{'C_SS'.$G}) { |
683 |
$r .= _i2c ($C->{C0}->{'C_SS'.$G}, $C, type => 'C0') || return undef; |
$r .= _i2c ($C->{C0}->{'C_SS'.$G}, $C, type => 'C0') || return undef; |
684 |
} else { ## Both C0 and C1 set do not have SS2/3. |
} else { ## Both C0 and C1 set do not have SS2/3. |
685 |
|
$left = 0 if $G == 1 && !$C->{C0}->{C_LS1}; |
686 |
$r .= __invoke ($C, $G => $left) if $C->{$left?'GL':'GR'} ne 'G'.$G; |
$r .= __invoke ($C, $G => $left) if $C->{$left?'GL':'GR'} ne 'G'.$G; |
687 |
} |
} |
688 |
} else { |
} else { |
759 |
|
|
760 |
Encode, perlunicode |
Encode, perlunicode |
761 |
|
|
762 |
|
=head1 TODO |
763 |
|
|
764 |
|
=over 4 |
765 |
|
|
766 |
|
=item NCR (coding system other than ISO/IEC 2022) support |
767 |
|
|
768 |
|
=over 2 |
769 |
|
|
770 |
|
=item ESC 02/05 02/15 03/x of X Compound Text |
771 |
|
|
772 |
|
=back |
773 |
|
|
774 |
|
=item Output of control character sets, single control functions |
775 |
|
|
776 |
|
=item Designation sequence of control character sets (input) |
777 |
|
|
778 |
|
=item Special graphic character sets such as G3 of EUC-TW |
779 |
|
|
780 |
|
=item SUPER SHIFT (SS) invoke function of old control character set |
781 |
|
|
782 |
|
=item Safe transparent of control string (ISO/IEC 6429) |
783 |
|
|
784 |
|
=item Output of unoutputable characters as alternative notation such as SGML-like entity |
785 |
|
|
786 |
|
=item C0 set invoked to CR area like ISIRI code |
787 |
|
|
788 |
|
Really need? |
789 |
|
|
790 |
|
=item special treatment of 0x20, 0x7E, 0xA0, 0xFF |
791 |
|
|
792 |
|
For example, GB mongolian sets use MSP (MONGOLIAN SPACE) |
793 |
|
with these code positions. |
794 |
|
|
795 |
|
And, no less coding systems does not use (or does ban using) DEL. |
796 |
|
|
797 |
|
=item A lot of character sets don't have pseudo-UCS mapping. |
798 |
|
|
799 |
|
Most of 9m^n (n >= 3) sets, 9m^n sets with I byte, 9m^n |
800 |
|
DRCSes do not have pseudo-UCS mapping area. It is |
801 |
|
questionable to allocate lots of code positions to these |
802 |
|
rarely-(or no-)used character sets. |
803 |
|
|
804 |
|
=item Even character sets that have pseudo-UCS mapping, some of them can't be outputed in ISO/IEC 2022. |
805 |
|
|
806 |
|
Because output of rarely-used character sets is |
807 |
|
not implemented yet. |
808 |
|
|
809 |
|
=back |
810 |
|
|
811 |
=head1 LICENSE |
=head1 LICENSE |
812 |
|
|
813 |
Copyright 2002 wakaba <w@suika.fam.cx> |
Copyright 2002 wakaba <w@suika.fam.cx> |