/[suikacvs]/perl/lib/Encode/ISO2022.pm
Suika

Diff of /perl/lib/Encode/ISO2022.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2 by wakaba, Sun Sep 15 05:08:13 2002 UTC revision 1.4 by wakaba, Mon Sep 16 06:35:16 2002 UTC
# Line 75  for my $f (0x30..0x7E) { Line 75  for my $f (0x30..0x7E) {
75  }  }
76  for my $f (0x30..0x5F, 0x7E) {  for my $f (0x30..0x5F, 0x7E) {
77    my $F = pack 'C', $f;    my $F = pack 'C', $f;
78    for ('', '!', '"', '#') {    for ('', '!', '"', '#', ' ') {
79      $CHARSET{G94n}->{ $_.$F }->{dimension} = 2;      $CHARSET{G94n}->{ $_.$F }->{dimension} = 2;
80      $CHARSET{G94n}->{ $_.$F }->{chars} = 94;      $CHARSET{G94n}->{ $_.$F }->{chars} = 94;
81      $CHARSET{G94n}->{ $_.$F }->{ucs} =      $CHARSET{G94n}->{ $_.$F }->{ucs} =
82        ({'' => 0xE9F6C0}->{ $_ }||0) + 94*94 * ($f-0x30);        ({'' => 0xE9F6C0}->{ $_ }||0) + 94*94 * ($f-0x30);
83        ## BUG: 94^n DRCSes with I byte have no mapping area        ## BUG: 94^n sets with I byte have no mapping area
84            
85      $CHARSET{G96n}->{ $_.$F }->{dimension} = 2;      $CHARSET{G96n}->{ $_.$F }->{dimension} = 2;
86      $CHARSET{G96n}->{ $_.$F }->{chars} = 96;      $CHARSET{G96n}->{ $_.$F }->{chars} = 96;
# Line 254  sub new_object { Line 254  sub new_object {
254                  Gdesignation    => "\x42",      ## F of designation or 0                  Gdesignation    => "\x42",      ## F of designation or 0
255                  Ginvoke => 1,                  Ginvoke => 1,
256          },          },
257            undef_char      => ["\x3F", {type => 'G94', charset => 'B'}],
258          use_revision    => 1,   ## Output IRR          use_revision    => 1,   ## Output IRR
259    };    };
260    \%C;    \%C;
# Line 295  sub iso2022_to_internal ($;\%) { Line 296  sub iso2022_to_internal ($;\%) {
296        ((??{ $_CHARS_to_RANGE{'b'.$C->{$C->{GL}}->{chars}}        ((??{ $_CHARS_to_RANGE{'b'.$C->{$C->{GL}}->{chars}}
297              . qq/{$C->{G3}->{dimension},$C->{G3}->{dimension}}/ }))              . qq/{$C->{G3}->{dimension},$C->{G3}->{dimension}}/ }))
298            
299      |((??{ $C->{$C->{CL}}->{r_LS0}||'(?!)' }))  ## GL = G0      ## Locking shift
300      |((??{ $C->{$C->{CL}}->{r_LS1}||'(?!)' }))  ## GL = G1      |( \x1B[\x6E\x6F\x7C-\x7E]
301           |(??{ $C->{$C->{CL}}->{r_LS0}||'(?!)' })
302           |(??{ $C->{$C->{CL}}->{r_LS1}||'(?!)' })
303         )
304            
305      ## Control sequence      ## Control sequence
306      |(??{ '(?:'.($C->{$C->{CR}}->{r_CSI}||'(?!)')      |(??{ '(?:'.($C->{$C->{CR}}->{r_CSI}||'(?!)')
# Line 314  sub iso2022_to_internal ($;\%) { Line 318  sub iso2022_to_internal ($;\%) {
318      ## Misc. sequence (SP, control, or broken data)      ## Misc. sequence (SP, control, or broken data)
319      |([\x00-\xFF])      |([\x00-\xFF])
320    }{    }{
321      my ($gl,$gr,$ss2,$ss3,$ls0,$ls1,$csi,$esc,$misc)      my ($gl,$gr,$ss2,$ss3,$ls,$csi,$esc,$misc)
322        = ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10);        = ($1,$2,$3,$4,$5,$6,$7,$8,$9);
323      $C->{_irr} = undef unless defined $esc;      $C->{_irr} = undef unless defined $esc;
324      ## GL graphic character      ## GL graphic character
325      if (defined $gl) {      if (defined $gl) {
# Line 353  sub iso2022_to_internal ($;\%) { Line 357  sub iso2022_to_internal ($;\%) {
357          $c = $c * $C->{G3}->{chars} + unpack ('C', $_) - $m;          $c = $c * $C->{G3}->{chars} + unpack ('C', $_) - $m;
358        }        }
359        chr ($C->{G3}->{ucs} + $c);        chr ($C->{G3}->{ucs} + $c);
     ## Locking shifts  
       } elsif ($ls0) {  
         $C->{GL} = 'G0'; '';  
       } elsif ($ls1) {  
         $C->{GL} = 'G1'; '';  
360      ## Escape sequence      ## Escape sequence
361      } elsif ($esc) {      } elsif ($esc) {
362        ## IRR (revision number)        ## IRR (revision number)
# Line 434  sub iso2022_to_internal ($;\%) { Line 433  sub iso2022_to_internal ($;\%) {
433          $C->{_irr} = undef;          $C->{_irr} = undef;
434        }        }
435        $esc;        $esc;
436        ## Locking shifts
437        } elsif ($ls) {
438          if ($ls eq $C->{$C->{CL}}->{LS0}) {
439            $C->{GL} = 'G0'; '';
440          } elsif ($ls eq $C->{$C->{CL}}->{LS1}) {
441            $C->{GL} = 'G1'; '';
442          } elsif ($ls =~ /\x1B([\x6E\x6F])/) {
443            $C->{GL} = {"\x6E"=>2, "\x6F"=>3}->{$1}; '';
444          } elsif ($ls =~ /\x1B([\x7C-\x7E])/) {
445            $C->{GR} = {"\x7E"=>1, "\x7D"=>2, "\x7C"=>3}->{$1}; '';
446          }
447      ## Control sequence      ## Control sequence
448      } elsif ($csi) {      } elsif ($csi) {
449        $csi =~ tr/\xA0-\xFF/\x20-\x7F/d;        $csi =~ tr/\xA0-\xFF/\x20-\x7F/d;
# Line 562  sub internal_to_iso2022 ($\%) { Line 572  sub internal_to_iso2022 ($\%) {
572      if (defined $t) {      if (defined $t) {
573        $r .= $t;        $r .= $t;
574      } else {      } else {
575        $r .= _i2g ("\x3F", $C, type => 'G94', charset => 'B');        $r .= _i2g ($C->{option}->{undef_char}->[0], $C,
576                      %{ $C->{option}->{undef_char}->[1] });
577      }      }
578    }    }
579    $r . _back2ascii ($C);    $r . _back2ascii ($C);
# Line 616  sub _i2c ($%%) { Line 627  sub _i2c ($%%) {
627  sub _i2g ($%%) {  sub _i2g ($%%) {
628    my ($s, $C, %O) = @_;    my ($s, $C, %O) = @_;
629    my $r = '';    my $r = '';
630    my $set = $CHARSET{$O{type}}->{$O{charset}};    my $set = $CHARSET{$O{type}}->{$O{charset}.
631        ($O{revision}&&$C->{option}->{use_revision}?$O{revision}:'')};
632    my $set0 = $CHARSET{$O{type}}->{$O{charset_id}};    my $set0 = $CHARSET{$O{type}}->{$O{charset_id}};
633    ## -- designate character set    ## -- designate character set
634    my $G = 0;    my $G = 0;
# Line 670  sub _i2g ($%%) { Line 682  sub _i2g ($%%) {
682        } elsif ($C->{C0}->{'C_SS'.$G}) {        } elsif ($C->{C0}->{'C_SS'.$G}) {
683          $r .= _i2c ($C->{C0}->{'C_SS'.$G}, $C, type => 'C0') || return undef;          $r .= _i2c ($C->{C0}->{'C_SS'.$G}, $C, type => 'C0') || return undef;
684        } else {  ## Both C0 and C1 set do not have SS2/3.        } else {  ## Both C0 and C1 set do not have SS2/3.
685            $left = 0 if $G == 1 && !$C->{C0}->{C_LS1};
686          $r .= __invoke ($C, $G => $left) if $C->{$left?'GL':'GR'} ne 'G'.$G;          $r .= __invoke ($C, $G => $left) if $C->{$left?'GL':'GR'} ne 'G'.$G;
687        }        }
688      } else {      } else {
# Line 746  ISO/IEC 8859, "8-Bit Single-Byte Coded G Line 759  ISO/IEC 8859, "8-Bit Single-Byte Coded G
759    
760  Encode, perlunicode  Encode, perlunicode
761    
762    =head1 TODO
763    
764    =over 4
765    
766    =item NCR (coding system other than ISO/IEC 2022) support
767    
768    =over 2
769    
770    =item ESC 02/05 02/15 03/x of X Compound Text
771    
772    =back
773    
774    =item Output of control character sets, single control functions
775    
776    =item Designation sequence of control character sets (input)
777    
778    =item Special graphic character sets such as G3 of EUC-TW
779    
780    =item SUPER SHIFT (SS) invoke function of old control character set
781    
782    =item Safe transparent of control string (ISO/IEC 6429)
783    
784    =item Output of unoutputable characters as alternative notation such as SGML-like entity
785    
786    =item C0 set invoked to CR area like ISIRI code
787    
788    Really need?
789    
790    =item special treatment of 0x20, 0x7E, 0xA0, 0xFF
791    
792    For example, GB mongolian sets use MSP (MONGOLIAN SPACE)
793    with these code positions.
794    
795    And, no less coding systems does not use (or does ban using) DEL.
796    
797    =item A lot of character sets don't have pseudo-UCS mapping.
798    
799    Most of 9m^n (n >= 3) sets, 9m^n sets with I byte, 9m^n
800    DRCSes do not have pseudo-UCS mapping area.  It is
801    questionable to allocate lots of code positions to these
802    rarely-(or no-)used character sets.
803    
804    =item Even character sets that have pseudo-UCS mapping, some of them can't be outputed in ISO/IEC 2022.
805    
806    Because output of rarely-used character sets is
807    not implemented yet.
808    
809    =back
810    
811  =head1 LICENSE  =head1 LICENSE
812    
813  Copyright 2002 wakaba <w@suika.fam.cx>  Copyright 2002 wakaba <w@suika.fam.cx>

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.4

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24