/[suikacvs]/perl/lib/Encode/ISO2022.pm
Suika

Diff of /perl/lib/Encode/ISO2022.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.7 by wakaba, Sun Sep 22 11:09:38 2002 UTC revision 1.9 by wakaba, Mon Oct 14 06:58:35 2002 UTC
# Line 48  require Encode::Charset; Line 48  require Encode::Charset;
48          *CHARSET = \%Encode::Charset::CHARSET;          *CHARSET = \%Encode::Charset::CHARSET;
49          *CODING_SYSTEM = \%Encode::Charset::CODING_SYSTEM;          *CODING_SYSTEM = \%Encode::Charset::CODING_SYSTEM;
50    
 ### --- Intialization  
   
 my %_CHARS_to_RANGE = (  
         l94     => q/[\x21-\x7E]/,  
         l96     => q/[\x20-\x7F]/,  
         l128    => q/[\x00-\x7F]/,  
         l256    => q/[\x00-\xFF]/,  
         r94     => q/[\xA1-\xFE]/,  
         r96     => q/[\xA0-\xFF]/,  
         r128    => q/[\x80-\xFF]/,  
         r256    => q/[\x80-\xFF]/,  
         b94     => q/[\x21-\x7E\xA1-\xFE]/,  
         b96     => q/[\x20-\x7F\xA0-\xFF]/,  
         b128    => q/[\x00-\xFF]/,  
         b256    => q/[\x00-\xFF]/,  
 );  
   
51  ### --- Perl Encode module common functions  ### --- Perl Encode module common functions
52    
53  sub encode ($$;$) {  sub encode ($$;$) {
# Line 87  sub iso2022_to_internal ($;\%) { Line 70  sub iso2022_to_internal ($;\%) {
70    my ($s, $C) = @_;    my ($s, $C) = @_;
71    $C ||= &new_object;    $C ||= &new_object;
72    my $t = '';    my $t = '';
73    $s =~ s{    $s =~ s{^((?:(?!\x1B\x25\x2F?[\x30-\x7E]).)*)}{
     ^((?:(?!\x1B\x25\x2F?[\x30-\x7E]).)*)  
   }{  
74      my $i2 = $1;      my $i2 = $1;
75      $t = _iso2022_to_internal ($i2, $C);      $t = _iso2022_to_internal ($i2, $C);
76      '';      '';
77    }gesx;    }es;
78    my $pad = '';    my $pad = '';
79    use re 'eval';    use re 'eval';
80    $s =~ s{    $s =~ s{
# Line 142  sub iso2022_to_internal ($;\%) { Line 123  sub iso2022_to_internal ($;\%) {
123    $t . $s;    $t . $s;
124  }  }
125    
126    # this is very very trickey.  my perl 5.8.0 does not process
127    # regex with eval except the first time (i think it's a bug
128    # of perl), so we redefine this function whenever being called!
129    # when this unexpected behavior is fixed or someone finds
130    # better way to avoid it, we will rewrite this code.
131    &_iso2022_to_internal (undef);
132  sub _iso2022_to_internal ($;\%) {  sub _iso2022_to_internal ($;\%) {
133      eval q{ sub __iso2022_to_internal ($;\%) { 0 } };
134      eval q{
135    sub __iso2022_to_internal ($;\%) {
136      use re 'eval';
137    my ($s, $C) = @_;    my ($s, $C) = @_;
138    my %_GB_to_GN = (    my %_GB_to_GN = (
139      "\x28"=>'G0',"\x29"=>'G1',"\x2A"=>'G2',"\x2B"=>'G3',      "\x28"=>'G0',"\x29"=>'G1',"\x2A"=>'G2',"\x2B"=>'G3',
140      "\x2C"=>'G0',"\x2D"=>'G1',"\x2E"=>'G2',"\x2F"=>'G3',      "\x2C"=>'G0',"\x2D"=>'G1',"\x2E"=>'G2',"\x2F"=>'G3',
141    );    );
142      my %_CHARS_to_RANGE = (
143            l94     => q/[\x21-\x7E]/,      l96     => q/[\x20-\x7F]/,
144            l128    => q/[\x00-\x7F]/,      l256    => q/[\x00-\xFF]/,
145            r94     => q/[\xA1-\xFE]/,      r96     => q/[\xA0-\xFF]/,
146            r128    => q/[\x80-\xFF]/,      r256    => q/[\x80-\xFF]/,
147            b94     => q/[\x21-\x7E\xA1-\xFE]/,     b96     => q/[\x20-\x7F\xA0-\xFF]/,
148            b128    => q/[\x00-\xFF]/,      b256    => q/[\x00-\xFF]/,
149      );
150        
   use re 'eval';  
151    $s =~ s{    $s =~ s{
152       ((??{ $_CHARS_to_RANGE{'l'.$C->{$C->{GL}}->{chars}}       ((??{ $_CHARS_to_RANGE{'l'.$C->{$C->{GL}}->{chars}}
153           . qq/{$C->{$C->{GL}}->{dimension},$C->{$C->{GL}}->{dimension}}/ }))           . qq/{$C->{$C->{GL}}->{dimension},$C->{$C->{GL}}->{dimension}}/ }))
154      |((??{ $_CHARS_to_RANGE{'r'.$C->{$C->{GR}}->{chars}}      |((??{ $_CHARS_to_RANGE{'r'.$C->{$C->{GR}}->{chars}}
155           . qq/{$C->{$C->{GR}}->{dimension},$C->{$C->{GR}}->{dimension}}/ }))           . qq/{$C->{$C->{GR}}->{dimension},$C->{$C->{GR}}->{dimension}}/  }))
156      |  (??{ q/(?:/ . ($C->{$C->{CR}}->{r_SS2} || '(?!)')      |  (??{ q/(?:/ . ($C->{$C->{CR}}->{r_SS2} || '(?!)')
157               . ($C->{$C->{ESC_Fe}}->{r_SS2_ESC} ?               . ($C->{$C->{ESC_Fe}}->{r_SS2_ESC} ?
158                   qq/|$C->{$C->{ESC_Fe}}->{r_SS2_ESC}/ : '')                   qq/|$C->{$C->{ESC_Fe}}->{r_SS2_ESC}/ : '')
# Line 164  sub _iso2022_to_internal ($;\%) { Line 162  sub _iso2022_to_internal ($;\%) {
162               qq/[$C->{$C->{CL}}->{r_LS0}$C->{$C->{CL}}->{r_LS1}]*/:'')               qq/[$C->{$C->{CL}}->{r_LS0}$C->{$C->{CL}}->{r_LS1}]*/:'')
163          })          })
164        ((??{ $_CHARS_to_RANGE{'b'.$C->{G2}->{chars}}        ((??{ $_CHARS_to_RANGE{'b'.$C->{G2}->{chars}}
165              . qq/{$C->{G2}->{dimension},$C->{G2}->{dimension}}/ }))           . qq/{$C->{G2}->{dimension},$C->{G2}->{dimension}}/ }))
166      |  (??{ q/(?:/ . ($C->{$C->{CR}}->{r_SS3} || '(?!)')      |  (??{ q/(?:/ . ($C->{$C->{CR}}->{r_SS3} || '(?!)')
167               . ($C->{$C->{ESC_Fe}}->{r_SS3_ESC} ?               . ($C->{$C->{ESC_Fe}}->{r_SS3_ESC} ?
168                  qq/|$C->{$C->{ESC_Fe}}->{r_SS3_ESC}/ : '')                  qq/|$C->{$C->{ESC_Fe}}->{r_SS3_ESC}/ : '')
# Line 326  sub _iso2022_to_internal ($;\%) { Line 324  sub _iso2022_to_internal ($;\%) {
324        $csi =~ s/$C->{$C->{CL}}->{LS1}//g if $C->{$C->{CL}}->{LS1};        $csi =~ s/$C->{$C->{CL}}->{LS1}//g if $C->{$C->{CL}}->{LS1};
325        "\x9B".$csi;        "\x9B".$csi;
326      }      }
327    }gex;    }gesx;
328    $s;    $s;
329  }  } # __iso2022_to_internal
330    
331      };
332      &__iso2022_to_internal (@_) if defined $_[0];
333    
334    } # _iso2022_to_internal
335    
336  sub internal_to_iso2022 ($\%) {  sub internal_to_iso2022 ($\%) {
337    my ($s, $C) = @_;    my ($s, $C) = @_;
# Line 336  sub internal_to_iso2022 ($\%) { Line 339  sub internal_to_iso2022 ($\%) {
339        
340    my $r = '';    my $r = '';
341    for my $c (split //, $s) {    for my $c (split //, $s) {
342      my $cc = ord $c;      my $cc = ord $c;  Encode::_utf8_off ($c);
343      my $t;      my $t;
344      if ($cc <= 0x1F) {      if ($cc <= 0x1F) {
345        $t = _i2c ($c, $C, type => 'C0', charset => '@');        $t = _i2c ($c, $C, type => 'C0', charset => '@');
# Line 346  sub internal_to_iso2022 ($\%) { Line 349  sub internal_to_iso2022 ($\%) {
349        $t = _i2g ($c, $C, type => 'G94', charset => 'B');        $t = _i2g ($c, $C, type => 'G94', charset => 'B');
350      } elsif ($cc <= 0x9F) {      } elsif ($cc <= 0x9F) {
351        $t = _i2c ($c, $C, type => 'C1', charset_id => '64291991C1',        $t = _i2c ($c, $C, type => 'C1', charset_id => '64291991C1',
352          charset => $C->{private_set}->{XC1}->{'64291991C1'});          charset => $C->{option}->{private_set}->{XC1}->{'64291991C1'});
353      } elsif ($cc <= 0xFF) {      } elsif ($cc <= 0xFF) {
354        $t = _i2g (chr($cc-0x80), $C, type => 'G96', charset => 'A');        $t = _i2g (chr($cc-0x80), $C, type => 'G96', charset => 'A');
355      } elsif ($cc <= 0x24FF) {      } elsif ($cc <= 0x24FF) {
356        my $c = $cc - 0x100;        my $c = $cc - 0x100;
357        my $final = $C->{private_set}->{U96n}->[0];        my $final = $C->{option}->{private_set}->{U96n}->[0];
358        if (length $final) {        if (length $final) {
359          $t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C,          $t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C,
360            type => 'G96n', charset => $final);            type => 'G96n', charset => $final);
361        }        }
362      } elsif ($cc <= 0x33FF) {      } elsif ($cc <= 0x33FF) {
363        my $c = $cc - 0x2500;        my $c = $cc - 0x2500;
364        my $final = $C->{private_set}->{U96n}->[1];        my $final = $C->{option}->{private_set}->{U96n}->[1];
365        if (length $final) {        if (length $final) {
366          $t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C,          $t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C,
367            type => 'G96n', charset => $final);            type => 'G96n', charset => $final);
368        }        }
369      } elsif (0xE000 <= $cc && $cc <= 0xFFFF) {      } elsif (0xE000 <= $cc && $cc <= 0xFFFF) {
370        my $c = $cc - 0xE000;        my $c = $cc - 0xE000;
371        my $final = $C->{private_set}->{U96n}->[2];        my $final = $C->{option}->{private_set}->{U96n}->[2];
372        if (length $final) {        if (length $final) {
373          $t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C,          $t = _i2g (chr(($c / 96)+0x20).chr(($c % 96)+0x20), $C,
374            type => 'G96n', charset => $final);            type => 'G96n', charset => $final);
# Line 431  sub internal_to_iso2022 ($\%) { Line 434  sub internal_to_iso2022 ($\%) {
434      } elsif (0x70400000 <= $cc && $cc <= 0x7040FFED) {      } elsif (0x70400000 <= $cc && $cc <= 0x7040FFED) {
435        my $c = $cc - 0x70400000;        my $c = $cc - 0x70400000;
436        $t = _i2g (chr(($c % 94)+0x21), $C, charset_id => 'P'.int ($c / 94),        $t = _i2g (chr(($c % 94)+0x21), $C, charset_id => 'P'.int ($c / 94),
437            type => 'G94', charset => $C->{private_set}->{G94}->[ $c / 94 ]);            type => 'G94', charset => $C->{option}->{private_set}->{G94}->[ $c / 94 ]);
438      } elsif (0x70410000 <= $cc && $cc <= 0x7041FFBF) {      } elsif (0x70410000 <= $cc && $cc <= 0x7041FFBF) {
439        my $c = $cc - 0x70410000;        my $c = $cc - 0x70410000;
440        $t = _i2g (chr(($c % 96)+0x20), $C, charset_id => 'P'.int ($c / 96),        $t = _i2g (chr(($c % 96)+0x20), $C, charset_id => 'P'.int ($c / 96),
441            type => 'G96', charset => $C->{private_set}->{G96}->[ $c / 96 ]);            type => 'G96', charset => $C->{option}->{private_set}->{G96}->[ $c / 96 ]);
442      } elsif (0x70420000 <= $cc && $cc <= 0x7046F19B) {      } elsif (0x70420000 <= $cc && $cc <= 0x7046F19B) {
443        my $c = $cc % 0x10000;        my $c = $cc % 0x10000;
444        $t = _i2g (chr((($c % 8836) / 94)+0x21).chr(($c % 94)+0x21), $C,        $t = _i2g (chr((($c % 8836) / 94)+0x21).chr(($c % 94)+0x21), $C,
445            type => 'G94n',            type => 'G94n',
446            charset_id => 'P'.int(($cc / 0x10000) - 0x7042).'_'.int($c / 8836),            charset_id => 'P'.int(($cc / 0x10000) - 0x7042).'_'.int($c / 8836),
447            charset => $C->{private_set}->{G94n}->[ ($cc / 0x10000) - 0x7042 ]            charset => $C->{option}->{private_set}->{G94n}
448                         ->[ $c / 8836 ]);                         ->[ ($cc / 0x10000) - 0x7042 ]->[ $c / 8836 ]);
449      }      }
450      if (defined $t) {      if (defined $t) {
451        $t = _i2o ($t, $C, cs_F => "\x40")        $t = _i2o ($t, $C, cs_F => "\x40")
# Line 634  sub _i2o ($\%%) { Line 637  sub _i2o ($\%%) {
637    if ($CS ne $C->{coding_system}) {    if ($CS ne $C->{coding_system}) {
638      my $e = '';      my $e = '';
639      $e .= "\x1B\x25";      $e .= "\x1B\x25";
640      $e .= $O{cs_F} || $C->{private_set}->{coding_system}->{ $O{cs_id} }      $e .= $O{cs_F} || $C->{option}->{private_set}->{coding_system}->{ $O{cs_id} }
641            || return undef;            || return undef;
642      if ($C->{coding_system} eq $CODING_SYSTEM{"\x2F\x40"}      if ($C->{coding_system} eq $CODING_SYSTEM{"\x2F\x40"}
643       || $C->{coding_system} eq $CODING_SYSTEM{"\x2F\x43"}       || $C->{coding_system} eq $CODING_SYSTEM{"\x2F\x43"}

Legend:
Removed from v.1.7  
changed lines
  Added in v.1.9

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24