/[suikacvs]/perl/lib/Encode/SJIS.pm
Suika

Diff of /perl/lib/Encode/SJIS.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.1 by wakaba, Sat Oct 12 07:27:01 2002 UTC revision 1.4 by wakaba, Thu Dec 12 08:17:16 2002 UTC
# Line 17  use 5.7.3; Line 17  use 5.7.3;
17  use strict;  use strict;
18  our $VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};  our $VERSION=do{my @r=(q$Revision$=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
19  require Encode::Charset;  require Encode::Charset;
20    use base qw(Encode::Encoding);
21    
22  ### --- Perl Encode module common functions  ### --- Perl Encode module common functions
23    
# Line 96  sub sjis_to_internal ($$) { Line 97  sub sjis_to_internal ($$) {
97    $s;    $s;
98  }  }
99    
100    sub internal_to_sjis ($\%) {
101      use integer;
102      my ($s, $C) = @_;
103      $C ||= &new_object;
104      
105      my $r = '';
106      for my $c (split //, $s) {
107        my $cc = ord $c;
108        my $t;
109        if ($cc <= 0x1F) {
110          $t = $c if $C->{ $C->{CL} } eq $Encode::Charset::CHARSET{C0}->{"\x40"};
111        } elsif ($cc == 0x20 || $cc == 0x7F) {
112          Encode::_utf8_off ($c);
113          $t = $c;
114        } elsif ($cc < 0x7F) {
115          Encode::_utf8_off ($c);
116          $t = $c if $C->{ $C->{GL} } eq $Encode::Charset::CHARSET{G94}->{"\x42"};
117        } elsif ($C->{option}->{C1invoke_to_right} && $cc == 0x80) {
118          $t = "\x80"
119            if $C->{ $C->{CR} } eq $Encode::Charset::CHARSET{C1}->{'64291991C1'};
120        } elsif ($cc <= 0x9F) {
121          $t = "\x1B".pack 'C', ($cc - 0x40)
122            if $C->{ $C->{ESC_Fe} } eq $Encode::Charset::CHARSET{C1}->{'64291991C1'};
123        
124        } elsif (0xE9F6C0 <= $cc && $cc <= 0xF06F80) {
125          my $c = $cc - 0xE9F6C0;  my $F = chr (($c / 8836)+0x30);
126          if ($C->{G1} eq $Encode::Charset::CHARSET{G94n}->{ $F }) {
127            my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21);
128            $t = pack ('CC', (($c1 - 1) >> 1) + ($c1 < 0x5F ? 0x71 : 0xB1),
129                   $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E));
130          } elsif ($C->{G3} eq $Encode::Charset::CHARSET{G94n}->{ $F }) {
131            my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21);
132            if ($C->{G3}->{Csjis_first}) {
133              $t = pack ('CC', $C->{G3}->{Csjis_first}->{ ($c % 8836) / 94 },
134                         $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E));
135            } else {
136              $t = pack ('CC', ($c / 188) + 0xF0,
137                         $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E))
138                   if ($c / 188) + 0xF0 < 0xFD;
139            }
140          }
141        } elsif (0xF49D7C <= $cc && $cc <= 0xF4BFFF) {
142          my $c = $cc - 0xF49D7C;
143          if ($C->{G1} eq $Encode::Charset::CHARSET{G94n}->{'B@'}) {
144            my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21);
145            $t = pack ('CC', (($c1 - 1) >> 1) + ($c1 < 0x5F ? 0x71 : 0xB1),
146                   $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E));
147          }
148        
149        } elsif (0xE90940 <= $cc && $cc <= 0xE92641) {
150          my $c = $cc - 0xE90940;  my $F = chr (($c / 94)+0x30);
151          if ($C->{ $C->{GL} } eq $Encode::Charset::CHARSET{G94}->{ $F }) {
152            $t = pack 'C', (($c % 94) + 0x21);
153          } elsif ($C->{ $C->{GR} } eq $Encode::Charset::CHARSET{G94}->{ $F }) {
154            $t = pack 'C', (($c % 94) + 0xA1) if ($c % 94) < 0x3F;
155          }
156        } elsif (0x70420000 <= $cc && $cc <= 0x7046F19B) {
157          my $c = $cc % 0x10000;
158          my $F0=$C->{option}->{private_set}->{G94n}->[($cc/0x10000)-0x7042]->[$c/8836];
159          my $F1 = 'P'.(($cc / 0x10000) - 0x7042).'_'.($c / 8836);
160          if ($C->{G3} eq $Encode::Charset::CHARSET{G94n}->{ $F0 }
161           || $C->{G3} eq $Encode::Charset::CHARSET{G94n}->{ $F1 }) {
162            my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21);
163            if ($C->{G3}->{Csjis_first}) {
164              $t = pack ('CC', $C->{G3}->{Csjis_first}->{ ($c % 8836) / 94 },
165                         $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E));
166            } else {
167              $t = pack ('CC', ($c / 188) + 0xF0,
168                         $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E))
169                   if ($c / 188) + 0xF0 < 0xFD;
170            }
171          }
172        }
173        
174        if (defined $t) {
175          $r .= $t;
176        } elsif ($C->{GsmapR}->{ $c }) {
177          $r .= $C->{GsmapR}->{ $c };
178        } else {
179          $r .= $C->{option}->{undef_char_sjis} || "\x3F";
180        }
181      }
182      $r;
183    }
184    
185  sub __clone ($) {  sub __clone ($) {
186    my $self = shift;    my $self = shift;
187    bless {%$self}, ref $self;    bless {%$self}, ref $self;
188  };  };
189    
 use base qw(Encode::Encoding);  
190  __PACKAGE__->Define (qw!shift_jisx0213 japanese-shift-jisx0213  __PACKAGE__->Define (qw!shift_jisx0213 japanese-shift-jisx0213
191  shift-jisx0213 x-shift_jisx0213 shift-jis-3  shift-jisx0213 x-shift_jisx0213 shift-jis-3 shift-jis-2000 sjisx0213
192  sjis shift-jis x-sjis x_sjis x-sjis-jp shiftjis x-shiftjis  sjis s-jis shift-jis x-sjis x_sjis x-sjis-jp shiftjis x-shiftjis
193  x-shift-jis shift.jis!);  x-shift-jis shift.jis!);
194    
195  =item sjis  =item sjis
196    
197  "Shift JIS" coding system.  (Alias: shift-jis, shiftjis,  "Shift JIS" coding system.  (Alias: shift-jis, shiftjis,
198  shift.jis, x-shiftjis, x-shift-jis, x-sjis, x_sjis,  shift.jis, x-shiftjis, x-shift-jis, s-jis, x-sjis, x_sjis,
199  x-sjis-jp)  x-sjis-jp)
200    
201  Since this name is ambiguous (it can now refer all or any  Since this name is ambiguous (it can now refer all or any
# Line 129  it to a shift JIS defined by JIS X 0208: Line 214  it to a shift JIS defined by JIS X 0208:
214  Shift_JISX0213 coded representation, defined by  Shift_JISX0213 coded representation, defined by
215  JIS X 0213:2000 Appendix 1 (implemention level 4).  JIS X 0213:2000 Appendix 1 (implemention level 4).
216  (Alias: shift-jisx0213, x-shift_jisx0213, japanese-shift-jisx0213 (emacsen),  (Alias: shift-jisx0213, x-shift_jisx0213, japanese-shift-jisx0213 (emacsen),
217  shift-jis-3 (Yudit))  shift-jis-3 (Yudit), shift-jis-2000, sjisx0213)
218    
219  =cut  =cut
220    
# Line 159  sub __decode_map ($) { Line 244  sub __decode_map ($) {
244  package Encode::SJIS::X0213ASCII;  package Encode::SJIS::X0213ASCII;
245  use vars qw/@ISA/;  use vars qw/@ISA/;
246  push @ISA, 'Encode::SJIS';  push @ISA, 'Encode::SJIS';
247  __PACKAGE__->Define (qw/shift_jisx0213-ascii sjis-ascii shift-jis-ascii/);  __PACKAGE__->Define (qw/shift_jisx0213-ascii shift-jis-2000-ascii
248    sjis-ascii shift-jis-ascii/);
249    
250  =item sjis-ascii  =item sjis-ascii
251    
# Line 177  as sjis is of shift_jisx0213. Line 263  as sjis is of shift_jisx0213.
263    
264  Same as Shift_JISX0213 but ASCII (ISO/IEC 646 IRV)  Same as Shift_JISX0213 but ASCII (ISO/IEC 646 IRV)
265  instead of JIS X 0201:1997 Latin character set.  instead of JIS X 0201:1997 Latin character set.
266    (Alias: shift-jis-2000-ascii)
267    
268    Note that this coding system does NOT comform to
269    JIS X 0213:2000 Appendix 1.
270    
271  =cut  =cut
272    
# Line 195  sub __decode_map ($) { Line 285  sub __decode_map ($) {
285  1;  1;
286  __END__  __END__
287    
288    =back
289    
290  =head1 SEE ALSO  =head1 SEE ALSO
291    
292  JIS X 0208:1997, "7-bit and 8-bit double byte coded Kanji  JIS X 0208:1997, "7-bit and 8-bit double byte coded Kanji

Legend:
Removed from v.1.1  
changed lines
  Added in v.1.4

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24