lib/Encode/SJIS.pm


=head1 NAME

Encode::SJIS --- Shift JIS coding systems encoder and decoder

=head1 ENCODINGS

This module defines only two basic version of shift JIS.
Other variants are defined in Encode::SJIS::* modules.

=over 4

=cut

package Encode::SJIS;
use 5.7.3;
use strict;
our $VERSION=do{my @r=(q$Revision: 1.1 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
require Encode::Charset;
use base qw(Encode::Encoding);

### --- Perl Encode module common functions

sub encode ($$;$) {
  my ($obj, $str, $chk) = @_;
  $_[1] = '' if $chk;
  if (!defined $obj->{_encode_mapping} || $obj->{_encode_mapping}) {
    require Encode::Table;
    $str = Encode::Table::convert ($str, $obj->__encode_map,
      -autoload => defined $obj->{_encode_mapping_autoload} ?
                   $obj->{_encode_mapping_autoload} : 1);
  }
  $str = &internal_to_sjis ($str, $obj->__2022_encode);
  $str;
}

sub decode ($$;$) {
  my ($obj, $str, $chk) = @_;
  $_[1] = '' if $chk;
  $str = &sjis_to_internal ($str, $obj->__2022_decode);
  if (!defined $obj->{_decode_mapping} || $obj->{_decode_mapping}) {
    require Encode::Table;
    $str = Encode::Table::convert ($str, $obj->__decode_map,
      -autoload => defined $obj->{_decode_mapping_autoload} ?
                   $obj->{_decode_mapping_autoload} : 1);
  }
  $str;
}

### --- Encode::SJIS unique functions
*new_object = \&Encode::Charset::new_object_sjis;

sub sjis_to_internal ($$) {
  my ($s, $C) = @_;
  $C ||= &new_object;
  $s =~ s{
     ([\x00-\x7F\xA1-\xDF])
  #   ([\xA1-\xDF])
    |([\x81-\x9F\xE0-\xFC][\x40-\x7E\x80-\xFC])
    |\x1B([\x40-\x5F])
    |([\x80-\xFF])      ## Broken or supplemental 1-byte character
  }{
    my ($c7, $c2, $c1, $c8) = ($1, $2, $3, $4);
    if (defined $c7) {
      if ($c7 =~ /([\x21-\x7E])/) {
        chr ($C->{ $C->{GL} }->{ucs} + ord ($1) - 0x21);
      } elsif ($c7 =~ /([\x00-\x1F])/) {
        chr ($C->{ $C->{CL} }->{ucs} + ord ($1));
      } elsif ($C->{GR} && $c7 =~ /([\xA1-\xDF])/) {
        chr ($C->{ $C->{GR} }->{ucs} + ord ($1) - 0xA1);
      } else {  ## 0x20, 0x7F
        $C->{Gsmap}->{ $c7 } || $c7;
      }
    } elsif ($c2) {
      if ($c2 =~ /([\x81-\xEF])(.)/) {
        my ($f, $s) = (ord $1, ord $2);
        $f -= $f < 0xA0 ? 0x81 : 0xC1;  $s -= 0x40 + ($s > 0x7F);
        chr ($C->{G1}->{ucs} + $f * 188 + $s);
      } else {  ## [\xF0-\xFC].
        my ($f, $s) = unpack ('CC', $c2);
        if ($C->{G3}->{Csjis_kuE}) {
          $f = $s > 0x9E ? $C->{G3}->{Csjis_kuE}->{ $f }:
                           $C->{G3}->{Csjis_kuO}->{ $f };
          $s -= ($s > 0x9E ? 0x9F : $s > 0x7F ? 0x41 : 0x40);
          chr ($C->{G3}->{ucs} + $f * 94 + $s);
        } else {
          $f -= 0xF0; $s -= 0x40 + ($s > 0x7F);
          chr ($C->{G3}->{ucs} + $f * 188 + $s);
        }
      }
    } elsif ($c1) {     ## ESC Fe
      chr ($C->{ $C->{ESC_Fe} }->{ucs} + ord ($c1) - 0x40);
    } else {    # $C8
      $C->{Gsmap}->{ $c8 } || $c8;
    }
  }gex;
  $s;
}

sub internal_to_sjis ($\%) {
  use integer;
  my ($s, $C) = @_;
  $C ||= &new_object;
  
  my $r = '';
  for my $c (split //, $s) {
    my $cc = ord $c;
    my $t;
    if ($cc <= 0x1F) {
      $t = $c if $C->{ $C->{CL} } eq $Encode::Charset::CHARSET{C0}->{"\x40"};
    } elsif ($cc == 0x20 || $cc == 0x7F) {
      $t = $c;
    } elsif ($cc < 0x7F) {
      $t = $c if $C->{ $C->{GL} } eq $Encode::Charset::CHARSET{G94}->{"\x42"};
    } elsif ($C->{option}->{C1invoke_to_right} && $cc == 0x80) {
      $t = $c if $C->{ $C->{CR} } eq $Encode::Charset::CHARSET{C1}->{'64291991C1'};
    } elsif ($cc <= 0x9F) {
      $t = "\x1B".chr ($cc - 0x40)
        if $C->{ $C->{ESC_Fe} } eq $Encode::Charset::CHARSET{C1}->{'64291991C1'};
    
    } elsif (0xE9F6C0 <= $cc && $cc <= 0xF06F80) {
      my $c = $cc - 0xE9F6C0;  my $F = chr (($c / 8836)+0x30);
      if ($C->{G1} eq $Encode::Charset::CHARSET{G94n}->{ $F }) {
        my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21);
        $t = pack ('CC', (($c1 - 1) >> 1) + ($c1 < 0x5F ? 0x71 : 0xB1),
               $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E));
      } elsif ($C->{G3} eq $Encode::Charset::CHARSET{G94n}->{ $F }) {
        my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21);
        if ($C->{G3}->{Csjis_first}) {
          $t = pack ('CC', $C->{G3}->{Csjis_first}->{ ($c % 8836) / 94 },
                     $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E));
        } else {
          $t = pack ('CC', ($c / 188) + 0xF0,
                     $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E))
               if ($c / 188) + 0xF0 < 0xFD;
        }
      }
    } elsif (0xF49D7C <= $cc && $cc <= 0xF4BFFF) {
      my $c = $cc - 0xF49D7C;
      if ($C->{G1} eq $Encode::Charset::CHARSET{G94n}->{'B@'}) {
        my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21);
        $t = pack ('CC', (($c1 - 1) >> 1) + ($c1 < 0x5F ? 0x71 : 0xB1),
               $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E));
      }
    
    } elsif (0xE90940 <= $cc && $cc <= 0xE92641) {
      my $c = $cc - 0xE90940;  my $F = chr (($c / 94)+0x30);
      if ($C->{ $C->{GL} } eq $Encode::Charset::CHARSET{G94}->{ $F }) {
        $t = chr (($c % 94) + 0x21);
      } elsif ($C->{ $C->{GR} } eq $Encode::Charset::CHARSET{G94}->{ $F }) {
        $t = chr (($c % 94) + 0xA1) if ($c % 94) < 0x3F;
      }
    } elsif (0x70420000 <= $cc && $cc <= 0x7046F19B) {
      my $c = $cc % 0x10000;
      my $F0=$C->{option}->{private_set}->{G94n}->[($cc/0x10000)-0x7042]->[$c/8836];
      my $F1 = 'P'.(($cc / 0x10000) - 0x7042).'_'.($c / 8836);
      if ($C->{G3} eq $Encode::Charset::CHARSET{G94n}->{ $F0 }
       || $C->{G3} eq $Encode::Charset::CHARSET{G94n}->{ $F1 }) {
        my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21);
        if ($C->{G3}->{Csjis_first}) {
          $t = pack ('CC', $C->{G3}->{Csjis_first}->{ ($c % 8836) / 94 },
                     $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E));
        } else {
          $t = pack ('CC', ($c / 188) + 0xF0,
                     $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E))
               if ($c / 188) + 0xF0 < 0xFD;
        }
      }
    }
    
    if (defined $t) {
      $r .= $t;
    } elsif ($C->{GsmapR}->{ $c }) {
      $r .= $C->{GsmapR}->{ $c };
    } else {
      $r .= $C->{option}->{undef_char_sjis} || "\x3F";
    }
  }
  $r;
}

sub __clone ($) {
  my $self = shift;
  bless {%$self}, ref $self;
};

__PACKAGE__->Define (qw!shift_jisx0213 japanese-shift-jisx0213
shift-jisx0213 x-shift_jisx0213 shift-jis-3 shift-jis-2000
sjis shift-jis x-sjis x_sjis x-sjis-jp shiftjis x-shiftjis
x-shift-jis shift.jis!);

=item sjis

"Shift JIS" coding system.  (Alias: shift-jis, shiftjis,
shift.jis, x-shiftjis, x-shift-jis, x-sjis, x_sjis,
x-sjis-jp)

Since this name is ambiguous (it can now refer all or any
of shift JIS coding system family), this name should not
be used to address specific coding system.  In this module,
this is considered as an alias name to the shift JIS with
latest official definition, currently of JIS X 0213:2000
Appendix 1 (with implemention level 4).

Note that the name "Shift_JIS" is not associated with
this name, because IANA registry [IANAREG] assignes
it to a shift JIS defined by JIS X 0208:1997.

=item shift_jisx0213

Shift_JISX0213 coded representation, defined by
JIS X 0213:2000 Appendix 1 (implemention level 4).
(Alias: shift-jisx0213, x-shift_jisx0213, japanese-shift-jisx0213 (emacsen),
shift-jis-3 (Yudit), shift-jis-2000)

=cut

sub __2022__common ($) {
  my $C = Encode::SJIS->new_object;
  $C->{G0} = $Encode::Charset::CHARSET{G94}->{J};       ## JIS X 0201:1997 Latin
  $C->{G1} = $Encode::Charset::CHARSET{G94n}->{"\x4F"}; ## JIS X 0213:2000 plane 1
  $C->{G2} = $Encode::Charset::CHARSET{G94}->{I};       ## JIS X 0201:1997 Katakana
  $C->{G3} = $Encode::Charset::CHARSET{G94n}->{"\x50"}; ## JIS X 0213:2000 plane 2
  $C;
}
sub __2022_encode ($) {
  my $C = shift->__2022__common;
  $C;
}
sub __2022_decode ($) {
  my $C = shift->__2022__common;
  $C;
}
sub __encode_map ($) {
  [qw/ucs_to_jisx0201_latin ucs_to_jisx0213_2000_1 ucs_to_jisx0213_2000_2 ucs_to_jisx0201_katakana/];
}
sub __decode_map ($) {
  [qw/jisx0201_latin_to_ucs jisx0213_2000_1_to_ucs jisx0213_2000_2_to_ucs jisx0201_katakana_to_ucs/];
}

package Encode::SJIS::X0213ASCII;
use vars qw/@ISA/;
push @ISA, 'Encode::SJIS';
__PACKAGE__->Define (qw/shift_jisx0213-ascii shift-jis-2000-ascii
sjis-ascii shift-jis-ascii/);

=item sjis-ascii

Same as sjis but ASCII (ISO/IEC 646 IRV) instead of
JIS X 0201 Roman (or Latin) set.  (Alias: shift-jis-ascii)

In spite of the history of shift JIS, ASCII is sometimes
used instead of JIS X 0201 Roman set, because of compatibility
with ASCII world.

Note that this name is now an alias of shift_jisx0213-ascii,
as sjis is of shift_jisx0213.

=item shift_jisx0213-ascii

Same as Shift_JISX0213 but ASCII (ISO/IEC 646 IRV)
instead of JIS X 0201:1997 Latin character set.
(Alias: shift-jis-2000-ascii)

Note that this coding system does NOT comform to
JIS X 0213:2000 Appendix 1.

=cut

sub __2022__common ($) {
  my $C = shift->SUPER::__2022__common;
  $C->{G0} = $Encode::Charset::CHARSET{G94}->{B};       ## ASCII
  $C;
}
sub __encode_map ($) {
  [qw/ucs_to_ascii ucs_to_jisx0213_2000_1 ucs_to_jisx0213_2000_2 ucs_to_jisx0201_katakana/];
}
sub __decode_map ($) {
  [qw/jisx0213_2000_1_to_ucs jisx0213_2000_2_to_ucs jisx0201_katakana_to_ucs/];
}

1;
__END__

=back

=head1 SEE ALSO

JIS X 0208:1997, "7-bit and 8-bit double byte coded Kanji
set for information interchange", Japan Industrial Standards
Committee (JISC) <http://www.jisc.go.jp/>, 1997.

JIS X 0213:2000, "7-bit and 8-bit double byte coded extended Kanji
sets for information interchange", Japan Industrial Standards
Committee (JISC) <http://www.jisc.go.jp/>, 2000.

Encode, perlunicode

[IANAREG] "CHARACTER SETS", IANA <http://www.iana.org/>,
<http://www.iana.org/assignments/character-sets>.
The charset registry for IETF <http://www.ietf.org/> standards.
(Note that in this registry two shift JISes are registered,
"Shift_JIS" and "Windows-31j".  Former is JIS X 0208:1997's
definition and later is the Windows standard character set.)

=head1 LICENSE

Copyright 2002 Nanashi-san

This library is free software; you can redistribute it
and/or modify it under the same terms as Perl itself.

=cut

# $Date: 2002/10/12 07:27:01 $
### SJIS.pm ends here
1	wakaba	1.1
2			=head1 NAME
3
4			Encode::SJIS --- Shift JIS coding systems encoder and decoder
5
6			=head1 ENCODINGS
7
8			This module defines only two basic version of shift JIS.
9			Other variants are defined in Encode::SJIS::* modules.
10
11			=over 4
12
13			=cut
14
15			package Encode::SJIS;
16			use 5.7.3;
17			use strict;
18	wakaba	1.2	our $VERSION=do{my @r=(q$Revision: 1.1 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
19	wakaba	1.1	require Encode::Charset;
20	wakaba	1.2	use base qw(Encode::Encoding);
21	wakaba	1.1
22			### --- Perl Encode module common functions
23
24			sub encode ($$;$) {
25			my ($obj, $str, $chk) = @_;
26			$_[1] = '' if $chk;
27			if (!defined $obj->{_encode_mapping} \|\| $obj->{_encode_mapping}) {
28			require Encode::Table;
29			$str = Encode::Table::convert ($str, $obj->__encode_map,
30			-autoload => defined $obj->{_encode_mapping_autoload} ?
31			$obj->{_encode_mapping_autoload} : 1);
32			}
33			$str = &internal_to_sjis ($str, $obj->__2022_encode);
34			$str;
35			}
36
37			sub decode ($$;$) {
38			my ($obj, $str, $chk) = @_;
39			$_[1] = '' if $chk;
40			$str = &sjis_to_internal ($str, $obj->__2022_decode);
41			if (!defined $obj->{_decode_mapping} \|\| $obj->{_decode_mapping}) {
42			require Encode::Table;
43			$str = Encode::Table::convert ($str, $obj->__decode_map,
44			-autoload => defined $obj->{_decode_mapping_autoload} ?
45			$obj->{_decode_mapping_autoload} : 1);
46			}
47			$str;
48			}
49
50			### --- Encode::SJIS unique functions
51			*new_object = \&Encode::Charset::new_object_sjis;
52
53			sub sjis_to_internal ($$) {
54			my ($s, $C) = @_;
55			$C \|\|= &new_object;
56			$s =~ s{
57			([\x00-\x7F\xA1-\xDF])
58			# ([\xA1-\xDF])
59			\|([\x81-\x9F\xE0-\xFC][\x40-\x7E\x80-\xFC])
60			\|\x1B([\x40-\x5F])
61			\|([\x80-\xFF]) ## Broken or supplemental 1-byte character
62			}{
63			my ($c7, $c2, $c1, $c8) = ($1, $2, $3, $4);
64			if (defined $c7) {
65			if ($c7 =~ /([\x21-\x7E])/) {
66			chr ($C->{ $C->{GL} }->{ucs} + ord ($1) - 0x21);
67			} elsif ($c7 =~ /([\x00-\x1F])/) {
68			chr ($C->{ $C->{CL} }->{ucs} + ord ($1));
69			} elsif ($C->{GR} && $c7 =~ /([\xA1-\xDF])/) {
70			chr ($C->{ $C->{GR} }->{ucs} + ord ($1) - 0xA1);
71			} else { ## 0x20, 0x7F
72			$C->{Gsmap}->{ $c7 } \|\| $c7;
73			}
74			} elsif ($c2) {
75			if ($c2 =~ /([\x81-\xEF])(.)/) {
76			my ($f, $s) = (ord $1, ord $2);
77			$f -= $f < 0xA0 ? 0x81 : 0xC1; $s -= 0x40 + ($s > 0x7F);
78			chr ($C->{G1}->{ucs} + $f * 188 + $s);
79			} else { ## [\xF0-\xFC].
80			my ($f, $s) = unpack ('CC', $c2);
81			if ($C->{G3}->{Csjis_kuE}) {
82			$f = $s > 0x9E ? $C->{G3}->{Csjis_kuE}->{ $f }:
83			$C->{G3}->{Csjis_kuO}->{ $f };
84			$s -= ($s > 0x9E ? 0x9F : $s > 0x7F ? 0x41 : 0x40);
85			chr ($C->{G3}->{ucs} + $f * 94 + $s);
86			} else {
87			$f -= 0xF0; $s -= 0x40 + ($s > 0x7F);
88			chr ($C->{G3}->{ucs} + $f * 188 + $s);
89			}
90			}
91			} elsif ($c1) { ## ESC Fe
92			chr ($C->{ $C->{ESC_Fe} }->{ucs} + ord ($c1) - 0x40);
93			} else { # $C8
94			$C->{Gsmap}->{ $c8 } \|\| $c8;
95			}
96			}gex;
97			$s;
98			}
99
100	wakaba	1.2	sub internal_to_sjis ($\%) {
101			use integer;
102			my ($s, $C) = @_;
103			$C \|\|= &new_object;
104
105			my $r = '';
106			for my $c (split //, $s) {
107			my $cc = ord $c;
108			my $t;
109			if ($cc <= 0x1F) {
110			$t = $c if $C->{ $C->{CL} } eq $Encode::Charset::CHARSET{C0}->{"\x40"};
111			} elsif ($cc == 0x20 \|\| $cc == 0x7F) {
112			$t = $c;
113			} elsif ($cc < 0x7F) {
114			$t = $c if $C->{ $C->{GL} } eq $Encode::Charset::CHARSET{G94}->{"\x42"};
115			} elsif ($C->{option}->{C1invoke_to_right} && $cc == 0x80) {
116			$t = $c if $C->{ $C->{CR} } eq $Encode::Charset::CHARSET{C1}->{'64291991C1'};
117			} elsif ($cc <= 0x9F) {
118			$t = "\x1B".chr ($cc - 0x40)
119			if $C->{ $C->{ESC_Fe} } eq $Encode::Charset::CHARSET{C1}->{'64291991C1'};
120
121			} elsif (0xE9F6C0 <= $cc && $cc <= 0xF06F80) {
122			my $c = $cc - 0xE9F6C0; my $F = chr (($c / 8836)+0x30);
123			if ($C->{G1} eq $Encode::Charset::CHARSET{G94n}->{ $F }) {
124			my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21);
125			$t = pack ('CC', (($c1 - 1) >> 1) + ($c1 < 0x5F ? 0x71 : 0xB1),
126			$c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E));
127			} elsif ($C->{G3} eq $Encode::Charset::CHARSET{G94n}->{ $F }) {
128			my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21);
129			if ($C->{G3}->{Csjis_first}) {
130			$t = pack ('CC', $C->{G3}->{Csjis_first}->{ ($c % 8836) / 94 },
131			$c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E));
132			} else {
133			$t = pack ('CC', ($c / 188) + 0xF0,
134			$c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E))
135			if ($c / 188) + 0xF0 < 0xFD;
136			}
137			}
138			} elsif (0xF49D7C <= $cc && $cc <= 0xF4BFFF) {
139			my $c = $cc - 0xF49D7C;
140			if ($C->{G1} eq $Encode::Charset::CHARSET{G94n}->{'B@'}) {
141			my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21);
142			$t = pack ('CC', (($c1 - 1) >> 1) + ($c1 < 0x5F ? 0x71 : 0xB1),
143			$c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E));
144			}
145
146			} elsif (0xE90940 <= $cc && $cc <= 0xE92641) {
147			my $c = $cc - 0xE90940; my $F = chr (($c / 94)+0x30);
148			if ($C->{ $C->{GL} } eq $Encode::Charset::CHARSET{G94}->{ $F }) {
149			$t = chr (($c % 94) + 0x21);
150			} elsif ($C->{ $C->{GR} } eq $Encode::Charset::CHARSET{G94}->{ $F }) {
151			$t = chr (($c % 94) + 0xA1) if ($c % 94) < 0x3F;
152			}
153			} elsif (0x70420000 <= $cc && $cc <= 0x7046F19B) {
154			my $c = $cc % 0x10000;
155			my $F0=$C->{option}->{private_set}->{G94n}->[($cc/0x10000)-0x7042]->[$c/8836];
156			my $F1 = 'P'.(($cc / 0x10000) - 0x7042).'_'.($c / 8836);
157			if ($C->{G3} eq $Encode::Charset::CHARSET{G94n}->{ $F0 }
158			\|\| $C->{G3} eq $Encode::Charset::CHARSET{G94n}->{ $F1 }) {
159			my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21);
160			if ($C->{G3}->{Csjis_first}) {
161			$t = pack ('CC', $C->{G3}->{Csjis_first}->{ ($c % 8836) / 94 },
162			$c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E));
163			} else {
164			$t = pack ('CC', ($c / 188) + 0xF0,
165			$c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E))
166			if ($c / 188) + 0xF0 < 0xFD;
167			}
168			}
169			}
170
171			if (defined $t) {
172			$r .= $t;
173			} elsif ($C->{GsmapR}->{ $c }) {
174			$r .= $C->{GsmapR}->{ $c };
175			} else {
176			$r .= $C->{option}->{undef_char_sjis} \|\| "\x3F";
177			}
178			}
179			$r;
180			}
181
182	wakaba	1.1	sub __clone ($) {
183			my $self = shift;
184			bless {%$self}, ref $self;
185			};
186
187			__PACKAGE__->Define (qw!shift_jisx0213 japanese-shift-jisx0213
188	wakaba	1.2	shift-jisx0213 x-shift_jisx0213 shift-jis-3 shift-jis-2000
189	wakaba	1.1	sjis shift-jis x-sjis x_sjis x-sjis-jp shiftjis x-shiftjis
190			x-shift-jis shift.jis!);
191
192			=item sjis
193
194			"Shift JIS" coding system. (Alias: shift-jis, shiftjis,
195			shift.jis, x-shiftjis, x-shift-jis, x-sjis, x_sjis,
196			x-sjis-jp)
197
198			Since this name is ambiguous (it can now refer all or any
199			of shift JIS coding system family), this name should not
200			be used to address specific coding system. In this module,
201			this is considered as an alias name to the shift JIS with
202			latest official definition, currently of JIS X 0213:2000
203			Appendix 1 (with implemention level 4).
204
205			Note that the name "Shift_JIS" is not associated with
206			this name, because IANA registry [IANAREG] assignes
207			it to a shift JIS defined by JIS X 0208:1997.
208
209			=item shift_jisx0213
210
211			Shift_JISX0213 coded representation, defined by
212			JIS X 0213:2000 Appendix 1 (implemention level 4).
213			(Alias: shift-jisx0213, x-shift_jisx0213, japanese-shift-jisx0213 (emacsen),
214	wakaba	1.2	shift-jis-3 (Yudit), shift-jis-2000)
215	wakaba	1.1
216			=cut
217
218			sub __2022__common ($) {
219			my $C = Encode::SJIS->new_object;
220			$C->{G0} = $Encode::Charset::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin
221			$C->{G1} = $Encode::Charset::CHARSET{G94n}->{"\x4F"}; ## JIS X 0213:2000 plane 1
222			$C->{G2} = $Encode::Charset::CHARSET{G94}->{I}; ## JIS X 0201:1997 Katakana
223			$C->{G3} = $Encode::Charset::CHARSET{G94n}->{"\x50"}; ## JIS X 0213:2000 plane 2
224			$C;
225			}
226			sub __2022_encode ($) {
227			my $C = shift->__2022__common;
228			$C;
229			}
230			sub __2022_decode ($) {
231			my $C = shift->__2022__common;
232			$C;
233			}
234			sub __encode_map ($) {
235			[qw/ucs_to_jisx0201_latin ucs_to_jisx0213_2000_1 ucs_to_jisx0213_2000_2 ucs_to_jisx0201_katakana/];
236			}
237			sub __decode_map ($) {
238			[qw/jisx0201_latin_to_ucs jisx0213_2000_1_to_ucs jisx0213_2000_2_to_ucs jisx0201_katakana_to_ucs/];
239			}
240
241			package Encode::SJIS::X0213ASCII;
242			use vars qw/@ISA/;
243			push @ISA, 'Encode::SJIS';
244	wakaba	1.2	__PACKAGE__->Define (qw/shift_jisx0213-ascii shift-jis-2000-ascii
245			sjis-ascii shift-jis-ascii/);
246	wakaba	1.1
247			=item sjis-ascii
248
249			Same as sjis but ASCII (ISO/IEC 646 IRV) instead of
250			JIS X 0201 Roman (or Latin) set. (Alias: shift-jis-ascii)
251
252			In spite of the history of shift JIS, ASCII is sometimes
253			used instead of JIS X 0201 Roman set, because of compatibility
254			with ASCII world.
255
256			Note that this name is now an alias of shift_jisx0213-ascii,
257			as sjis is of shift_jisx0213.
258
259			=item shift_jisx0213-ascii
260
261			Same as Shift_JISX0213 but ASCII (ISO/IEC 646 IRV)
262			instead of JIS X 0201:1997 Latin character set.
263	wakaba	1.2	(Alias: shift-jis-2000-ascii)
264
265			Note that this coding system does NOT comform to
266			JIS X 0213:2000 Appendix 1.
267	wakaba	1.1
268			=cut
269
270			sub __2022__common ($) {
271			my $C = shift->SUPER::__2022__common;
272			$C->{G0} = $Encode::Charset::CHARSET{G94}->{B}; ## ASCII
273			$C;
274			}
275			sub __encode_map ($) {
276			[qw/ucs_to_ascii ucs_to_jisx0213_2000_1 ucs_to_jisx0213_2000_2 ucs_to_jisx0201_katakana/];
277			}
278			sub __decode_map ($) {
279			[qw/jisx0213_2000_1_to_ucs jisx0213_2000_2_to_ucs jisx0201_katakana_to_ucs/];
280			}
281
282			1;
283			__END__
284
285	wakaba	1.2	=back
286
287	wakaba	1.1	=head1 SEE ALSO
288
289			JIS X 0208:1997, "7-bit and 8-bit double byte coded Kanji
290			set for information interchange", Japan Industrial Standards
291			Committee (JISC) <http://www.jisc.go.jp/>, 1997.
292
293			JIS X 0213:2000, "7-bit and 8-bit double byte coded extended Kanji
294			sets for information interchange", Japan Industrial Standards
295			Committee (JISC) <http://www.jisc.go.jp/>, 2000.
296
297			Encode, perlunicode
298
299			[IANAREG] "CHARACTER SETS", IANA <http://www.iana.org/>,
300			<http://www.iana.org/assignments/character-sets>.
301			The charset registry for IETF <http://www.ietf.org/> standards.
302			(Note that in this registry two shift JISes are registered,
303			"Shift_JIS" and "Windows-31j". Former is JIS X 0208:1997's
304			definition and later is the Windows standard character set.)
305
306			=head1 LICENSE
307
308			Copyright 2002 Nanashi-san
309
310			This library is free software; you can redistribute it
311			and/or modify it under the same terms as Perl itself.
312
313			=cut
314
315	wakaba	1.2	# $Date: 2002/10/12 07:27:01 $
316	wakaba	1.1	### SJIS.pm ends here