lib/Encode/SJIS.pm


=head1 NAME

Encode::SJIS --- Shift JIS coding systems encoder and decoder

=head1 ENCODINGS

This module defines only two basic version of shift JIS.
Other variants are defined in Encode::SJIS::* modules.

=over 4

=cut

package Encode::SJIS;
use 5.7.3;
use strict;
our $VERSION=do{my @r=(q$Revision: 1.2 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
require Encode::Charset;
use base qw(Encode::Encoding);

### --- Perl Encode module common functions

sub encode ($$;$) {
  my ($obj, $str, $chk) = @_;
  $_[1] = '' if $chk;
  if (!defined $obj->{_encode_mapping} || $obj->{_encode_mapping}) {
    require Encode::Table;
    $str = Encode::Table::convert ($str, $obj->__encode_map,
      -autoload => defined $obj->{_encode_mapping_autoload} ?
                   $obj->{_encode_mapping_autoload} : 1);
  }
  $str = &internal_to_sjis ($str, $obj->__2022_encode);
  $str;
}

sub decode ($$;$) {
  my ($obj, $str, $chk) = @_;
  $_[1] = '' if $chk;
  $str = &sjis_to_internal ($str, $obj->__2022_decode);
  if (!defined $obj->{_decode_mapping} || $obj->{_decode_mapping}) {
    require Encode::Table;
    $str = Encode::Table::convert ($str, $obj->__decode_map,
      -autoload => defined $obj->{_decode_mapping_autoload} ?
                   $obj->{_decode_mapping_autoload} : 1);
  }
  $str;
}

### --- Encode::SJIS unique functions
*new_object = \&Encode::Charset::new_object_sjis;

sub sjis_to_internal ($$) {
  my ($s, $C) = @_;
  $C ||= &new_object;
  $s =~ s{
     ([\x00-\x7F\xA1-\xDF])
  #   ([\xA1-\xDF])
    |([\x81-\x9F\xE0-\xFC][\x40-\x7E\x80-\xFC])
    |\x1B([\x40-\x5F])
    |([\x80-\xFF])      ## Broken or supplemental 1-byte character
  }{
    my ($c7, $c2, $c1, $c8) = ($1, $2, $3, $4);
    if (defined $c7) {
      if ($c7 =~ /([\x21-\x7E])/) {
        chr ($C->{ $C->{GL} }->{ucs} + ord ($1) - 0x21);
      } elsif ($c7 =~ /([\x00-\x1F])/) {
        chr ($C->{ $C->{CL} }->{ucs} + ord ($1));
      } elsif ($C->{GR} && $c7 =~ /([\xA1-\xDF])/) {
        chr ($C->{ $C->{GR} }->{ucs} + ord ($1) - 0xA1);
      } else {  ## 0x20, 0x7F
        $C->{Gsmap}->{ $c7 } || $c7;
      }
    } elsif ($c2) {
      if ($c2 =~ /([\x81-\xEF])(.)/) {
        my ($f, $s) = (ord $1, ord $2);
        $f -= $f < 0xA0 ? 0x81 : 0xC1;  $s -= 0x40 + ($s > 0x7F);
        chr ($C->{G1}->{ucs} + $f * 188 + $s);
      } else {  ## [\xF0-\xFC].
        my ($f, $s) = unpack ('CC', $c2);
        if ($C->{G3}->{Csjis_kuE}) {
          $f = $s > 0x9E ? $C->{G3}->{Csjis_kuE}->{ $f }:
                           $C->{G3}->{Csjis_kuO}->{ $f };
          $s -= ($s > 0x9E ? 0x9F : $s > 0x7F ? 0x41 : 0x40);
          chr ($C->{G3}->{ucs} + $f * 94 + $s);
        } else {
          $f -= 0xF0; $s -= 0x40 + ($s > 0x7F);
          chr ($C->{G3}->{ucs} + $f * 188 + $s);
        }
      }
    } elsif ($c1) {     ## ESC Fe
      chr ($C->{ $C->{ESC_Fe} }->{ucs} + ord ($c1) - 0x40);
    } else {    # $C8
      $C->{Gsmap}->{ $c8 } || $c8;
    }
  }gex;
  $s;
}

sub internal_to_sjis ($\%) {
  use integer;
  my ($s, $C) = @_;
  $C ||= &new_object;
  
  my $r = '';
  for my $c (split //, $s) {
    my $cc = ord $c;
    my $t;
    if ($cc <= 0x1F) {
      $t = $c if $C->{ $C->{CL} } eq $Encode::Charset::CHARSET{C0}->{"\x40"};
    } elsif ($cc == 0x20 || $cc == 0x7F) {
      Encode::_utf8_off ($c);
      $t = $c;
    } elsif ($cc < 0x7F) {
      Encode::_utf8_off ($c);
      $t = $c if $C->{ $C->{GL} } eq $Encode::Charset::CHARSET{G94}->{"\x42"};
    } elsif ($C->{option}->{C1invoke_to_right} && $cc == 0x80) {
      $t = "\x80"
        if $C->{ $C->{CR} } eq $Encode::Charset::CHARSET{C1}->{'64291991C1'};
    } elsif ($cc <= 0x9F) {
      $t = "\x1B".pack 'C', ($cc - 0x40)
        if $C->{ $C->{ESC_Fe} } eq $Encode::Charset::CHARSET{C1}->{'64291991C1'};
    
    } elsif (0xE9F6C0 <= $cc && $cc <= 0xF06F80) {
      my $c = $cc - 0xE9F6C0;  my $F = chr (($c / 8836)+0x30);
      if ($C->{G1} eq $Encode::Charset::CHARSET{G94n}->{ $F }) {
        my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21);
        $t = pack ('CC', (($c1 - 1) >> 1) + ($c1 < 0x5F ? 0x71 : 0xB1),
               $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E));
      } elsif ($C->{G3} eq $Encode::Charset::CHARSET{G94n}->{ $F }) {
        my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21);
        if ($C->{G3}->{Csjis_first}) {
          $t = pack ('CC', $C->{G3}->{Csjis_first}->{ ($c % 8836) / 94 },
                     $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E));
        } else {
          $t = pack ('CC', ($c / 188) + 0xF0,
                     $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E))
               if ($c / 188) + 0xF0 < 0xFD;
        }
      }
    } elsif (0xF49D7C <= $cc && $cc <= 0xF4BFFF) {
      my $c = $cc - 0xF49D7C;
      if ($C->{G1} eq $Encode::Charset::CHARSET{G94n}->{'B@'}) {
        my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21);
        $t = pack ('CC', (($c1 - 1) >> 1) + ($c1 < 0x5F ? 0x71 : 0xB1),
               $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E));
      }
    
    } elsif (0xE90940 <= $cc && $cc <= 0xE92641) {
      my $c = $cc - 0xE90940;  my $F = chr (($c / 94)+0x30);
      if ($C->{ $C->{GL} } eq $Encode::Charset::CHARSET{G94}->{ $F }) {
        $t = pack 'C', (($c % 94) + 0x21);
      } elsif ($C->{ $C->{GR} } eq $Encode::Charset::CHARSET{G94}->{ $F }) {
        $t = pack 'C', (($c % 94) + 0xA1) if ($c % 94) < 0x3F;
      }
    } elsif (0x70420000 <= $cc && $cc <= 0x7046F19B) {
      my $c = $cc % 0x10000;
      my $F0=$C->{option}->{private_set}->{G94n}->[($cc/0x10000)-0x7042]->[$c/8836];
      my $F1 = 'P'.(($cc / 0x10000) - 0x7042).'_'.($c / 8836);
      if ($C->{G3} eq $Encode::Charset::CHARSET{G94n}->{ $F0 }
       || $C->{G3} eq $Encode::Charset::CHARSET{G94n}->{ $F1 }) {
        my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21);
        if ($C->{G3}->{Csjis_first}) {
          $t = pack ('CC', $C->{G3}->{Csjis_first}->{ ($c % 8836) / 94 },
                     $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E));
        } else {
          $t = pack ('CC', ($c / 188) + 0xF0,
                     $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E))
               if ($c / 188) + 0xF0 < 0xFD;
        }
      }
    }
    
    if (defined $t) {
      $r .= $t;
    } elsif ($C->{GsmapR}->{ $c }) {
      $r .= $C->{GsmapR}->{ $c };
    } else {
      $r .= $C->{option}->{undef_char_sjis} || "\x3F";
    }
  }
  $r;
}

sub __clone ($) {
  my $self = shift;
  bless {%$self}, ref $self;
};

__PACKAGE__->Define (qw!shift_jisx0213 japanese-shift-jisx0213
shift-jisx0213 x-shift_jisx0213 shift-jis-3 shift-jis-2000
sjis s-jis shift-jis x-sjis x_sjis x-sjis-jp shiftjis x-shiftjis
x-shift-jis shift.jis!);

=item sjis

"Shift JIS" coding system.  (Alias: shift-jis, shiftjis,
shift.jis, x-shiftjis, x-shift-jis, s-jis, x-sjis, x_sjis,
x-sjis-jp)

Since this name is ambiguous (it can now refer all or any
of shift JIS coding system family), this name should not
be used to address specific coding system.  In this module,
this is considered as an alias name to the shift JIS with
latest official definition, currently of JIS X 0213:2000
Appendix 1 (with implemention level 4).

Note that the name "Shift_JIS" is not associated with
this name, because IANA registry [IANAREG] assignes
it to a shift JIS defined by JIS X 0208:1997.

=item shift_jisx0213

Shift_JISX0213 coded representation, defined by
JIS X 0213:2000 Appendix 1 (implemention level 4).
(Alias: shift-jisx0213, x-shift_jisx0213, japanese-shift-jisx0213 (emacsen),
shift-jis-3 (Yudit), shift-jis-2000)

=cut

sub __2022__common ($) {
  my $C = Encode::SJIS->new_object;
  $C->{G0} = $Encode::Charset::CHARSET{G94}->{J};       ## JIS X 0201:1997 Latin
  $C->{G1} = $Encode::Charset::CHARSET{G94n}->{"\x4F"}; ## JIS X 0213:2000 plane 1
  $C->{G2} = $Encode::Charset::CHARSET{G94}->{I};       ## JIS X 0201:1997 Katakana
  $C->{G3} = $Encode::Charset::CHARSET{G94n}->{"\x50"}; ## JIS X 0213:2000 plane 2
  $C;
}
sub __2022_encode ($) {
  my $C = shift->__2022__common;
  $C;
}
sub __2022_decode ($) {
  my $C = shift->__2022__common;
  $C;
}
sub __encode_map ($) {
  [qw/ucs_to_jisx0201_latin ucs_to_jisx0213_2000_1 ucs_to_jisx0213_2000_2 ucs_to_jisx0201_katakana/];
}
sub __decode_map ($) {
  [qw/jisx0201_latin_to_ucs jisx0213_2000_1_to_ucs jisx0213_2000_2_to_ucs jisx0201_katakana_to_ucs/];
}

package Encode::SJIS::X0213ASCII;
use vars qw/@ISA/;
push @ISA, 'Encode::SJIS';
__PACKAGE__->Define (qw/shift_jisx0213-ascii shift-jis-2000-ascii
sjis-ascii shift-jis-ascii/);

=item sjis-ascii

Same as sjis but ASCII (ISO/IEC 646 IRV) instead of
JIS X 0201 Roman (or Latin) set.  (Alias: shift-jis-ascii)

In spite of the history of shift JIS, ASCII is sometimes
used instead of JIS X 0201 Roman set, because of compatibility
with ASCII world.

Note that this name is now an alias of shift_jisx0213-ascii,
as sjis is of shift_jisx0213.

=item shift_jisx0213-ascii

Same as Shift_JISX0213 but ASCII (ISO/IEC 646 IRV)
instead of JIS X 0201:1997 Latin character set.
(Alias: shift-jis-2000-ascii)

Note that this coding system does NOT comform to
JIS X 0213:2000 Appendix 1.

=cut

sub __2022__common ($) {
  my $C = shift->SUPER::__2022__common;
  $C->{G0} = $Encode::Charset::CHARSET{G94}->{B};       ## ASCII
  $C;
}
sub __encode_map ($) {
  [qw/ucs_to_ascii ucs_to_jisx0213_2000_1 ucs_to_jisx0213_2000_2 ucs_to_jisx0201_katakana/];
}
sub __decode_map ($) {
  [qw/jisx0213_2000_1_to_ucs jisx0213_2000_2_to_ucs jisx0201_katakana_to_ucs/];
}

1;
__END__

=back

=head1 SEE ALSO

JIS X 0208:1997, "7-bit and 8-bit double byte coded Kanji
set for information interchange", Japan Industrial Standards
Committee (JISC) <http://www.jisc.go.jp/>, 1997.

JIS X 0213:2000, "7-bit and 8-bit double byte coded extended Kanji
sets for information interchange", Japan Industrial Standards
Committee (JISC) <http://www.jisc.go.jp/>, 2000.

Encode, perlunicode

[IANAREG] "CHARACTER SETS", IANA <http://www.iana.org/>,
<http://www.iana.org/assignments/character-sets>.
The charset registry for IETF <http://www.ietf.org/> standards.
(Note that in this registry two shift JISes are registered,
"Shift_JIS" and "Windows-31j".  Former is JIS X 0208:1997's
definition and later is the Windows standard character set.)

=head1 LICENSE

Copyright 2002 Nanashi-san

This library is free software; you can redistribute it
and/or modify it under the same terms as Perl itself.

=cut

# $Date: 2002/10/12 11:03:00 $
### SJIS.pm ends here
1	wakaba	1.1
2			=head1 NAME
3
4			Encode::SJIS --- Shift JIS coding systems encoder and decoder
5
6			=head1 ENCODINGS
7
8			This module defines only two basic version of shift JIS.
9			Other variants are defined in Encode::SJIS::* modules.
10
11			=over 4
12
13			=cut
14
15			package Encode::SJIS;
16			use 5.7.3;
17			use strict;
18	wakaba	1.3	our $VERSION=do{my @r=(q$Revision: 1.2 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
19	wakaba	1.1	require Encode::Charset;
20	wakaba	1.2	use base qw(Encode::Encoding);
21	wakaba	1.1
22			### --- Perl Encode module common functions
23
24			sub encode ($$;$) {
25			my ($obj, $str, $chk) = @_;
26			$_[1] = '' if $chk;
27			if (!defined $obj->{_encode_mapping} \|\| $obj->{_encode_mapping}) {
28			require Encode::Table;
29			$str = Encode::Table::convert ($str, $obj->__encode_map,
30			-autoload => defined $obj->{_encode_mapping_autoload} ?
31			$obj->{_encode_mapping_autoload} : 1);
32			}
33			$str = &internal_to_sjis ($str, $obj->__2022_encode);
34			$str;
35			}
36
37			sub decode ($$;$) {
38			my ($obj, $str, $chk) = @_;
39			$_[1] = '' if $chk;
40			$str = &sjis_to_internal ($str, $obj->__2022_decode);
41			if (!defined $obj->{_decode_mapping} \|\| $obj->{_decode_mapping}) {
42			require Encode::Table;
43			$str = Encode::Table::convert ($str, $obj->__decode_map,
44			-autoload => defined $obj->{_decode_mapping_autoload} ?
45			$obj->{_decode_mapping_autoload} : 1);
46			}
47			$str;
48			}
49
50			### --- Encode::SJIS unique functions
51			*new_object = \&Encode::Charset::new_object_sjis;
52
53			sub sjis_to_internal ($$) {
54			my ($s, $C) = @_;
55			$C \|\|= &new_object;
56			$s =~ s{
57			([\x00-\x7F\xA1-\xDF])
58			# ([\xA1-\xDF])
59			\|([\x81-\x9F\xE0-\xFC][\x40-\x7E\x80-\xFC])
60			\|\x1B([\x40-\x5F])
61			\|([\x80-\xFF]) ## Broken or supplemental 1-byte character
62			}{
63			my ($c7, $c2, $c1, $c8) = ($1, $2, $3, $4);
64			if (defined $c7) {
65			if ($c7 =~ /([\x21-\x7E])/) {
66			chr ($C->{ $C->{GL} }->{ucs} + ord ($1) - 0x21);
67			} elsif ($c7 =~ /([\x00-\x1F])/) {
68			chr ($C->{ $C->{CL} }->{ucs} + ord ($1));
69			} elsif ($C->{GR} && $c7 =~ /([\xA1-\xDF])/) {
70			chr ($C->{ $C->{GR} }->{ucs} + ord ($1) - 0xA1);
71			} else { ## 0x20, 0x7F
72			$C->{Gsmap}->{ $c7 } \|\| $c7;
73			}
74			} elsif ($c2) {
75			if ($c2 =~ /([\x81-\xEF])(.)/) {
76			my ($f, $s) = (ord $1, ord $2);
77			$f -= $f < 0xA0 ? 0x81 : 0xC1; $s -= 0x40 + ($s > 0x7F);
78			chr ($C->{G1}->{ucs} + $f * 188 + $s);
79			} else { ## [\xF0-\xFC].
80			my ($f, $s) = unpack ('CC', $c2);
81			if ($C->{G3}->{Csjis_kuE}) {
82			$f = $s > 0x9E ? $C->{G3}->{Csjis_kuE}->{ $f }:
83			$C->{G3}->{Csjis_kuO}->{ $f };
84			$s -= ($s > 0x9E ? 0x9F : $s > 0x7F ? 0x41 : 0x40);
85			chr ($C->{G3}->{ucs} + $f * 94 + $s);
86			} else {
87			$f -= 0xF0; $s -= 0x40 + ($s > 0x7F);
88			chr ($C->{G3}->{ucs} + $f * 188 + $s);
89			}
90			}
91			} elsif ($c1) { ## ESC Fe
92			chr ($C->{ $C->{ESC_Fe} }->{ucs} + ord ($c1) - 0x40);
93			} else { # $C8
94			$C->{Gsmap}->{ $c8 } \|\| $c8;
95			}
96			}gex;
97			$s;
98			}
99
100	wakaba	1.2	sub internal_to_sjis ($\%) {
101			use integer;
102			my ($s, $C) = @_;
103			$C \|\|= &new_object;
104
105			my $r = '';
106			for my $c (split //, $s) {
107			my $cc = ord $c;
108			my $t;
109			if ($cc <= 0x1F) {
110			$t = $c if $C->{ $C->{CL} } eq $Encode::Charset::CHARSET{C0}->{"\x40"};
111			} elsif ($cc == 0x20 \|\| $cc == 0x7F) {
112	wakaba	1.3	Encode::_utf8_off ($c);
113	wakaba	1.2	$t = $c;
114			} elsif ($cc < 0x7F) {
115	wakaba	1.3	Encode::_utf8_off ($c);
116	wakaba	1.2	$t = $c if $C->{ $C->{GL} } eq $Encode::Charset::CHARSET{G94}->{"\x42"};
117			} elsif ($C->{option}->{C1invoke_to_right} && $cc == 0x80) {
118	wakaba	1.3	$t = "\x80"
119			if $C->{ $C->{CR} } eq $Encode::Charset::CHARSET{C1}->{'64291991C1'};
120	wakaba	1.2	} elsif ($cc <= 0x9F) {
121	wakaba	1.3	$t = "\x1B".pack 'C', ($cc - 0x40)
122	wakaba	1.2	if $C->{ $C->{ESC_Fe} } eq $Encode::Charset::CHARSET{C1}->{'64291991C1'};
123
124			} elsif (0xE9F6C0 <= $cc && $cc <= 0xF06F80) {
125			my $c = $cc - 0xE9F6C0; my $F = chr (($c / 8836)+0x30);
126			if ($C->{G1} eq $Encode::Charset::CHARSET{G94n}->{ $F }) {
127			my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21);
128			$t = pack ('CC', (($c1 - 1) >> 1) + ($c1 < 0x5F ? 0x71 : 0xB1),
129			$c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E));
130			} elsif ($C->{G3} eq $Encode::Charset::CHARSET{G94n}->{ $F }) {
131			my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21);
132			if ($C->{G3}->{Csjis_first}) {
133			$t = pack ('CC', $C->{G3}->{Csjis_first}->{ ($c % 8836) / 94 },
134			$c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E));
135			} else {
136			$t = pack ('CC', ($c / 188) + 0xF0,
137			$c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E))
138			if ($c / 188) + 0xF0 < 0xFD;
139			}
140			}
141			} elsif (0xF49D7C <= $cc && $cc <= 0xF4BFFF) {
142			my $c = $cc - 0xF49D7C;
143			if ($C->{G1} eq $Encode::Charset::CHARSET{G94n}->{'B@'}) {
144			my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21);
145			$t = pack ('CC', (($c1 - 1) >> 1) + ($c1 < 0x5F ? 0x71 : 0xB1),
146			$c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E));
147			}
148
149			} elsif (0xE90940 <= $cc && $cc <= 0xE92641) {
150			my $c = $cc - 0xE90940; my $F = chr (($c / 94)+0x30);
151			if ($C->{ $C->{GL} } eq $Encode::Charset::CHARSET{G94}->{ $F }) {
152	wakaba	1.3	$t = pack 'C', (($c % 94) + 0x21);
153	wakaba	1.2	} elsif ($C->{ $C->{GR} } eq $Encode::Charset::CHARSET{G94}->{ $F }) {
154	wakaba	1.3	$t = pack 'C', (($c % 94) + 0xA1) if ($c % 94) < 0x3F;
155	wakaba	1.2	}
156			} elsif (0x70420000 <= $cc && $cc <= 0x7046F19B) {
157			my $c = $cc % 0x10000;
158			my $F0=$C->{option}->{private_set}->{G94n}->[($cc/0x10000)-0x7042]->[$c/8836];
159			my $F1 = 'P'.(($cc / 0x10000) - 0x7042).'_'.($c / 8836);
160			if ($C->{G3} eq $Encode::Charset::CHARSET{G94n}->{ $F0 }
161			\|\| $C->{G3} eq $Encode::Charset::CHARSET{G94n}->{ $F1 }) {
162			my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21);
163			if ($C->{G3}->{Csjis_first}) {
164			$t = pack ('CC', $C->{G3}->{Csjis_first}->{ ($c % 8836) / 94 },
165			$c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E));
166			} else {
167			$t = pack ('CC', ($c / 188) + 0xF0,
168			$c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E))
169			if ($c / 188) + 0xF0 < 0xFD;
170			}
171			}
172			}
173
174			if (defined $t) {
175			$r .= $t;
176			} elsif ($C->{GsmapR}->{ $c }) {
177			$r .= $C->{GsmapR}->{ $c };
178			} else {
179			$r .= $C->{option}->{undef_char_sjis} \|\| "\x3F";
180			}
181			}
182			$r;
183			}
184
185	wakaba	1.1	sub __clone ($) {
186			my $self = shift;
187			bless {%$self}, ref $self;
188			};
189
190			__PACKAGE__->Define (qw!shift_jisx0213 japanese-shift-jisx0213
191	wakaba	1.2	shift-jisx0213 x-shift_jisx0213 shift-jis-3 shift-jis-2000
192	wakaba	1.3	sjis s-jis shift-jis x-sjis x_sjis x-sjis-jp shiftjis x-shiftjis
193	wakaba	1.1	x-shift-jis shift.jis!);
194
195			=item sjis
196
197			"Shift JIS" coding system. (Alias: shift-jis, shiftjis,
198	wakaba	1.3	shift.jis, x-shiftjis, x-shift-jis, s-jis, x-sjis, x_sjis,
199	wakaba	1.1	x-sjis-jp)
200
201			Since this name is ambiguous (it can now refer all or any
202			of shift JIS coding system family), this name should not
203			be used to address specific coding system. In this module,
204			this is considered as an alias name to the shift JIS with
205			latest official definition, currently of JIS X 0213:2000
206			Appendix 1 (with implemention level 4).
207
208			Note that the name "Shift_JIS" is not associated with
209			this name, because IANA registry [IANAREG] assignes
210			it to a shift JIS defined by JIS X 0208:1997.
211
212			=item shift_jisx0213
213
214			Shift_JISX0213 coded representation, defined by
215			JIS X 0213:2000 Appendix 1 (implemention level 4).
216			(Alias: shift-jisx0213, x-shift_jisx0213, japanese-shift-jisx0213 (emacsen),
217	wakaba	1.2	shift-jis-3 (Yudit), shift-jis-2000)
218	wakaba	1.1
219			=cut
220
221			sub __2022__common ($) {
222			my $C = Encode::SJIS->new_object;
223			$C->{G0} = $Encode::Charset::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin
224			$C->{G1} = $Encode::Charset::CHARSET{G94n}->{"\x4F"}; ## JIS X 0213:2000 plane 1
225			$C->{G2} = $Encode::Charset::CHARSET{G94}->{I}; ## JIS X 0201:1997 Katakana
226			$C->{G3} = $Encode::Charset::CHARSET{G94n}->{"\x50"}; ## JIS X 0213:2000 plane 2
227			$C;
228			}
229			sub __2022_encode ($) {
230			my $C = shift->__2022__common;
231			$C;
232			}
233			sub __2022_decode ($) {
234			my $C = shift->__2022__common;
235			$C;
236			}
237			sub __encode_map ($) {
238			[qw/ucs_to_jisx0201_latin ucs_to_jisx0213_2000_1 ucs_to_jisx0213_2000_2 ucs_to_jisx0201_katakana/];
239			}
240			sub __decode_map ($) {
241			[qw/jisx0201_latin_to_ucs jisx0213_2000_1_to_ucs jisx0213_2000_2_to_ucs jisx0201_katakana_to_ucs/];
242			}
243
244			package Encode::SJIS::X0213ASCII;
245			use vars qw/@ISA/;
246			push @ISA, 'Encode::SJIS';
247	wakaba	1.2	__PACKAGE__->Define (qw/shift_jisx0213-ascii shift-jis-2000-ascii
248			sjis-ascii shift-jis-ascii/);
249	wakaba	1.1
250			=item sjis-ascii
251
252			Same as sjis but ASCII (ISO/IEC 646 IRV) instead of
253			JIS X 0201 Roman (or Latin) set. (Alias: shift-jis-ascii)
254
255			In spite of the history of shift JIS, ASCII is sometimes
256			used instead of JIS X 0201 Roman set, because of compatibility
257			with ASCII world.
258
259			Note that this name is now an alias of shift_jisx0213-ascii,
260			as sjis is of shift_jisx0213.
261
262			=item shift_jisx0213-ascii
263
264			Same as Shift_JISX0213 but ASCII (ISO/IEC 646 IRV)
265			instead of JIS X 0201:1997 Latin character set.
266	wakaba	1.2	(Alias: shift-jis-2000-ascii)
267
268			Note that this coding system does NOT comform to
269			JIS X 0213:2000 Appendix 1.
270	wakaba	1.1
271			=cut
272
273			sub __2022__common ($) {
274			my $C = shift->SUPER::__2022__common;
275			$C->{G0} = $Encode::Charset::CHARSET{G94}->{B}; ## ASCII
276			$C;
277			}
278			sub __encode_map ($) {
279			[qw/ucs_to_ascii ucs_to_jisx0213_2000_1 ucs_to_jisx0213_2000_2 ucs_to_jisx0201_katakana/];
280			}
281			sub __decode_map ($) {
282			[qw/jisx0213_2000_1_to_ucs jisx0213_2000_2_to_ucs jisx0201_katakana_to_ucs/];
283			}
284
285			1;
286			__END__
287
288	wakaba	1.2	=back
289
290	wakaba	1.1	=head1 SEE ALSO
291
292			JIS X 0208:1997, "7-bit and 8-bit double byte coded Kanji
293			set for information interchange", Japan Industrial Standards
294			Committee (JISC) <http://www.jisc.go.jp/>, 1997.
295
296			JIS X 0213:2000, "7-bit and 8-bit double byte coded extended Kanji
297			sets for information interchange", Japan Industrial Standards
298			Committee (JISC) <http://www.jisc.go.jp/>, 2000.
299
300			Encode, perlunicode
301
302			[IANAREG] "CHARACTER SETS", IANA <http://www.iana.org/>,
303			<http://www.iana.org/assignments/character-sets>.
304			The charset registry for IETF <http://www.ietf.org/> standards.
305			(Note that in this registry two shift JISes are registered,
306			"Shift_JIS" and "Windows-31j". Former is JIS X 0208:1997's
307			definition and later is the Windows standard character set.)
308
309			=head1 LICENSE
310
311			Copyright 2002 Nanashi-san
312
313			This library is free software; you can redistribute it
314			and/or modify it under the same terms as Perl itself.
315
316			=cut
317
318	wakaba	1.3	# $Date: 2002/10/12 11:03:00 $
319	wakaba	1.1	### SJIS.pm ends here