=head1 NAME Encode::SJIS --- Shift JIS coding systems encoder and decoder =head1 ENCODINGS This module defines encoding engine for Shift JIS coding systems. This module only provides general en/decoding parts. Actual profiles for Shift JISes are included in Encode::SJIS::*. =over 4 =cut package Encode::SJIS; use 5.7.3; use strict; our $VERSION=do{my @r=(q$Revision: 1.5 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; require Encode::Charset; use base qw(Encode::Encoding); *new_object = \&Encode::Charset::new_object_sjis; ## Code extention escape sequence defined by ISO/IEC 2022 is ## not supported in this version of this module. sub sjis_to_internal ($$) { my ($s, $C) = @_; $C ||= &new_object; $s =~ s{ ([\x00-\x7F\xA1-\xDF]) # ([\xA1-\xDF]) |([\x81-\x9F\xE0-\xFC][\x40-\x7E\x80-\xFC]) |\x1B([\x40-\x5F]) |([\x80-\xFF]) ## Broken or supplemental 1-byte character }{ my ($c7, $c2, $c1, $c8) = ($1, $2, $3, $4); if (defined $c7) { if ($c7 =~ /([\x21-\x7E])/) { chr ($C->{ $C->{GL} }->{ucs} + ord ($1) - 0x21); } elsif ($c7 =~ /([\x00-\x1F])/) { chr ($C->{ $C->{CL} }->{ucs} + ord ($1)); } elsif ($C->{GR} && $c7 =~ /([\xA1-\xDF])/) { chr ($C->{ $C->{GR} }->{ucs} + ord ($1) - 0xA1); } else { ## 0x20, 0x7F $C->{Gsmap}->{ $c7 } || $c7; } } elsif ($c2) { if ($c2 =~ /([\x81-\xEF])(.)/) { my ($f, $s) = (ord $1, ord $2); $f -= $f < 0xA0 ? 0x81 : 0xC1; $s -= 0x40 + ($s > 0x7F); chr ($C->{G1}->{ucs} + $f * 188 + $s); } else { ## [\xF0-\xFC]. my ($f, $s) = unpack ('CC', $c2); if ($C->{G3}->{Csjis_kuE}) { $f = $s > 0x9E ? $C->{G3}->{Csjis_kuE}->{ $f }: $C->{G3}->{Csjis_kuO}->{ $f }; $s -= ($s > 0x9E ? 0x9F : $s > 0x7F ? 0x41 : 0x40); chr ($C->{G3}->{ucs} + $f * 94 + $s); } else { $f -= 0xF0; $s -= 0x40 + ($s > 0x7F); chr ($C->{G3}->{ucs} + $f * 188 + $s); } } } elsif ($c1) { ## ESC Fe chr ($C->{ $C->{ESC_Fe} }->{ucs} + ord ($c1) - 0x40); } else { # $C8 $C->{Gsmap}->{ $c8 } || $c8; } }gex; $s; } sub internal_to_sjis ($\%) { use integer; my ($s, $C) = @_; $C ||= &new_object; my $r = ''; my @c = split //, $s; for my $i (0..$#c) { my $c = $c[$i]; my $cc = ord $c; Encode::_utf8_off ($c); my $t; ## CL = C0 control characters if ($cc <= 0x1F) { $t = $c if $C->{ $C->{CL} } eq $Encode::Charset::CHARSET{C0}->{"\x40"}; ## 0x20 == SP and 0x7E == DEL } elsif ($cc == 0x20 || $cc == 0x7F) { $t = $c; ## GL = G0 = ISO/IEC 646 graphic character set } elsif ($cc < 0x7F) { $t = $c if $C->{ $C->{GL} } eq $Encode::Charset::CHARSET{G94}->{"\x42"}; ## 0x80 } elsif ($C->{option}->{C1invoke_to_right} && $cc == 0x80) { $t = "\x80" if $C->{ $C->{CR} } eq $Encode::Charset::CHARSET{C1}->{'64291991C1'}; ## ESC Fe = C1 control characters } elsif ($cc <= 0x9F) { $t = "\x1B".pack 'C', ($cc - 0x40) if $C->{ $C->{ESC_Fe} } eq $Encode::Charset::CHARSET{C1}->{'64291991C1'}; ## G1 or G3 = 94^2 graphic character set from ISO-IR } elsif (0xE9F6C0 <= $cc && $cc <= 0xF06F80) { my $c = $cc - 0xE9F6C0; my $F = chr (($c / 8836)+0x30); if ($C->{G1} eq $Encode::Charset::CHARSET{G94n}->{ $F }) { my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21); $t = pack ('CC', (($c1 - 1) >> 1) + ($c1 < 0x5F ? 0x71 : 0xB1), $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E)); } elsif ($C->{G3} eq $Encode::Charset::CHARSET{G94n}->{ $F }) { my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21); if ($C->{G3}->{Csjis_first}) { my $fb = $C->{G3}->{Csjis_first}->{ ($c % 8836) / 94 }; $t = pack ('CC', $fb, $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E)) if $fb; } else { $t = pack ('CC', ($c / 188) + 0xF0, $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E)) if ($c / 188) + 0xF0 < 0xFD; } } ## G1 = JIS X 0208-1990/:1997 } elsif (0xF49D7C <= $cc && $cc <= 0xF4BFFF) { my $c = $cc - 0xF49D7C; if ($C->{G1} eq $Encode::Charset::CHARSET{G94n}->{'B@'}) { my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21); $t = pack ('CC', (($c1 - 1) >> 1) + ($c1 < 0x5F ? 0x71 : 0xB1), $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E)); } ## GL = G0 = ISO/IEC 646 graphic character set / GR = G2 = JIS X 0201 Katakana set } elsif (0xE90940 <= $cc && $cc <= 0xE92641) { my $c = $cc - 0xE90940; my $F = chr (($c / 94)+0x30); if ($C->{ $C->{GL} } eq $Encode::Charset::CHARSET{G94}->{ $F }) { $t = pack 'C', (($c % 94) + 0x21); } elsif ($C->{ $C->{GR} } eq $Encode::Charset::CHARSET{G94}->{ $F }) { $t = pack 'C', (($c % 94) + 0xA1) if ($c % 94) < 0x3F; } ## G1 / G3 = 94^2 graphic character set } elsif (0x70420000 <= $cc && $cc <= 0x7046F19B) { my $c = $cc % 0x10000; my $F0=$C->{option}->{private_set}->{G94n}->[($cc/0x10000)-0x7042]->[$c/8836]; my $F1 = 'P'.(($cc / 0x10000) - 0x7042).'_'.($c / 8836); if ($C->{G3} eq $Encode::Charset::CHARSET{G94n}->{ $F0 } || $C->{G3} eq $Encode::Charset::CHARSET{G94n}->{ $F1 }) { my ($c1, $c2) = ((($c % 8836) / 94)+0x21, ($c % 94)+0x21); if ($C->{G3}->{Csjis_first}) { my $fb = $C->{G3}->{Csjis_first}->{ ($c % 8836) / 94 }; $t = pack ('CC', $fb, $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E)) if $fb; } else { $t = pack ('CC', ($c / 188) + 0xF0, $c2 + (($c1 & 1) ? ($c2 < 0x60 ? 0x1F : 0x20) : 0x7E)) if ($c / 188) + 0xF0 < 0xFD; } } ## Other character sets are not supported now (and there is no plan to implement them). } ## Output the character itself if (defined $t) { $r .= $t; ## Output the character itself with mapping table of special code positions } elsif ($C->{GsmapR}->{ $c }) { $r .= $C->{GsmapR}->{ $c }; } elsif ($C->{option}->{fallback_from_ucs} =~ /quiet/) { return ($r, halfway => 1, converted_length => $i, warn => $C->{option}->{fallback_from_ucs} =~ /warn/ ? 1 : 0, reason => sprintf (q(U+%04X: There is no character mapped to), $cc)); } elsif ($C->{option}->{fallback_from_ucs} eq 'croak') { return ($r, halfway => 1, die => 1, reason => sprintf (q(U+%04X: There is no character mapped to), $cc)); ## } else { ## Try to output with fallback escape sequence (if specified) my $t = Encode::Charset::fallback_escape ($C, $c); if (defined $t) { my %D = (fallback => $C->{option}->{fallback_from_ucs}, reset => $C->{option}->{reset}); $C->{option}->{fallback_from_ucs} = 'croak'; $C->{option}->{reset} = {Gdesignation => 0, Ginvoke => 0}; eval q{$t = $C->{_encoder}->_encode_internal ($t, $C)} or undef $t; $C->{option}->{fallback_from_ucs} = $D{fallback}; $C->{option}->{reset} = $D{reset}; } if (defined $t) { $r .= $t; } else { ## Replacement character specified in charset definition $r .= $C->{option}->{undef_char_sjis} || "\x3F"; } } } $r; } =back =head1 SEE ALSO JIS X 0208:1997, "7-bit and 8-bit double byte coded Kanji set for information interchange", Japan Industrial Standards Committee (JISC) , 1997. JIS X 0213:2000, "7-bit and 8-bit double byte coded extended Kanji sets for information interchange", Japan Industrial Standards Committee (JISC) , 2000. L L, L L, L =head1 LICENSE Copyright 2002 Nanashi-san This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; # $Date: 2002/12/16 10:25:01 $