/[suikacvs]/perl/lib/Encode/SJIS.pm
Suika

Contents of /perl/lib/Encode/SJIS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (hide annotations) (download)
Sat Oct 12 07:27:01 2002 UTC (22 years, 1 month ago) by wakaba
Branch: MAIN
2002-10-12  Nanashi-san

	* SJIS.pm: New module.
	* SJIS/: New directory.
	(Commited by Wakaba <w@suika.fam.cx>)

1 wakaba 1.1
2     =head1 NAME
3    
4     Encode::SJIS --- Shift JIS coding systems encoder and decoder
5    
6     =head1 ENCODINGS
7    
8     This module defines only two basic version of shift JIS.
9     Other variants are defined in Encode::SJIS::* modules.
10    
11     =over 4
12    
13     =cut
14    
15     package Encode::SJIS;
16     use 5.7.3;
17     use strict;
18     our $VERSION=do{my @r=(q$Revision: 1.6 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
19     require Encode::Charset;
20    
21     ### --- Perl Encode module common functions
22    
23     sub encode ($$;$) {
24     my ($obj, $str, $chk) = @_;
25     $_[1] = '' if $chk;
26     if (!defined $obj->{_encode_mapping} || $obj->{_encode_mapping}) {
27     require Encode::Table;
28     $str = Encode::Table::convert ($str, $obj->__encode_map,
29     -autoload => defined $obj->{_encode_mapping_autoload} ?
30     $obj->{_encode_mapping_autoload} : 1);
31     }
32     $str = &internal_to_sjis ($str, $obj->__2022_encode);
33     $str;
34     }
35    
36     sub decode ($$;$) {
37     my ($obj, $str, $chk) = @_;
38     $_[1] = '' if $chk;
39     $str = &sjis_to_internal ($str, $obj->__2022_decode);
40     if (!defined $obj->{_decode_mapping} || $obj->{_decode_mapping}) {
41     require Encode::Table;
42     $str = Encode::Table::convert ($str, $obj->__decode_map,
43     -autoload => defined $obj->{_decode_mapping_autoload} ?
44     $obj->{_decode_mapping_autoload} : 1);
45     }
46     $str;
47     }
48    
49     ### --- Encode::SJIS unique functions
50     *new_object = \&Encode::Charset::new_object_sjis;
51    
52     sub sjis_to_internal ($$) {
53     my ($s, $C) = @_;
54     $C ||= &new_object;
55     $s =~ s{
56     ([\x00-\x7F\xA1-\xDF])
57     # ([\xA1-\xDF])
58     |([\x81-\x9F\xE0-\xFC][\x40-\x7E\x80-\xFC])
59     |\x1B([\x40-\x5F])
60     |([\x80-\xFF]) ## Broken or supplemental 1-byte character
61     }{
62     my ($c7, $c2, $c1, $c8) = ($1, $2, $3, $4);
63     if (defined $c7) {
64     if ($c7 =~ /([\x21-\x7E])/) {
65     chr ($C->{ $C->{GL} }->{ucs} + ord ($1) - 0x21);
66     } elsif ($c7 =~ /([\x00-\x1F])/) {
67     chr ($C->{ $C->{CL} }->{ucs} + ord ($1));
68     } elsif ($C->{GR} && $c7 =~ /([\xA1-\xDF])/) {
69     chr ($C->{ $C->{GR} }->{ucs} + ord ($1) - 0xA1);
70     } else { ## 0x20, 0x7F
71     $C->{Gsmap}->{ $c7 } || $c7;
72     }
73     } elsif ($c2) {
74     if ($c2 =~ /([\x81-\xEF])(.)/) {
75     my ($f, $s) = (ord $1, ord $2);
76     $f -= $f < 0xA0 ? 0x81 : 0xC1; $s -= 0x40 + ($s > 0x7F);
77     chr ($C->{G1}->{ucs} + $f * 188 + $s);
78     } else { ## [\xF0-\xFC].
79     my ($f, $s) = unpack ('CC', $c2);
80     if ($C->{G3}->{Csjis_kuE}) {
81     $f = $s > 0x9E ? $C->{G3}->{Csjis_kuE}->{ $f }:
82     $C->{G3}->{Csjis_kuO}->{ $f };
83     $s -= ($s > 0x9E ? 0x9F : $s > 0x7F ? 0x41 : 0x40);
84     chr ($C->{G3}->{ucs} + $f * 94 + $s);
85     } else {
86     $f -= 0xF0; $s -= 0x40 + ($s > 0x7F);
87     chr ($C->{G3}->{ucs} + $f * 188 + $s);
88     }
89     }
90     } elsif ($c1) { ## ESC Fe
91     chr ($C->{ $C->{ESC_Fe} }->{ucs} + ord ($c1) - 0x40);
92     } else { # $C8
93     $C->{Gsmap}->{ $c8 } || $c8;
94     }
95     }gex;
96     $s;
97     }
98    
99     sub __clone ($) {
100     my $self = shift;
101     bless {%$self}, ref $self;
102     };
103    
104     use base qw(Encode::Encoding);
105     __PACKAGE__->Define (qw!shift_jisx0213 japanese-shift-jisx0213
106     shift-jisx0213 x-shift_jisx0213 shift-jis-3
107     sjis shift-jis x-sjis x_sjis x-sjis-jp shiftjis x-shiftjis
108     x-shift-jis shift.jis!);
109    
110     =item sjis
111    
112     "Shift JIS" coding system. (Alias: shift-jis, shiftjis,
113     shift.jis, x-shiftjis, x-shift-jis, x-sjis, x_sjis,
114     x-sjis-jp)
115    
116     Since this name is ambiguous (it can now refer all or any
117     of shift JIS coding system family), this name should not
118     be used to address specific coding system. In this module,
119     this is considered as an alias name to the shift JIS with
120     latest official definition, currently of JIS X 0213:2000
121     Appendix 1 (with implemention level 4).
122    
123     Note that the name "Shift_JIS" is not associated with
124     this name, because IANA registry [IANAREG] assignes
125     it to a shift JIS defined by JIS X 0208:1997.
126    
127     =item shift_jisx0213
128    
129     Shift_JISX0213 coded representation, defined by
130     JIS X 0213:2000 Appendix 1 (implemention level 4).
131     (Alias: shift-jisx0213, x-shift_jisx0213, japanese-shift-jisx0213 (emacsen),
132     shift-jis-3 (Yudit))
133    
134     =cut
135    
136     sub __2022__common ($) {
137     my $C = Encode::SJIS->new_object;
138     $C->{G0} = $Encode::Charset::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin
139     $C->{G1} = $Encode::Charset::CHARSET{G94n}->{"\x4F"}; ## JIS X 0213:2000 plane 1
140     $C->{G2} = $Encode::Charset::CHARSET{G94}->{I}; ## JIS X 0201:1997 Katakana
141     $C->{G3} = $Encode::Charset::CHARSET{G94n}->{"\x50"}; ## JIS X 0213:2000 plane 2
142     $C;
143     }
144     sub __2022_encode ($) {
145     my $C = shift->__2022__common;
146     $C;
147     }
148     sub __2022_decode ($) {
149     my $C = shift->__2022__common;
150     $C;
151     }
152     sub __encode_map ($) {
153     [qw/ucs_to_jisx0201_latin ucs_to_jisx0213_2000_1 ucs_to_jisx0213_2000_2 ucs_to_jisx0201_katakana/];
154     }
155     sub __decode_map ($) {
156     [qw/jisx0201_latin_to_ucs jisx0213_2000_1_to_ucs jisx0213_2000_2_to_ucs jisx0201_katakana_to_ucs/];
157     }
158    
159     package Encode::SJIS::X0213ASCII;
160     use vars qw/@ISA/;
161     push @ISA, 'Encode::SJIS';
162     __PACKAGE__->Define (qw/shift_jisx0213-ascii sjis-ascii shift-jis-ascii/);
163    
164     =item sjis-ascii
165    
166     Same as sjis but ASCII (ISO/IEC 646 IRV) instead of
167     JIS X 0201 Roman (or Latin) set. (Alias: shift-jis-ascii)
168    
169     In spite of the history of shift JIS, ASCII is sometimes
170     used instead of JIS X 0201 Roman set, because of compatibility
171     with ASCII world.
172    
173     Note that this name is now an alias of shift_jisx0213-ascii,
174     as sjis is of shift_jisx0213.
175    
176     =item shift_jisx0213-ascii
177    
178     Same as Shift_JISX0213 but ASCII (ISO/IEC 646 IRV)
179     instead of JIS X 0201:1997 Latin character set.
180    
181     =cut
182    
183     sub __2022__common ($) {
184     my $C = shift->SUPER::__2022__common;
185     $C->{G0} = $Encode::Charset::CHARSET{G94}->{B}; ## ASCII
186     $C;
187     }
188     sub __encode_map ($) {
189     [qw/ucs_to_ascii ucs_to_jisx0213_2000_1 ucs_to_jisx0213_2000_2 ucs_to_jisx0201_katakana/];
190     }
191     sub __decode_map ($) {
192     [qw/jisx0213_2000_1_to_ucs jisx0213_2000_2_to_ucs jisx0201_katakana_to_ucs/];
193     }
194    
195     1;
196     __END__
197    
198     =head1 SEE ALSO
199    
200     JIS X 0208:1997, "7-bit and 8-bit double byte coded Kanji
201     set for information interchange", Japan Industrial Standards
202     Committee (JISC) <http://www.jisc.go.jp/>, 1997.
203    
204     JIS X 0213:2000, "7-bit and 8-bit double byte coded extended Kanji
205     sets for information interchange", Japan Industrial Standards
206     Committee (JISC) <http://www.jisc.go.jp/>, 2000.
207    
208     Encode, perlunicode
209    
210     [IANAREG] "CHARACTER SETS", IANA <http://www.iana.org/>,
211     <http://www.iana.org/assignments/character-sets>.
212     The charset registry for IETF <http://www.ietf.org/> standards.
213     (Note that in this registry two shift JISes are registered,
214     "Shift_JIS" and "Windows-31j". Former is JIS X 0208:1997's
215     definition and later is the Windows standard character set.)
216    
217     =head1 LICENSE
218    
219     Copyright 2002 Nanashi-san
220    
221     This library is free software; you can redistribute it
222     and/or modify it under the same terms as Perl itself.
223    
224     =cut
225    
226     # $Date: 2002/09/21 01:34:08 $
227     ### SJIS.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24