/[suikacvs]/perl/lib/Encode/SJIS.pm
Suika

Contents of /perl/lib/Encode/SJIS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (show annotations) (download)
Sat Oct 12 07:27:01 2002 UTC (22 years, 1 month ago) by wakaba
Branch: MAIN
2002-10-12  Nanashi-san

	* SJIS.pm: New module.
	* SJIS/: New directory.
	(Commited by Wakaba <w@suika.fam.cx>)

1
2 =head1 NAME
3
4 Encode::SJIS --- Shift JIS coding systems encoder and decoder
5
6 =head1 ENCODINGS
7
8 This module defines only two basic version of shift JIS.
9 Other variants are defined in Encode::SJIS::* modules.
10
11 =over 4
12
13 =cut
14
15 package Encode::SJIS;
16 use 5.7.3;
17 use strict;
18 our $VERSION=do{my @r=(q$Revision: 1.6 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
19 require Encode::Charset;
20
21 ### --- Perl Encode module common functions
22
23 sub encode ($$;$) {
24 my ($obj, $str, $chk) = @_;
25 $_[1] = '' if $chk;
26 if (!defined $obj->{_encode_mapping} || $obj->{_encode_mapping}) {
27 require Encode::Table;
28 $str = Encode::Table::convert ($str, $obj->__encode_map,
29 -autoload => defined $obj->{_encode_mapping_autoload} ?
30 $obj->{_encode_mapping_autoload} : 1);
31 }
32 $str = &internal_to_sjis ($str, $obj->__2022_encode);
33 $str;
34 }
35
36 sub decode ($$;$) {
37 my ($obj, $str, $chk) = @_;
38 $_[1] = '' if $chk;
39 $str = &sjis_to_internal ($str, $obj->__2022_decode);
40 if (!defined $obj->{_decode_mapping} || $obj->{_decode_mapping}) {
41 require Encode::Table;
42 $str = Encode::Table::convert ($str, $obj->__decode_map,
43 -autoload => defined $obj->{_decode_mapping_autoload} ?
44 $obj->{_decode_mapping_autoload} : 1);
45 }
46 $str;
47 }
48
49 ### --- Encode::SJIS unique functions
50 *new_object = \&Encode::Charset::new_object_sjis;
51
52 sub sjis_to_internal ($$) {
53 my ($s, $C) = @_;
54 $C ||= &new_object;
55 $s =~ s{
56 ([\x00-\x7F\xA1-\xDF])
57 # ([\xA1-\xDF])
58 |([\x81-\x9F\xE0-\xFC][\x40-\x7E\x80-\xFC])
59 |\x1B([\x40-\x5F])
60 |([\x80-\xFF]) ## Broken or supplemental 1-byte character
61 }{
62 my ($c7, $c2, $c1, $c8) = ($1, $2, $3, $4);
63 if (defined $c7) {
64 if ($c7 =~ /([\x21-\x7E])/) {
65 chr ($C->{ $C->{GL} }->{ucs} + ord ($1) - 0x21);
66 } elsif ($c7 =~ /([\x00-\x1F])/) {
67 chr ($C->{ $C->{CL} }->{ucs} + ord ($1));
68 } elsif ($C->{GR} && $c7 =~ /([\xA1-\xDF])/) {
69 chr ($C->{ $C->{GR} }->{ucs} + ord ($1) - 0xA1);
70 } else { ## 0x20, 0x7F
71 $C->{Gsmap}->{ $c7 } || $c7;
72 }
73 } elsif ($c2) {
74 if ($c2 =~ /([\x81-\xEF])(.)/) {
75 my ($f, $s) = (ord $1, ord $2);
76 $f -= $f < 0xA0 ? 0x81 : 0xC1; $s -= 0x40 + ($s > 0x7F);
77 chr ($C->{G1}->{ucs} + $f * 188 + $s);
78 } else { ## [\xF0-\xFC].
79 my ($f, $s) = unpack ('CC', $c2);
80 if ($C->{G3}->{Csjis_kuE}) {
81 $f = $s > 0x9E ? $C->{G3}->{Csjis_kuE}->{ $f }:
82 $C->{G3}->{Csjis_kuO}->{ $f };
83 $s -= ($s > 0x9E ? 0x9F : $s > 0x7F ? 0x41 : 0x40);
84 chr ($C->{G3}->{ucs} + $f * 94 + $s);
85 } else {
86 $f -= 0xF0; $s -= 0x40 + ($s > 0x7F);
87 chr ($C->{G3}->{ucs} + $f * 188 + $s);
88 }
89 }
90 } elsif ($c1) { ## ESC Fe
91 chr ($C->{ $C->{ESC_Fe} }->{ucs} + ord ($c1) - 0x40);
92 } else { # $C8
93 $C->{Gsmap}->{ $c8 } || $c8;
94 }
95 }gex;
96 $s;
97 }
98
99 sub __clone ($) {
100 my $self = shift;
101 bless {%$self}, ref $self;
102 };
103
104 use base qw(Encode::Encoding);
105 __PACKAGE__->Define (qw!shift_jisx0213 japanese-shift-jisx0213
106 shift-jisx0213 x-shift_jisx0213 shift-jis-3
107 sjis shift-jis x-sjis x_sjis x-sjis-jp shiftjis x-shiftjis
108 x-shift-jis shift.jis!);
109
110 =item sjis
111
112 "Shift JIS" coding system. (Alias: shift-jis, shiftjis,
113 shift.jis, x-shiftjis, x-shift-jis, x-sjis, x_sjis,
114 x-sjis-jp)
115
116 Since this name is ambiguous (it can now refer all or any
117 of shift JIS coding system family), this name should not
118 be used to address specific coding system. In this module,
119 this is considered as an alias name to the shift JIS with
120 latest official definition, currently of JIS X 0213:2000
121 Appendix 1 (with implemention level 4).
122
123 Note that the name "Shift_JIS" is not associated with
124 this name, because IANA registry [IANAREG] assignes
125 it to a shift JIS defined by JIS X 0208:1997.
126
127 =item shift_jisx0213
128
129 Shift_JISX0213 coded representation, defined by
130 JIS X 0213:2000 Appendix 1 (implemention level 4).
131 (Alias: shift-jisx0213, x-shift_jisx0213, japanese-shift-jisx0213 (emacsen),
132 shift-jis-3 (Yudit))
133
134 =cut
135
136 sub __2022__common ($) {
137 my $C = Encode::SJIS->new_object;
138 $C->{G0} = $Encode::Charset::CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin
139 $C->{G1} = $Encode::Charset::CHARSET{G94n}->{"\x4F"}; ## JIS X 0213:2000 plane 1
140 $C->{G2} = $Encode::Charset::CHARSET{G94}->{I}; ## JIS X 0201:1997 Katakana
141 $C->{G3} = $Encode::Charset::CHARSET{G94n}->{"\x50"}; ## JIS X 0213:2000 plane 2
142 $C;
143 }
144 sub __2022_encode ($) {
145 my $C = shift->__2022__common;
146 $C;
147 }
148 sub __2022_decode ($) {
149 my $C = shift->__2022__common;
150 $C;
151 }
152 sub __encode_map ($) {
153 [qw/ucs_to_jisx0201_latin ucs_to_jisx0213_2000_1 ucs_to_jisx0213_2000_2 ucs_to_jisx0201_katakana/];
154 }
155 sub __decode_map ($) {
156 [qw/jisx0201_latin_to_ucs jisx0213_2000_1_to_ucs jisx0213_2000_2_to_ucs jisx0201_katakana_to_ucs/];
157 }
158
159 package Encode::SJIS::X0213ASCII;
160 use vars qw/@ISA/;
161 push @ISA, 'Encode::SJIS';
162 __PACKAGE__->Define (qw/shift_jisx0213-ascii sjis-ascii shift-jis-ascii/);
163
164 =item sjis-ascii
165
166 Same as sjis but ASCII (ISO/IEC 646 IRV) instead of
167 JIS X 0201 Roman (or Latin) set. (Alias: shift-jis-ascii)
168
169 In spite of the history of shift JIS, ASCII is sometimes
170 used instead of JIS X 0201 Roman set, because of compatibility
171 with ASCII world.
172
173 Note that this name is now an alias of shift_jisx0213-ascii,
174 as sjis is of shift_jisx0213.
175
176 =item shift_jisx0213-ascii
177
178 Same as Shift_JISX0213 but ASCII (ISO/IEC 646 IRV)
179 instead of JIS X 0201:1997 Latin character set.
180
181 =cut
182
183 sub __2022__common ($) {
184 my $C = shift->SUPER::__2022__common;
185 $C->{G0} = $Encode::Charset::CHARSET{G94}->{B}; ## ASCII
186 $C;
187 }
188 sub __encode_map ($) {
189 [qw/ucs_to_ascii ucs_to_jisx0213_2000_1 ucs_to_jisx0213_2000_2 ucs_to_jisx0201_katakana/];
190 }
191 sub __decode_map ($) {
192 [qw/jisx0213_2000_1_to_ucs jisx0213_2000_2_to_ucs jisx0201_katakana_to_ucs/];
193 }
194
195 1;
196 __END__
197
198 =head1 SEE ALSO
199
200 JIS X 0208:1997, "7-bit and 8-bit double byte coded Kanji
201 set for information interchange", Japan Industrial Standards
202 Committee (JISC) <http://www.jisc.go.jp/>, 1997.
203
204 JIS X 0213:2000, "7-bit and 8-bit double byte coded extended Kanji
205 sets for information interchange", Japan Industrial Standards
206 Committee (JISC) <http://www.jisc.go.jp/>, 2000.
207
208 Encode, perlunicode
209
210 [IANAREG] "CHARACTER SETS", IANA <http://www.iana.org/>,
211 <http://www.iana.org/assignments/character-sets>.
212 The charset registry for IETF <http://www.ietf.org/> standards.
213 (Note that in this registry two shift JISes are registered,
214 "Shift_JIS" and "Windows-31j". Former is JIS X 0208:1997's
215 definition and later is the Windows standard character set.)
216
217 =head1 LICENSE
218
219 Copyright 2002 Nanashi-san
220
221 This library is free software; you can redistribute it
222 and/or modify it under the same terms as Perl itself.
223
224 =cut
225
226 # $Date: 2002/09/21 01:34:08 $
227 ### SJIS.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24