/[suikacvs]/perl/lib/Encode/Charset.pm
Suika

Contents of /perl/lib/Encode/Charset.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (show annotations) (download)
Sat Oct 12 11:03:00 2002 UTC (22 years, 1 month ago) by wakaba
Branch: MAIN
Changes since 1.3: +12 -9 lines
2002-10-12  Nanashi-san

	* SJIS.pm: New module.
	* SJIS/: New directory.
	(Commited by Wakaba <w@suika.fam.cx>)

1
2 =head1 NAME
3
4 Encode::Charset --- Coded Character Sets objects,
5 used by Encode::ISO2022, Encode::SJIS, and other modules.
6
7 =cut
8
9 package Encode::Charset;
10 use strict;
11 use vars qw(%CHARSET %CODING_SYSTEM $VERSION);
12 $VERSION=do{my @r=(q$Revision: 1.3 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
13
14 ## --- Make initial charset definitions
15 &_make_initial_charsets;
16 sub _make_initial_charsets () {
17 for my $f (0x30..0x7E) {
18 my $F = pack 'C', $f;
19 for ('', '!', '"', '#') {
20 $CHARSET{G94}->{ $_.$F }->{dimension} = 1;
21 $CHARSET{G94}->{ $_.$F }->{chars} = 94;
22 $CHARSET{G94}->{ $_.$F }->{ucs} =
23 {'' => 0xE90940, '!' => 0xE944A0, '"' => 0xE98000, '#' => 0xE9BB60}->{ $_ }
24 + 94 * ($f-0x30);
25
26 $CHARSET{G96}->{ $_.$F }->{dimension} = 1;
27 $CHARSET{G96}->{ $_.$F }->{chars} = 96;
28 $CHARSET{G96}->{ $_.$F }->{ucs} =
29 {'' => 0xE926A0, '!' => 0xE96200, '"' => 0xE99D60, '#' => 0xE9D8C0}->{ $_ }
30 + 96 * ($f-0x30);
31
32 $CHARSET{C0}->{ $_.$F }->{dimension} = 1;
33 $CHARSET{C0}->{ $_.$F }->{chars} = 32;
34 $CHARSET{C0}->{ $_.$F }->{ucs} =
35 {'' => 0x70000000, '!' => 0x70001400,
36 '"' => 0x70002800, '#' => 0x70003C00}->{ $_ } + 32 * ($f-0x30);
37
38 $CHARSET{C1}->{ $_.$F }->{dimension} = 1;
39 $CHARSET{C1}->{ $_.$F }->{chars} = 32;
40 $CHARSET{C1}->{ $_.$F }->{ucs} =
41 {'' => 0x70000A00, '!' => 0x70001E00,
42 '"' => 0x70003200, '#' => 0x70004600}->{ $_ } + 32 * ($f-0x30);
43
44 $CHARSET{G94}->{ ' '.$_.$F }->{dimension} = 1; ## DRCS
45 $CHARSET{G94}->{ ' '.$_.$F }->{chars} = 94;
46 $CHARSET{G94}->{ ' '.$_.$F }->{ucs} =
47 {'' => 0x70090940, '!' => 0x700944A0,
48 '"' => 0x70098000, '#' => 0x7009BB60}->{ $_ } + 94 * ($f-0x30);
49
50 $CHARSET{G96}->{ ' '.$_.$F }->{dimension} = 1; ## DRCS
51 $CHARSET{G96}->{ ' '.$_.$F }->{chars} = 96;
52 $CHARSET{G96}->{ ' '.$_.$F }->{ucs} =
53 {'' => 0x700926A0, '!' => 0x70096200,
54 '"' => 0x70099D60, '#' => 0x7009D8C0}->{ $_ } + 96 * ($f-0x30);
55 }
56 }
57 for my $f (0x30..0x5F, 0x7E) {
58 my $F = pack 'C', $f;
59 for ('', '!', '"', '#', ' ') {
60 $CHARSET{G94n}->{ $_.$F }->{dimension} = 2;
61 $CHARSET{G94n}->{ $_.$F }->{chars} = 94;
62 $CHARSET{G94n}->{ $_.$F }->{ucs} =
63 ({'' => 0xE9F6C0}->{ $_ }||0) + 94*94 * ($f-0x30);
64 ## BUG: 94^n sets with I byte have no mapping area
65
66 $CHARSET{G96n}->{ $_.$F }->{dimension} = 2;
67 $CHARSET{G96n}->{ $_.$F }->{chars} = 96;
68 $CHARSET{G96n}->{ $_.$F }->{ucs} =
69 ({'' => 0xF4C000}->{ $_ }||0) + 96*96 * ($f-0x30);
70 ## BUG: 96^n DRCSes with I byte have no mapping area
71 }
72 }
73 $CHARSET{G94n}->{"\x20\x40"}->{ucs} = 0x70460000; ## DRCS 94^2 04/00
74
75 for (0x60..0x6F) {
76 my $F = pack 'C', $_;
77 ## BUG: 9x^3 sets have no mapping area
78 for ('', '!', '"', '#', ' ') {
79 $CHARSET{G94n}->{ $_.$F }->{dimension} = 3;
80 $CHARSET{G94n}->{ $_.$F }->{chars} = 94;
81
82 $CHARSET{G96n}->{ $_.$F }->{dimension} = 3;
83 $CHARSET{G96n}->{ $_.$F }->{chars} = 96;
84 }
85 }
86 for (0x70..0x7D) {
87 my $F = pack 'C', $_;
88 ## BUG: 9x^4 sets have no mapping area
89 for ('', '!', '"', '#', ' ') {
90 $CHARSET{G94n}->{ $_.$F }->{dimension} = 4;
91 $CHARSET{G94n}->{ $_.$F }->{chars} = 94;
92
93 $CHARSET{G96n}->{ $_.$F }->{dimension} = 4;
94 $CHARSET{G96n}->{ $_.$F }->{chars} = 96;
95 }
96 }
97 for my $f (0x40..0x4E) {
98 my $F = pack 'C', $f;
99 $CHARSET{G96n}->{ ' '.$F }->{dimension} = 2;
100 $CHARSET{G96n}->{ ' '.$F }->{chars} = 96;
101 $CHARSET{G96n}->{ ' '.$F }->{ucs} = 0xF0000 + 96*96*($f-0x40);
102 ## U+F0000-U+10F7FF (private) -> ESC 02/04 02/00 <I> (04/00-04/14) (DRCS)
103 }
104
105 $CHARSET{G94}->{B}->{ucs} = 0x21; ## ASCII
106 $CHARSET{G96}->{A}->{ucs} = 0xA0; ## ISO 8859-1
107
108 $CHARSET{G94n}->{'B@'}->{dimension} = 2; ## JIS X 0208-1990
109 $CHARSET{G94n}->{'B@'}->{chars} = 94;
110 $CHARSET{G94n}->{'B@'}->{ucs} = 0xE9F6C0 + 94*94*79;
111
112 ## SJIS G3 mapping (JIS X 0213:2000 plane 2)
113 $CHARSET{G94n}->{"\x50"}->{Csjis_kuE} = { # ku - 1
114 0xF0 => 7, 0xF1 => 3, 0xF2 => 11, 0xF3 => 13, 0xF4 => 77,
115 0xF5 => 79, 0xF6 => 81, 0xF7 => 83, 0xF8 => 85, 0xF9 => 87,
116 0xFA => 89, 0xFB => 91, 0xFC => 93,
117 };
118 $CHARSET{G94n}->{"\x50"}->{Csjis_kuO} = { # ku - 1
119 0xF0 => 0, 0xF1 => 2, 0xF2 => 4, 0xF3 => 12, 0xF4 => 14,
120 0xF5 => 78, 0xF6 => 80, 0xF7 => 82, 0xF8 => 84, 0xF9 => 86,
121 0xFA => 88, 0xFB => 90, 0xFC => 92,
122 };
123 $CHARSET{G94n}->{"\x50"}->{Csjis_first} = { reverse (
124 %{ $CHARSET{G94n}->{"\x50"}->{Csjis_kuE} },
125 %{ $CHARSET{G94n}->{"\x50"}->{Csjis_kuO} },
126 )};
127
128 ## -- Control character sets
129 $CHARSET{C0}->{'@'}->{ucs} = 0x00; ## ISO/IEC 6429 C0
130 for ("\x40", "\x43", "\x44", "\x45", "\x46", "\x49", "\x4A", "\x4B", "\x4C") {
131 $CHARSET{C0}->{$_}->{C_LS0} = "\x0F";
132 $CHARSET{C0}->{$_}->{C_LS1} = "\x0E";
133 $CHARSET{C0}->{$_}->{r_LS0} = '\x0F';
134 $CHARSET{C0}->{$_}->{r_LS1} = '\x0E';
135 }
136 for ("\x40", "\x44", "\x45", "\x46", "\x48", "\x4C") {
137 $CHARSET{C0}->{$_}->{reset_all} = {"\x0A" => 1, "\x0B" => 1,
138 "\x0C" => 1, "\x0D" => 1};
139 }
140 $CHARSET{C0}->{"\x43"}->{reset_all} = {"\x0A" => 1};
141 $CHARSET{C0}->{"\x44"}->{C_SS2} = "\x1C";
142 $CHARSET{C0}->{"\x44"}->{r_SS2} = '\x1C';
143 for ("\x45", "\x49", "\x4A", "\x4B") {
144 $CHARSET{C0}->{$_}->{C_SS2} = "\x19";
145 $CHARSET{C0}->{$_}->{C_SS3} = "\x1D";
146 $CHARSET{C0}->{$_}->{r_SS2} = '\x19';
147 $CHARSET{C0}->{$_}->{r_SS3} = '\x1D';
148 }
149 $CHARSET{C0}->{"\x4C"}->{C_SS2} = "\x19";
150 $CHARSET{C0}->{"\x4C"}->{r_SS2} = '\x19';
151
152 $CHARSET{C1}->{'64291991C1'}->{dimension} = 1; ## ISO/IEC 6429:1991 C1
153 $CHARSET{C1}->{'64291991C1'}->{chars} = 32;
154 $CHARSET{C1}->{'64291991C1'}->{ucs} = 0x80;
155 for ("\x43", "\x45", "\x47", '64291991C1') {
156 $CHARSET{C1}->{$_}->{C_SS2} = "\x8E";
157 $CHARSET{C1}->{$_}->{C_SS3} = "\x8F";
158 $CHARSET{C1}->{$_}->{r_SS2} = '\x8E';
159 $CHARSET{C1}->{$_}->{r_SS3} = '\x8F';
160 $CHARSET{C1}->{$_}->{r_SS2_ESC} = '\x1B\x4E';
161 $CHARSET{C1}->{$_}->{r_SS3_ESC} = '\x1B\x4F';
162 }
163 for ("\x43", '64291991C1') {
164 $CHARSET{C1}->{$_}->{r_CSI} = '\x9B';
165 $CHARSET{C1}->{$_}->{r_CSI_ESC} = '\x1B\x5B';
166 $CHARSET{C1}->{$_}->{r_DCS} = '\x90';
167 $CHARSET{C1}->{$_}->{r_ST} = '\x9C';
168 $CHARSET{C1}->{$_}->{r_OSC} = '\x9D';
169 $CHARSET{C1}->{$_}->{r_PM} = '\x9E';
170 $CHARSET{C1}->{$_}->{r_APC} = '\x9F';
171 $CHARSET{C1}->{$_}->{reset_all} = {"\x85"=>1, "\x90"=>1,
172 "\x9C"=>1, "\x9D"=>1, "\x9E"=>1, "\x9F"=>1};
173 }
174 $CHARSET{C1}->{'64291991C1'}->{r_SCI} = '\x9A';
175
176 $CHARSET{single_control}->{Fs} ={ucs => 0x70005000, chars => 32, dimension => 1};
177 $CHARSET{single_control}->{'3F'} ={ucs => 0x70005020, chars => 80, dimension => 1};
178 $CHARSET{single_control}->{'3F!'}={ucs => 0x70005070, chars => 80, dimension => 1};
179 $CHARSET{single_control}->{'3F"'}={ucs => 0x700050C0, chars => 80, dimension => 1};
180 $CHARSET{single_control}->{'3F#'}={ucs => 0x70005110, chars => 80, dimension => 1};
181 }
182
183 &make_initial_coding_system;
184 sub make_initial_coding_system {
185 for (0x30..0x7E) {
186 my $F = chr $_;
187 $CODING_SYSTEM{$F} = {};
188 $CODING_SYSTEM{"\x2F".$F} = {reset_state => 1};
189 }
190 $CODING_SYSTEM{Csjis} = {perl_name => 'shiftjis'};
191 }
192
193 sub make_charset (%) {
194 my %set = @_;
195 my $setid = qq($set{I}$set{F}$set{revision});
196 my $settype = $set{type} || 'G94';
197 delete $set{type}, $set{I}, $set{F}, $set{revision};
198 $CHARSET{ $settype }->{ $setid } = \%set;
199 }
200
201 ## Make a new ISO/IEC 2022-buffers object with default status
202 sub new_object {
203 my %C;
204 $C{bit} = 8;
205 $C{coding_system} = $CODING_SYSTEM{"\x40"}; ## ISO/IEC 2022
206 $C{CL} = 'C0'; $C{CR} = 'C1'; $C{ESC_Fe} = 'C1';
207 $C{C0} = $CHARSET{C0}->{"\x40"}; ## ISO/IEC 6429:1991 C0
208 $C{C1} = $CHARSET{C1}->{'64291991C1'}; ## ISO/IEC 6429:1991 C1
209 $C{GL} = 'G0'; $C{GR} = 'G1';
210 $C{G0} = $CHARSET{G94}->{"\x42"}; ## ISO/IEC 646:1991 IRV
211 #$C{G1} = $CHARSET{G96}->{"\x41"}; ## ISO/IEC 8859-1 GR
212 $C{G1} = $CHARSET{G94}->{"\x7E"}; ## empty set
213 $C{G2} = $CHARSET{G94}->{"\x7E"}; ## empty set
214 $C{G3} = $CHARSET{G94}->{"\x7E"}; ## empty set
215 $C{option} = {
216 C1invoke_to_right => 0, ## C1 invoked to: (0: ESC Fe, 1: CR)
217 G94n_designate_long => 0, ## (1: ESC 02/04 02/08 04/00..02)
218 designate_to => { ## Designated G buffer (-1: not be outputed)
219 C0 => {
220 default => 0,
221 },
222 C1 => {
223 default => 1,
224 },
225 G94 => {
226 "\x42" => 0,
227 default => 0,
228 },
229 G96 => {
230 default => 1,
231 },
232 G94n => {
233 default => 0,
234 },
235 G96n => {
236 default => 1,
237 },
238 coding_system => {
239 default => -1,
240 },
241 },
242 Ginvoke_by_single_shift => [0,0,0,0], ## Invoked by SS
243 Ginvoke_to_left => [1,1,1,1], ## Which invoked to? (1: L, 0: R)
244 private_set => { ## Private set vs Final byte
245 C0 => [],
246 C1 => [],
247 G94 => [],
248 G94n => [[],[],[],[],["\x20\x40"]],
249 G96 => [],
250 #G96n => [], ## (not implemented)
251 U96n => [], ## mule-unicode sets
252 XC1 => {
253 '64291991C1' => undef, ## ISO/IEC 6429:1991 C1
254 },
255 },
256 reset => { ## Reset status at top of line
257 Gdesignation => "\x42", ## F of designation or 0
258 Ginvoke => 1,
259 },
260 undef_char => ["\x3F", {type => 'G94', charset => 'B'}],
261 use_revision => 1, ## Output IRR
262 };
263 \%C;
264 }
265
266 sub new_object_sjis {
267 my $C = new_object;
268 $C->{coding_system} = $CODING_SYSTEM{Csjis};
269 $C->{CR} = undef;
270 $C->{GR} = 'G2'; ## 0xA1-0xDF
271 $C->{G0} = $CHARSET{G94}->{J}; ## JIS X 0201:1997 Latin
272 $C->{G1} = $CHARSET{G94n}->{"\x4F"}; ## JIS X 0213:2000
273 $C->{G2} = $CHARSET{G94}->{I}; ## JIS X 0201:1997 Katakana
274 $C->{G3} = $CHARSET{G94n}->{"\x50"}; ## JIS X 0213:2000 plane 2
275 ## Special code area (0xFD-0xFF)
276 $C->{Gsmap} = {"\xA0" => "\x{F8F0}", "\xFD" => "\x{F8F1}", "\xFE" => "\x{F8F2}", "\xFF" => "\x{F8F3}"};
277 $C->{GsmapR} = {}; ## Reversed table
278 $C->{option}->{undef_char_sjis} = "\x81\xAC";
279 $C;
280 }
281
282 1;
283 __END__
284
285 =head1 AUTHORS
286
287 Nanashi-san
288
289 Wakaba <w@suika.fam.cx>
290
291 =head1 LICENSE
292
293 Copyright 2002 AUTHORS
294
295 This library is free software; you can redistribute it
296 and/or modify it under the same terms as Perl itself.
297
298 =cut
299
300 # $Date: 2002/10/12 07:27:01 $
301 ### Charset.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24