/[suikacvs]/perl/lib/Encode/Charset.pm
Suika

Contents of /perl/lib/Encode/Charset.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (hide annotations) (download)
Fri Sep 20 14:01:45 2002 UTC (22 years, 1 month ago) by wakaba
Branch: MAIN
2002-09-20  Wakaba <w@suika.fam.cx>

	* ISO2022.pm:
	- (iso2022_to_internal): New function.
	- (_iso2022_to_internal): Renamed from iso2022_to_internal.
	- (iso2022_to_internal): Experimental support of DOCS.
	- (internal_to_iso2022): Output in UCS coding systems
	if the character is unable to be encoded in ISO/IEC 2022
	coded character sets.
	- (_i2o): New procedure.
	- ($C->{option}->{designate_to}->{coding_system}): New option
	property object.
	- ($C->{coding_system}): New property.
	- (%CODING_SYSTEM): New hash.  (Alias to Encode::Charset's one.)
	* Charset.pm (make_initial_coding_system): Set 'reset_state'
	property with 1 value to coding systems of DOCS with 02/14 I byte.

1 wakaba 1.1
2     =head1 NAME
3    
4     Encode::Charset --- Coded Character Sets objects,
5     used by Encode::ISO2022, Encode::SJIS, and other modules.
6    
7     =cut
8    
9     package Encode::Charset;
10     use strict;
11     use vars qw(%CHARSET %CODING_SYSTEM $VERSION);
12     $VERSION=do{my @r=(q$Revision: 1.4 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
13    
14     ## --- Make initial charset definitions
15     &_make_initial_charsets;
16     sub _make_initial_charsets () {
17     for my $f (0x30..0x7E) {
18     my $F = pack 'C', $f;
19     for ('', '!', '"', '#') {
20     $CHARSET{G94}->{ $_.$F }->{dimension} = 1;
21     $CHARSET{G94}->{ $_.$F }->{chars} = 94;
22     $CHARSET{G94}->{ $_.$F }->{ucs} =
23     {'' => 0xE90940, '!' => 0xE944A0, '"' => 0xE98000, '#' => 0xE9BB60}->{ $_ }
24     + 94 * ($f-0x30);
25    
26     $CHARSET{G96}->{ $_.$F }->{dimension} = 1;
27     $CHARSET{G96}->{ $_.$F }->{chars} = 96;
28     $CHARSET{G96}->{ $_.$F }->{ucs} =
29     {'' => 0xE926A0, '!' => 0xE96200, '"' => 0xE99D60, '#' => 0xE9D8C0}->{ $_ }
30     + 96 * ($f-0x30);
31    
32     $CHARSET{C0}->{ $_.$F }->{dimension} = 1;
33     $CHARSET{C0}->{ $_.$F }->{chars} = 32;
34     $CHARSET{C0}->{ $_.$F }->{ucs} =
35     {'' => 0x70000000, '!' => 0x70001400,
36     '"' => 0x70002800, '#' => 0x70003C00}->{ $_ } + 32 * ($f-0x30);
37    
38     $CHARSET{C1}->{ $_.$F }->{dimension} = 1;
39     $CHARSET{C1}->{ $_.$F }->{chars} = 32;
40     $CHARSET{C1}->{ $_.$F }->{ucs} =
41     {'' => 0x70000A00, '!' => 0x70001E00,
42     '"' => 0x70003200, '#' => 0x70004600}->{ $_ } + 32 * ($f-0x30);
43    
44     $CHARSET{G94}->{ ' '.$_.$F }->{dimension} = 1; ## DRCS
45     $CHARSET{G94}->{ ' '.$_.$F }->{chars} = 94;
46     $CHARSET{G94}->{ ' '.$_.$F }->{ucs} =
47     {'' => 0x70090940, '!' => 0x700944A0,
48     '"' => 0x70098000, '#' => 0x7009BB60}->{ $_ } + 94 * ($f-0x30);
49    
50     $CHARSET{G96}->{ ' '.$_.$F }->{dimension} = 1; ## DRCS
51     $CHARSET{G96}->{ ' '.$_.$F }->{chars} = 96;
52     $CHARSET{G96}->{ ' '.$_.$F }->{ucs} =
53     {'' => 0x700926A0, '!' => 0x70096200,
54     '"' => 0x70099D60, '#' => 0x7009D8C0}->{ $_ } + 96 * ($f-0x30);
55     }
56     }
57     for my $f (0x30..0x5F, 0x7E) {
58     my $F = pack 'C', $f;
59     for ('', '!', '"', '#', ' ') {
60     $CHARSET{G94n}->{ $_.$F }->{dimension} = 2;
61     $CHARSET{G94n}->{ $_.$F }->{chars} = 94;
62     $CHARSET{G94n}->{ $_.$F }->{ucs} =
63     ({'' => 0xE9F6C0}->{ $_ }||0) + 94*94 * ($f-0x30);
64     ## BUG: 94^n sets with I byte have no mapping area
65    
66     $CHARSET{G96n}->{ $_.$F }->{dimension} = 2;
67     $CHARSET{G96n}->{ $_.$F }->{chars} = 96;
68     $CHARSET{G96n}->{ $_.$F }->{ucs} =
69     ({'' => 0xF4C000}->{ $_ }||0) + 96*96 * ($f-0x30);
70     ## BUG: 94^n DRCSes with I byte have no mapping area
71     }
72     }
73     for (0x60..0x6F) {
74     my $F = pack 'C', $_;
75     ## BUG: 9x^3 sets have no mapping area
76     for ('', '!', '"', '#', ' ') {
77     $CHARSET{G94n}->{ $_.$F }->{dimension} = 3;
78     $CHARSET{G94n}->{ $_.$F }->{chars} = 94;
79    
80     $CHARSET{G96n}->{ $_.$F }->{dimension} = 3;
81     $CHARSET{G96n}->{ $_.$F }->{chars} = 96;
82     }
83     }
84     for (0x70..0x7D) {
85     my $F = pack 'C', $_;
86     ## BUG: 9x^4 sets have no mapping area
87     for ('', '!', '"', '#', ' ') {
88     $CHARSET{G94n}->{ $_.$F }->{dimension} = 4;
89     $CHARSET{G94n}->{ $_.$F }->{chars} = 94;
90    
91     $CHARSET{G96n}->{ $_.$F }->{dimension} = 4;
92     $CHARSET{G96n}->{ $_.$F }->{chars} = 96;
93     }
94     }
95     for my $f (0x40..0x4E) {
96     my $F = pack 'C', $f;
97     $CHARSET{G96n}->{ ' '.$F }->{dimension} = 2;
98     $CHARSET{G96n}->{ ' '.$F }->{chars} = 96;
99     $CHARSET{G96n}->{ ' '.$F }->{ucs} = 0xF0000 + 96*96*($f-0x40);
100     ## U+F0000-U+10F7FF (private) -> ESC 02/04 02/00 <I> (04/00-04/14) (DRCS)
101     }
102    
103     $CHARSET{G94}->{B}->{ucs} = 0x21; ## ASCII
104     $CHARSET{G96}->{A}->{ucs} = 0xA0; ## ISO 8859-1
105    
106     $CHARSET{G94n}->{'B@'}->{dimension} = 2; ## JIS X 0208-1990
107     $CHARSET{G94n}->{'B@'}->{chars} = 94;
108     $CHARSET{G94n}->{'B@'}->{ucs} = 0xE9F6C0 + 94*94*79;
109    
110     ## -- Control character sets
111     $CHARSET{C0}->{'@'}->{ucs} = 0x00; ## ISO/IEC 6429 C0
112     for ("\x40", "\x43", "\x44", "\x45", "\x46", "\x49", "\x4A", "\x4B", "\x4C") {
113     $CHARSET{C0}->{$_}->{C_LS0} = "\x0F";
114     $CHARSET{C0}->{$_}->{C_LS1} = "\x0E";
115     $CHARSET{C0}->{$_}->{r_LS0} = '\x0F';
116     $CHARSET{C0}->{$_}->{r_LS1} = '\x0E';
117     }
118     for ("\x40", "\x44", "\x45", "\x46", "\x48", "\x4C") {
119     $CHARSET{C0}->{$_}->{reset_all} = {"\x0A" => 1, "\x0B" => 1,
120     "\x0C" => 1, "\x0D" => 1};
121     }
122     $CHARSET{C0}->{"\x43"}->{reset_all} = {"\x0A" => 1};
123     $CHARSET{C0}->{"\x44"}->{C_SS2} = "\x1C";
124     $CHARSET{C0}->{"\x44"}->{r_SS2} = '\x1C';
125     for ("\x45", "\x49", "\x4A", "\x4B") {
126     $CHARSET{C0}->{$_}->{C_SS2} = "\x19";
127     $CHARSET{C0}->{$_}->{C_SS3} = "\x1D";
128     $CHARSET{C0}->{$_}->{r_SS2} = '\x19';
129     $CHARSET{C0}->{$_}->{r_SS3} = '\x1D';
130     }
131     $CHARSET{C0}->{"\x4C"}->{C_SS2} = "\x19";
132     $CHARSET{C0}->{"\x4C"}->{r_SS2} = '\x19';
133    
134     $CHARSET{C1}->{'64291991C1'}->{dimension} = 1; ## ISO/IEC 6429:1991 C1
135     $CHARSET{C1}->{'64291991C1'}->{chars} = 32;
136     $CHARSET{C1}->{'64291991C1'}->{ucs} = 0x80;
137     for ("\x43", "\x45", "\x47", '64291991C1') {
138     $CHARSET{C1}->{$_}->{C_SS2} = "\x8E";
139     $CHARSET{C1}->{$_}->{C_SS3} = "\x8F";
140     $CHARSET{C1}->{$_}->{r_SS2} = '\x8E';
141     $CHARSET{C1}->{$_}->{r_SS3} = '\x8F';
142     $CHARSET{C1}->{$_}->{r_SS2_ESC} = '\x1B\x4E';
143     $CHARSET{C1}->{$_}->{r_SS3_ESC} = '\x1B\x4F';
144     }
145     for ("\x43", '64291991C1') {
146     $CHARSET{C1}->{$_}->{r_CSI} = '\x9B';
147     $CHARSET{C1}->{$_}->{r_CSI_ESC} = '\x1B\x5B';
148     $CHARSET{C1}->{$_}->{r_DCS} = '\x90';
149     $CHARSET{C1}->{$_}->{r_ST} = '\x9C';
150     $CHARSET{C1}->{$_}->{r_OSC} = '\x9D';
151     $CHARSET{C1}->{$_}->{r_PM} = '\x9E';
152     $CHARSET{C1}->{$_}->{r_APC} = '\x9F';
153     $CHARSET{C1}->{$_}->{reset_all} = {"\x85"=>1, "\x90"=>1,
154     "\x9C"=>1, "\x9D"=>1, "\x9E"=>1, "\x9F"=>1};
155     }
156     $CHARSET{C1}->{'64291991C1'}->{r_SCI} = '\x9A';
157    
158     $CHARSET{single_control}->{Fs} ={ucs => 0x70005000, chars => 32, dimension => 1};
159     $CHARSET{single_control}->{'3F'} ={ucs => 0x70005020, chars => 80, dimension => 1};
160     $CHARSET{single_control}->{'3F!'}={ucs => 0x70005070, chars => 80, dimension => 1};
161     $CHARSET{single_control}->{'3F"'}={ucs => 0x700050C0, chars => 80, dimension => 1};
162     $CHARSET{single_control}->{'3F#'}={ucs => 0x70005110, chars => 80, dimension => 1};
163     }
164    
165     &make_initial_coding_system;
166     sub make_initial_coding_system {
167     for (0x30..0x7E) {
168     my $F = chr $_;
169     $CODING_SYSTEM{$F} = {};
170     $CODING_SYSTEM{"\x2F".$F} = {reset_state => 1};
171     }
172     }
173    
174     sub make_charset (%) {
175     my %set = @_;
176     my $setid = qq($set{I}$set{F}$set{revision});
177     my $settype = $set{type} || 'G94';
178     delete $set{type}, $set{I}, $set{F}, $set{revision};
179     $CHARSET{ $settype }->{ $setid } = \%set;
180     }
181    
182     1;
183     __END__
184    
185     =head1 AUTHORS
186    
187     Nanashi-san
188    
189     Wakaba <w@suika.fam.cx>
190    
191     =head1 LICENSE
192    
193     Copyright 2002 AUTHORS
194    
195     This library is free software; you can redistribute it
196     and/or modify it under the same terms as Perl itself.
197    
198     =cut
199    
200     # $Date: 2002/09/16 06:35:16 $
201     ### Charset.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24