/[suikacvs]/perl/lib/Encode/Table/tool/esr2pm.pl
Suika

Contents of /perl/lib/Encode/Table/tool/esr2pm.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (show annotations) (download)
Wed Dec 18 12:57:40 2002 UTC (21 years, 11 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.6: +2 -2 lines
File MIME type: text/plain
Pictogram charsets support

1 #!/usr/bin/perl
2 use strict;
3
4 =head1 NEMA
5
6 esr2pm.pl --- Simple Encode module generator
7
8 =cut
9
10 my %ReplaceText = &ReplaceText;
11 my %Info;
12 my $mode = 0;
13 my $section = '';
14 my %item;
15 while (<>) {
16 if ($mode == 1) { ## In a block
17 if (/^\}$/) {
18 $item{ModuleName} ||= $item{Name};
19 $item{ModuleName} =~ s/[^0-9A-Za-z_]/_/g;
20 push @{ $Info{encoding} }, {%item};
21 $mode = 0; %item = ();
22 } elsif (/^(.+):$/) {
23 $section = $1;
24 } elsif (/^\t(.*)$/) {
25 my $l = $1;
26 if ($section =~ /^(?:En|De)code/ || $section == 'Cversion') {
27 if ($l =~ /^->(.+):C$/) {
28 my $name = $1;
29 if ($name eq 'iso2022') {
30 $l = q(($s, %s) = Encode::ISO2022::internal_to_iso2022 ($s, $C););
31 } elsif ($name eq 'sjis') {
32 $l = q(($s, %s) = Encode::SJIS::internal_to_sjis ($s, $C););
33 }
34 } elsif ($l =~ /^->(.+)$/) {
35 $l = qq(my \$e = Encode::find_encoding (q($1))->__clone;\n\$e->{_encode_mapping} = 0;\n\$s = \$e->encode (\$s););
36 $Info{__use_clone} = 1;
37 } elsif ($l =~ /^<-(.+):C$/) {
38 my $name = $1;
39 if ($name eq 'iso2022') {
40 $l = q($s = Encode::ISO2022::iso2022_to_internal ($s, $C););
41 } elsif ($name eq 'sjis') {
42 $l = q($s = Encode::SJIS::sjis_to_internal ($s, $C););
43 }
44 } elsif ($l =~ /^<-(.+)$/) {
45 $l = qq(my \$e = Encode::find_encoding (q($1))->__clone;\n\$e->{_decode_mapping} = 0;\n\$s = \$e->decode (\$s););
46 } elsif ($l =~ /^(?:<=|=>)(.+)$/) {
47 $l = qq(\$s = Encode::Table::convert (\$s, [qw($1)], \%tblopt) if \$tbl;);
48 } elsif ($l =~ /^utf8:o(n|ff)$/) {
49 $l = qq(Encode::_utf8_o$1 (\$s););
50 } elsif ($l =~ /^C:([GC][^=:]+)=([^:]+):([^\t]+)(\t\s*\#\#.+)?$/) {
51 $l = qq(\$C->{$1} = \$Encode::Charset::CHARSET{$2}->{'$3'};$4);
52 } elsif ($l =~ /^C:option:([^=]+)=([^\t]+?)(\t\s*\#\#.+)?$/) {
53 $l = qq(\$C->{option}->$1 = $2;$3);
54 } elsif ($l =~ /^C:designate:\*:default=(-?[0-3]+)$/) {
55 $l = qq(\$C->{option}->{designate_to}->{G94}->{default} = $1;\n).
56 qq(\$C->{option}->{designate_to}->{G96}->{default} = $1;\n).
57 qq(\$C->{option}->{designate_to}->{G94n}->{default} = $1;\n).
58 qq(\$C->{option}->{designate_to}->{G96n}->{default} = $1;\n).
59 qq(\$C->{option}->{designate_to}->{C0}->{default} = $1;\n).
60 qq(\$C->{option}->{designate_to}->{C1}->{default} = $1;);
61 } elsif ($l =~ /^C:designate:\*drcs:default=(-?[0-3]+)$/) {
62 $l = qq(for (0x30..0x7E) {\n).
63 qq(my \$F = chr \$_;\n).
64 qq( \$C->{option}->{designate_to}->{G94}->{\$F} = $1;\n).
65 qq( \$C->{option}->{designate_to}->{G96}->{\$F} = $1;\n).
66 qq( \$C->{option}->{designate_to}->{G94n}->{\$F} = $1;\n).
67 qq( \$C->{option}->{designate_to}->{G96n}->{\$F} = $1;\n).
68 qq(});
69 } elsif ($l =~ /^C:designate:\*private:default=(-?[0-3]+)$/) {
70 $l = qq(for (0x30..0x3F) {\n).
71 qq( my \$F = chr \$_;\n).
72 qq( for my \$c (qw/G94 G96 G94n G96n C0 C1/) {\n).
73 qq( \$C->{option}->{designate_to}->{\$c}->{\$F} = $1;\n).
74 qq( \$C->{option}->{designate_to}->{\$c}->{'\x21'.\$F} = $1;\n).
75 qq( \$C->{option}->{designate_to}->{\$c}->{'\x22'.\$F} = $1;\n).
76 qq( \$C->{option}->{designate_to}->{\$c}->{'\x23'.\$F} = $1;\n).
77 qq( }\n).
78 qq(});
79 } elsif ($l =~ /^C:designate:([^:=]+):([^=]+)=(-?[0-3]+)(\t\s*\#\#.+)?$/) {
80 $l = qq(\$C->{option}->{designate_to}->{$1}->{'$2'} = $3;$4);
81 } elsif ($l =~ /^C:([GC][LR])=undef(\t\s*\#\#.+)?$/) {
82 $l = qq(\$C->{$1} = undef;$3);
83 } elsif ($l =~ /^C:([GC][LR])=(..)(\t\s*\#\#.+)?$/) {
84 $l = qq(\$C->{$1} = '$2';$3);
85 } elsif ($l =~ /^C:bit=([78])(\t\s*\#\#.+)?$/) {
86 $l = qq(\$C->{bit} = $1;$2);
87 } elsif ($l =~ /^use:table:(.+)$/) {
88 $l = qq(eval q(use Encode::Table::$1) unless \$Encode::Table::${1}::VERSION;);
89 } elsif ($l =~ /^require:private:(.+)$/) {
90 $l = qq(eval q(use Encode::Charset::Private q(:$1)) or die \$\@;);
91 } elsif ($l =~ /^use:private:(.+)$/) {
92 $l = qq(eval q(use Encode::Charset::Private q(:$1)) or die \$\@;\neval q(Encode::Charset::Private::designate_$1 (\$C)););
93 } elsif ($l =~ /^use:charset:(\S+)(?:\s+(.+))?$/) {
94 $l = qq(eval q(require Encode::Charset::$1;\neval q(Encode::Charset::$1->import (qw(@{[$2?$2:':all']})));\nEncode::Charset::${1}::designate (\$C)););
95 } elsif ($l =~ /^use:(.+)$/) {
96 $l = qq(eval q(use $1) unless \$${1}::VERSION;);
97 } elsif ($l =~ /^!\&\s+(\S+)/) {
98 $l = qq(\$s = $1 (\$C, \$s););
99 } elsif ($l =~ /^\#;/) {
100 $l = undef;
101 }
102 }
103 if ($item{$section}) {
104 $item{$section} .= "\n".$l if defined $l;
105 } else {
106 $item{$section} = $l;
107 }
108 }
109 } else { ## Out of blocks
110 if (/^\{$/) {
111 $mode = 1;
112 } elsif (/^(.+):$/) {
113 $section = $1;
114 } elsif (/^\t(.*)$/) {
115 my $t = $1;
116 if ($Info{$section}) {
117 $Info{$section} .= "\n".$t;
118 } else {
119 $Info{$section} = $t;
120 }
121 }
122 }
123 }
124 $ReplaceText{MYSELF} = qq(Encode::$Info{Name});
125
126 print <<EOH;
127 ## This file is auto-generated (at @{[ sprintf '%04d-%02d-%02dT%02d:%02d:%02dZ', (gmtime)[5]+1900, (gmtime)[4]+1, (gmtime)[3,2,1,0] ]}).
128 ## Do not edit by hand!
129
130 =head1 NAME
131
132 $ReplaceText{MYSELF} --- $Info{ShortDescription}
133 @{[ $Info{'POD:DESCRIPTION'} ? qq{
134 =head1 DESCRIPTION
135
136 $Info{'POD:DESCRIPTION'}} : '']}
137
138 =cut
139
140 package $ReplaceText{MYSELF};
141 use 5.7.3;
142 use strict;
143 our \$VERSION = q(@{[sprintf '%04d.%02d%02d', (gmtime)[5]+1900, (gmtime)[4]+1, (gmtime)[3]]});
144
145 =head1 ENCODINGS
146 @{[ $Info{'POD:ENCODING:PREAMBLE'} ? qq(
147 $Info{'POD:ENCODING:PREAMBLE'}
148 ) : '']}
149 =over 8
150
151 =cut
152
153 EOH
154
155 my @alias_def;
156
157 for my $encode (@{ $Info{encoding} }) {
158 $encode->{EncodeFull} = $encode->{'Encode:Prepare'}."\n".$encode->{Encode};
159 $encode->{DecodeFull} = $encode->{'Decode:Prepare'}."\n".$encode->{Decode};
160 for my $ED (qw/Encode Decode EncodeFull DecodeFull Cversion/) {
161 my $ed = $ED =~ /Encode/ ? 'encode' : 'decode';
162 if ($encode->{$ED} =~ /\$C->{option}->{fallback_from_ucs} = /) {
163 $encode->{$ED} =~ s((?=\$C->{option}->{fallback_from_ucs} = ))
164 (\$C->{option}->{fallback_from_ucs_2} = \$C->{option}->{fallback_from_ucs};\n);
165 }
166 if ($encode->{$ED} =~ /Encode::Table/) {
167 $encode->{$ED} = q/require Encode::Table;
168 my $tbl = defined $obj->{_/.$ed.q/_mapping} ? $obj->{_/.$ed.q/_mapping} : 1;
169 my %tblopt = (-autoload => defined $obj->{_/.$ed.q/_mapping_autoload} ? $obj->{_/.$ed.q/_mapping_autoload} : 1);
170 /.$encode->{$ED};
171 }
172 if ($encode->{$ED} =~ /\$C/) {
173 if ($ED ne 'Cversion' && $encode->{Cversion}) {
174 $encode->{$ED} = ($ED =~ /Full/ ? qq(my \$C = \$obj->__code_version;\n) : '')
175 .qq(\$C->{_encoder} = \$obj;\n)
176 .($ED eq 'EncodeFull' ? qq(\$C->{option}->{fallback_from_ucs} = \$obj->{_encode_fallback} ? \$obj->{_encode_fallback} :
177 \$chk & Encode::DIE_ON_ERR ? 'croak' :
178 \$chk & Encode::RETURN_ON_ERR ? (\$chk & Encode::WARN_ON_ERR ? 'quiet+warn' : 'quiet') :
179 \$chk & Encode::PERLQQ ? 'perl' : \$chk & Encode::HTMLCREF ? 'sgml' :
180 \$chk & Encode::XMLCREF ? 'sgml-hex' : 'replacement';
181 ) : '')
182 .$encode->{$ED};
183 } elsif ($encode->{Encode} =~ /internal_to_sjis/i || $encode->{Decode} =~ /sjis_to_internal/i) {
184 $encode->{$ED} = qq(require Encode::Charset;\nmy \$C = &Encode::Charset::new_object_sjis;\n).$encode->{$ED};
185 } else {
186 $encode->{$ED} = qq(require Encode::Charset;\nmy \$C = &Encode::Charset::new_object;\n).$encode->{$ED};
187 }
188 }
189 for (qw/ISO2022 SJIS/) {
190 if ($encode->{$ED} =~ /Encode::$_/) {
191 $encode->{$ED} = qq(require Encode::$_;\n).$encode->{$ED};
192 }
193 }
194 $encode->{$ED} =~ s/\n/\n /g;
195 }
196
197 ## Define a new encoding
198 if ($encode->{Name}) {
199 print <<EOH;
200
201 package Encode::$Info{Name}::$encode->{ModuleName};
202 our \$VERSION = \$Encode::$Info{Name}::VERSION;
203 use base qw(Encode::Encoding);
204 __PACKAGE__->Define (qw/$encode->{Name} $encode->{Alias}/);
205
206 =item $encode->{Name}
207
208 $encode->{Description}@{[ $encode->{Alias} ? '
209 (Alias: ' . join (', ', split /\s+/, $encode->{Alias}) . ')' : '' ]}
210
211 =cut
212
213 sub encode (\$\$;\$) {
214 my (\$obj, \$s, \$chk) = \@_;
215 my \%s;
216 $encode->{EncodeFull}
217 if (\$s{die}) { ## FB_CROAK
218 if (\$Carp::VERSION) { Carp::croak ('encode: '.\$s{reason}) }
219 else { die ('encode: '.\$s{reason}) }
220 } elsif (\$s{halfway}) { ## FB_QUIET, FB_WARNING
221 \$_[1] = substr (\$_[1], \$s{converted_length});
222 if (\$s{warn}) {
223 if (\$Carp::VERSION) { Carp::carp ('encode: '.\$s{reason}) }
224 else { warn ('encode: '.\$s{reason}) }
225 }
226 } else {
227 \$_[1] = '' if \$chk;
228 }
229 return \$s;
230 }
231
232 sub _encode_internal (\$\$\$) {
233 my (\$obj, \$s, \$C) = \@_;
234 my \%s;
235 $encode->{Encode}
236 if (\$s{die}) {
237 if (\$Carp::VERSION) { Carp::croak ('encode: '.\$s{reason}) }
238 else { die ('encode: '.\$s{reason}) }
239 }
240 return \$s;
241 }
242
243 sub decode (\$\$;\$) {
244 my (\$obj, \$s, \$chk) = \@_;
245 $encode->{DecodeFull}
246 \$_[1] = '' if \$chk;
247 return \$s;
248 }
249 @{[ $encode->{Cversion} ? qq(
250 sub __code_version (\$) {
251 $encode->{Cversion}
252 \$C;
253 }):'']}
254 EOH
255 ## Define a set of aliases
256 } elsif ($encode->{AliasOf}) {
257 my @alias = split /\s+/, $encode->{Alias};
258 my $alias1 = shift @alias;
259 print <<EOH;
260
261 =item $alias1
262
263 $encode->{Description}@{[ @alias ? '
264 (Alias: ' . join (', ', @alias) . ')' : '' ]}
265
266 This name is an alias of $encode->{AliasOf}.
267
268 =cut
269
270 EOH
271 ## Encode::Alias is already 'use'ed by Encode, so we don't have to 'require' it.
272 for ($alias1, @alias) {
273 ## Aliases is not defined here, because 'parent' of aliases can be defined
274 ## in later part of THIS module.
275 push @alias_def, "Encode::Alias::define_alias ('$_' => '$encode->{AliasOf}');\n";
276 }
277 }
278 }
279
280 print @alias_def;
281 print <<EOH;
282
283 =back
284 @{[ $Info{'POD:ENCODING:POSTAMBLE'} ? qq(
285 $Info{'POD:ENCODING:POSTAMBLE'}
286 ) : '']}
287 =cut
288 @{[$Info{__use_clone} ? q(
289 sub Encode::Encoding::__clone ($) {
290 my $self = shift;
291 bless {%$self}, ref $self;
292 }):'']}
293
294 EOH
295
296 for my $name (qw/EXAMPLE/, 'SEE ALSO', 'TO DO', qw/AUTHORS LICENSE/) {
297 if ($Info{qq(POD:$name)}) {
298 $Info{qq(POD:$name)} =~ s/%%([A-Za-z0-9_]+)%%/$ReplaceText{$1}/g;
299 print <<EOH;
300 =head1 $name
301
302 $Info{qq(POD:$name)}
303
304 EOH
305 }
306 }
307
308 print <<EOH;
309 =cut
310
311 1;
312 EOH
313
314 sub ReplaceText () {
315 my %RT = (
316 GNUGPL2 => q{This program is free software; you can redistribute it and/or modify
317 it under the terms of the GNU General Public License as published by
318 the Free Software Foundation; either version 2 of the License, or
319 (at your option) any later version.
320
321 This program is distributed in the hope that it will be useful,
322 but WITHOUT ANY WARRANTY; without even the implied warranty of
323 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
324 GNU General Public License for more details.
325
326 You should have received a copy of the GNU General Public License
327 along with this program; see the file COPYING. If not, write to
328 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
329 Boston, MA 02111-1307, USA.},
330 PerlLicense => q{This library is free software; you can redistribute it
331 and/or modify it under the same terms as Perl itself.},
332 ReferenceIANAREG => q([IANAREG] "CHARACTER SETS", IANA <http://www.iana.org/>,
333 <http://www.iana.org/assignments/character-sets>.
334 The charset registry for IETF <http://www.ietf.org/> standards.),
335 ReferenceJISX0208_1978 => q(JIS C 6226 (JIS X 0208)-1978, "Code of Japanese graphic
336 character set for information interchange", Japan Industrial Standards
337 Committee (JISC) <http://www.jisc.go.jp/>, 1978.),
338 ReferenceJISX0208_1983 => q(JIS C 6226 (JIS X 0208)-1983, "Code of Japanese graphic
339 character set for information interchange", Japan Industrial Standards
340 Committee (JISC) <http://www.jisc.go.jp/>, 1983.),
341 ReferenceJISX0208_1990 => q(JIS X 0208-1990, "Code of Japanese graphic character
342 set for information interchange", Japan Industrial Standards
343 Committee (JISC) <http://www.jisc.go.jp/>, 1990.),
344 ReferenceJISX0212_1990 => q(JIS X 0212-1990, "Code of supplementary Japanese graphic
345 character set for information interchange", Japan Industrial Standards
346 Committee (JISC) <http://www.jisc.go.jp/>, 1990.),
347 ReferenceJISX0221_1995 => q(JIS X 0221-1995, "Universal multi-octet coded character
348 set (UCS)", Japan Industrial Standards Committee
349 <http://www.jisc.go.jp/>, 1995. IDT with ISO/IEC 10646-1:1993
350 but three additional appendixes.),
351 ReferenceJISX0201_1997 => q(JIS X 0201:1997, "7-bit and 8-bit coded character
352 set for information interchange", Japan Industrial Standards
353 Committee (JISC) <http://www.jisc.go.jp/>, 1997.),
354 ReferenceJISX0208_1997 => q(JIS X 0208:1997, "7-bit and 8-bit double byte coded Kanji
355 set for information interchange", Japan Industrial Standards
356 Committee (JISC) <http://www.jisc.go.jp/>, 1997.),
357 ReferenceJISX0213_2000 => q(JIS X 0213:2000, "7-bit and 8-bit double byte coded extended Kanji
358 sets for information interchange", Japan Industrial Standards
359 Committee (JISC) <http://www.jisc.go.jp/>, 2000.),
360 ReferenceRFC1468 => q(RFC 1468, "Japanese Character Encoding for Internet Messages",
361 J. Murai, et al, IETF <http://www.ietf.org/>, June 1993.
362 <urn:ietf:rfc:1468>.),
363 YEAR => (gmtime)[5]+1900,
364 );
365 %RT;
366 }
367
368 =head1 SEE ALSO
369
370 L<Encode>, L<Encode::Table>,
371 SuikaWiki:esr2pm <http://suika.fam.cx/~wakaba/-temp/wiki/wiki?esr2pm>
372
373 =head1 LICENSE
374
375 Copyright 2002 Wakaba <w@suika.fam.cx>
376
377 This library is free software; you can redistribute it
378 and/or modify it under the same terms as Perl itself.
379
380 Note that modules generated by this script should be
381 licensed by the licenser of source file so that copyright
382 holder of this script does not claim any right to them.
383
384 =cut
385
386 # $Date: 2002/12/18 10:21:09 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24