1 |
#!/usr/bin/perl |
2 |
use strict; |
3 |
|
4 |
=head1 NEMA |
5 |
|
6 |
esr2pm.pl --- Simple Encode module generator |
7 |
|
8 |
=cut |
9 |
|
10 |
my %ReplaceText = &ReplaceText; |
11 |
my %Info; |
12 |
my $mode = 0; |
13 |
my $section = ''; |
14 |
my %item; |
15 |
while (<>) { |
16 |
if ($mode == 1) { ## In a block |
17 |
if (/^\}$/) { |
18 |
$item{ModuleName} ||= $item{Name}; |
19 |
$item{ModuleName} =~ s/[^0-9A-Za-z_]/_/g; |
20 |
push @{ $Info{encoding} }, {%item}; |
21 |
$mode = 0; %item = (); |
22 |
} elsif (/^(.+):$/) { |
23 |
$section = $1; |
24 |
} elsif (/^\t(.*)$/) { |
25 |
my $l = $1; |
26 |
if ($section =~ /^(?:En|De)code/ || $section == 'Cversion') { |
27 |
if ($l =~ /^->(.+):C$/) { |
28 |
my $name = $1; |
29 |
if ($name eq 'iso2022') { |
30 |
$l = q(($s, %s) = Encode::ISO2022::internal_to_iso2022 ($s, $C);); |
31 |
} elsif ($name eq 'sjis') { |
32 |
$l = q(($s, %s) = Encode::SJIS::internal_to_sjis ($s, $C);); |
33 |
} |
34 |
} elsif ($l =~ /^->(.+)$/) { |
35 |
$l = qq(my \$e = Encode::find_encoding (q($1))->__clone;\n\$e->{_encode_mapping} = 0;\n\$s = \$e->encode (\$s);); |
36 |
$Info{__use_clone} = 1; |
37 |
} elsif ($l =~ /^<-(.+):C$/) { |
38 |
my $name = $1; |
39 |
if ($name eq 'iso2022') { |
40 |
$l = q($s = Encode::ISO2022::iso2022_to_internal ($s, $C);); |
41 |
} elsif ($name eq 'sjis') { |
42 |
$l = q($s = Encode::SJIS::sjis_to_internal ($s, $C);); |
43 |
} |
44 |
} elsif ($l =~ /^<-(.+)$/) { |
45 |
$l = qq(my \$e = Encode::find_encoding (q($1))->__clone;\n\$e->{_decode_mapping} = 0;\n\$s = \$e->decode (\$s);); |
46 |
} elsif ($l =~ /^(?:<=|=>)(.+)$/) { |
47 |
$l = qq(\$s = Encode::Table::convert (\$s, [qw($1)], \%tblopt) if \$tbl;); |
48 |
} elsif ($l =~ /^utf8:o(n|ff)$/) { |
49 |
$l = qq(Encode::_utf8_o$1 (\$s);); |
50 |
} elsif ($l =~ /^C:([GC][^=:]+)=([^:]+):([^\t]+)(\t\s*\#\#.+)?$/) { |
51 |
$l = qq(\$C->{$1} = \$Encode::Charset::CHARSET{$2}->{'$3'};$4); |
52 |
} elsif ($l =~ /^C:option:([^=]+)=([^\t]+?)(\t\s*\#\#.+)?$/) { |
53 |
$l = qq(\$C->{option}->$1 = $2;$3); |
54 |
} elsif ($l =~ /^C:designate:\*:default=(-?[0-3]+)$/) { |
55 |
$l = qq(\$C->{option}->{designate_to}->{G94}->{default} = $1;\n). |
56 |
qq(\$C->{option}->{designate_to}->{G96}->{default} = $1;\n). |
57 |
qq(\$C->{option}->{designate_to}->{G94n}->{default} = $1;\n). |
58 |
qq(\$C->{option}->{designate_to}->{G96n}->{default} = $1;\n). |
59 |
qq(\$C->{option}->{designate_to}->{C0}->{default} = $1;\n). |
60 |
qq(\$C->{option}->{designate_to}->{C1}->{default} = $1;); |
61 |
} elsif ($l =~ /^C:designate:\*drcs:default=(-?[0-3]+)$/) { |
62 |
$l = qq(for (0x30..0x7E) {\n). |
63 |
qq(my \$F = chr \$_;\n). |
64 |
qq( \$C->{option}->{designate_to}->{G94}->{\$F} = $1;\n). |
65 |
qq( \$C->{option}->{designate_to}->{G96}->{\$F} = $1;\n). |
66 |
qq( \$C->{option}->{designate_to}->{G94n}->{\$F} = $1;\n). |
67 |
qq( \$C->{option}->{designate_to}->{G96n}->{\$F} = $1;\n). |
68 |
qq(}); |
69 |
} elsif ($l =~ /^C:designate:\*private:default=(-?[0-3]+)$/) { |
70 |
$l = qq(for (0x30..0x3F) {\n). |
71 |
qq( my \$F = chr \$_;\n). |
72 |
qq( for my \$c (qw/G94 G96 G94n G96n C0 C1/) {\n). |
73 |
qq( \$C->{option}->{designate_to}->{\$c}->{\$F} = $1;\n). |
74 |
qq( \$C->{option}->{designate_to}->{\$c}->{'\x21'.\$F} = $1;\n). |
75 |
qq( \$C->{option}->{designate_to}->{\$c}->{'\x22'.\$F} = $1;\n). |
76 |
qq( \$C->{option}->{designate_to}->{\$c}->{'\x23'.\$F} = $1;\n). |
77 |
qq( }\n). |
78 |
qq(}); |
79 |
} elsif ($l =~ /^C:designate:([^:=]+):([^=]+)=(-?[0-3]+)(\t\s*\#\#.+)?$/) { |
80 |
$l = qq(\$C->{option}->{designate_to}->{$1}->{'$2'} = $3;$4); |
81 |
} elsif ($l =~ /^C:([GC][LR])=undef(\t\s*\#\#.+)?$/) { |
82 |
$l = qq(\$C->{$1} = undef;$3); |
83 |
} elsif ($l =~ /^C:([GC][LR])=(..)(\t\s*\#\#.+)?$/) { |
84 |
$l = qq(\$C->{$1} = '$2';$3); |
85 |
} elsif ($l =~ /^C:bit=([78])(\t\s*\#\#.+)?$/) { |
86 |
$l = qq(\$C->{bit} = $1;$2); |
87 |
} elsif ($l =~ /^use:table:(.+)$/) { |
88 |
$l = qq(eval q(use Encode::Table::$1) unless \$Encode::Table::${1}::VERSION;); |
89 |
} elsif ($l =~ /^require:private:(.+)$/) { |
90 |
$l = qq(eval q(use Encode::Charset::Private q(:$1)) or die \$\@;); |
91 |
} elsif ($l =~ /^use:private:(.+)$/) { |
92 |
$l = qq(eval q(use Encode::Charset::Private q(:$1)) or die \$\@;\neval q(Encode::Charset::Private::designate_$1 (\$C));); |
93 |
} elsif ($l =~ /^use:charset:(\S+)(?:\s+(.+))?$/) { |
94 |
$l = qq(eval q(require Encode::Charset::$1;\neval q(Encode::Charset::$1->import (qw(@{[$2?$2:':all']})));\nEncode::Charset::${1}::designate (\$C));); |
95 |
} elsif ($l =~ /^use:(.+)$/) { |
96 |
$l = qq(eval q(use $1) unless \$${1}::VERSION;); |
97 |
} elsif ($l =~ /^!\&\s+(\S+)/) { |
98 |
$l = qq(\$s = $1 (\$C, \$s);); |
99 |
} elsif ($l =~ /^\#;/) { |
100 |
$l = undef; |
101 |
} |
102 |
} |
103 |
if ($item{$section}) { |
104 |
$item{$section} .= "\n".$l if defined $l; |
105 |
} else { |
106 |
$item{$section} = $l; |
107 |
} |
108 |
} |
109 |
} else { ## Out of blocks |
110 |
if (/^\{$/) { |
111 |
$mode = 1; |
112 |
} elsif (/^(.+):$/) { |
113 |
$section = $1; |
114 |
} elsif (/^\t(.*)$/) { |
115 |
my $t = $1; |
116 |
if ($Info{$section}) { |
117 |
$Info{$section} .= "\n".$t; |
118 |
} else { |
119 |
$Info{$section} = $t; |
120 |
} |
121 |
} |
122 |
} |
123 |
} |
124 |
$ReplaceText{MYSELF} = qq(Encode::$Info{Name}); |
125 |
|
126 |
print <<EOH; |
127 |
## This file is auto-generated (at @{[ sprintf '%04d-%02d-%02dT%02d:%02d:%02dZ', (gmtime)[5]+1900, (gmtime)[4]+1, (gmtime)[3,2,1,0] ]}). |
128 |
## Do not edit by hand! |
129 |
|
130 |
=head1 NAME |
131 |
|
132 |
$ReplaceText{MYSELF} --- $Info{ShortDescription} |
133 |
@{[ $Info{'POD:DESCRIPTION'} ? qq{ |
134 |
=head1 DESCRIPTION |
135 |
|
136 |
$Info{'POD:DESCRIPTION'}} : '']} |
137 |
|
138 |
=cut |
139 |
|
140 |
package $ReplaceText{MYSELF}; |
141 |
use 5.7.3; |
142 |
use strict; |
143 |
our \$VERSION = q(@{[sprintf '%04d.%02d%02d', (gmtime)[5]+1900, (gmtime)[4]+1, (gmtime)[3]]}); |
144 |
|
145 |
=head1 ENCODINGS |
146 |
@{[ $Info{'POD:ENCODING:PREAMBLE'} ? qq( |
147 |
$Info{'POD:ENCODING:PREAMBLE'} |
148 |
) : '']} |
149 |
=over 8 |
150 |
|
151 |
=cut |
152 |
|
153 |
EOH |
154 |
|
155 |
my @alias_def; |
156 |
|
157 |
for my $encode (@{ $Info{encoding} }) { |
158 |
$encode->{EncodeFull} = $encode->{'Encode:Prepare'}."\n".$encode->{Encode}; |
159 |
$encode->{DecodeFull} = $encode->{'Decode:Prepare'}."\n".$encode->{Decode}; |
160 |
for my $ED (qw/Encode Decode EncodeFull DecodeFull Cversion/) { |
161 |
my $ed = $ED =~ /Encode/ ? 'encode' : 'decode'; |
162 |
if ($encode->{$ED} =~ /\$C->{option}->{fallback_from_ucs} = /) { |
163 |
$encode->{$ED} =~ s((?=\$C->{option}->{fallback_from_ucs} = )) |
164 |
(\$C->{option}->{fallback_from_ucs_2} = \$C->{option}->{fallback_from_ucs};\n); |
165 |
} |
166 |
if ($encode->{$ED} =~ /Encode::Table/) { |
167 |
$encode->{$ED} = q/require Encode::Table; |
168 |
my $tbl = defined $obj->{_/.$ed.q/_mapping} ? $obj->{_/.$ed.q/_mapping} : 1; |
169 |
my %tblopt = (-autoload => defined $obj->{_/.$ed.q/_mapping_autoload} ? $obj->{_/.$ed.q/_mapping_autoload} : 1); |
170 |
/.$encode->{$ED}; |
171 |
} |
172 |
if ($encode->{$ED} =~ /\$C/) { |
173 |
if ($ED ne 'Cversion' && $encode->{Cversion}) { |
174 |
$encode->{$ED} = ($ED =~ /Full/ ? qq(my \$C = \$obj->__code_version;\n) : '') |
175 |
.qq(\$C->{_encoder} = \$obj;\n) |
176 |
.($ED eq 'EncodeFull' ? qq(\$C->{option}->{fallback_from_ucs} = \$obj->{_encode_fallback} ? \$obj->{_encode_fallback} : |
177 |
\$chk & Encode::DIE_ON_ERR ? 'croak' : |
178 |
\$chk & Encode::RETURN_ON_ERR ? (\$chk & Encode::WARN_ON_ERR ? 'quiet+warn' : 'quiet') : |
179 |
\$chk & Encode::PERLQQ ? 'perl' : \$chk & Encode::HTMLCREF ? 'sgml' : |
180 |
\$chk & Encode::XMLCREF ? 'sgml-hex' : 'replacement'; |
181 |
) : '') |
182 |
.$encode->{$ED}; |
183 |
} elsif ($encode->{Encode} =~ /internal_to_sjis/i || $encode->{Decode} =~ /sjis_to_internal/i) { |
184 |
$encode->{$ED} = qq(require Encode::Charset;\nmy \$C = &Encode::Charset::new_object_sjis;\n).$encode->{$ED}; |
185 |
} else { |
186 |
$encode->{$ED} = qq(require Encode::Charset;\nmy \$C = &Encode::Charset::new_object;\n).$encode->{$ED}; |
187 |
} |
188 |
} |
189 |
for (qw/ISO2022 SJIS/) { |
190 |
if ($encode->{$ED} =~ /Encode::$_/) { |
191 |
$encode->{$ED} = qq(require Encode::$_;\n).$encode->{$ED}; |
192 |
} |
193 |
} |
194 |
$encode->{$ED} =~ s/\n/\n /g; |
195 |
} |
196 |
|
197 |
## Define a new encoding |
198 |
if ($encode->{Name}) { |
199 |
print <<EOH; |
200 |
|
201 |
package Encode::$Info{Name}::$encode->{ModuleName}; |
202 |
our \$VERSION = \$Encode::$Info{Name}::VERSION; |
203 |
use base qw(Encode::Encoding); |
204 |
__PACKAGE__->Define (qw/$encode->{Name} $encode->{Alias}/); |
205 |
|
206 |
=item $encode->{Name} |
207 |
|
208 |
$encode->{Description}@{[ $encode->{Alias} ? ' |
209 |
(Alias: ' . join (', ', split /\s+/, $encode->{Alias}) . ')' : '' ]} |
210 |
|
211 |
=cut |
212 |
|
213 |
sub encode (\$\$;\$) { |
214 |
my (\$obj, \$s, \$chk) = \@_; |
215 |
my \%s; |
216 |
$encode->{EncodeFull} |
217 |
if (\$s{die}) { ## FB_CROAK |
218 |
if (\$Carp::VERSION) { Carp::croak ('encode: '.\$s{reason}) } |
219 |
else { die ('encode: '.\$s{reason}) } |
220 |
} elsif (\$s{halfway}) { ## FB_QUIET, FB_WARNING |
221 |
\$_[1] = substr (\$_[1], \$s{converted_length}); |
222 |
if (\$s{warn}) { |
223 |
if (\$Carp::VERSION) { Carp::carp ('encode: '.\$s{reason}) } |
224 |
else { warn ('encode: '.\$s{reason}) } |
225 |
} |
226 |
} else { |
227 |
\$_[1] = '' if \$chk; |
228 |
} |
229 |
return \$s; |
230 |
} |
231 |
|
232 |
sub _encode_internal (\$\$\$) { |
233 |
my (\$obj, \$s, \$C) = \@_; |
234 |
my \%s; |
235 |
$encode->{Encode} |
236 |
if (\$s{die}) { |
237 |
if (\$Carp::VERSION) { Carp::croak ('encode: '.\$s{reason}) } |
238 |
else { die ('encode: '.\$s{reason}) } |
239 |
} |
240 |
return \$s; |
241 |
} |
242 |
|
243 |
sub decode (\$\$;\$) { |
244 |
my (\$obj, \$s, \$chk) = \@_; |
245 |
$encode->{DecodeFull} |
246 |
\$_[1] = '' if \$chk; |
247 |
return \$s; |
248 |
} |
249 |
@{[ $encode->{Cversion} ? qq( |
250 |
sub __code_version (\$) { |
251 |
$encode->{Cversion} |
252 |
\$C; |
253 |
}):'']} |
254 |
EOH |
255 |
## Define a set of aliases |
256 |
} elsif ($encode->{AliasOf}) { |
257 |
my @alias = split /\s+/, $encode->{Alias}; |
258 |
my $alias1 = shift @alias; |
259 |
print <<EOH; |
260 |
|
261 |
=item $alias1 |
262 |
|
263 |
$encode->{Description}@{[ @alias ? ' |
264 |
(Alias: ' . join (', ', @alias) . ')' : '' ]} |
265 |
|
266 |
This name is an alias of $encode->{AliasOf}. |
267 |
|
268 |
=cut |
269 |
|
270 |
EOH |
271 |
## Encode::Alias is already 'use'ed by Encode, so we don't have to 'require' it. |
272 |
for ($alias1, @alias) { |
273 |
## Aliases is not defined here, because 'parent' of aliases can be defined |
274 |
## in later part of THIS module. |
275 |
push @alias_def, "Encode::Alias::define_alias ('$_' => '$encode->{AliasOf}');\n"; |
276 |
} |
277 |
} |
278 |
} |
279 |
|
280 |
print @alias_def; |
281 |
print <<EOH; |
282 |
|
283 |
=back |
284 |
@{[ $Info{'POD:ENCODING:POSTAMBLE'} ? qq( |
285 |
$Info{'POD:ENCODING:POSTAMBLE'} |
286 |
) : '']} |
287 |
=cut |
288 |
@{[$Info{__use_clone} ? q( |
289 |
sub Encode::Encoding::__clone ($) { |
290 |
my $self = shift; |
291 |
bless {%$self}, ref $self; |
292 |
}):'']} |
293 |
|
294 |
EOH |
295 |
|
296 |
for my $name (qw/EXAMPLE/, 'SEE ALSO', 'TO DO', qw/AUTHORS LICENSE/) { |
297 |
if ($Info{qq(POD:$name)}) { |
298 |
$Info{qq(POD:$name)} =~ s/%%([A-Za-z0-9_]+)%%/$ReplaceText{$1}/g; |
299 |
print <<EOH; |
300 |
=head1 $name |
301 |
|
302 |
$Info{qq(POD:$name)} |
303 |
|
304 |
EOH |
305 |
} |
306 |
} |
307 |
|
308 |
print <<EOH; |
309 |
=cut |
310 |
|
311 |
1; |
312 |
EOH |
313 |
|
314 |
sub ReplaceText () { |
315 |
my %RT = ( |
316 |
GNUGPL2 => q{This program is free software; you can redistribute it and/or modify |
317 |
it under the terms of the GNU General Public License as published by |
318 |
the Free Software Foundation; either version 2 of the License, or |
319 |
(at your option) any later version. |
320 |
|
321 |
This program is distributed in the hope that it will be useful, |
322 |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
323 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
324 |
GNU General Public License for more details. |
325 |
|
326 |
You should have received a copy of the GNU General Public License |
327 |
along with this program; see the file COPYING. If not, write to |
328 |
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
329 |
Boston, MA 02111-1307, USA.}, |
330 |
PerlLicense => q{This library is free software; you can redistribute it |
331 |
and/or modify it under the same terms as Perl itself.}, |
332 |
ReferenceIANAREG => q([IANAREG] "CHARACTER SETS", IANA <http://www.iana.org/>, |
333 |
<http://www.iana.org/assignments/character-sets>. |
334 |
The charset registry for IETF <http://www.ietf.org/> standards.), |
335 |
ReferenceJISX0208_1978 => q(JIS C 6226 (JIS X 0208)-1978, "Code of Japanese graphic |
336 |
character set for information interchange", Japan Industrial Standards |
337 |
Committee (JISC) <http://www.jisc.go.jp/>, 1978.), |
338 |
ReferenceJISX0208_1983 => q(JIS C 6226 (JIS X 0208)-1983, "Code of Japanese graphic |
339 |
character set for information interchange", Japan Industrial Standards |
340 |
Committee (JISC) <http://www.jisc.go.jp/>, 1983.), |
341 |
ReferenceJISX0208_1990 => q(JIS X 0208-1990, "Code of Japanese graphic character |
342 |
set for information interchange", Japan Industrial Standards |
343 |
Committee (JISC) <http://www.jisc.go.jp/>, 1990.), |
344 |
ReferenceJISX0212_1990 => q(JIS X 0212-1990, "Code of supplementary Japanese graphic |
345 |
character set for information interchange", Japan Industrial Standards |
346 |
Committee (JISC) <http://www.jisc.go.jp/>, 1990.), |
347 |
ReferenceJISX0221_1995 => q(JIS X 0221-1995, "Universal multi-octet coded character |
348 |
set (UCS)", Japan Industrial Standards Committee |
349 |
<http://www.jisc.go.jp/>, 1995. IDT with ISO/IEC 10646-1:1993 |
350 |
but three additional appendixes.), |
351 |
ReferenceJISX0201_1997 => q(JIS X 0201:1997, "7-bit and 8-bit coded character |
352 |
set for information interchange", Japan Industrial Standards |
353 |
Committee (JISC) <http://www.jisc.go.jp/>, 1997.), |
354 |
ReferenceJISX0208_1997 => q(JIS X 0208:1997, "7-bit and 8-bit double byte coded Kanji |
355 |
set for information interchange", Japan Industrial Standards |
356 |
Committee (JISC) <http://www.jisc.go.jp/>, 1997.), |
357 |
ReferenceJISX0213_2000 => q(JIS X 0213:2000, "7-bit and 8-bit double byte coded extended Kanji |
358 |
sets for information interchange", Japan Industrial Standards |
359 |
Committee (JISC) <http://www.jisc.go.jp/>, 2000.), |
360 |
ReferenceRFC1468 => q(RFC 1468, "Japanese Character Encoding for Internet Messages", |
361 |
J. Murai, et al, IETF <http://www.ietf.org/>, June 1993. |
362 |
<urn:ietf:rfc:1468>.), |
363 |
YEAR => (gmtime)[5]+1900, |
364 |
); |
365 |
%RT; |
366 |
} |
367 |
|
368 |
=head1 SEE ALSO |
369 |
|
370 |
L<Encode>, L<Encode::Table>, |
371 |
SuikaWiki:esr2pm <http://suika.fam.cx/~wakaba/-temp/wiki/wiki?esr2pm> |
372 |
|
373 |
=head1 LICENSE |
374 |
|
375 |
Copyright 2002 Wakaba <w@suika.fam.cx> |
376 |
|
377 |
This library is free software; you can redistribute it |
378 |
and/or modify it under the same terms as Perl itself. |
379 |
|
380 |
Note that modules generated by this script should be |
381 |
licensed by the licenser of source file so that copyright |
382 |
holder of this script does not claim any right to them. |
383 |
|
384 |
=cut |
385 |
|
386 |
# $Date: 2002/12/18 10:21:09 $ |