1 |
wakaba |
1.1 |
#!/usr/bin/perl |
2 |
|
|
use strict; |
3 |
|
|
|
4 |
|
|
=head1 NEMA |
5 |
|
|
|
6 |
|
|
esr2pm.pl --- Simple Encode module generator |
7 |
|
|
|
8 |
|
|
=cut |
9 |
|
|
|
10 |
|
|
my %ReplaceText = &ReplaceText; |
11 |
|
|
my %Info; |
12 |
|
|
my $mode = 0; |
13 |
|
|
my $section = ''; |
14 |
|
|
my %item; |
15 |
|
|
while (<>) { |
16 |
|
|
if ($mode == 1) { ## In a block |
17 |
|
|
if (/^\}$/) { |
18 |
|
|
$item{ModuleName} ||= $item{Name}; |
19 |
|
|
$item{ModuleName} =~ s/[^0-9A-Za-z_]/_/g; |
20 |
|
|
push @{ $Info{encoding} }, {%item}; |
21 |
|
|
$mode = 0; %item = (); |
22 |
|
|
} elsif (/^(.+):$/) { |
23 |
|
|
$section = $1; |
24 |
wakaba |
1.2 |
} elsif (/^\t(.*)$/) { |
25 |
wakaba |
1.1 |
my $l = $1; |
26 |
wakaba |
1.3 |
if ($section == 'Encode' || $section == 'Decode' || $section == 'Cversion') { |
27 |
|
|
if ($l =~ /^->(.+):C$/) { |
28 |
|
|
my $name = $1; |
29 |
|
|
if ($name eq 'iso2022') { |
30 |
|
|
$l = q($s = Encode::ISO2022::internal_to_iso2022 ($s, $C);); |
31 |
|
|
} elsif ($name eq 'sjis') { |
32 |
|
|
$l = q($s = Encode::SJIS::internal_to_sjis ($s, $C);); |
33 |
|
|
} |
34 |
|
|
} elsif ($l =~ /^->(.+)$/) { |
35 |
wakaba |
1.1 |
$l = qq(my \$e = Encode::find_encoding (q($1))->__clone;\n\$e->{_encode_mapping} = 0;\n\$s = \$e->encode (\$s);); |
36 |
wakaba |
1.3 |
$Info{__use_clone} = 1; |
37 |
|
|
} elsif ($l =~ /^<-(.+):C$/) { |
38 |
|
|
my $name = $1; |
39 |
|
|
if ($name eq 'iso2022') { |
40 |
|
|
$l = q($s = Encode::ISO2022::iso2022_to_internal ($s, $C);); |
41 |
|
|
} elsif ($name eq 'sjis') { |
42 |
|
|
$l = q($s = Encode::SJIS::sjis_to_internal ($s, $C);); |
43 |
|
|
} |
44 |
wakaba |
1.1 |
} elsif ($l =~ /^<-(.+)$/) { |
45 |
|
|
$l = qq(my \$e = Encode::find_encoding (q($1))->__clone;\n\$e->{_decode_mapping} = 0;\n\$s = \$e->decode (\$s);); |
46 |
|
|
} elsif ($l =~ /^(?:<=|=>)(.+)$/) { |
47 |
|
|
$l = qq(\$s = Encode::Table::convert (\$s, [qw($1)], \%tblopt) if \$tbl;); |
48 |
|
|
} elsif ($l =~ /^utf8:o(n|ff)$/) { |
49 |
|
|
$l = qq(Encode::_utf8_o$1 (\$s);); |
50 |
wakaba |
1.3 |
} elsif ($l =~ /^C:([GC][^=:]+)=([^:]+):([^\t]+)(\t\s*\#\#.+)?$/) { |
51 |
|
|
$l = qq(\$C->{$1} = \$Encode::Charset::CHARSET{$2}->{'$3'};$4); |
52 |
|
|
} elsif ($l =~ /^C:option:([^=]+)=(.+)(\s+\#\#.+)?$/) { |
53 |
|
|
$l = qq(\$C->{option}->$1 = $2;$3); |
54 |
|
|
} elsif ($l =~ /^C:designate:\*:default=(-?[0-3]+)$/) { |
55 |
|
|
$l = qq(\$C->{option}->{designate_to}->{G94}->{default} = $1;\n). |
56 |
|
|
qq(\$C->{option}->{designate_to}->{G96}->{default} = $1;\n). |
57 |
|
|
qq(\$C->{option}->{designate_to}->{G94n}->{default} = $1;\n). |
58 |
|
|
qq(\$C->{option}->{designate_to}->{G96n}->{default} = $1;\n). |
59 |
|
|
qq(\$C->{option}->{designate_to}->{C0}->{default} = $1;\n). |
60 |
|
|
qq(\$C->{option}->{designate_to}->{C1}->{default} = $1;); |
61 |
|
|
} elsif ($l =~ /^C:designate:\*drcs:default=(-?[0-3]+)$/) { |
62 |
|
|
$l = qq(for (0x30..0x7E) {\n). |
63 |
|
|
qq(my \$F = chr \$_;\n). |
64 |
|
|
qq( \$C->{option}->{designate_to}->{G94}->{\$F} = $1;\n). |
65 |
|
|
qq( \$C->{option}->{designate_to}->{G96}->{\$F} = $1;\n). |
66 |
|
|
qq( \$C->{option}->{designate_to}->{G94n}->{\$F} = $1;\n). |
67 |
|
|
qq( \$C->{option}->{designate_to}->{G96n}->{\$F} = $1;\n). |
68 |
|
|
qq(}); |
69 |
|
|
} elsif ($l =~ /^C:designate:\*private:default=(-?[0-3]+)$/) { |
70 |
|
|
$l = qq(for (0x30..0x3F) {\n). |
71 |
|
|
qq( my \$F = chr \$_;\n). |
72 |
|
|
qq( for my \$c (qw/G94 G96 G94n G96n C0 C1/) {\n). |
73 |
|
|
qq( \$C->{option}->{designate_to}->{\$c}->{\$F} = $1;\n). |
74 |
|
|
qq( \$C->{option}->{designate_to}->{\$c}->{'\x21'.\$F} = $1;\n). |
75 |
|
|
qq( \$C->{option}->{designate_to}->{\$c}->{'\x22'.\$F} = $1;\n). |
76 |
|
|
qq( \$C->{option}->{designate_to}->{\$c}->{'\x23'.\$F} = $1;\n). |
77 |
|
|
qq( }\n); |
78 |
|
|
qq(}); |
79 |
|
|
} elsif ($l =~ /^C:designate:([^:=]+):([^=]+)=(-?[0-3]+)(\s+\#\#.+)?$/) { |
80 |
|
|
$l = qq(\$C->{option}->{designate_to}->{$1}->{'$2'} = $3;$4); |
81 |
|
|
} elsif ($l =~ /^C:([GC][LR])=undef$/) { |
82 |
|
|
$l = qq(\$C->{$1} = undef;); |
83 |
|
|
} elsif ($l =~ /^C:([GC][LR])=(..)$/) { |
84 |
|
|
$l = qq(\$C->{$1} = '$2';); |
85 |
|
|
} elsif ($l =~ /^C:bit=([78])$/) { |
86 |
|
|
$l = qq(\$C->{bit} = $1;); |
87 |
wakaba |
1.1 |
} elsif ($l =~ /^use:table:(.+)$/) { |
88 |
|
|
$l = qq(eval q(use Encode::Table::$1) unless \$Encode::Table::$1::VERSION;); |
89 |
wakaba |
1.3 |
} elsif ($l =~ /^require:private:(.+)$/) { |
90 |
|
|
$l = qq(eval q(use Encode::Charset::Private q(:$1)) or die \$\@;); |
91 |
|
|
} elsif ($l =~ /^use:private:(.+)$/) { |
92 |
|
|
$l = qq(eval q(use Encode::Charset::Private q(:$1)) or die \$\@;\neval q(Encode::Charset::Private::designate_$1 (\$C));); |
93 |
wakaba |
1.1 |
} elsif ($l =~ /^use:(.+)$/) { |
94 |
|
|
$l = qq(eval q(use $1) unless \$$1::VERSION;); |
95 |
wakaba |
1.3 |
} elsif ($l =~ /^\#;/) { |
96 |
|
|
$l = undef; |
97 |
wakaba |
1.1 |
} |
98 |
|
|
} |
99 |
|
|
if ($item{$section}) { |
100 |
wakaba |
1.3 |
$item{$section} .= "\n".$l if defined $l; |
101 |
wakaba |
1.1 |
} else { |
102 |
|
|
$item{$section} = $l; |
103 |
|
|
} |
104 |
|
|
} |
105 |
|
|
} else { ## Out of blocks |
106 |
|
|
if (/^\{$/) { |
107 |
|
|
$mode = 1; |
108 |
|
|
} elsif (/^(.+):$/) { |
109 |
|
|
$section = $1; |
110 |
|
|
} elsif (/^\t(.*)$/) { |
111 |
|
|
my $t = $1; |
112 |
|
|
if ($Info{$section}) { |
113 |
|
|
$Info{$section} .= "\n".$t; |
114 |
|
|
} else { |
115 |
|
|
$Info{$section} = $t; |
116 |
|
|
} |
117 |
|
|
} |
118 |
|
|
} |
119 |
|
|
} |
120 |
|
|
$ReplaceText{MYSELF} = qq(Encode::$Info{Name}); |
121 |
|
|
|
122 |
|
|
print <<EOH; |
123 |
|
|
## This file is auto-generated (at @{[ sprintf '%04d-%02d-%02dT%02d:%02d:%02dZ', (gmtime)[5]+1900, (gmtime)[4]+1, (gmtime)[3,2,1,0] ]}). |
124 |
|
|
## Do not edit by hand! |
125 |
|
|
|
126 |
|
|
=head1 NAME |
127 |
|
|
|
128 |
|
|
$ReplaceText{MYSELF} --- $Info{ShortDescription} |
129 |
wakaba |
1.2 |
@{[ $Info{'POD:DESCRIPTION'} ? qq{ |
130 |
|
|
=head1 DESCRIPTION |
131 |
wakaba |
1.1 |
|
132 |
|
|
$Info{'POD:DESCRIPTION'}} : '']} |
133 |
wakaba |
1.2 |
|
134 |
wakaba |
1.1 |
=cut |
135 |
|
|
|
136 |
|
|
package $ReplaceText{MYSELF}; |
137 |
|
|
use 5.7.3; |
138 |
|
|
use strict; |
139 |
|
|
our \$VERSION = q(@{[sprintf '%04d.%02d%02d', (gmtime)[5]+1900, (gmtime)[4]+1, (gmtime)[3]]}); |
140 |
|
|
|
141 |
|
|
=head1 ENCODINGS |
142 |
|
|
@{[ $Info{'POD:ENCODING:PREAMBLE'} ? qq( |
143 |
|
|
$Info{'POD:ENCODING:PREAMBLE'} |
144 |
|
|
) : '']} |
145 |
|
|
=over 8 |
146 |
|
|
|
147 |
|
|
=cut |
148 |
|
|
|
149 |
|
|
EOH |
150 |
|
|
|
151 |
|
|
for my $encode (@{ $Info{encoding} }) { |
152 |
wakaba |
1.3 |
for my $ED (qw/Encode Decode Cversion/) { |
153 |
wakaba |
1.1 |
my $ed = lc $ED; |
154 |
|
|
if ($encode->{$ED} =~ /Encode::Table/) { |
155 |
|
|
$encode->{$ED} = q/require Encode::Table; |
156 |
|
|
my $tbl = defined $obj->{_/.$ed.q/_mapping} ? $obj->{_/.$ed.q/_mapping} : 1; |
157 |
|
|
my %tblopt = (-autoload => defined $obj->{_/.$ed.q/_mapping_autoload} ? $obj->{_/.$ed.q/_mapping_autoload} : 1); |
158 |
|
|
/.$encode->{$ED}; |
159 |
|
|
} |
160 |
wakaba |
1.3 |
if ($encode->{$ED} =~ /\$C/) { |
161 |
|
|
if ($ED ne 'Cversion' && $encode->{Cversion}) { |
162 |
|
|
$encode->{$ED} = qq(my \$C = \$obj->__code_version;\n).$encode->{$ED}; |
163 |
|
|
} elsif ($encode->{$ED} =~ /SJIS/i) { |
164 |
|
|
$encode->{$ED} = qq(require Encode::Charset;\nmy \$C = &Encode::Charset::new_object_sjis;\n).$encode->{$ED}; |
165 |
|
|
} else { |
166 |
|
|
$encode->{$ED} = qq(require Encode::Charset;\nmy \$C = &Encode::Charset::new_object;\n).$encode->{$ED}; |
167 |
|
|
} |
168 |
|
|
} |
169 |
|
|
for (qw/ISO2022 SJIS/) { |
170 |
|
|
if ($encode->{$ED} =~ /Encode::$_/) { |
171 |
|
|
$encode->{$ED} = qq(require Encode::$_;\n).$encode->{$ED}; |
172 |
|
|
} |
173 |
|
|
} |
174 |
wakaba |
1.1 |
$encode->{$ED} =~ s/\n/\n /g; |
175 |
|
|
} |
176 |
|
|
print <<EOH; |
177 |
|
|
|
178 |
|
|
package Encode::$Info{Name}::$encode->{ModuleName}; |
179 |
|
|
our \$VERSION = \$Encode::$Info{Name}::VERSION; |
180 |
|
|
use base qw(Encode::Encoding); |
181 |
|
|
__PACKAGE__->Define (qw/$encode->{Name} $encode->{Alias}/); |
182 |
|
|
|
183 |
|
|
=item $encode->{Name} |
184 |
|
|
|
185 |
|
|
$encode->{Description}@{[ $encode->{Alias} ? ' |
186 |
|
|
(Alias: ' . join (', ', split /\s+/, $encode->{Alias}) . ')' : '' ]} |
187 |
|
|
|
188 |
|
|
=cut |
189 |
|
|
|
190 |
|
|
sub encode (\$\$;\$) { |
191 |
|
|
my (\$obj, \$s, \$chk) = \@_; |
192 |
|
|
$encode->{Encode} |
193 |
|
|
\$_[1] = '' if \$chk; |
194 |
|
|
return \$s; |
195 |
|
|
} |
196 |
|
|
|
197 |
|
|
sub decode (\$\$;\$) { |
198 |
|
|
my (\$obj, \$s, \$chk) = \@_; |
199 |
|
|
$encode->{Decode} |
200 |
|
|
\$_[1] = '' if \$chk; |
201 |
|
|
return \$s; |
202 |
|
|
} |
203 |
wakaba |
1.3 |
@{[ $encode->{Cversion} ? qq( |
204 |
|
|
sub __code_version (\$) { |
205 |
|
|
$encode->{Cversion} |
206 |
|
|
\$C; |
207 |
|
|
}):'']} |
208 |
wakaba |
1.1 |
EOH |
209 |
|
|
} |
210 |
|
|
|
211 |
|
|
print <<EOH; |
212 |
|
|
|
213 |
|
|
=back |
214 |
|
|
@{[ $Info{'POD:ENCODING:POSTAMBLE'} ? qq( |
215 |
|
|
$Info{'POD:ENCODING:POSTAMBLE'} |
216 |
|
|
) : '']} |
217 |
|
|
=cut |
218 |
wakaba |
1.3 |
@{[$Info{__use_clone} ? q( |
219 |
|
|
sub Encode::Encoding::__clone ($) { |
220 |
|
|
my $self = shift; |
221 |
|
|
bless {%$self}, ref $self; |
222 |
|
|
}):'']} |
223 |
wakaba |
1.1 |
|
224 |
|
|
EOH |
225 |
|
|
|
226 |
|
|
for my $name (qw/EXAMPLE/, 'SEE ALSO', 'TO DO', qw/AUTHORS LICENSE/) { |
227 |
|
|
if ($Info{qq(POD:$name)}) { |
228 |
|
|
$Info{qq(POD:$name)} =~ s/%%([A-Za-z0-9_]+)%%/$ReplaceText{$1}/g; |
229 |
|
|
print <<EOH; |
230 |
|
|
=head1 $name |
231 |
|
|
|
232 |
|
|
$Info{qq(POD:$name)} |
233 |
|
|
|
234 |
|
|
EOH |
235 |
|
|
} |
236 |
|
|
} |
237 |
|
|
|
238 |
|
|
print <<EOH; |
239 |
|
|
=cut |
240 |
|
|
|
241 |
|
|
1; |
242 |
|
|
EOH |
243 |
|
|
|
244 |
|
|
sub ReplaceText () { |
245 |
|
|
my %RT = ( |
246 |
|
|
GNUGPL2 => q{This program is free software; you can redistribute it and/or modify |
247 |
|
|
it under the terms of the GNU General Public License as published by |
248 |
|
|
the Free Software Foundation; either version 2 of the License, or |
249 |
|
|
(at your option) any later version. |
250 |
|
|
|
251 |
|
|
This program is distributed in the hope that it will be useful, |
252 |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
253 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
254 |
|
|
GNU General Public License for more details. |
255 |
|
|
|
256 |
|
|
You should have received a copy of the GNU General Public License |
257 |
|
|
along with this program; see the file COPYING. If not, write to |
258 |
|
|
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
259 |
|
|
Boston, MA 02111-1307, USA.}, |
260 |
|
|
PerlLicense => q{This library is free software; you can redistribute it |
261 |
|
|
and/or modify it under the same terms as Perl itself.}, |
262 |
|
|
ReferenceIANAREG => q([IANAREG] "CHARACTER SETS", IANA <http://www.iana.org/>, |
263 |
|
|
<http://www.iana.org/assignments/character-sets>. |
264 |
|
|
The charset registry for IETF <http://www.ietf.org/> standards.), |
265 |
wakaba |
1.3 |
ReferenceJISX0208_1978 => q(JIS C 6226 (JIS X 0208)-1978, "Code of Japanese graphic |
266 |
|
|
character set for information interchange", Japan Industrial Standards |
267 |
|
|
Committee (JISC) <http://www.jisc.go.jp/>, 1978.), |
268 |
|
|
ReferenceJISX0208_1983 => q(JIS C 6226 (JIS X 0208)-1983, "Code of Japanese graphic |
269 |
|
|
character set for information interchange", Japan Industrial Standards |
270 |
|
|
Committee (JISC) <http://www.jisc.go.jp/>, 1983.), |
271 |
|
|
ReferenceJISX0208_1990 => q(JIS X 0208-1990, "Code of Japanese graphic character |
272 |
|
|
set for information interchange", Japan Industrial Standards |
273 |
|
|
Committee (JISC) <http://www.jisc.go.jp/>, 1990.), |
274 |
|
|
ReferenceJISX0212_1990 => q(JIS X 0212-1990, "Code of supplementary Japanese graphic |
275 |
|
|
character set for information interchange", Japan Industrial Standards |
276 |
|
|
Committee (JISC) <http://www.jisc.go.jp/>, 1990.), |
277 |
|
|
ReferenceJISX0221_1995 => q(JIS X 0221-1995, "Universal multi-octet coded character |
278 |
wakaba |
1.2 |
set (UCS)", Japan Industrial Standards Committee |
279 |
|
|
<http://www.jisc.go.jp/>, 1995. IDT with ISO/IEC 10646-1:1993 |
280 |
|
|
but three additional appendixes.), |
281 |
wakaba |
1.3 |
ReferenceJISX0201_1997 => q(JIS X 0201:1997, "7-bit and 8-bit coded character |
282 |
|
|
set for information interchange", Japan Industrial Standards |
283 |
|
|
Committee (JISC) <http://www.jisc.go.jp/>, 1997.), |
284 |
wakaba |
1.1 |
ReferenceJISX0208_1997 => q(JIS X 0208:1997, "7-bit and 8-bit double byte coded Kanji |
285 |
|
|
set for information interchange", Japan Industrial Standards |
286 |
|
|
Committee (JISC) <http://www.jisc.go.jp/>, 1997.), |
287 |
|
|
ReferenceJISX0213_2000 => q(JIS X 0213:2000, "7-bit and 8-bit double byte coded extended Kanji |
288 |
|
|
sets for information interchange", Japan Industrial Standards |
289 |
|
|
Committee (JISC) <http://www.jisc.go.jp/>, 2000.), |
290 |
wakaba |
1.2 |
ReferenceRFC1468 => q(RFC 1468, "Japanese Character Encoding for Internet Messages", |
291 |
|
|
J. Murai, et al, IETF <http://www.ietf.org/>, June 1993. |
292 |
|
|
<urn:ietf:rfc:1468>.), |
293 |
wakaba |
1.1 |
YEAR => (gmtime)[5]+1900, |
294 |
|
|
); |
295 |
|
|
%RT; |
296 |
|
|
} |
297 |
|
|
|
298 |
|
|
=head1 SEE ALSO |
299 |
|
|
|
300 |
wakaba |
1.3 |
L<Encode>, L<Encode::Table>, |
301 |
|
|
SuikaWiki:esr2pm <http://suika.fam.cx/~wakaba/-temp/wiki/wiki?esr2pm> |
302 |
wakaba |
1.1 |
|
303 |
|
|
=head1 LICENSE |
304 |
|
|
|
305 |
|
|
Copyright 2002 Wakaba <w@suika.fam.cx> |
306 |
|
|
|
307 |
|
|
This library is free software; you can redistribute it |
308 |
|
|
and/or modify it under the same terms as Perl itself. |
309 |
|
|
|
310 |
|
|
Note that modules generated by this script should be |
311 |
|
|
licensed by the licenser of source file so that copyright |
312 |
|
|
holder of this script does not claim any right to them. |
313 |
|
|
|
314 |
|
|
=cut |
315 |
|
|
|
316 |
wakaba |
1.3 |
# $Date: 2002/10/14 06:56:53 $ |