/[suikacvs]/perl/lib/Encode/Table/tool/esr2pm.pl
Suika

Contents of /perl/lib/Encode/Table/tool/esr2pm.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (hide annotations) (download)
Mon Dec 16 10:25:01 2002 UTC (21 years, 11 months ago) by wakaba
Branch: MAIN
Changes since 1.4: +35 -6 lines
File MIME type: text/plain
*** empty log message ***

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3    
4     =head1 NEMA
5    
6     esr2pm.pl --- Simple Encode module generator
7    
8     =cut
9    
10     my %ReplaceText = &ReplaceText;
11     my %Info;
12     my $mode = 0;
13     my $section = '';
14     my %item;
15     while (<>) {
16     if ($mode == 1) { ## In a block
17     if (/^\}$/) {
18     $item{ModuleName} ||= $item{Name};
19     $item{ModuleName} =~ s/[^0-9A-Za-z_]/_/g;
20     push @{ $Info{encoding} }, {%item};
21     $mode = 0; %item = ();
22     } elsif (/^(.+):$/) {
23     $section = $1;
24 wakaba 1.2 } elsif (/^\t(.*)$/) {
25 wakaba 1.1 my $l = $1;
26 wakaba 1.4 if ($section =~ /^(?:En|De)code/ || $section == 'Cversion') {
27 wakaba 1.3 if ($l =~ /^->(.+):C$/) {
28     my $name = $1;
29     if ($name eq 'iso2022') {
30 wakaba 1.4 $l = q(($s, %s) = Encode::ISO2022::internal_to_iso2022 ($s, $C););
31 wakaba 1.3 } elsif ($name eq 'sjis') {
32 wakaba 1.5 $l = q(($s, %s) = Encode::SJIS::internal_to_sjis ($s, $C););
33 wakaba 1.3 }
34     } elsif ($l =~ /^->(.+)$/) {
35 wakaba 1.1 $l = qq(my \$e = Encode::find_encoding (q($1))->__clone;\n\$e->{_encode_mapping} = 0;\n\$s = \$e->encode (\$s););
36 wakaba 1.3 $Info{__use_clone} = 1;
37     } elsif ($l =~ /^<-(.+):C$/) {
38     my $name = $1;
39     if ($name eq 'iso2022') {
40     $l = q($s = Encode::ISO2022::iso2022_to_internal ($s, $C););
41     } elsif ($name eq 'sjis') {
42     $l = q($s = Encode::SJIS::sjis_to_internal ($s, $C););
43     }
44 wakaba 1.1 } elsif ($l =~ /^<-(.+)$/) {
45     $l = qq(my \$e = Encode::find_encoding (q($1))->__clone;\n\$e->{_decode_mapping} = 0;\n\$s = \$e->decode (\$s););
46     } elsif ($l =~ /^(?:<=|=>)(.+)$/) {
47     $l = qq(\$s = Encode::Table::convert (\$s, [qw($1)], \%tblopt) if \$tbl;);
48     } elsif ($l =~ /^utf8:o(n|ff)$/) {
49     $l = qq(Encode::_utf8_o$1 (\$s););
50 wakaba 1.3 } elsif ($l =~ /^C:([GC][^=:]+)=([^:]+):([^\t]+)(\t\s*\#\#.+)?$/) {
51     $l = qq(\$C->{$1} = \$Encode::Charset::CHARSET{$2}->{'$3'};$4);
52 wakaba 1.4 } elsif ($l =~ /^C:option:([^=]+)=([^\t]+?)(\t\s*\#\#.+)?$/) {
53 wakaba 1.3 $l = qq(\$C->{option}->$1 = $2;$3);
54     } elsif ($l =~ /^C:designate:\*:default=(-?[0-3]+)$/) {
55     $l = qq(\$C->{option}->{designate_to}->{G94}->{default} = $1;\n).
56     qq(\$C->{option}->{designate_to}->{G96}->{default} = $1;\n).
57     qq(\$C->{option}->{designate_to}->{G94n}->{default} = $1;\n).
58     qq(\$C->{option}->{designate_to}->{G96n}->{default} = $1;\n).
59     qq(\$C->{option}->{designate_to}->{C0}->{default} = $1;\n).
60     qq(\$C->{option}->{designate_to}->{C1}->{default} = $1;);
61     } elsif ($l =~ /^C:designate:\*drcs:default=(-?[0-3]+)$/) {
62     $l = qq(for (0x30..0x7E) {\n).
63     qq(my \$F = chr \$_;\n).
64     qq( \$C->{option}->{designate_to}->{G94}->{\$F} = $1;\n).
65     qq( \$C->{option}->{designate_to}->{G96}->{\$F} = $1;\n).
66     qq( \$C->{option}->{designate_to}->{G94n}->{\$F} = $1;\n).
67     qq( \$C->{option}->{designate_to}->{G96n}->{\$F} = $1;\n).
68     qq(});
69     } elsif ($l =~ /^C:designate:\*private:default=(-?[0-3]+)$/) {
70     $l = qq(for (0x30..0x3F) {\n).
71     qq( my \$F = chr \$_;\n).
72     qq( for my \$c (qw/G94 G96 G94n G96n C0 C1/) {\n).
73     qq( \$C->{option}->{designate_to}->{\$c}->{\$F} = $1;\n).
74     qq( \$C->{option}->{designate_to}->{\$c}->{'\x21'.\$F} = $1;\n).
75     qq( \$C->{option}->{designate_to}->{\$c}->{'\x22'.\$F} = $1;\n).
76     qq( \$C->{option}->{designate_to}->{\$c}->{'\x23'.\$F} = $1;\n).
77 wakaba 1.5 qq( }\n).
78 wakaba 1.3 qq(});
79 wakaba 1.4 } elsif ($l =~ /^C:designate:([^:=]+):([^=]+)=(-?[0-3]+)(\t\s*\#\#.+)?$/) {
80 wakaba 1.3 $l = qq(\$C->{option}->{designate_to}->{$1}->{'$2'} = $3;$4);
81 wakaba 1.4 } elsif ($l =~ /^C:([GC][LR])=undef(\t\s*\#\#.+)?$/) {
82     $l = qq(\$C->{$1} = undef;$3);
83     } elsif ($l =~ /^C:([GC][LR])=(..)(\t\s*\#\#.+)?$/) {
84     $l = qq(\$C->{$1} = '$2';$3);
85     } elsif ($l =~ /^C:bit=([78])(\t\s*\#\#.+)?$/) {
86     $l = qq(\$C->{bit} = $1;$2);
87 wakaba 1.1 } elsif ($l =~ /^use:table:(.+)$/) {
88     $l = qq(eval q(use Encode::Table::$1) unless \$Encode::Table::$1::VERSION;);
89 wakaba 1.3 } elsif ($l =~ /^require:private:(.+)$/) {
90     $l = qq(eval q(use Encode::Charset::Private q(:$1)) or die \$\@;);
91     } elsif ($l =~ /^use:private:(.+)$/) {
92     $l = qq(eval q(use Encode::Charset::Private q(:$1)) or die \$\@;\neval q(Encode::Charset::Private::designate_$1 (\$C)););
93 wakaba 1.1 } elsif ($l =~ /^use:(.+)$/) {
94     $l = qq(eval q(use $1) unless \$$1::VERSION;);
95 wakaba 1.3 } elsif ($l =~ /^\#;/) {
96     $l = undef;
97 wakaba 1.1 }
98     }
99     if ($item{$section}) {
100 wakaba 1.3 $item{$section} .= "\n".$l if defined $l;
101 wakaba 1.1 } else {
102     $item{$section} = $l;
103     }
104     }
105     } else { ## Out of blocks
106     if (/^\{$/) {
107     $mode = 1;
108     } elsif (/^(.+):$/) {
109     $section = $1;
110     } elsif (/^\t(.*)$/) {
111     my $t = $1;
112     if ($Info{$section}) {
113     $Info{$section} .= "\n".$t;
114     } else {
115     $Info{$section} = $t;
116     }
117     }
118     }
119     }
120     $ReplaceText{MYSELF} = qq(Encode::$Info{Name});
121    
122     print <<EOH;
123     ## This file is auto-generated (at @{[ sprintf '%04d-%02d-%02dT%02d:%02d:%02dZ', (gmtime)[5]+1900, (gmtime)[4]+1, (gmtime)[3,2,1,0] ]}).
124     ## Do not edit by hand!
125    
126     =head1 NAME
127    
128     $ReplaceText{MYSELF} --- $Info{ShortDescription}
129 wakaba 1.2 @{[ $Info{'POD:DESCRIPTION'} ? qq{
130     =head1 DESCRIPTION
131 wakaba 1.1
132     $Info{'POD:DESCRIPTION'}} : '']}
133 wakaba 1.2
134 wakaba 1.1 =cut
135    
136     package $ReplaceText{MYSELF};
137     use 5.7.3;
138     use strict;
139     our \$VERSION = q(@{[sprintf '%04d.%02d%02d', (gmtime)[5]+1900, (gmtime)[4]+1, (gmtime)[3]]});
140    
141     =head1 ENCODINGS
142     @{[ $Info{'POD:ENCODING:PREAMBLE'} ? qq(
143     $Info{'POD:ENCODING:PREAMBLE'}
144     ) : '']}
145     =over 8
146    
147     =cut
148    
149     EOH
150    
151 wakaba 1.5 my @alias_def;
152    
153 wakaba 1.1 for my $encode (@{ $Info{encoding} }) {
154 wakaba 1.4 $encode->{EncodeFull} = $encode->{'Encode:Prepare'}."\n".$encode->{Encode};
155     $encode->{DecodeFull} = $encode->{'Decode:Prepare'}."\n".$encode->{Decode};
156     for my $ED (qw/Encode Decode EncodeFull DecodeFull Cversion/) {
157     my $ed = $ED =~ /Encode/ ? 'encode' : 'decode';
158 wakaba 1.1 if ($encode->{$ED} =~ /Encode::Table/) {
159     $encode->{$ED} = q/require Encode::Table;
160     my $tbl = defined $obj->{_/.$ed.q/_mapping} ? $obj->{_/.$ed.q/_mapping} : 1;
161     my %tblopt = (-autoload => defined $obj->{_/.$ed.q/_mapping_autoload} ? $obj->{_/.$ed.q/_mapping_autoload} : 1);
162     /.$encode->{$ED};
163     }
164 wakaba 1.3 if ($encode->{$ED} =~ /\$C/) {
165     if ($ED ne 'Cversion' && $encode->{Cversion}) {
166 wakaba 1.4 $encode->{$ED} = ($ED =~ /Full/ ? qq(my \$C = \$obj->__code_version;\n) : '')
167     .qq(\$C->{_encoder} = \$obj;\n)
168     .($ED eq 'EncodeFull' ? qq(\$C->{option}->{fallback_from_ucs} = \$obj->{_encode_fallback} ? \$obj->{_encode_fallback} :
169     \$chk & Encode::DIE_ON_ERR ? 'croak' :
170 wakaba 1.5 \$chk & Encode::RETURN_ON_ERR ? (\$chk & Encode::WARN_ON_ERR ? 'quiet+warn' : 'quiet') :
171 wakaba 1.4 \$chk & Encode::PERLQQ ? 'perl' : \$chk & Encode::HTMLCREF ? 'sgml' :
172     \$chk & Encode::XMLCREF ? 'sgml-hex' : 'replacement';
173     ) : '')
174     .$encode->{$ED};
175 wakaba 1.5 } elsif ($encode->{$ED} =~ /SJIS/i || $encode->{Encode} =~ /SJIS/i || $encode->{Decode} =~ /SJIS/i) {
176 wakaba 1.3 $encode->{$ED} = qq(require Encode::Charset;\nmy \$C = &Encode::Charset::new_object_sjis;\n).$encode->{$ED};
177     } else {
178     $encode->{$ED} = qq(require Encode::Charset;\nmy \$C = &Encode::Charset::new_object;\n).$encode->{$ED};
179     }
180     }
181     for (qw/ISO2022 SJIS/) {
182     if ($encode->{$ED} =~ /Encode::$_/) {
183     $encode->{$ED} = qq(require Encode::$_;\n).$encode->{$ED};
184     }
185     }
186 wakaba 1.1 $encode->{$ED} =~ s/\n/\n /g;
187     }
188 wakaba 1.5
189     ## Define a new encoding
190     if ($encode->{Name}) {
191     print <<EOH;
192 wakaba 1.1
193     package Encode::$Info{Name}::$encode->{ModuleName};
194     our \$VERSION = \$Encode::$Info{Name}::VERSION;
195     use base qw(Encode::Encoding);
196     __PACKAGE__->Define (qw/$encode->{Name} $encode->{Alias}/);
197    
198     =item $encode->{Name}
199    
200     $encode->{Description}@{[ $encode->{Alias} ? '
201     (Alias: ' . join (', ', split /\s+/, $encode->{Alias}) . ')' : '' ]}
202    
203     =cut
204    
205     sub encode (\$\$;\$) {
206     my (\$obj, \$s, \$chk) = \@_;
207 wakaba 1.4 my \%s;
208     $encode->{EncodeFull}
209     if (\$s{die}) { ## FB_CROAK
210     if (\$Carp::VERSION) { Carp::croak ('encode: '.\$s{reason}) }
211     else { die ('encode: '.\$s{reason}) }
212     } elsif (\$s{halfway}) { ## FB_QUIET, FB_WARNING
213     \$_[1] = substr (\$_[1], \$s{converted_length});
214     if (\$s{warn}) {
215     if (\$Carp::VERSION) { Carp::carp ('encode: '.\$s{reason}) }
216     else { warn ('encode: '.\$s{reason}) }
217     }
218     } else {
219     \$_[1] = '' if \$chk;
220     }
221     return \$s;
222     }
223    
224     sub _encode_internal (\$\$\$) {
225     my (\$obj, \$s, \$C) = \@_;
226     my \%s;
227 wakaba 1.1 $encode->{Encode}
228 wakaba 1.4 if (\$s{die}) {
229     if (\$Carp::VERSION) { Carp::croak ('encode: '.\$s{reason}) }
230     else { die ('encode: '.\$s{reason}) }
231     }
232 wakaba 1.1 return \$s;
233     }
234    
235     sub decode (\$\$;\$) {
236     my (\$obj, \$s, \$chk) = \@_;
237 wakaba 1.4 $encode->{DecodeFull}
238 wakaba 1.1 \$_[1] = '' if \$chk;
239     return \$s;
240     }
241 wakaba 1.3 @{[ $encode->{Cversion} ? qq(
242     sub __code_version (\$) {
243     $encode->{Cversion}
244     \$C;
245     }):'']}
246 wakaba 1.1 EOH
247 wakaba 1.5 ## Define a set of aliases
248     } elsif ($encode->{AliasOf}) {
249     my @alias = split /\s+/, $encode->{Alias};
250     my $alias1 = shift @alias;
251     print <<EOH;
252    
253     =item $alias1
254    
255     $encode->{Description}@{[ @alias ? '
256     (Alias: ' . join (', ', @alias) . ')' : '' ]}
257    
258     This name is an alias of $encode->{AliasOf}.
259    
260     =cut
261    
262     EOH
263     ## Encode::Alias is already 'use'ed by Encode, so we don't have to 'require' it.
264     for ($alias1, @alias) {
265     ## Aliases is not defined here, because 'parent' of aliases can be defined
266     ## in later part of THIS module.
267     push @alias_def, "Encode::Alias::define_alias ('$_' => '$encode->{AliasOf}');\n";
268     }
269     }
270 wakaba 1.1 }
271    
272 wakaba 1.5 print @alias_def;
273 wakaba 1.1 print <<EOH;
274    
275     =back
276     @{[ $Info{'POD:ENCODING:POSTAMBLE'} ? qq(
277     $Info{'POD:ENCODING:POSTAMBLE'}
278     ) : '']}
279     =cut
280 wakaba 1.3 @{[$Info{__use_clone} ? q(
281     sub Encode::Encoding::__clone ($) {
282     my $self = shift;
283     bless {%$self}, ref $self;
284     }):'']}
285 wakaba 1.1
286     EOH
287    
288     for my $name (qw/EXAMPLE/, 'SEE ALSO', 'TO DO', qw/AUTHORS LICENSE/) {
289     if ($Info{qq(POD:$name)}) {
290     $Info{qq(POD:$name)} =~ s/%%([A-Za-z0-9_]+)%%/$ReplaceText{$1}/g;
291     print <<EOH;
292     =head1 $name
293    
294     $Info{qq(POD:$name)}
295    
296     EOH
297     }
298     }
299    
300     print <<EOH;
301     =cut
302    
303     1;
304     EOH
305    
306     sub ReplaceText () {
307     my %RT = (
308     GNUGPL2 => q{This program is free software; you can redistribute it and/or modify
309     it under the terms of the GNU General Public License as published by
310     the Free Software Foundation; either version 2 of the License, or
311     (at your option) any later version.
312    
313     This program is distributed in the hope that it will be useful,
314     but WITHOUT ANY WARRANTY; without even the implied warranty of
315     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
316     GNU General Public License for more details.
317    
318     You should have received a copy of the GNU General Public License
319     along with this program; see the file COPYING. If not, write to
320     the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
321     Boston, MA 02111-1307, USA.},
322     PerlLicense => q{This library is free software; you can redistribute it
323     and/or modify it under the same terms as Perl itself.},
324     ReferenceIANAREG => q([IANAREG] "CHARACTER SETS", IANA <http://www.iana.org/>,
325     <http://www.iana.org/assignments/character-sets>.
326     The charset registry for IETF <http://www.ietf.org/> standards.),
327 wakaba 1.3 ReferenceJISX0208_1978 => q(JIS C 6226 (JIS X 0208)-1978, "Code of Japanese graphic
328     character set for information interchange", Japan Industrial Standards
329     Committee (JISC) <http://www.jisc.go.jp/>, 1978.),
330     ReferenceJISX0208_1983 => q(JIS C 6226 (JIS X 0208)-1983, "Code of Japanese graphic
331     character set for information interchange", Japan Industrial Standards
332     Committee (JISC) <http://www.jisc.go.jp/>, 1983.),
333     ReferenceJISX0208_1990 => q(JIS X 0208-1990, "Code of Japanese graphic character
334     set for information interchange", Japan Industrial Standards
335     Committee (JISC) <http://www.jisc.go.jp/>, 1990.),
336     ReferenceJISX0212_1990 => q(JIS X 0212-1990, "Code of supplementary Japanese graphic
337     character set for information interchange", Japan Industrial Standards
338     Committee (JISC) <http://www.jisc.go.jp/>, 1990.),
339     ReferenceJISX0221_1995 => q(JIS X 0221-1995, "Universal multi-octet coded character
340 wakaba 1.2 set (UCS)", Japan Industrial Standards Committee
341     <http://www.jisc.go.jp/>, 1995. IDT with ISO/IEC 10646-1:1993
342     but three additional appendixes.),
343 wakaba 1.3 ReferenceJISX0201_1997 => q(JIS X 0201:1997, "7-bit and 8-bit coded character
344     set for information interchange", Japan Industrial Standards
345     Committee (JISC) <http://www.jisc.go.jp/>, 1997.),
346 wakaba 1.1 ReferenceJISX0208_1997 => q(JIS X 0208:1997, "7-bit and 8-bit double byte coded Kanji
347     set for information interchange", Japan Industrial Standards
348     Committee (JISC) <http://www.jisc.go.jp/>, 1997.),
349     ReferenceJISX0213_2000 => q(JIS X 0213:2000, "7-bit and 8-bit double byte coded extended Kanji
350     sets for information interchange", Japan Industrial Standards
351     Committee (JISC) <http://www.jisc.go.jp/>, 2000.),
352 wakaba 1.2 ReferenceRFC1468 => q(RFC 1468, "Japanese Character Encoding for Internet Messages",
353     J. Murai, et al, IETF <http://www.ietf.org/>, June 1993.
354     <urn:ietf:rfc:1468>.),
355 wakaba 1.1 YEAR => (gmtime)[5]+1900,
356     );
357     %RT;
358     }
359    
360     =head1 SEE ALSO
361    
362 wakaba 1.3 L<Encode>, L<Encode::Table>,
363     SuikaWiki:esr2pm <http://suika.fam.cx/~wakaba/-temp/wiki/wiki?esr2pm>
364 wakaba 1.1
365     =head1 LICENSE
366    
367     Copyright 2002 Wakaba <w@suika.fam.cx>
368    
369     This library is free software; you can redistribute it
370     and/or modify it under the same terms as Perl itself.
371    
372     Note that modules generated by this script should be
373     licensed by the licenser of source file so that copyright
374     holder of this script does not claim any right to them.
375    
376     =cut
377    
378 wakaba 1.5 # $Date: 2002/12/14 11:02:25 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24