/[suikacvs]/perl/lib/Encode/Table/tool/esr2pm.pl
Suika

Contents of /perl/lib/Encode/Table/tool/esr2pm.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (hide annotations) (download)
Wed Dec 18 12:57:40 2002 UTC (21 years, 11 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.6: +2 -2 lines
File MIME type: text/plain
Pictogram charsets support

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3    
4     =head1 NEMA
5    
6     esr2pm.pl --- Simple Encode module generator
7    
8     =cut
9    
10     my %ReplaceText = &ReplaceText;
11     my %Info;
12     my $mode = 0;
13     my $section = '';
14     my %item;
15     while (<>) {
16     if ($mode == 1) { ## In a block
17     if (/^\}$/) {
18     $item{ModuleName} ||= $item{Name};
19     $item{ModuleName} =~ s/[^0-9A-Za-z_]/_/g;
20     push @{ $Info{encoding} }, {%item};
21     $mode = 0; %item = ();
22     } elsif (/^(.+):$/) {
23     $section = $1;
24 wakaba 1.2 } elsif (/^\t(.*)$/) {
25 wakaba 1.1 my $l = $1;
26 wakaba 1.4 if ($section =~ /^(?:En|De)code/ || $section == 'Cversion') {
27 wakaba 1.3 if ($l =~ /^->(.+):C$/) {
28     my $name = $1;
29     if ($name eq 'iso2022') {
30 wakaba 1.4 $l = q(($s, %s) = Encode::ISO2022::internal_to_iso2022 ($s, $C););
31 wakaba 1.3 } elsif ($name eq 'sjis') {
32 wakaba 1.5 $l = q(($s, %s) = Encode::SJIS::internal_to_sjis ($s, $C););
33 wakaba 1.3 }
34     } elsif ($l =~ /^->(.+)$/) {
35 wakaba 1.1 $l = qq(my \$e = Encode::find_encoding (q($1))->__clone;\n\$e->{_encode_mapping} = 0;\n\$s = \$e->encode (\$s););
36 wakaba 1.3 $Info{__use_clone} = 1;
37     } elsif ($l =~ /^<-(.+):C$/) {
38     my $name = $1;
39     if ($name eq 'iso2022') {
40     $l = q($s = Encode::ISO2022::iso2022_to_internal ($s, $C););
41     } elsif ($name eq 'sjis') {
42     $l = q($s = Encode::SJIS::sjis_to_internal ($s, $C););
43     }
44 wakaba 1.1 } elsif ($l =~ /^<-(.+)$/) {
45     $l = qq(my \$e = Encode::find_encoding (q($1))->__clone;\n\$e->{_decode_mapping} = 0;\n\$s = \$e->decode (\$s););
46     } elsif ($l =~ /^(?:<=|=>)(.+)$/) {
47     $l = qq(\$s = Encode::Table::convert (\$s, [qw($1)], \%tblopt) if \$tbl;);
48     } elsif ($l =~ /^utf8:o(n|ff)$/) {
49     $l = qq(Encode::_utf8_o$1 (\$s););
50 wakaba 1.3 } elsif ($l =~ /^C:([GC][^=:]+)=([^:]+):([^\t]+)(\t\s*\#\#.+)?$/) {
51     $l = qq(\$C->{$1} = \$Encode::Charset::CHARSET{$2}->{'$3'};$4);
52 wakaba 1.4 } elsif ($l =~ /^C:option:([^=]+)=([^\t]+?)(\t\s*\#\#.+)?$/) {
53 wakaba 1.3 $l = qq(\$C->{option}->$1 = $2;$3);
54     } elsif ($l =~ /^C:designate:\*:default=(-?[0-3]+)$/) {
55     $l = qq(\$C->{option}->{designate_to}->{G94}->{default} = $1;\n).
56     qq(\$C->{option}->{designate_to}->{G96}->{default} = $1;\n).
57     qq(\$C->{option}->{designate_to}->{G94n}->{default} = $1;\n).
58     qq(\$C->{option}->{designate_to}->{G96n}->{default} = $1;\n).
59     qq(\$C->{option}->{designate_to}->{C0}->{default} = $1;\n).
60     qq(\$C->{option}->{designate_to}->{C1}->{default} = $1;);
61     } elsif ($l =~ /^C:designate:\*drcs:default=(-?[0-3]+)$/) {
62     $l = qq(for (0x30..0x7E) {\n).
63     qq(my \$F = chr \$_;\n).
64     qq( \$C->{option}->{designate_to}->{G94}->{\$F} = $1;\n).
65     qq( \$C->{option}->{designate_to}->{G96}->{\$F} = $1;\n).
66     qq( \$C->{option}->{designate_to}->{G94n}->{\$F} = $1;\n).
67     qq( \$C->{option}->{designate_to}->{G96n}->{\$F} = $1;\n).
68     qq(});
69     } elsif ($l =~ /^C:designate:\*private:default=(-?[0-3]+)$/) {
70     $l = qq(for (0x30..0x3F) {\n).
71     qq( my \$F = chr \$_;\n).
72     qq( for my \$c (qw/G94 G96 G94n G96n C0 C1/) {\n).
73     qq( \$C->{option}->{designate_to}->{\$c}->{\$F} = $1;\n).
74     qq( \$C->{option}->{designate_to}->{\$c}->{'\x21'.\$F} = $1;\n).
75     qq( \$C->{option}->{designate_to}->{\$c}->{'\x22'.\$F} = $1;\n).
76     qq( \$C->{option}->{designate_to}->{\$c}->{'\x23'.\$F} = $1;\n).
77 wakaba 1.5 qq( }\n).
78 wakaba 1.3 qq(});
79 wakaba 1.4 } elsif ($l =~ /^C:designate:([^:=]+):([^=]+)=(-?[0-3]+)(\t\s*\#\#.+)?$/) {
80 wakaba 1.3 $l = qq(\$C->{option}->{designate_to}->{$1}->{'$2'} = $3;$4);
81 wakaba 1.4 } elsif ($l =~ /^C:([GC][LR])=undef(\t\s*\#\#.+)?$/) {
82     $l = qq(\$C->{$1} = undef;$3);
83     } elsif ($l =~ /^C:([GC][LR])=(..)(\t\s*\#\#.+)?$/) {
84     $l = qq(\$C->{$1} = '$2';$3);
85     } elsif ($l =~ /^C:bit=([78])(\t\s*\#\#.+)?$/) {
86     $l = qq(\$C->{bit} = $1;$2);
87 wakaba 1.1 } elsif ($l =~ /^use:table:(.+)$/) {
88 wakaba 1.6 $l = qq(eval q(use Encode::Table::$1) unless \$Encode::Table::${1}::VERSION;);
89 wakaba 1.3 } elsif ($l =~ /^require:private:(.+)$/) {
90     $l = qq(eval q(use Encode::Charset::Private q(:$1)) or die \$\@;);
91     } elsif ($l =~ /^use:private:(.+)$/) {
92     $l = qq(eval q(use Encode::Charset::Private q(:$1)) or die \$\@;\neval q(Encode::Charset::Private::designate_$1 (\$C)););
93 wakaba 1.6 } elsif ($l =~ /^use:charset:(\S+)(?:\s+(.+))?$/) {
94     $l = qq(eval q(require Encode::Charset::$1;\neval q(Encode::Charset::$1->import (qw(@{[$2?$2:':all']})));\nEncode::Charset::${1}::designate (\$C)););
95 wakaba 1.1 } elsif ($l =~ /^use:(.+)$/) {
96 wakaba 1.6 $l = qq(eval q(use $1) unless \$${1}::VERSION;);
97     } elsif ($l =~ /^!\&\s+(\S+)/) {
98     $l = qq(\$s = $1 (\$C, \$s););
99 wakaba 1.3 } elsif ($l =~ /^\#;/) {
100     $l = undef;
101 wakaba 1.1 }
102     }
103     if ($item{$section}) {
104 wakaba 1.3 $item{$section} .= "\n".$l if defined $l;
105 wakaba 1.1 } else {
106     $item{$section} = $l;
107     }
108     }
109     } else { ## Out of blocks
110     if (/^\{$/) {
111     $mode = 1;
112     } elsif (/^(.+):$/) {
113     $section = $1;
114     } elsif (/^\t(.*)$/) {
115     my $t = $1;
116     if ($Info{$section}) {
117     $Info{$section} .= "\n".$t;
118     } else {
119     $Info{$section} = $t;
120     }
121     }
122     }
123     }
124     $ReplaceText{MYSELF} = qq(Encode::$Info{Name});
125    
126     print <<EOH;
127     ## This file is auto-generated (at @{[ sprintf '%04d-%02d-%02dT%02d:%02d:%02dZ', (gmtime)[5]+1900, (gmtime)[4]+1, (gmtime)[3,2,1,0] ]}).
128     ## Do not edit by hand!
129    
130     =head1 NAME
131    
132     $ReplaceText{MYSELF} --- $Info{ShortDescription}
133 wakaba 1.2 @{[ $Info{'POD:DESCRIPTION'} ? qq{
134     =head1 DESCRIPTION
135 wakaba 1.1
136     $Info{'POD:DESCRIPTION'}} : '']}
137 wakaba 1.2
138 wakaba 1.1 =cut
139    
140     package $ReplaceText{MYSELF};
141     use 5.7.3;
142     use strict;
143     our \$VERSION = q(@{[sprintf '%04d.%02d%02d', (gmtime)[5]+1900, (gmtime)[4]+1, (gmtime)[3]]});
144    
145     =head1 ENCODINGS
146     @{[ $Info{'POD:ENCODING:PREAMBLE'} ? qq(
147     $Info{'POD:ENCODING:PREAMBLE'}
148     ) : '']}
149     =over 8
150    
151     =cut
152    
153     EOH
154    
155 wakaba 1.5 my @alias_def;
156    
157 wakaba 1.1 for my $encode (@{ $Info{encoding} }) {
158 wakaba 1.4 $encode->{EncodeFull} = $encode->{'Encode:Prepare'}."\n".$encode->{Encode};
159     $encode->{DecodeFull} = $encode->{'Decode:Prepare'}."\n".$encode->{Decode};
160     for my $ED (qw/Encode Decode EncodeFull DecodeFull Cversion/) {
161     my $ed = $ED =~ /Encode/ ? 'encode' : 'decode';
162 wakaba 1.6 if ($encode->{$ED} =~ /\$C->{option}->{fallback_from_ucs} = /) {
163     $encode->{$ED} =~ s((?=\$C->{option}->{fallback_from_ucs} = ))
164     (\$C->{option}->{fallback_from_ucs_2} = \$C->{option}->{fallback_from_ucs};\n);
165     }
166 wakaba 1.1 if ($encode->{$ED} =~ /Encode::Table/) {
167     $encode->{$ED} = q/require Encode::Table;
168     my $tbl = defined $obj->{_/.$ed.q/_mapping} ? $obj->{_/.$ed.q/_mapping} : 1;
169     my %tblopt = (-autoload => defined $obj->{_/.$ed.q/_mapping_autoload} ? $obj->{_/.$ed.q/_mapping_autoload} : 1);
170     /.$encode->{$ED};
171     }
172 wakaba 1.3 if ($encode->{$ED} =~ /\$C/) {
173     if ($ED ne 'Cversion' && $encode->{Cversion}) {
174 wakaba 1.4 $encode->{$ED} = ($ED =~ /Full/ ? qq(my \$C = \$obj->__code_version;\n) : '')
175     .qq(\$C->{_encoder} = \$obj;\n)
176     .($ED eq 'EncodeFull' ? qq(\$C->{option}->{fallback_from_ucs} = \$obj->{_encode_fallback} ? \$obj->{_encode_fallback} :
177     \$chk & Encode::DIE_ON_ERR ? 'croak' :
178 wakaba 1.5 \$chk & Encode::RETURN_ON_ERR ? (\$chk & Encode::WARN_ON_ERR ? 'quiet+warn' : 'quiet') :
179 wakaba 1.4 \$chk & Encode::PERLQQ ? 'perl' : \$chk & Encode::HTMLCREF ? 'sgml' :
180     \$chk & Encode::XMLCREF ? 'sgml-hex' : 'replacement';
181     ) : '')
182     .$encode->{$ED};
183 wakaba 1.7 } elsif ($encode->{Encode} =~ /internal_to_sjis/i || $encode->{Decode} =~ /sjis_to_internal/i) {
184 wakaba 1.3 $encode->{$ED} = qq(require Encode::Charset;\nmy \$C = &Encode::Charset::new_object_sjis;\n).$encode->{$ED};
185     } else {
186     $encode->{$ED} = qq(require Encode::Charset;\nmy \$C = &Encode::Charset::new_object;\n).$encode->{$ED};
187     }
188     }
189     for (qw/ISO2022 SJIS/) {
190     if ($encode->{$ED} =~ /Encode::$_/) {
191     $encode->{$ED} = qq(require Encode::$_;\n).$encode->{$ED};
192     }
193     }
194 wakaba 1.1 $encode->{$ED} =~ s/\n/\n /g;
195     }
196 wakaba 1.5
197     ## Define a new encoding
198     if ($encode->{Name}) {
199     print <<EOH;
200 wakaba 1.1
201     package Encode::$Info{Name}::$encode->{ModuleName};
202     our \$VERSION = \$Encode::$Info{Name}::VERSION;
203     use base qw(Encode::Encoding);
204     __PACKAGE__->Define (qw/$encode->{Name} $encode->{Alias}/);
205    
206     =item $encode->{Name}
207    
208     $encode->{Description}@{[ $encode->{Alias} ? '
209     (Alias: ' . join (', ', split /\s+/, $encode->{Alias}) . ')' : '' ]}
210    
211     =cut
212    
213     sub encode (\$\$;\$) {
214     my (\$obj, \$s, \$chk) = \@_;
215 wakaba 1.4 my \%s;
216     $encode->{EncodeFull}
217     if (\$s{die}) { ## FB_CROAK
218     if (\$Carp::VERSION) { Carp::croak ('encode: '.\$s{reason}) }
219     else { die ('encode: '.\$s{reason}) }
220     } elsif (\$s{halfway}) { ## FB_QUIET, FB_WARNING
221     \$_[1] = substr (\$_[1], \$s{converted_length});
222     if (\$s{warn}) {
223     if (\$Carp::VERSION) { Carp::carp ('encode: '.\$s{reason}) }
224     else { warn ('encode: '.\$s{reason}) }
225     }
226     } else {
227     \$_[1] = '' if \$chk;
228     }
229     return \$s;
230     }
231    
232     sub _encode_internal (\$\$\$) {
233     my (\$obj, \$s, \$C) = \@_;
234     my \%s;
235 wakaba 1.1 $encode->{Encode}
236 wakaba 1.4 if (\$s{die}) {
237     if (\$Carp::VERSION) { Carp::croak ('encode: '.\$s{reason}) }
238     else { die ('encode: '.\$s{reason}) }
239     }
240 wakaba 1.1 return \$s;
241     }
242    
243     sub decode (\$\$;\$) {
244     my (\$obj, \$s, \$chk) = \@_;
245 wakaba 1.4 $encode->{DecodeFull}
246 wakaba 1.1 \$_[1] = '' if \$chk;
247     return \$s;
248     }
249 wakaba 1.3 @{[ $encode->{Cversion} ? qq(
250     sub __code_version (\$) {
251     $encode->{Cversion}
252     \$C;
253     }):'']}
254 wakaba 1.1 EOH
255 wakaba 1.5 ## Define a set of aliases
256     } elsif ($encode->{AliasOf}) {
257     my @alias = split /\s+/, $encode->{Alias};
258     my $alias1 = shift @alias;
259     print <<EOH;
260    
261     =item $alias1
262    
263     $encode->{Description}@{[ @alias ? '
264     (Alias: ' . join (', ', @alias) . ')' : '' ]}
265    
266     This name is an alias of $encode->{AliasOf}.
267    
268     =cut
269    
270     EOH
271     ## Encode::Alias is already 'use'ed by Encode, so we don't have to 'require' it.
272     for ($alias1, @alias) {
273     ## Aliases is not defined here, because 'parent' of aliases can be defined
274     ## in later part of THIS module.
275     push @alias_def, "Encode::Alias::define_alias ('$_' => '$encode->{AliasOf}');\n";
276     }
277     }
278 wakaba 1.1 }
279    
280 wakaba 1.5 print @alias_def;
281 wakaba 1.1 print <<EOH;
282    
283     =back
284     @{[ $Info{'POD:ENCODING:POSTAMBLE'} ? qq(
285     $Info{'POD:ENCODING:POSTAMBLE'}
286     ) : '']}
287     =cut
288 wakaba 1.3 @{[$Info{__use_clone} ? q(
289     sub Encode::Encoding::__clone ($) {
290     my $self = shift;
291     bless {%$self}, ref $self;
292     }):'']}
293 wakaba 1.1
294     EOH
295    
296     for my $name (qw/EXAMPLE/, 'SEE ALSO', 'TO DO', qw/AUTHORS LICENSE/) {
297     if ($Info{qq(POD:$name)}) {
298     $Info{qq(POD:$name)} =~ s/%%([A-Za-z0-9_]+)%%/$ReplaceText{$1}/g;
299     print <<EOH;
300     =head1 $name
301    
302     $Info{qq(POD:$name)}
303    
304     EOH
305     }
306     }
307    
308     print <<EOH;
309     =cut
310    
311     1;
312     EOH
313    
314     sub ReplaceText () {
315     my %RT = (
316     GNUGPL2 => q{This program is free software; you can redistribute it and/or modify
317     it under the terms of the GNU General Public License as published by
318     the Free Software Foundation; either version 2 of the License, or
319     (at your option) any later version.
320    
321     This program is distributed in the hope that it will be useful,
322     but WITHOUT ANY WARRANTY; without even the implied warranty of
323     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
324     GNU General Public License for more details.
325    
326     You should have received a copy of the GNU General Public License
327     along with this program; see the file COPYING. If not, write to
328     the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
329     Boston, MA 02111-1307, USA.},
330     PerlLicense => q{This library is free software; you can redistribute it
331     and/or modify it under the same terms as Perl itself.},
332     ReferenceIANAREG => q([IANAREG] "CHARACTER SETS", IANA <http://www.iana.org/>,
333     <http://www.iana.org/assignments/character-sets>.
334     The charset registry for IETF <http://www.ietf.org/> standards.),
335 wakaba 1.3 ReferenceJISX0208_1978 => q(JIS C 6226 (JIS X 0208)-1978, "Code of Japanese graphic
336     character set for information interchange", Japan Industrial Standards
337     Committee (JISC) <http://www.jisc.go.jp/>, 1978.),
338     ReferenceJISX0208_1983 => q(JIS C 6226 (JIS X 0208)-1983, "Code of Japanese graphic
339     character set for information interchange", Japan Industrial Standards
340     Committee (JISC) <http://www.jisc.go.jp/>, 1983.),
341     ReferenceJISX0208_1990 => q(JIS X 0208-1990, "Code of Japanese graphic character
342     set for information interchange", Japan Industrial Standards
343     Committee (JISC) <http://www.jisc.go.jp/>, 1990.),
344     ReferenceJISX0212_1990 => q(JIS X 0212-1990, "Code of supplementary Japanese graphic
345     character set for information interchange", Japan Industrial Standards
346     Committee (JISC) <http://www.jisc.go.jp/>, 1990.),
347     ReferenceJISX0221_1995 => q(JIS X 0221-1995, "Universal multi-octet coded character
348 wakaba 1.2 set (UCS)", Japan Industrial Standards Committee
349     <http://www.jisc.go.jp/>, 1995. IDT with ISO/IEC 10646-1:1993
350     but three additional appendixes.),
351 wakaba 1.3 ReferenceJISX0201_1997 => q(JIS X 0201:1997, "7-bit and 8-bit coded character
352     set for information interchange", Japan Industrial Standards
353     Committee (JISC) <http://www.jisc.go.jp/>, 1997.),
354 wakaba 1.1 ReferenceJISX0208_1997 => q(JIS X 0208:1997, "7-bit and 8-bit double byte coded Kanji
355     set for information interchange", Japan Industrial Standards
356     Committee (JISC) <http://www.jisc.go.jp/>, 1997.),
357     ReferenceJISX0213_2000 => q(JIS X 0213:2000, "7-bit and 8-bit double byte coded extended Kanji
358     sets for information interchange", Japan Industrial Standards
359     Committee (JISC) <http://www.jisc.go.jp/>, 2000.),
360 wakaba 1.2 ReferenceRFC1468 => q(RFC 1468, "Japanese Character Encoding for Internet Messages",
361     J. Murai, et al, IETF <http://www.ietf.org/>, June 1993.
362     <urn:ietf:rfc:1468>.),
363 wakaba 1.1 YEAR => (gmtime)[5]+1900,
364     );
365     %RT;
366     }
367    
368     =head1 SEE ALSO
369    
370 wakaba 1.3 L<Encode>, L<Encode::Table>,
371     SuikaWiki:esr2pm <http://suika.fam.cx/~wakaba/-temp/wiki/wiki?esr2pm>
372 wakaba 1.1
373     =head1 LICENSE
374    
375     Copyright 2002 Wakaba <w@suika.fam.cx>
376    
377     This library is free software; you can redistribute it
378     and/or modify it under the same terms as Perl itself.
379    
380     Note that modules generated by this script should be
381     licensed by the licenser of source file so that copyright
382     holder of this script does not claim any right to them.
383    
384     =cut
385    
386 wakaba 1.7 # $Date: 2002/12/18 10:21:09 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24