/[suikacvs]/perl/lib/Encode/Table/tool/tbr2tbl.pl
Suika

Contents of /perl/lib/Encode/Table/tool/tbr2tbl.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (hide annotations) (download)
Wed Dec 18 10:21:09 2002 UTC (21 years, 11 months ago) by wakaba
Branch: MAIN
Changes since 1.7: +5 -5 lines
File MIME type: text/plain
*** empty log message ***

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3    
4 wakaba 1.2 =head1 NAME
5    
6     tbr2tbl --- PETBL/1.0 source(s) to completed table converter
7    
8     =cut
9 wakaba 1.1
10     my %CMD;
11     my %C;
12     $C{tbl_std_cl} = [split /\n/, <<EOH];
13     0x00 U+0000 # <control>
14     0x01 U+0001 # <control>
15     0x02 U+0002 # <control>
16     0x03 U+0003 # <control>
17     0x04 U+0004 # <control>
18     0x05 U+0005 # <control>
19     0x06 U+0006 # <control>
20     0x07 U+0007 # <control>
21     0x08 U+0008 # <control>
22     0x09 U+0009 # <control>
23     0x0A U+000A # <control>
24     0x0B U+000B # <control>
25     0x0C U+000C # <control>
26     0x0D U+000D # <control>
27     0x0E U+000E # <control>
28     0x0F U+000F # <control>
29     0x10 U+0010 # <control>
30     0x11 U+0011 # <control>
31     0x12 U+0012 # <control>
32     0x13 U+0013 # <control>
33     0x14 U+0014 # <control>
34     0x15 U+0015 # <control>
35     0x16 U+0016 # <control>
36     0x17 U+0017 # <control>
37     0x18 U+0018 # <control>
38     0x19 U+0019 # <control>
39     0x1A U+001A # <control>
40     0x1B U+001B # <control>
41     0x1C U+001C # <control>
42     0x1D U+001D # <control>
43     0x1E U+001E # <control>
44     0x1F U+001F # <control>
45     EOH
46     $C{tbl_std_20} = q(0x20 U+0020 # SPACE);
47     $C{tbl_std_7f} = q(0x7F U+007F # DELETE);
48     $C{tbl_std_cr} = [split /\n/, <<EOH];
49     0x80 U+0080 # <control>
50     0x81 U+0081 # <control>
51     0x82 U+0082 # <control>
52     0x83 U+0083 # <control>
53     0x84 U+0084 # <control>
54     0x85 U+0085 # <control>
55     0x86 U+0086 # <control>
56     0x87 U+0087 # <control>
57     0x88 U+0088 # <control>
58     0x89 U+0089 # <control>
59     0x8A U+008A # <control>
60     0x8B U+008B # <control>
61     0x8C U+008C # <control>
62     0x8D U+008D # <control>
63     0x8E U+008E # <control>
64     0x8F U+008F # <control>
65     0x90 U+0090 # <control>
66     0x91 U+0091 # <control>
67     0x92 U+0092 # <control>
68     0x93 U+0093 # <control>
69     0x94 U+0094 # <control>
70     0x95 U+0095 # <control>
71     0x96 U+0096 # <control>
72     0x97 U+0097 # <control>
73     0x98 U+0098 # <control>
74     0x99 U+0099 # <control>
75     0x9A U+009A # <control>
76     0x9B U+009B # <control>
77     0x9C U+009C # <control>
78     0x9D U+009D # <control>
79     0x9E U+009E # <control>
80     0x9F U+009F # <control>
81     EOH
82     $C{tbl_std_a0} = q(0xA0 # <reserved>);
83     $C{tbl_std_ff} = q(0xFF # <reserved>);
84    
85     {
86     my @name = split /\n/, require 'unicore/Name.pl';
87     my %name;
88     for (@name) {
89     if (/^(....) ([^\t]+)/) {
90     $name{hex $1} = $2;
91     }
92     }
93     sub charname ($) {
94 wakaba 1.5 my $U = shift;
95     if ($U =~ /[^0-9]/) {
96     $U =~ s/^[Uu]\+|^0[Xx]//;
97     $U = hex $U;
98     }
99 wakaba 1.1 ## TODO: be more strict!
100 wakaba 1.5 $U < 0x0020 ? '<control>' :
101     $U < 0x007F ? $name{$U} :
102     $U < 0x00A0 ? '<control>' :
103     $name{$U} ? $name{$U} :
104     $U < 0x00A0 ? '<control>' :
105     $U < 0x3400 ? '' :
106     $U < 0xA000 ? '<cjk>' :
107     $U < 0xE000 ? '<hangul>' :
108     $U < 0xF900 ? '<private>' :
109 wakaba 1.1 '';
110     }
111     }
112    
113     sub array_to_table (@%) {
114     my ($source, $o) = @_;
115     my @r; $o->{mode}->{DEFAULT} = 1;
116     my $mode = 'DEFAULT';
117     for (@$source) {
118     if (/^#\?if-mode ([A-Za-z0-9-]+)/) {
119     $mode = $1;
120     } elsif (/^#\?end-if-mode/) {
121     $mode = 'DEFAULT';
122     } elsif ($o->{mode}->{$mode}) { ## mode is enabled
123    
124     if (/^#\?o/) { ## table option
125 wakaba 1.6 push @r, $_ unless $o->{_imported_file};
126 wakaba 1.1 } elsif (s/^#\?([A-Za-z0-9-]+)//) {
127     my %opt = (cmd => $1);
128     s{ ([A-Za-z0-9-]+)=(?:"((?:[^"\\]|\\.)*)"|([A-Za-z0-9-]+))
129     | ([A-Za-z0-9-]+)}{
130     my ($N, $V, $v, $n) = ($1, $2, $3, $4);
131     $V =~ s/\\(.)/$1/g;
132     $opt{ $N || $n } = $n ? 1 : ($V || $v);
133     }gex;
134 wakaba 1.6 push @r, &{ $CMD{ $opt{cmd} } } ($o, \%opt) if ref $CMD{ $opt{cmd} };
135 wakaba 1.1 } elsif (/^##/) { ## Comment
136     push @r, $_;
137 wakaba 1.3 } elsif (/^#;/) { ## Comment
138 wakaba 1.1 } elsif (/^#/) { ## Comment or unsupported function
139 wakaba 1.3 push @r, $_;
140 wakaba 1.8 } elsif (/^(0x|[0-9A-Za-z]+[+-])($o->{except} [0-9A-Fa-f]+)(?:\t([^\t]*)(?:\t([^\t]*)(?:\t(.*))?)?)?/x) {
141     my ($p, $u, $l, $f, $m) = ($1, hex $2, $3, $4, $5);
142 wakaba 1.1 $f = $o->{fallback} if $o->{fallback};
143     my $offset = $o->{offset};
144     $offset += $u + $offset > 0xFF ? 0x8080 : 0x80 if $o->{right};
145     $m =~ s/^#\s*//;
146 wakaba 1.8 push @r, sprintf qq{%s%02X\t%s\t%s\t# %s},
147     $p, $u+$offset, $l, $f, $m || charname ($l);
148 wakaba 1.1 } elsif (/^$/) {
149     } else {
150 wakaba 1.3 #push @r, $_;
151 wakaba 1.1 }
152    
153     } # / mode is enabled
154     }
155     @r;
156     }
157    
158     $CMD{import} = sub {
159 wakaba 1.6 my ($opt0, $opt) = @_;
160 wakaba 1.1 if ($opt->{src}) {
161     ## BUG: resolve of relative path
162     open TBL, $opt->{src} or die "$0: $opt->{src}: Imported table not found";
163     my @tbl = <TBL>; close TBL; map {s/[\x0D\x0A]+$//} @tbl;
164     my $m = {}; for (split /,/, $opt->{mode}) { $m->{$_} = 1 }
165     shift (@tbl) if $tbl[0] =~ m!^#\?PETBL/1.0 SOURCE!;
166 wakaba 1.3 $opt->{except} = $opt->{except} ? qq((?!(?i)$opt->{except})) : '';
167 wakaba 1.7 $opt->{except} .= $opt0->{except} if $opt0->{except};
168 wakaba 1.1 array_to_table (\@tbl, {offset => hex $opt->{offset},
169     fallback => $opt->{fallback}, mode => $m,
170 wakaba 1.6 except => $opt->{except}, right => $opt->{right},
171     _imported_file => 1});
172 wakaba 1.1 } elsif ($opt->{'std-cl'}) { @{ $C{tbl_std_cl} };
173     } elsif ($opt->{'std-cr'}) { @{ $C{tbl_std_cr} };
174     } elsif ($opt->{'std-0x20'} || $opt->{'std-sp'}) { $C{tbl_std_20};
175     } elsif ($opt->{'std-0x7F'} || $opt->{'std-del'}) { $C{tbl_std_7f};
176     } elsif ($opt->{'std-0xA0'}) { $C{tbl_std_a0};
177     } elsif ($opt->{'std-0xFF'}) { $C{tbl_std_ff};
178     }
179     };
180    
181     my @src;
182     while (<>) {
183     s/[\x0D\x0A]+$//;
184     push @src, $_;
185     }
186     shift (@src) if $src[0] =~ m!^#\?PETBL/1.0 SOURCE!;
187     @src = sort {
188     $a =~ /^#/ ? 0 :
189     $b =~ /^#/ ? 0 : $a cmp $b
190     } array_to_table (\@src);
191    
192     binmode STDOUT;
193     print "#?PETBL/1.0\n";
194     print join ("\n", @src)."\n";
195 wakaba 1.2
196    
197     =head1 AUTHOR
198    
199     Nanashi-san
200    
201     =head1 LICENSE
202    
203     Copyright 2002 AUTHOR
204    
205     This library is free software; you can redistribute it
206     and/or modify it under the same terms as Perl itself.
207    
208     AUTHOR does NOT claim any right to the data generated by
209     this script. License of generated data fully depends
210     author of source data.
211    
212     =cut
213    
214 wakaba 1.8 1; ## $Date: 2002/10/14 06:56:53 $
215 wakaba 1.2 ### tbr2tbl.pl ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24