/[suikacvs]/perl/lib/Encode/Table/tool/tbr2tbl.pl
Suika

Contents of /perl/lib/Encode/Table/tool/tbr2tbl.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.9 - (show annotations) (download)
Sat Nov 8 04:25:59 2008 UTC (16 years ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.8: +10 -11 lines
File MIME type: text/plain
New table for SuikaWiki data transition

1 #!/usr/bin/perl
2 use strict;
3
4 =head1 NAME
5
6 tbr2tbl --- PETBL/1.0 source(s) to completed table converter
7
8 =cut
9
10 my %CMD;
11 my %C;
12 $C{tbl_std_cl} = [split /\n/, <<EOH];
13 0x00 U+0000 # <control>
14 0x01 U+0001 # <control>
15 0x02 U+0002 # <control>
16 0x03 U+0003 # <control>
17 0x04 U+0004 # <control>
18 0x05 U+0005 # <control>
19 0x06 U+0006 # <control>
20 0x07 U+0007 # <control>
21 0x08 U+0008 # <control>
22 0x09 U+0009 # <control>
23 0x0A U+000A # <control>
24 0x0B U+000B # <control>
25 0x0C U+000C # <control>
26 0x0D U+000D # <control>
27 0x0E U+000E # <control>
28 0x0F U+000F # <control>
29 0x10 U+0010 # <control>
30 0x11 U+0011 # <control>
31 0x12 U+0012 # <control>
32 0x13 U+0013 # <control>
33 0x14 U+0014 # <control>
34 0x15 U+0015 # <control>
35 0x16 U+0016 # <control>
36 0x17 U+0017 # <control>
37 0x18 U+0018 # <control>
38 0x19 U+0019 # <control>
39 0x1A U+001A # <control>
40 0x1B U+001B # <control>
41 0x1C U+001C # <control>
42 0x1D U+001D # <control>
43 0x1E U+001E # <control>
44 0x1F U+001F # <control>
45 EOH
46 $C{tbl_std_20} = q(0x20 U+0020 # SPACE);
47 $C{tbl_std_7f} = q(0x7F U+007F # DELETE);
48 $C{tbl_std_cr} = [split /\n/, <<EOH];
49 0x80 U+0080 # <control>
50 0x81 U+0081 # <control>
51 0x82 U+0082 # <control>
52 0x83 U+0083 # <control>
53 0x84 U+0084 # <control>
54 0x85 U+0085 # <control>
55 0x86 U+0086 # <control>
56 0x87 U+0087 # <control>
57 0x88 U+0088 # <control>
58 0x89 U+0089 # <control>
59 0x8A U+008A # <control>
60 0x8B U+008B # <control>
61 0x8C U+008C # <control>
62 0x8D U+008D # <control>
63 0x8E U+008E # <control>
64 0x8F U+008F # <control>
65 0x90 U+0090 # <control>
66 0x91 U+0091 # <control>
67 0x92 U+0092 # <control>
68 0x93 U+0093 # <control>
69 0x94 U+0094 # <control>
70 0x95 U+0095 # <control>
71 0x96 U+0096 # <control>
72 0x97 U+0097 # <control>
73 0x98 U+0098 # <control>
74 0x99 U+0099 # <control>
75 0x9A U+009A # <control>
76 0x9B U+009B # <control>
77 0x9C U+009C # <control>
78 0x9D U+009D # <control>
79 0x9E U+009E # <control>
80 0x9F U+009F # <control>
81 EOH
82 $C{tbl_std_a0} = q(0xA0 # <reserved>);
83 $C{tbl_std_ff} = q(0xFF # <reserved>);
84
85 {
86 my @name = split /\n/, require 'unicore/Name.pl';
87 my %name;
88 for (@name) {
89 if (/^(....) ([^\t]+)/) {
90 $name{hex $1} = $2;
91 }
92 }
93 sub charname ($) {
94 my $U = shift;
95 if ($U =~ /[^0-9]/) {
96 $U =~ s/^[Uu]\+|^0[Xx]//;
97 $U = hex $U;
98 }
99 ## TODO: be more strict!
100 $U < 0x0020 ? '<control>' :
101 $U < 0x007F ? $name{$U} :
102 $U < 0x00A0 ? '<control>' :
103 $name{$U} ? $name{$U} :
104 $U < 0x00A0 ? '<control>' :
105 $U < 0x3400 ? '' :
106 $U < 0xA000 ? '<cjk>' :
107 $U < 0xE000 ? '<hangul>' :
108 $U < 0xF900 ? '<private>' :
109 '';
110 }
111 }
112
113 sub array_to_table (@%) {
114 my ($source, $o) = @_;
115 my @r; $o->{mode}->{DEFAULT} = 1;
116 my $mode = 'DEFAULT';
117 for (@$source) {
118 if (/^#\?if-mode ([A-Za-z0-9-]+)/) {
119 $mode = $1;
120 } elsif (/^#\?end-if-mode/) {
121 $mode = 'DEFAULT';
122 } elsif ($o->{mode}->{$mode}) { ## mode is enabled
123
124 if (/^#\?o/) { ## table option
125 push @r, $_ unless $o->{_imported_file};
126 } elsif (s/^#\?([A-Za-z0-9-]+)//) {
127 my %opt = (cmd => $1);
128 s{ ([A-Za-z0-9-]+)=(?:"((?:[^"\\]|\\.)*)"|([A-Za-z0-9-]+))
129 | ([A-Za-z0-9-]+)}{
130 my ($N, $V, $v, $n) = ($1, $2, $3, $4);
131 $V =~ s/\\(.)/$1/g;
132 $opt{ $N || $n } = $n ? 1 : ($V || $v);
133 }gex;
134 push @r, &{ $CMD{ $opt{cmd} } } ($o, \%opt) if ref $CMD{ $opt{cmd} };
135 } elsif (/^##/) { ## Comment
136 push @r, $_;
137 } elsif (/^#;/) { ## Comment
138 } elsif (/^#/) { ## Comment or unsupported function
139 push @r, $_;
140 } elsif (/^(0x|[0-9A-Za-z]+[+-])($o->{except} [0-9A-Fa-f]+)(?:\t([^\t]*)(?:\t([^\t]*)(?:\t(.*))?)?)?/x) {
141 my ($p, $u, $l, $f, $m) = ($1, hex $2, $3, $4, $5);
142 $f = $o->{fallback} if $o->{fallback};
143 my $offset = $o->{offset};
144 $offset += $u + $offset > 0xFF ? 0x8080 : 0x80 if $o->{right};
145 $m =~ s/^#\s*//;
146 push @r, sprintf qq{%s%02X\t%s\t%s\t# %s},
147 $p, $u+$offset, $l, $f,
148 #$m ||
149 charname ($l);
150 } elsif (/^$/) {
151 } else {
152 #push @r, $_;
153 }
154
155 } # / mode is enabled
156 }
157 @r;
158 }
159
160 $CMD{import} = sub {
161 my ($opt0, $opt) = @_;
162 if ($opt->{src}) {
163 ## BUG: resolve of relative path
164 open TBL, $opt->{src} or die "$0: $opt->{src}: Imported table not found";
165 my @tbl = <TBL>; close TBL; map {s/[\x0D\x0A]+$//} @tbl;
166 my $m = {}; for (split /,/, $opt->{mode}) { $m->{$_} = 1 }
167 shift (@tbl) if $tbl[0] =~ m!^#\?PETBL/1.0 SOURCE!;
168 $opt->{except} = $opt->{except} ? qq((?!(?i)$opt->{except})) : '';
169 $opt->{except} .= $opt0->{except} if $opt0->{except};
170 array_to_table (\@tbl, {offset => hex $opt->{offset},
171 fallback => $opt->{fallback}, mode => $m,
172 except => $opt->{except}, right => $opt->{right},
173 _imported_file => 1});
174 } elsif ($opt->{'std-cl'}) { @{ $C{tbl_std_cl} };
175 } elsif ($opt->{'std-cr'}) { @{ $C{tbl_std_cr} };
176 } elsif ($opt->{'std-0x20'} || $opt->{'std-sp'}) { $C{tbl_std_20};
177 } elsif ($opt->{'std-0x7F'} || $opt->{'std-del'}) { $C{tbl_std_7f};
178 } elsif ($opt->{'std-0xA0'}) { $C{tbl_std_a0};
179 } elsif ($opt->{'std-0xFF'}) { $C{tbl_std_ff};
180 }
181 };
182
183 my @src;
184 while (<>) {
185 s/[\x0D\x0A]+$//;
186 push @src, $_;
187 }
188 shift (@src) if $src[0] =~ m!^#\?PETBL/1.0 SOURCE!;
189 @src = sort {
190 $a =~ /^#/ ? 0 :
191 $b =~ /^#/ ? 0 : $a cmp $b
192 } array_to_table (\@src);
193
194 binmode STDOUT;
195 print "#?PETBL/1.0\n";
196 print join ("\n", @src)."\n";
197
198 =head1 LICENSE
199
200 Copyright 2002 Nanashi-san.
201
202 Copyright 2008 Wakaba <w@suika.fam.cx>.
203
204 This library is free software; you can redistribute it
205 and/or modify it under the same terms as Perl itself.
206
207 The authors do NOT claim any right to the data generated using this
208 script. License of generated data fully depends on the intention of
209 the author(s) of source data used to generate them.
210
211 =cut
212
213 1; ## $Date: 2002/12/18 10:21:09 $
214 ### tbr2tbl.pl ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24