/[suikacvs]/perl/lib/Encode/Table/tool/tbr2tbl.pl
Suika

Contents of /perl/lib/Encode/Table/tool/tbr2tbl.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.9 - (hide annotations) (download)
Sat Nov 8 04:25:59 2008 UTC (16 years ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.8: +10 -11 lines
File MIME type: text/plain
New table for SuikaWiki data transition

1 wakaba 1.1 #!/usr/bin/perl
2     use strict;
3    
4 wakaba 1.2 =head1 NAME
5    
6     tbr2tbl --- PETBL/1.0 source(s) to completed table converter
7    
8     =cut
9 wakaba 1.1
10     my %CMD;
11     my %C;
12     $C{tbl_std_cl} = [split /\n/, <<EOH];
13     0x00 U+0000 # <control>
14     0x01 U+0001 # <control>
15     0x02 U+0002 # <control>
16     0x03 U+0003 # <control>
17     0x04 U+0004 # <control>
18     0x05 U+0005 # <control>
19     0x06 U+0006 # <control>
20     0x07 U+0007 # <control>
21     0x08 U+0008 # <control>
22     0x09 U+0009 # <control>
23     0x0A U+000A # <control>
24     0x0B U+000B # <control>
25     0x0C U+000C # <control>
26     0x0D U+000D # <control>
27     0x0E U+000E # <control>
28     0x0F U+000F # <control>
29     0x10 U+0010 # <control>
30     0x11 U+0011 # <control>
31     0x12 U+0012 # <control>
32     0x13 U+0013 # <control>
33     0x14 U+0014 # <control>
34     0x15 U+0015 # <control>
35     0x16 U+0016 # <control>
36     0x17 U+0017 # <control>
37     0x18 U+0018 # <control>
38     0x19 U+0019 # <control>
39     0x1A U+001A # <control>
40     0x1B U+001B # <control>
41     0x1C U+001C # <control>
42     0x1D U+001D # <control>
43     0x1E U+001E # <control>
44     0x1F U+001F # <control>
45     EOH
46     $C{tbl_std_20} = q(0x20 U+0020 # SPACE);
47     $C{tbl_std_7f} = q(0x7F U+007F # DELETE);
48     $C{tbl_std_cr} = [split /\n/, <<EOH];
49     0x80 U+0080 # <control>
50     0x81 U+0081 # <control>
51     0x82 U+0082 # <control>
52     0x83 U+0083 # <control>
53     0x84 U+0084 # <control>
54     0x85 U+0085 # <control>
55     0x86 U+0086 # <control>
56     0x87 U+0087 # <control>
57     0x88 U+0088 # <control>
58     0x89 U+0089 # <control>
59     0x8A U+008A # <control>
60     0x8B U+008B # <control>
61     0x8C U+008C # <control>
62     0x8D U+008D # <control>
63     0x8E U+008E # <control>
64     0x8F U+008F # <control>
65     0x90 U+0090 # <control>
66     0x91 U+0091 # <control>
67     0x92 U+0092 # <control>
68     0x93 U+0093 # <control>
69     0x94 U+0094 # <control>
70     0x95 U+0095 # <control>
71     0x96 U+0096 # <control>
72     0x97 U+0097 # <control>
73     0x98 U+0098 # <control>
74     0x99 U+0099 # <control>
75     0x9A U+009A # <control>
76     0x9B U+009B # <control>
77     0x9C U+009C # <control>
78     0x9D U+009D # <control>
79     0x9E U+009E # <control>
80     0x9F U+009F # <control>
81     EOH
82     $C{tbl_std_a0} = q(0xA0 # <reserved>);
83     $C{tbl_std_ff} = q(0xFF # <reserved>);
84    
85     {
86     my @name = split /\n/, require 'unicore/Name.pl';
87     my %name;
88     for (@name) {
89     if (/^(....) ([^\t]+)/) {
90     $name{hex $1} = $2;
91     }
92     }
93     sub charname ($) {
94 wakaba 1.5 my $U = shift;
95     if ($U =~ /[^0-9]/) {
96     $U =~ s/^[Uu]\+|^0[Xx]//;
97     $U = hex $U;
98     }
99 wakaba 1.1 ## TODO: be more strict!
100 wakaba 1.5 $U < 0x0020 ? '<control>' :
101     $U < 0x007F ? $name{$U} :
102     $U < 0x00A0 ? '<control>' :
103     $name{$U} ? $name{$U} :
104     $U < 0x00A0 ? '<control>' :
105     $U < 0x3400 ? '' :
106     $U < 0xA000 ? '<cjk>' :
107     $U < 0xE000 ? '<hangul>' :
108     $U < 0xF900 ? '<private>' :
109 wakaba 1.1 '';
110     }
111     }
112    
113     sub array_to_table (@%) {
114     my ($source, $o) = @_;
115     my @r; $o->{mode}->{DEFAULT} = 1;
116     my $mode = 'DEFAULT';
117     for (@$source) {
118     if (/^#\?if-mode ([A-Za-z0-9-]+)/) {
119     $mode = $1;
120     } elsif (/^#\?end-if-mode/) {
121     $mode = 'DEFAULT';
122     } elsif ($o->{mode}->{$mode}) { ## mode is enabled
123    
124     if (/^#\?o/) { ## table option
125 wakaba 1.6 push @r, $_ unless $o->{_imported_file};
126 wakaba 1.1 } elsif (s/^#\?([A-Za-z0-9-]+)//) {
127     my %opt = (cmd => $1);
128     s{ ([A-Za-z0-9-]+)=(?:"((?:[^"\\]|\\.)*)"|([A-Za-z0-9-]+))
129     | ([A-Za-z0-9-]+)}{
130     my ($N, $V, $v, $n) = ($1, $2, $3, $4);
131     $V =~ s/\\(.)/$1/g;
132     $opt{ $N || $n } = $n ? 1 : ($V || $v);
133     }gex;
134 wakaba 1.6 push @r, &{ $CMD{ $opt{cmd} } } ($o, \%opt) if ref $CMD{ $opt{cmd} };
135 wakaba 1.1 } elsif (/^##/) { ## Comment
136     push @r, $_;
137 wakaba 1.3 } elsif (/^#;/) { ## Comment
138 wakaba 1.1 } elsif (/^#/) { ## Comment or unsupported function
139 wakaba 1.3 push @r, $_;
140 wakaba 1.8 } elsif (/^(0x|[0-9A-Za-z]+[+-])($o->{except} [0-9A-Fa-f]+)(?:\t([^\t]*)(?:\t([^\t]*)(?:\t(.*))?)?)?/x) {
141     my ($p, $u, $l, $f, $m) = ($1, hex $2, $3, $4, $5);
142 wakaba 1.1 $f = $o->{fallback} if $o->{fallback};
143     my $offset = $o->{offset};
144     $offset += $u + $offset > 0xFF ? 0x8080 : 0x80 if $o->{right};
145     $m =~ s/^#\s*//;
146 wakaba 1.8 push @r, sprintf qq{%s%02X\t%s\t%s\t# %s},
147 wakaba 1.9 $p, $u+$offset, $l, $f,
148     #$m ||
149     charname ($l);
150 wakaba 1.1 } elsif (/^$/) {
151     } else {
152 wakaba 1.3 #push @r, $_;
153 wakaba 1.1 }
154    
155     } # / mode is enabled
156     }
157     @r;
158     }
159    
160     $CMD{import} = sub {
161 wakaba 1.6 my ($opt0, $opt) = @_;
162 wakaba 1.1 if ($opt->{src}) {
163     ## BUG: resolve of relative path
164     open TBL, $opt->{src} or die "$0: $opt->{src}: Imported table not found";
165     my @tbl = <TBL>; close TBL; map {s/[\x0D\x0A]+$//} @tbl;
166     my $m = {}; for (split /,/, $opt->{mode}) { $m->{$_} = 1 }
167     shift (@tbl) if $tbl[0] =~ m!^#\?PETBL/1.0 SOURCE!;
168 wakaba 1.3 $opt->{except} = $opt->{except} ? qq((?!(?i)$opt->{except})) : '';
169 wakaba 1.7 $opt->{except} .= $opt0->{except} if $opt0->{except};
170 wakaba 1.1 array_to_table (\@tbl, {offset => hex $opt->{offset},
171     fallback => $opt->{fallback}, mode => $m,
172 wakaba 1.6 except => $opt->{except}, right => $opt->{right},
173     _imported_file => 1});
174 wakaba 1.1 } elsif ($opt->{'std-cl'}) { @{ $C{tbl_std_cl} };
175     } elsif ($opt->{'std-cr'}) { @{ $C{tbl_std_cr} };
176     } elsif ($opt->{'std-0x20'} || $opt->{'std-sp'}) { $C{tbl_std_20};
177     } elsif ($opt->{'std-0x7F'} || $opt->{'std-del'}) { $C{tbl_std_7f};
178     } elsif ($opt->{'std-0xA0'}) { $C{tbl_std_a0};
179     } elsif ($opt->{'std-0xFF'}) { $C{tbl_std_ff};
180     }
181     };
182    
183     my @src;
184     while (<>) {
185     s/[\x0D\x0A]+$//;
186     push @src, $_;
187     }
188     shift (@src) if $src[0] =~ m!^#\?PETBL/1.0 SOURCE!;
189     @src = sort {
190     $a =~ /^#/ ? 0 :
191     $b =~ /^#/ ? 0 : $a cmp $b
192     } array_to_table (\@src);
193    
194     binmode STDOUT;
195     print "#?PETBL/1.0\n";
196     print join ("\n", @src)."\n";
197 wakaba 1.2
198 wakaba 1.9 =head1 LICENSE
199 wakaba 1.2
200 wakaba 1.9 Copyright 2002 Nanashi-san.
201 wakaba 1.2
202 wakaba 1.9 Copyright 2008 Wakaba <w@suika.fam.cx>.
203 wakaba 1.2
204     This library is free software; you can redistribute it
205     and/or modify it under the same terms as Perl itself.
206    
207 wakaba 1.9 The authors do NOT claim any right to the data generated using this
208     script. License of generated data fully depends on the intention of
209     the author(s) of source data used to generate them.
210 wakaba 1.2
211     =cut
212    
213 wakaba 1.9 1; ## $Date: 2002/12/18 10:21:09 $
214 wakaba 1.2 ### tbr2tbl.pl ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24