| 1 |
wakaba |
1.1 |
#!/usr/bin/perl
|
| 2 |
|
|
use strict;
|
| 3 |
|
|
|
| 4 |
wakaba |
1.2 |
=head1 NAME
|
| 5 |
|
|
|
| 6 |
|
|
tbr2tbl --- PETBL/1.0 source(s) to completed table converter
|
| 7 |
|
|
|
| 8 |
|
|
=cut
|
| 9 |
wakaba |
1.1 |
|
| 10 |
|
|
my %CMD;
|
| 11 |
|
|
my %C;
|
| 12 |
|
|
$C{tbl_std_cl} = [split /\n/, <<EOH];
|
| 13 |
|
|
0x00 U+0000 # <control>
|
| 14 |
|
|
0x01 U+0001 # <control>
|
| 15 |
|
|
0x02 U+0002 # <control>
|
| 16 |
|
|
0x03 U+0003 # <control>
|
| 17 |
|
|
0x04 U+0004 # <control>
|
| 18 |
|
|
0x05 U+0005 # <control>
|
| 19 |
|
|
0x06 U+0006 # <control>
|
| 20 |
|
|
0x07 U+0007 # <control>
|
| 21 |
|
|
0x08 U+0008 # <control>
|
| 22 |
|
|
0x09 U+0009 # <control>
|
| 23 |
|
|
0x0A U+000A # <control>
|
| 24 |
|
|
0x0B U+000B # <control>
|
| 25 |
|
|
0x0C U+000C # <control>
|
| 26 |
|
|
0x0D U+000D # <control>
|
| 27 |
|
|
0x0E U+000E # <control>
|
| 28 |
|
|
0x0F U+000F # <control>
|
| 29 |
|
|
0x10 U+0010 # <control>
|
| 30 |
|
|
0x11 U+0011 # <control>
|
| 31 |
|
|
0x12 U+0012 # <control>
|
| 32 |
|
|
0x13 U+0013 # <control>
|
| 33 |
|
|
0x14 U+0014 # <control>
|
| 34 |
|
|
0x15 U+0015 # <control>
|
| 35 |
|
|
0x16 U+0016 # <control>
|
| 36 |
|
|
0x17 U+0017 # <control>
|
| 37 |
|
|
0x18 U+0018 # <control>
|
| 38 |
|
|
0x19 U+0019 # <control>
|
| 39 |
|
|
0x1A U+001A # <control>
|
| 40 |
|
|
0x1B U+001B # <control>
|
| 41 |
|
|
0x1C U+001C # <control>
|
| 42 |
|
|
0x1D U+001D # <control>
|
| 43 |
|
|
0x1E U+001E # <control>
|
| 44 |
|
|
0x1F U+001F # <control>
|
| 45 |
|
|
EOH
|
| 46 |
|
|
$C{tbl_std_20} = q(0x20 U+0020 # SPACE);
|
| 47 |
|
|
$C{tbl_std_7f} = q(0x7F U+007F # DELETE);
|
| 48 |
|
|
$C{tbl_std_cr} = [split /\n/, <<EOH];
|
| 49 |
|
|
0x80 U+0080 # <control>
|
| 50 |
|
|
0x81 U+0081 # <control>
|
| 51 |
|
|
0x82 U+0082 # <control>
|
| 52 |
|
|
0x83 U+0083 # <control>
|
| 53 |
|
|
0x84 U+0084 # <control>
|
| 54 |
|
|
0x85 U+0085 # <control>
|
| 55 |
|
|
0x86 U+0086 # <control>
|
| 56 |
|
|
0x87 U+0087 # <control>
|
| 57 |
|
|
0x88 U+0088 # <control>
|
| 58 |
|
|
0x89 U+0089 # <control>
|
| 59 |
|
|
0x8A U+008A # <control>
|
| 60 |
|
|
0x8B U+008B # <control>
|
| 61 |
|
|
0x8C U+008C # <control>
|
| 62 |
|
|
0x8D U+008D # <control>
|
| 63 |
|
|
0x8E U+008E # <control>
|
| 64 |
|
|
0x8F U+008F # <control>
|
| 65 |
|
|
0x90 U+0090 # <control>
|
| 66 |
|
|
0x91 U+0091 # <control>
|
| 67 |
|
|
0x92 U+0092 # <control>
|
| 68 |
|
|
0x93 U+0093 # <control>
|
| 69 |
|
|
0x94 U+0094 # <control>
|
| 70 |
|
|
0x95 U+0095 # <control>
|
| 71 |
|
|
0x96 U+0096 # <control>
|
| 72 |
|
|
0x97 U+0097 # <control>
|
| 73 |
|
|
0x98 U+0098 # <control>
|
| 74 |
|
|
0x99 U+0099 # <control>
|
| 75 |
|
|
0x9A U+009A # <control>
|
| 76 |
|
|
0x9B U+009B # <control>
|
| 77 |
|
|
0x9C U+009C # <control>
|
| 78 |
|
|
0x9D U+009D # <control>
|
| 79 |
|
|
0x9E U+009E # <control>
|
| 80 |
|
|
0x9F U+009F # <control>
|
| 81 |
|
|
EOH
|
| 82 |
|
|
$C{tbl_std_a0} = q(0xA0 # <reserved>);
|
| 83 |
|
|
$C{tbl_std_ff} = q(0xFF # <reserved>);
|
| 84 |
|
|
|
| 85 |
|
|
{
|
| 86 |
|
|
my @name = split /\n/, require 'unicore/Name.pl';
|
| 87 |
|
|
my %name;
|
| 88 |
|
|
for (@name) {
|
| 89 |
|
|
if (/^(....) ([^\t]+)/) {
|
| 90 |
|
|
$name{hex $1} = $2;
|
| 91 |
|
|
}
|
| 92 |
|
|
}
|
| 93 |
|
|
sub charname ($) {
|
| 94 |
wakaba |
1.5 |
my $U = shift;
|
| 95 |
|
|
if ($U =~ /[^0-9]/) {
|
| 96 |
|
|
$U =~ s/^[Uu]\+|^0[Xx]//;
|
| 97 |
|
|
$U = hex $U;
|
| 98 |
|
|
}
|
| 99 |
wakaba |
1.1 |
## TODO: be more strict!
|
| 100 |
wakaba |
1.5 |
$U < 0x0020 ? '<control>' :
|
| 101 |
|
|
$U < 0x007F ? $name{$U} :
|
| 102 |
|
|
$U < 0x00A0 ? '<control>' :
|
| 103 |
|
|
$name{$U} ? $name{$U} :
|
| 104 |
|
|
$U < 0x00A0 ? '<control>' :
|
| 105 |
|
|
$U < 0x3400 ? '' :
|
| 106 |
|
|
$U < 0xA000 ? '<cjk>' :
|
| 107 |
|
|
$U < 0xE000 ? '<hangul>' :
|
| 108 |
|
|
$U < 0xF900 ? '<private>' :
|
| 109 |
wakaba |
1.1 |
'';
|
| 110 |
|
|
}
|
| 111 |
|
|
}
|
| 112 |
|
|
|
| 113 |
|
|
sub array_to_table (@%) {
|
| 114 |
|
|
my ($source, $o) = @_;
|
| 115 |
|
|
my @r; $o->{mode}->{DEFAULT} = 1;
|
| 116 |
|
|
my $mode = 'DEFAULT';
|
| 117 |
|
|
for (@$source) {
|
| 118 |
|
|
if (/^#\?if-mode ([A-Za-z0-9-]+)/) {
|
| 119 |
|
|
$mode = $1;
|
| 120 |
|
|
} elsif (/^#\?end-if-mode/) {
|
| 121 |
|
|
$mode = 'DEFAULT';
|
| 122 |
|
|
} elsif ($o->{mode}->{$mode}) { ## mode is enabled
|
| 123 |
|
|
|
| 124 |
|
|
if (/^#\?o/) { ## table option
|
| 125 |
wakaba |
1.6 |
push @r, $_ unless $o->{_imported_file};
|
| 126 |
wakaba |
1.1 |
} elsif (s/^#\?([A-Za-z0-9-]+)//) {
|
| 127 |
|
|
my %opt = (cmd => $1);
|
| 128 |
|
|
s{ ([A-Za-z0-9-]+)=(?:"((?:[^"\\]|\\.)*)"|([A-Za-z0-9-]+))
|
| 129 |
|
|
| ([A-Za-z0-9-]+)}{
|
| 130 |
|
|
my ($N, $V, $v, $n) = ($1, $2, $3, $4);
|
| 131 |
|
|
$V =~ s/\\(.)/$1/g;
|
| 132 |
|
|
$opt{ $N || $n } = $n ? 1 : ($V || $v);
|
| 133 |
|
|
}gex;
|
| 134 |
wakaba |
1.6 |
push @r, &{ $CMD{ $opt{cmd} } } ($o, \%opt) if ref $CMD{ $opt{cmd} };
|
| 135 |
wakaba |
1.1 |
} elsif (/^##/) { ## Comment
|
| 136 |
|
|
push @r, $_;
|
| 137 |
wakaba |
1.3 |
} elsif (/^#;/) { ## Comment
|
| 138 |
wakaba |
1.1 |
} elsif (/^#/) { ## Comment or unsupported function
|
| 139 |
wakaba |
1.3 |
push @r, $_;
|
| 140 |
wakaba |
1.8 |
} elsif (/^(0x|[0-9A-Za-z]+[+-])($o->{except} [0-9A-Fa-f]+)(?:\t([^\t]*)(?:\t([^\t]*)(?:\t(.*))?)?)?/x) {
|
| 141 |
|
|
my ($p, $u, $l, $f, $m) = ($1, hex $2, $3, $4, $5);
|
| 142 |
wakaba |
1.1 |
$f = $o->{fallback} if $o->{fallback};
|
| 143 |
|
|
my $offset = $o->{offset};
|
| 144 |
|
|
$offset += $u + $offset > 0xFF ? 0x8080 : 0x80 if $o->{right};
|
| 145 |
|
|
$m =~ s/^#\s*//;
|
| 146 |
wakaba |
1.8 |
push @r, sprintf qq{%s%02X\t%s\t%s\t# %s},
|
| 147 |
|
|
$p, $u+$offset, $l, $f, $m || charname ($l);
|
| 148 |
wakaba |
1.1 |
} elsif (/^$/) {
|
| 149 |
|
|
} else {
|
| 150 |
wakaba |
1.3 |
#push @r, $_;
|
| 151 |
wakaba |
1.1 |
}
|
| 152 |
|
|
|
| 153 |
|
|
} # / mode is enabled
|
| 154 |
|
|
}
|
| 155 |
|
|
@r;
|
| 156 |
|
|
}
|
| 157 |
|
|
|
| 158 |
|
|
$CMD{import} = sub {
|
| 159 |
wakaba |
1.6 |
my ($opt0, $opt) = @_;
|
| 160 |
wakaba |
1.1 |
if ($opt->{src}) {
|
| 161 |
|
|
## BUG: resolve of relative path
|
| 162 |
|
|
open TBL, $opt->{src} or die "$0: $opt->{src}: Imported table not found";
|
| 163 |
|
|
my @tbl = <TBL>; close TBL; map {s/[\x0D\x0A]+$//} @tbl;
|
| 164 |
|
|
my $m = {}; for (split /,/, $opt->{mode}) { $m->{$_} = 1 }
|
| 165 |
|
|
shift (@tbl) if $tbl[0] =~ m!^#\?PETBL/1.0 SOURCE!;
|
| 166 |
wakaba |
1.3 |
$opt->{except} = $opt->{except} ? qq((?!(?i)$opt->{except})) : '';
|
| 167 |
wakaba |
1.7 |
$opt->{except} .= $opt0->{except} if $opt0->{except};
|
| 168 |
wakaba |
1.1 |
array_to_table (\@tbl, {offset => hex $opt->{offset},
|
| 169 |
|
|
fallback => $opt->{fallback}, mode => $m,
|
| 170 |
wakaba |
1.6 |
except => $opt->{except}, right => $opt->{right},
|
| 171 |
|
|
_imported_file => 1});
|
| 172 |
wakaba |
1.1 |
} elsif ($opt->{'std-cl'}) { @{ $C{tbl_std_cl} };
|
| 173 |
|
|
} elsif ($opt->{'std-cr'}) { @{ $C{tbl_std_cr} };
|
| 174 |
|
|
} elsif ($opt->{'std-0x20'} || $opt->{'std-sp'}) { $C{tbl_std_20};
|
| 175 |
|
|
} elsif ($opt->{'std-0x7F'} || $opt->{'std-del'}) { $C{tbl_std_7f};
|
| 176 |
|
|
} elsif ($opt->{'std-0xA0'}) { $C{tbl_std_a0};
|
| 177 |
|
|
} elsif ($opt->{'std-0xFF'}) { $C{tbl_std_ff};
|
| 178 |
|
|
}
|
| 179 |
|
|
};
|
| 180 |
|
|
|
| 181 |
|
|
my @src;
|
| 182 |
|
|
while (<>) {
|
| 183 |
|
|
s/[\x0D\x0A]+$//;
|
| 184 |
|
|
push @src, $_;
|
| 185 |
|
|
}
|
| 186 |
|
|
shift (@src) if $src[0] =~ m!^#\?PETBL/1.0 SOURCE!;
|
| 187 |
|
|
@src = sort {
|
| 188 |
|
|
$a =~ /^#/ ? 0 :
|
| 189 |
|
|
$b =~ /^#/ ? 0 : $a cmp $b
|
| 190 |
|
|
} array_to_table (\@src);
|
| 191 |
|
|
|
| 192 |
|
|
binmode STDOUT;
|
| 193 |
|
|
print "#?PETBL/1.0\n";
|
| 194 |
|
|
print join ("\n", @src)."\n";
|
| 195 |
wakaba |
1.2 |
|
| 196 |
|
|
|
| 197 |
|
|
=head1 AUTHOR
|
| 198 |
|
|
|
| 199 |
|
|
Nanashi-san
|
| 200 |
|
|
|
| 201 |
|
|
=head1 LICENSE
|
| 202 |
|
|
|
| 203 |
|
|
Copyright 2002 AUTHOR
|
| 204 |
|
|
|
| 205 |
|
|
This library is free software; you can redistribute it
|
| 206 |
|
|
and/or modify it under the same terms as Perl itself.
|
| 207 |
|
|
|
| 208 |
|
|
AUTHOR does NOT claim any right to the data generated by
|
| 209 |
|
|
this script. License of generated data fully depends
|
| 210 |
|
|
author of source data.
|
| 211 |
|
|
|
| 212 |
|
|
=cut
|
| 213 |
|
|
|
| 214 |
wakaba |
1.8 |
1; ## $Date: 2002/10/14 06:56:53 $
|
| 215 |
wakaba |
1.2 |
### tbr2tbl.pl ends here
|