1 |
wakaba |
1.1 |
#!/usr/bin/perl
|
2 |
|
|
use strict;
|
3 |
|
|
|
4 |
wakaba |
1.2 |
=head1 NAME
|
5 |
|
|
|
6 |
|
|
tbl2ucm --- PETBL/1.0 -> ucm converter
|
7 |
|
|
|
8 |
|
|
=cut
|
9 |
|
|
|
10 |
wakaba |
1.1 |
my @l;
|
11 |
|
|
my %i;
|
12 |
|
|
while (<>) {
|
13 |
|
|
if (/^0x([0-9A-F]+)\tU\+([0-9A-F]+)\t([^\t]*)\t# ?(.*)/) {
|
14 |
|
|
push @l, {local => hex $1, ucs => hex $2, comment => $4};
|
15 |
|
|
my $info = $3;
|
16 |
|
|
if ($info =~ /<->/) { $l[-1]->{fallback} = 0 }
|
17 |
|
|
elsif ($info =~ /<-/) { $l[-1]->{fallback} = 1 }
|
18 |
|
|
elsif ($info =~ /->/) { $l[-1]->{fallback} = 3 }
|
19 |
wakaba |
1.3 |
} elsif (/^#\?o ([a-z0-9:_<>-]+)="(.+)"/) {
|
20 |
|
|
my ($n,$v) = ($1,$2); $v =~ s/\\(.)/$1/g;
|
21 |
|
|
$i{$n} = $i{$n} ? $i{$n} . "\t" . $v : $v;
|
22 |
|
|
} elsif (/^## ([a-z0-9:_<>-]+)\t(.+)/) {
|
23 |
|
|
$i{$1} = $i{$1} ? $i{$1} . "\t" . $2 : $2;
|
24 |
wakaba |
1.1 |
}
|
25 |
|
|
}
|
26 |
|
|
|
27 |
|
|
@l = sort { $a->{ucs} <=> $b->{ucs} || $a->{fallback} <=> $b->{fallback} || $a->{local} <=> $b->{local} } @l;
|
28 |
|
|
|
29 |
|
|
print <<EOH;
|
30 |
|
|
#
|
31 |
|
|
<code_set_name> "@{[ $i{'ucm:code_set_name'} || $i{name} ]}"
|
32 |
|
|
@{[&{sub{
|
33 |
|
|
my $s = '';
|
34 |
wakaba |
1.3 |
for (sort split /\t/, lc $i{'ucm:code_set_alias'}) {
|
35 |
wakaba |
1.1 |
$s .= sprintf '<code_set_alias> "%s"%s', $_, "\n";
|
36 |
|
|
}
|
37 |
|
|
$s;
|
38 |
|
|
}}]}<mb_cur_min> @{[ $i{'ucm:mb_cur_min'} || 1 ]}
|
39 |
|
|
<mb_cur_max> @{[ $i{'ucm:mb_cur_max'} || 1 ]}
|
40 |
|
|
<subchar> @{[&{sub{
|
41 |
wakaba |
1.3 |
my $s = uc ($i{'ucm:subchar'}||$i{'<-ucs-substition'}||$i{substition} || '0x3F');
|
42 |
wakaba |
1.1 |
$s =~ s/^[0\\]X/\\x/;
|
43 |
|
|
$s =~ s/^\\x([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])/\\x$1\\x$2/;
|
44 |
|
|
$s;
|
45 |
|
|
}}]}
|
46 |
|
|
#
|
47 |
|
|
CHARMAP
|
48 |
|
|
EOH
|
49 |
|
|
|
50 |
|
|
for (@l) {
|
51 |
wakaba |
1.4 |
my $localv = $_->{local};
|
52 |
|
|
my $local = '';
|
53 |
|
|
$local = '\x00' if $localv == 0;
|
54 |
|
|
while ($localv) {
|
55 |
|
|
$local = (sprintf '\x%02X', $localv % 0x100) . $local;
|
56 |
|
|
$localv >>= 8;
|
57 |
|
|
}
|
58 |
|
|
printf q{<U%04X> %s |%d # %s}, $_->{ucs}, $local, $_->{fallback}, $_->{comment}."\n";
|
59 |
wakaba |
1.1 |
}
|
60 |
|
|
|
61 |
|
|
print "END CHARMAP\n";
|
62 |
wakaba |
1.2 |
|
63 |
|
|
|
64 |
|
|
=head1 SEE ALSO
|
65 |
|
|
|
66 |
|
|
perlunicode, enc2xs, tbr2tbl.pl
|
67 |
|
|
|
68 |
|
|
=head1 AUTHOR
|
69 |
|
|
|
70 |
|
|
Nanashi-san
|
71 |
|
|
|
72 |
|
|
=head1 LICENSE
|
73 |
|
|
|
74 |
|
|
Copyright 2002 AUTHOR
|
75 |
|
|
|
76 |
|
|
This library is free software; you can redistribute it
|
77 |
|
|
and/or modify it under the same terms as Perl itself.
|
78 |
|
|
|
79 |
|
|
AUTHOR does NOT claim any right to the data generated by
|
80 |
|
|
this script. License of generated data fully depends
|
81 |
|
|
author of source data.
|
82 |
|
|
|
83 |
|
|
=cut
|
84 |
|
|
|
85 |
wakaba |
1.4 |
1; ## $Date: 2002/12/12 07:45:17 $
|
86 |
wakaba |
1.2 |
### tbl2ucm.pl ends here
|