| 1 |
wakaba |
1.1 |
#!/usr/bin/perl
|
| 2 |
|
|
use strict;
|
| 3 |
|
|
|
| 4 |
wakaba |
1.2 |
=head1 NAME
|
| 5 |
|
|
|
| 6 |
|
|
tbl2ucm --- PETBL/1.0 -> ucm converter
|
| 7 |
|
|
|
| 8 |
|
|
=cut
|
| 9 |
|
|
|
| 10 |
wakaba |
1.1 |
my @l;
|
| 11 |
|
|
my %i;
|
| 12 |
|
|
while (<>) {
|
| 13 |
|
|
if (/^0x([0-9A-F]+)\tU\+([0-9A-F]+)\t([^\t]*)\t# ?(.*)/) {
|
| 14 |
|
|
push @l, {local => hex $1, ucs => hex $2, comment => $4};
|
| 15 |
|
|
my $info = $3;
|
| 16 |
|
|
if ($info =~ /<->/) { $l[-1]->{fallback} = 0 }
|
| 17 |
|
|
elsif ($info =~ /<-/) { $l[-1]->{fallback} = 1 }
|
| 18 |
|
|
elsif ($info =~ /->/) { $l[-1]->{fallback} = 3 }
|
| 19 |
wakaba |
1.3 |
} elsif (/^#\?o ([a-z0-9:_<>-]+)="(.+)"/) {
|
| 20 |
|
|
my ($n,$v) = ($1,$2); $v =~ s/\\(.)/$1/g;
|
| 21 |
|
|
$i{$n} = $i{$n} ? $i{$n} . "\t" . $v : $v;
|
| 22 |
|
|
} elsif (/^## ([a-z0-9:_<>-]+)\t(.+)/) {
|
| 23 |
|
|
$i{$1} = $i{$1} ? $i{$1} . "\t" . $2 : $2;
|
| 24 |
wakaba |
1.1 |
}
|
| 25 |
|
|
}
|
| 26 |
|
|
|
| 27 |
|
|
@l = sort { $a->{ucs} <=> $b->{ucs} || $a->{fallback} <=> $b->{fallback} || $a->{local} <=> $b->{local} } @l;
|
| 28 |
|
|
|
| 29 |
|
|
print <<EOH;
|
| 30 |
|
|
#
|
| 31 |
|
|
<code_set_name> "@{[ $i{'ucm:code_set_name'} || $i{name} ]}"
|
| 32 |
|
|
@{[&{sub{
|
| 33 |
|
|
my $s = '';
|
| 34 |
wakaba |
1.3 |
for (sort split /\t/, lc $i{'ucm:code_set_alias'}) {
|
| 35 |
wakaba |
1.1 |
$s .= sprintf '<code_set_alias> "%s"%s', $_, "\n";
|
| 36 |
|
|
}
|
| 37 |
|
|
$s;
|
| 38 |
|
|
}}]}<mb_cur_min> @{[ $i{'ucm:mb_cur_min'} || 1 ]}
|
| 39 |
|
|
<mb_cur_max> @{[ $i{'ucm:mb_cur_max'} || 1 ]}
|
| 40 |
|
|
<subchar> @{[&{sub{
|
| 41 |
wakaba |
1.3 |
my $s = uc ($i{'ucm:subchar'}||$i{'<-ucs-substition'}||$i{substition} || '0x3F');
|
| 42 |
wakaba |
1.1 |
$s =~ s/^[0\\]X/\\x/;
|
| 43 |
|
|
$s =~ s/^\\x([0-9A-F][0-9A-F])([0-9A-F][0-9A-F])/\\x$1\\x$2/;
|
| 44 |
|
|
$s;
|
| 45 |
|
|
}}]}
|
| 46 |
|
|
#
|
| 47 |
|
|
CHARMAP
|
| 48 |
|
|
EOH
|
| 49 |
|
|
|
| 50 |
|
|
for (@l) {
|
| 51 |
wakaba |
1.4 |
my $localv = $_->{local};
|
| 52 |
|
|
my $local = '';
|
| 53 |
|
|
$local = '\x00' if $localv == 0;
|
| 54 |
|
|
while ($localv) {
|
| 55 |
|
|
$local = (sprintf '\x%02X', $localv % 0x100) . $local;
|
| 56 |
|
|
$localv >>= 8;
|
| 57 |
|
|
}
|
| 58 |
|
|
printf q{<U%04X> %s |%d # %s}, $_->{ucs}, $local, $_->{fallback}, $_->{comment}."\n";
|
| 59 |
wakaba |
1.1 |
}
|
| 60 |
|
|
|
| 61 |
|
|
print "END CHARMAP\n";
|
| 62 |
wakaba |
1.2 |
|
| 63 |
|
|
|
| 64 |
|
|
=head1 SEE ALSO
|
| 65 |
|
|
|
| 66 |
|
|
perlunicode, enc2xs, tbr2tbl.pl
|
| 67 |
|
|
|
| 68 |
|
|
=head1 AUTHOR
|
| 69 |
|
|
|
| 70 |
|
|
Nanashi-san
|
| 71 |
|
|
|
| 72 |
|
|
=head1 LICENSE
|
| 73 |
|
|
|
| 74 |
|
|
Copyright 2002 AUTHOR
|
| 75 |
|
|
|
| 76 |
|
|
This library is free software; you can redistribute it
|
| 77 |
|
|
and/or modify it under the same terms as Perl itself.
|
| 78 |
|
|
|
| 79 |
|
|
AUTHOR does NOT claim any right to the data generated by
|
| 80 |
|
|
this script. License of generated data fully depends
|
| 81 |
|
|
author of source data.
|
| 82 |
|
|
|
| 83 |
|
|
=cut
|
| 84 |
|
|
|
| 85 |
wakaba |
1.4 |
1; ## $Date: 2002/12/12 07:45:17 $
|
| 86 |
wakaba |
1.2 |
### tbl2ucm.pl ends here
|