| 1 |
#!/usr/bin/perl |
| 2 |
|
| 3 |
=head1 NAME |
| 4 |
|
| 5 |
ucm2tbl --- Mapping table converter, ucm to PETBL/1.0 format |
| 6 |
|
| 7 |
=head1 USAGE |
| 8 |
|
| 9 |
$ perl ucm2tbl.pl cp932.ucm > cp932.tbl |
| 10 |
|
| 11 |
=cut |
| 12 |
|
| 13 |
use strict; |
| 14 |
{ |
| 15 |
my @name = split /\n/, require 'unicore/Name.pl'; |
| 16 |
my %name; |
| 17 |
for (@name) { |
| 18 |
if (/^(....) ([^\t]+)/) { |
| 19 |
$name{hex $1} = $2; |
| 20 |
} |
| 21 |
} |
| 22 |
sub charname ($) { |
| 23 |
my $U = shift; |
| 24 |
if ($U =~ /[^0-9]/) { |
| 25 |
$U =~ s/^[Uu]\+|^0[Xx]//; |
| 26 |
$U = hex $U; |
| 27 |
} |
| 28 |
## TODO: be more strict! |
| 29 |
$U < 0x0020 ? '<control>' : |
| 30 |
$U < 0x007F ? $name{$U} : |
| 31 |
$U < 0x00A0 ? '<control>' : |
| 32 |
$name{$U} ? $name{$U} : |
| 33 |
$U < 0x00A0 ? '<control>' : |
| 34 |
$U < 0x3400 ? '' : |
| 35 |
$U < 0xA000 ? '<cjk>' : |
| 36 |
$U < 0xE000 ? '<hangul>' : |
| 37 |
$U < 0xF900 ? '<private>' : |
| 38 |
''; |
| 39 |
} |
| 40 |
} |
| 41 |
|
| 42 |
print qq(#?PETBL/1.0\n); |
| 43 |
my @char; |
| 44 |
while (<>) { |
| 45 |
if (/^<U([0-9A-Fa-f]+)>\s+([0-9A-Fa-f\\Xx]+)\s+\|(\d)/) { |
| 46 |
my ($u, $c, $f) = (hex $1, $2, $3); |
| 47 |
$c =~ tr/\\Xx//d; $c = hex $c; |
| 48 |
if ($c < 0x100) { |
| 49 |
push @char, sprintf q(0x%02X U+%04X %s # %s%s), $c, $u, ['','<-','','->']->[$f], charname ($u), "\n"; |
| 50 |
} else { |
| 51 |
push @char, sprintf q(0x%04X U+%04X %s # %s%s), $c, $u, ['','<-','','->']->[$f], charname ($u), "\n"; |
| 52 |
} |
| 53 |
} elsif (/^<code_set_name>\s+"([^"]+)"/) { |
| 54 |
print qq(#?o name="$1"\n); |
| 55 |
} elsif (/^<([^>]+)>\s+(.+)/) { |
| 56 |
my ($n,$v) = ($1,$2); $v =~ s/([\\"])/\\$1/g; |
| 57 |
print qq(#?o ucm:$n="$v"\n); |
| 58 |
} elsif (s/^#\s?// && (tr/\x0A\x0D//d || 1)) { |
| 59 |
print qq(## $_\n); |
| 60 |
} |
| 61 |
} |
| 62 |
print sort @char; |
| 63 |
|
| 64 |
=head1 AUTHOR |
| 65 |
|
| 66 |
Nanashi-san (SuikaWiki:WindowsCodePage |
| 67 |
<http://suika.fam.cx/~wakaba/-temp/wiki/wiki?WindowsCodePage>) |
| 68 |
|
| 69 |
=head1 LICENSE |
| 70 |
|
| 71 |
Public Domain. |
| 72 |
|
| 73 |
=cut |
| 74 |
|
| 75 |
# $Date: 2002/11/02 12:10:45 $ |