1 |
#!/usr/bin/perl |
2 |
|
3 |
=head1 NAME |
4 |
|
5 |
ucm2tbl --- Mapping table converter, ucm to PETBL/1.0 format |
6 |
|
7 |
=head1 USAGE |
8 |
|
9 |
$ perl ucm2tbl.pl cp932.ucm > cp932.tbl |
10 |
|
11 |
=cut |
12 |
|
13 |
use strict; |
14 |
{ |
15 |
my @name = split /\n/, require 'unicore/Name.pl'; |
16 |
my %name; |
17 |
for (@name) { |
18 |
if (/^(....) ([^\t]+)/) { |
19 |
$name{hex $1} = $2; |
20 |
} |
21 |
} |
22 |
sub charname ($) { |
23 |
my $U = shift; |
24 |
if ($U =~ /[^0-9]/) { |
25 |
$U =~ s/^[Uu]\+|^0[Xx]//; |
26 |
$U = hex $U; |
27 |
} |
28 |
## TODO: be more strict! |
29 |
$U < 0x0020 ? '<control>' : |
30 |
$U < 0x007F ? $name{$U} : |
31 |
$U < 0x00A0 ? '<control>' : |
32 |
$name{$U} ? $name{$U} : |
33 |
$U < 0x00A0 ? '<control>' : |
34 |
$U < 0x3400 ? '' : |
35 |
$U < 0xA000 ? '<cjk>' : |
36 |
$U < 0xE000 ? '<hangul>' : |
37 |
$U < 0xF900 ? '<private>' : |
38 |
''; |
39 |
} |
40 |
} |
41 |
|
42 |
print qq(#?PETBL/1.0\n); |
43 |
my @char; |
44 |
while (<>) { |
45 |
if (/^<U([0-9A-Fa-f]+)>\s+([0-9A-Fa-f\\Xx]+)\s+\|(\d)/) { |
46 |
my ($u, $c, $f) = (hex $1, $2, $3); |
47 |
$c =~ tr/\\Xx//d; $c = hex $c; |
48 |
if ($c < 0x100) { |
49 |
push @char, sprintf q(0x%02X U+%04X %s # %s%s), $c, $u, ['','<-','','->']->[$f], charname ($u), "\n"; |
50 |
} else { |
51 |
push @char, sprintf q(0x%04X U+%04X %s # %s%s), $c, $u, ['','<-','','->']->[$f], charname ($u), "\n"; |
52 |
} |
53 |
} elsif (/^<code_set_name>\s+"([^"]+)"/) { |
54 |
print qq(#?o name="$1"\n); |
55 |
} elsif (/^<([^>]+)>\s+(.+)/) { |
56 |
my ($n,$v) = ($1,$2); $v =~ s/([\\"])/\\$1/g; |
57 |
print qq(#?o ucm:$n="$v"\n); |
58 |
} elsif (s/^#\s?// && (tr/\x0A\x0D//d || 1)) { |
59 |
print qq(## $_\n); |
60 |
} |
61 |
} |
62 |
print sort @char; |
63 |
|
64 |
=head1 AUTHOR |
65 |
|
66 |
Nanashi-san (SuikaWiki:WindowsCodePage |
67 |
<http://suika.fam.cx/~wakaba/-temp/wiki/wiki?WindowsCodePage>) |
68 |
|
69 |
=head1 LICENSE |
70 |
|
71 |
Public Domain. |
72 |
|
73 |
=cut |
74 |
|
75 |
# $Date: 2002/11/02 12:10:45 $ |