1 |
wakaba |
1.1 |
#!/usr/bin/perl |
2 |
|
|
|
3 |
|
|
=head1 NAME |
4 |
|
|
|
5 |
|
|
ucm2tbl --- Mapping table converter, ucm to PETBL/1.0 format |
6 |
|
|
|
7 |
|
|
=head1 USAGE |
8 |
|
|
|
9 |
|
|
$ perl ucm2tbl.pl cp932.ucm > cp932.tbl |
10 |
|
|
|
11 |
|
|
=cut |
12 |
|
|
|
13 |
|
|
use strict; |
14 |
|
|
{ |
15 |
|
|
my @name = split /\n/, require 'unicore/Name.pl'; |
16 |
|
|
my %name; |
17 |
|
|
for (@name) { |
18 |
|
|
if (/^(....) ([^\t]+)/) { |
19 |
|
|
$name{hex $1} = $2; |
20 |
|
|
} |
21 |
|
|
} |
22 |
|
|
sub charname ($) { |
23 |
|
|
my $U = shift; |
24 |
|
|
if ($U =~ /[^0-9]/) { |
25 |
|
|
$U =~ s/^[Uu]\+|^0[Xx]//; |
26 |
|
|
$U = hex $U; |
27 |
|
|
} |
28 |
|
|
## TODO: be more strict! |
29 |
|
|
$U < 0x0020 ? '<control>' : |
30 |
|
|
$U < 0x007F ? $name{$U} : |
31 |
|
|
$U < 0x00A0 ? '<control>' : |
32 |
|
|
$name{$U} ? $name{$U} : |
33 |
|
|
$U < 0x00A0 ? '<control>' : |
34 |
|
|
$U < 0x3400 ? '' : |
35 |
|
|
$U < 0xA000 ? '<cjk>' : |
36 |
|
|
$U < 0xE000 ? '<hangul>' : |
37 |
|
|
$U < 0xF900 ? '<private>' : |
38 |
|
|
''; |
39 |
|
|
} |
40 |
|
|
} |
41 |
|
|
|
42 |
|
|
print qq(#?PETBL/1.0\n); |
43 |
|
|
my @char; |
44 |
|
|
while (<>) { |
45 |
|
|
if (/^<U([0-9A-Fa-f]+)>\s+([0-9A-Fa-f\\Xx]+)\s+\|(\d)/) { |
46 |
|
|
my ($u, $c, $f) = (hex $1, $2, $3); |
47 |
|
|
$c =~ tr/\\Xx//d; $c = hex $c; |
48 |
|
|
if ($c < 0x100) { |
49 |
|
|
push @char, sprintf q(0x%02X U+%04X %s # %s%s), $c, $u, ['','<-','','->']->[$f], charname ($u), "\n"; |
50 |
|
|
} else { |
51 |
|
|
push @char, sprintf q(0x%04X U+%04X %s # %s%s), $c, $u, ['','<-','','->']->[$f], charname ($u), "\n"; |
52 |
|
|
} |
53 |
|
|
} elsif (/^<code_set_name>\s+"([^"]+)"/) { |
54 |
|
|
print qq(#?o name="$1"\n); |
55 |
|
|
} elsif (/^<([^>]+)>\s+(.+)/) { |
56 |
|
|
my ($n,$v) = ($1,$2); $v =~ s/([\\"])/\\$1/g; |
57 |
|
|
print qq(#?o ucm:$n="$v"\n); |
58 |
|
|
} elsif (s/^#\s?// && (tr/\x0A\x0D//d || 1)) { |
59 |
|
|
print qq(## $_\n); |
60 |
|
|
} |
61 |
|
|
} |
62 |
|
|
print sort @char; |
63 |
|
|
|
64 |
|
|
=head1 AUTHOR |
65 |
|
|
|
66 |
|
|
Nanashi-san (SuikaWiki:WindowsCodePage |
67 |
|
|
<http://suika.fam.cx/~wakaba/-temp/wiki/wiki?WindowsCodePage>) |
68 |
|
|
|
69 |
|
|
=head1 LICENSE |
70 |
|
|
|
71 |
|
|
Public Domain. |
72 |
|
|
|
73 |
|
|
=cut |
74 |
|
|
|
75 |
|
|
# $Date: 2002/11/02 12:10:45 $ |