| 1 |
wakaba |
1.1 |
#!/usr/bin/perl
|
| 2 |
|
|
|
| 3 |
|
|
=head1 NAME
|
| 4 |
|
|
|
| 5 |
|
|
winapi2tbl --- Mapping table generator (from Win32 API)
|
| 6 |
|
|
|
| 7 |
|
|
=head1 USAGE
|
| 8 |
|
|
|
| 9 |
|
|
$ perl winapi2tbl.pl 932 > winapi-cp932.tbl
|
| 10 |
|
|
|
| 11 |
|
|
=cut
|
| 12 |
|
|
|
| 13 |
|
|
use strict;
|
| 14 |
|
|
require Win32::API;
|
| 15 |
|
|
|
| 16 |
|
|
my $CodePage = shift || 932;
|
| 17 |
|
|
my %S_range = (
|
| 18 |
|
|
932 => [0x40..0x7E,0x80..0xFC],
|
| 19 |
|
|
936 => [0x40..0x7E,0x80..0xFE],
|
| 20 |
|
|
949 => [0x41..0x5A,0x61..0x7A,0x81..0xFE],
|
| 21 |
|
|
950 => [0x40..0x7E,0xA1..0xFE],
|
| 22 |
|
|
1361 => [0x31..0xFE],
|
| 23 |
|
|
20000 => [0x21..0x7E,0xA1..0xFE],
|
| 24 |
wakaba |
1.2 |
20932 => [0xA1..0xFE],
|
| 25 |
wakaba |
1.1 |
20936 => [0xA1..0xFE],
|
| 26 |
|
|
20949 => [0xA1..0xFE],
|
| 27 |
|
|
);
|
| 28 |
|
|
|
| 29 |
|
|
{
|
| 30 |
|
|
my @name = split /\n/, require 'unicore/Name.pl';
|
| 31 |
|
|
my %name;
|
| 32 |
|
|
for (@name) {
|
| 33 |
|
|
if (/^(....) ([^\t]+)/) {
|
| 34 |
|
|
$name{hex $1} = $2;
|
| 35 |
|
|
}
|
| 36 |
|
|
}
|
| 37 |
|
|
sub NAME ($) {
|
| 38 |
|
|
$_[0] < 0x0020 ? '<control>' :
|
| 39 |
|
|
$_[0] < 0x007F ? $name{$_[0]} :
|
| 40 |
|
|
$_[0] < 0x00A0 ? '<control>' :
|
| 41 |
|
|
$name{$_[0]} ? $name{$_[0]} :
|
| 42 |
|
|
$_[0] < 0x00A0 ? '<control>' :
|
| 43 |
|
|
$_[0] < 0x3400 ? '' :
|
| 44 |
|
|
$_[0] < 0xA000 ? '<cjk>' :
|
| 45 |
|
|
$_[0] < 0xE000 ? '<hangul>' :
|
| 46 |
|
|
$_[0] < 0xF900 ? '<private>' :
|
| 47 |
|
|
'';
|
| 48 |
|
|
}
|
| 49 |
|
|
}
|
| 50 |
|
|
|
| 51 |
|
|
my @hdr;
|
| 52 |
wakaba |
1.2 |
print STDERR "Checking system version...\n";
|
| 53 |
wakaba |
1.1 |
if (eval q{require Message::Field::UA}) {
|
| 54 |
|
|
my $ua = new Message::Field::UA;
|
| 55 |
|
|
$ua->add_our_name;
|
| 56 |
|
|
$ua->replace_system_version ('ie', -prepend => 0);
|
| 57 |
|
|
push @hdr, "#; Environment: $ua";
|
| 58 |
|
|
print STDERR $hdr[$#hdr],"\n";
|
| 59 |
|
|
}
|
| 60 |
|
|
|
| 61 |
|
|
print STDERR "MultiByte -> WideChar...\n";
|
| 62 |
|
|
#int MultiByteToWideChar(
|
| 63 |
|
|
# UINT CodePage, // code page
|
| 64 |
|
|
# DWORD dwFlags, // mapping flag
|
| 65 |
|
|
# LPCSTR lpMultiByteStr, // multibyte string (source)
|
| 66 |
|
|
# int cchMultiByte, // length of multibyte string
|
| 67 |
|
|
# LPWSTR lpWideCharStr, // wide string (result)
|
| 68 |
|
|
# int cchWideChar // length of wide string
|
| 69 |
|
|
#);
|
| 70 |
|
|
## <http://www.microsoft.com/japan/developer/library/jpwinpf/_win32_multibytetowidechar.htm>
|
| 71 |
|
|
|
| 72 |
|
|
my $M2W = new Win32::API (kernel32 => "MultiByteToWideChar", INPIPI => 'I');
|
| 73 |
|
|
my %M2U;
|
| 74 |
|
|
for my $F (0x00..0xFF) { ## one byte
|
| 75 |
|
|
my $mb = pack 'C', $F;
|
| 76 |
|
|
my $wc = "\x00" x 10;
|
| 77 |
|
|
my $len = $M2W->Call ($CodePage, 0, $mb, length ($mb) => $wc, length ($wc));
|
| 78 |
|
|
$wc = substr ($wc, 0, $len * 2);
|
| 79 |
wakaba |
1.2 |
if (length ($wc) >= 2 && $wc ne "\x00\x00") {
|
| 80 |
wakaba |
1.1 |
$wc =~ s/(.)(.)/$2$1/gs;
|
| 81 |
|
|
$wc =~ s/(.)/sprintf '%02X', ord $1/ges;
|
| 82 |
|
|
$M2U{ $F } = hex $wc;
|
| 83 |
|
|
}
|
| 84 |
|
|
}
|
| 85 |
|
|
for my $F (0x80..0xFF) { ## two bytes
|
| 86 |
|
|
for my $S (@{$S_range{$CodePage} || [0x01..0xFF] }) {
|
| 87 |
|
|
my $mb = pack 'CC', $F, $S;
|
| 88 |
|
|
my $wc = "\x00" x 10;
|
| 89 |
|
|
my $len = $M2W->Call ($CodePage, 0, $mb, length ($mb) => $wc, length ($wc));
|
| 90 |
|
|
$wc = substr ($wc, 0, $len * 2);
|
| 91 |
wakaba |
1.2 |
if (length ($wc) >= 2) {
|
| 92 |
wakaba |
1.1 |
$wc =~ s/(.)(.)/$2$1/gs;
|
| 93 |
|
|
$wc =~ s/(.)/sprintf '%02X', ord $1/ges;
|
| 94 |
wakaba |
1.2 |
$M2U{ 0x100 * $F + $S } = hex ($wc) unless 0x10000 * $M2U{$F} + $M2U{$S} == hex ($wc);
|
| 95 |
wakaba |
1.1 |
}
|
| 96 |
|
|
}}
|
| 97 |
|
|
$M2U{0x00} = 0x0000;
|
| 98 |
|
|
|
| 99 |
|
|
print STDERR "WideChar -> MultiByte...\n";
|
| 100 |
|
|
#int WideCharToMultiByte(
|
| 101 |
|
|
# UINT CodePage, // code page
|
| 102 |
|
|
# DWORD dwFlags, // mapping flag
|
| 103 |
|
|
# LPCWSTR lpWideCharStr, // wide string (source)
|
| 104 |
|
|
# int cchWideChar, // length of wide string
|
| 105 |
|
|
# LPSTR lpMultiByteStr, // multibyte string (result)
|
| 106 |
|
|
# int cchMultiByte, // length of multibyte
|
| 107 |
|
|
# LPCSTR lpDefaultChar, // substition char of unmappable one
|
| 108 |
|
|
# LPBOOL lpUsedDefaultChar // substition char is used?
|
| 109 |
|
|
#);
|
| 110 |
|
|
## <http://www.microsoft.com/japan/developer/library/jpwinpf/_win32_widechartomultibyte.htm>
|
| 111 |
|
|
|
| 112 |
|
|
my $W2M = new Win32::API (kernel32 => "WideCharToMultiByte", INPIPIPP => 'I');
|
| 113 |
|
|
my %U2M;
|
| 114 |
|
|
for my $R (0x00..0xFF) {
|
| 115 |
|
|
for my $C (0x00..0xFF) {
|
| 116 |
|
|
my $wc = pack 'CC', $C, $R; ## UTF-16LE
|
| 117 |
|
|
my $mb = "\x00" x 10;
|
| 118 |
|
|
$W2M->Call ($CodePage, 0, $wc, length ($wc) / 2 => $mb, length ($mb), "\x00", 0);
|
| 119 |
|
|
$mb =~ s/\x00.*$//;
|
| 120 |
|
|
if (length $mb) {
|
| 121 |
|
|
$mb =~ s/(.)/sprintf '%02X', ord $1/ges;
|
| 122 |
|
|
$U2M{ 0x100 * $R + $C } = hex $mb;
|
| 123 |
|
|
}
|
| 124 |
|
|
}}
|
| 125 |
|
|
$U2M{0x0000} = 0x00;
|
| 126 |
wakaba |
1.2 |
|
| 127 |
|
|
for my $R1 (0xD8..0xDB) { for my $C1 (0x00..0xFF) {
|
| 128 |
|
|
for my $R2 (0xDC..0xDF) { for my $C2 (0x00..0xFF) {
|
| 129 |
|
|
my $wc = pack 'CC', $C1, $R1, $C2, $R2; ## UTF-16LE
|
| 130 |
|
|
my $mb = "\x00" x 10;
|
| 131 |
|
|
$W2M->Call ($CodePage, 0, $wc, length ($wc) / 2 => $mb, length ($mb), "\x00", 0);
|
| 132 |
|
|
$mb =~ s/\x00.*$//;
|
| 133 |
|
|
if (length $mb) {
|
| 134 |
|
|
$mb =~ s/(.)/sprintf '%02X', ord $1/ges;
|
| 135 |
|
|
$U2M{ 0x10000 + (0x100 * $R1 + $C1 - 0xD800) * 0x400
|
| 136 |
|
|
+ (0x100 * $R2 + $C2 - 0xDC00) } = hex ($mb);
|
| 137 |
|
|
}
|
| 138 |
|
|
}}}}
|
| 139 |
wakaba |
1.1 |
|
| 140 |
|
|
print STDERR "Creating table...\n";
|
| 141 |
|
|
|
| 142 |
|
|
my @t;
|
| 143 |
|
|
for my $U (keys %U2M) {
|
| 144 |
|
|
if ($M2U{$U2M{$U}} == $U) {
|
| 145 |
|
|
push @t, sprintf '0x%04X U+%04X # %s',
|
| 146 |
|
|
$U2M{$U}, $U, NAME ($U);
|
| 147 |
|
|
delete $M2U{ $U2M{$U} };
|
| 148 |
|
|
} else {
|
| 149 |
|
|
push @t, sprintf '0x%04X U+%04X <- # %s',
|
| 150 |
|
|
$U2M{$U}, $U, NAME ($U);
|
| 151 |
|
|
}
|
| 152 |
|
|
}
|
| 153 |
|
|
for my $C (keys %M2U) {
|
| 154 |
|
|
push @t, sprintf '0x%04X U+%04X -> # %s',
|
| 155 |
|
|
$C, $M2U{$C}, NAME ($M2U{$C});
|
| 156 |
|
|
}
|
| 157 |
|
|
for (@t) {
|
| 158 |
|
|
s/^0x00(..)/0x$1/;
|
| 159 |
|
|
}
|
| 160 |
|
|
|
| 161 |
|
|
print "#; This file is auto-generated (at @{[ sprintf '%04d-%02d-%02dT%02d:%02d:%02dZ', (gmtime)[5]+1900, (gmtime)[4]+1, (gmtime)[3,2,1,0] ]}).\n";
|
| 162 |
|
|
print join "\n", @hdr, sort @t;
|
| 163 |
|
|
print "\n";
|
| 164 |
|
|
|
| 165 |
|
|
=head1 REQUIRES
|
| 166 |
|
|
|
| 167 |
|
|
L<Win32::API> (MUST), unicore/Name.pl (MUST; in perl 5.8.0),
|
| 168 |
|
|
L<Message::Field::UA> (SHOULD)
|
| 169 |
|
|
|
| 170 |
|
|
=head1 BUGS
|
| 171 |
|
|
|
| 172 |
|
|
0x00 or U+0000 can't be treated correctly so that
|
| 173 |
|
|
0x00 <-> U+0000 is hardcoded. This may cause problem
|
| 174 |
|
|
if the code page does not have 0x00 <-> U+0000 mapping.
|
| 175 |
|
|
|
| 176 |
|
|
Mapping to 0x00 or U+0000 is also having problem.
|
| 177 |
|
|
For example, CP20127, Microsoft mapping of ASCII,
|
| 178 |
|
|
has an entry of 0x80 -> U+0000. But this entry does
|
| 179 |
|
|
not occur in outputed table.
|
| 180 |
|
|
|
| 181 |
|
|
In other points, this script has a lot of "hardcoded"
|
| 182 |
|
|
code, so that making table of some unknown code page
|
| 183 |
|
|
can result incorrect mapping.
|
| 184 |
|
|
|
| 185 |
|
|
=head1 AUTHOR
|
| 186 |
|
|
|
| 187 |
|
|
Nanashi-san <nanashi@san.invalid>
|
| 188 |
|
|
|
| 189 |
|
|
=head1 LICENSE
|
| 190 |
|
|
|
| 191 |
|
|
Public Domain.
|
| 192 |
|
|
|
| 193 |
|
|
=cut
|
| 194 |
|
|
|
| 195 |
|
|
# $Date: 2002/11/03 11:08:15 $
|