/[suikacvs]/perl/lib/Encode/Table/tool/winapi2tbl.pl
Suika

Contents of /perl/lib/Encode/Table/tool/winapi2tbl.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (show annotations) (download)
Thu Dec 12 07:47:19 2002 UTC (21 years, 11 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +18 -19 lines
File MIME type: text/plain
*** empty log message ***

1 #!/usr/bin/perl
2
3 =head1 NAME
4
5 winapi2tbl --- Mapping table generator (from Win32 API)
6
7 =head1 USAGE
8
9 $ perl winapi2tbl.pl 932 > winapi-cp932.tbl
10
11 =cut
12
13 use strict;
14 require Win32::API;
15
16 my $CodePage = shift || 932;
17 my %S_range = (
18 932 => [0x40..0x7E,0x80..0xFC],
19 936 => [0x40..0x7E,0x80..0xFE],
20 949 => [0x41..0x5A,0x61..0x7A,0x81..0xFE],
21 950 => [0x40..0x7E,0xA1..0xFE],
22 1361 => [0x31..0xFE],
23 20000 => [0x21..0x7E,0xA1..0xFE],
24 20932 => [0xA1..0xFE],
25 20936 => [0xA1..0xFE],
26 20949 => [0xA1..0xFE],
27 );
28
29 {
30 my @name = split /\n/, require 'unicore/Name.pl';
31 my %name;
32 for (@name) {
33 if (/^(....) ([^\t]+)/) {
34 $name{hex $1} = $2;
35 }
36 }
37 sub NAME ($) {
38 $_[0] < 0x0020 ? '<control>' :
39 $_[0] < 0x007F ? $name{$_[0]} :
40 $_[0] < 0x00A0 ? '<control>' :
41 $name{$_[0]} ? $name{$_[0]} :
42 $_[0] < 0x00A0 ? '<control>' :
43 $_[0] < 0x3400 ? '' :
44 $_[0] < 0xA000 ? '<cjk>' :
45 $_[0] < 0xE000 ? '<hangul>' :
46 $_[0] < 0xF900 ? '<private>' :
47 '';
48 }
49 }
50
51 my @hdr;
52 print STDERR "Checking system version...\n";
53 if (eval q{require Message::Field::UA}) {
54 my $ua = new Message::Field::UA;
55 $ua->add_our_name;
56 $ua->replace_system_version ('ie', -prepend => 0);
57 push @hdr, "#; Environment: $ua";
58 print STDERR $hdr[$#hdr],"\n";
59 }
60
61 print STDERR "MultiByte -> WideChar...\n";
62 #int MultiByteToWideChar(
63 # UINT CodePage, // code page
64 # DWORD dwFlags, // mapping flag
65 # LPCSTR lpMultiByteStr, // multibyte string (source)
66 # int cchMultiByte, // length of multibyte string
67 # LPWSTR lpWideCharStr, // wide string (result)
68 # int cchWideChar // length of wide string
69 #);
70 ## <http://www.microsoft.com/japan/developer/library/jpwinpf/_win32_multibytetowidechar.htm>
71
72 my $M2W = new Win32::API (kernel32 => "MultiByteToWideChar", INPIPI => 'I');
73 my %M2U;
74 for my $F (0x00..0xFF) { ## one byte
75 my $mb = pack 'C', $F;
76 my $wc = "\x00" x 10;
77 my $len = $M2W->Call ($CodePage, 0, $mb, length ($mb) => $wc, length ($wc));
78 $wc = substr ($wc, 0, $len * 2);
79 if (length ($wc) >= 2 && $wc ne "\x00\x00") {
80 $wc =~ s/(.)(.)/$2$1/gs;
81 $wc =~ s/(.)/sprintf '%02X', ord $1/ges;
82 $M2U{ $F } = hex $wc;
83 }
84 }
85 for my $F (0x80..0xFF) { ## two bytes
86 for my $S (@{$S_range{$CodePage} || [0x01..0xFF] }) {
87 my $mb = pack 'CC', $F, $S;
88 my $wc = "\x00" x 10;
89 my $len = $M2W->Call ($CodePage, 0, $mb, length ($mb) => $wc, length ($wc));
90 $wc = substr ($wc, 0, $len * 2);
91 if (length ($wc) >= 2) {
92 $wc =~ s/(.)(.)/$2$1/gs;
93 $wc =~ s/(.)/sprintf '%02X', ord $1/ges;
94 $M2U{ 0x100 * $F + $S } = hex ($wc) unless 0x10000 * $M2U{$F} + $M2U{$S} == hex ($wc);
95 }
96 }}
97 $M2U{0x00} = 0x0000;
98
99 print STDERR "WideChar -> MultiByte...\n";
100 #int WideCharToMultiByte(
101 # UINT CodePage, // code page
102 # DWORD dwFlags, // mapping flag
103 # LPCWSTR lpWideCharStr, // wide string (source)
104 # int cchWideChar, // length of wide string
105 # LPSTR lpMultiByteStr, // multibyte string (result)
106 # int cchMultiByte, // length of multibyte
107 # LPCSTR lpDefaultChar, // substition char of unmappable one
108 # LPBOOL lpUsedDefaultChar // substition char is used?
109 #);
110 ## <http://www.microsoft.com/japan/developer/library/jpwinpf/_win32_widechartomultibyte.htm>
111
112 my $W2M = new Win32::API (kernel32 => "WideCharToMultiByte", INPIPIPP => 'I');
113 my %U2M;
114 for my $R (0x00..0xFF) {
115 for my $C (0x00..0xFF) {
116 my $wc = pack 'CC', $C, $R; ## UTF-16LE
117 my $mb = "\x00" x 10;
118 $W2M->Call ($CodePage, 0, $wc, length ($wc) / 2 => $mb, length ($mb), "\x00", 0);
119 $mb =~ s/\x00.*$//;
120 if (length $mb) {
121 $mb =~ s/(.)/sprintf '%02X', ord $1/ges;
122 $U2M{ 0x100 * $R + $C } = hex $mb;
123 }
124 }}
125 $U2M{0x0000} = 0x00;
126
127 for my $R1 (0xD8..0xDB) { for my $C1 (0x00..0xFF) {
128 for my $R2 (0xDC..0xDF) { for my $C2 (0x00..0xFF) {
129 my $wc = pack 'CC', $C1, $R1, $C2, $R2; ## UTF-16LE
130 my $mb = "\x00" x 10;
131 $W2M->Call ($CodePage, 0, $wc, length ($wc) / 2 => $mb, length ($mb), "\x00", 0);
132 $mb =~ s/\x00.*$//;
133 if (length $mb) {
134 $mb =~ s/(.)/sprintf '%02X', ord $1/ges;
135 $U2M{ 0x10000 + (0x100 * $R1 + $C1 - 0xD800) * 0x400
136 + (0x100 * $R2 + $C2 - 0xDC00) } = hex ($mb);
137 }
138 }}}}
139
140 print STDERR "Creating table...\n";
141
142 my @t;
143 for my $U (keys %U2M) {
144 if ($M2U{$U2M{$U}} == $U) {
145 push @t, sprintf '0x%04X U+%04X # %s',
146 $U2M{$U}, $U, NAME ($U);
147 delete $M2U{ $U2M{$U} };
148 } else {
149 push @t, sprintf '0x%04X U+%04X <- # %s',
150 $U2M{$U}, $U, NAME ($U);
151 }
152 }
153 for my $C (keys %M2U) {
154 push @t, sprintf '0x%04X U+%04X -> # %s',
155 $C, $M2U{$C}, NAME ($M2U{$C});
156 }
157 for (@t) {
158 s/^0x00(..)/0x$1/;
159 }
160
161 print "#; This file is auto-generated (at @{[ sprintf '%04d-%02d-%02dT%02d:%02d:%02dZ', (gmtime)[5]+1900, (gmtime)[4]+1, (gmtime)[3,2,1,0] ]}).\n";
162 print join "\n", @hdr, sort @t;
163 print "\n";
164
165 =head1 REQUIRES
166
167 L<Win32::API> (MUST), unicore/Name.pl (MUST; in perl 5.8.0),
168 L<Message::Field::UA> (SHOULD)
169
170 =head1 BUGS
171
172 0x00 or U+0000 can't be treated correctly so that
173 0x00 <-> U+0000 is hardcoded. This may cause problem
174 if the code page does not have 0x00 <-> U+0000 mapping.
175
176 Mapping to 0x00 or U+0000 is also having problem.
177 For example, CP20127, Microsoft mapping of ASCII,
178 has an entry of 0x80 -> U+0000. But this entry does
179 not occur in outputed table.
180
181 In other points, this script has a lot of "hardcoded"
182 code, so that making table of some unknown code page
183 can result incorrect mapping.
184
185 =head1 AUTHOR
186
187 Nanashi-san <nanashi@san.invalid>
188
189 =head1 LICENSE
190
191 Public Domain.
192
193 =cut
194
195 # $Date: 2002/11/03 11:08:15 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24