/[suikacvs]/test/oldencodeutils/lib/Encode/Table/tool/winapi2tbl.pl
Suika

Contents of /test/oldencodeutils/lib/Encode/Table/tool/winapi2tbl.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (hide annotations) (download)
Thu Dec 12 07:45:49 2002 UTC (23 years, 5 months ago) by wakaba
Branch: MAIN
File MIME type: text/plain
*** empty log message ***

1 wakaba 1.1 #!/usr/bin/perl
2    
3     =head1 NAME
4    
5     winapi2tbl --- Mapping table generator (from Win32 API)
6    
7     =head1 USAGE
8    
9     $ perl winapi2tbl.pl 932 > winapi-cp932.tbl
10    
11     =cut
12    
13     use strict;
14     require Win32::API;
15    
16     my $CodePage = shift || 932;
17     my %S_range = (
18     932 => [0x40..0x7E,0x80..0xFC],
19     936 => [0x40..0x7E,0x80..0xFE],
20     949 => [0x41..0x5A,0x61..0x7A,0x81..0xFE],
21     950 => [0x40..0x7E,0xA1..0xFE],
22     1361 => [0x31..0xFE],
23     20000 => [0x21..0x7E,0xA1..0xFE],
24     20932 => [0x21..0x7E,0xA1..0xFE],
25     20936 => [0xA1..0xFE],
26     20949 => [0xA1..0xFE],
27     );
28    
29     {
30     my @name = split /\n/, require 'unicore/Name.pl';
31     my %name;
32     for (@name) {
33     if (/^(....) ([^\t]+)/) {
34     $name{hex $1} = $2;
35     }
36     }
37     sub NAME ($) {
38     $_[0] < 0x0020 ? '<control>' :
39     $_[0] < 0x007F ? $name{$_[0]} :
40     $_[0] < 0x00A0 ? '<control>' :
41     $name{$_[0]} ? $name{$_[0]} :
42     $_[0] < 0x00A0 ? '<control>' :
43     $_[0] < 0x3400 ? '' :
44     $_[0] < 0xA000 ? '<cjk>' :
45     $_[0] < 0xE000 ? '<hangul>' :
46     $_[0] < 0xF900 ? '<private>' :
47     '';
48     }
49     }
50    
51     my @hdr;
52    
53     if (eval q{require Message::Field::UA}) {
54     my $ua = new Message::Field::UA;
55     $ua->add_our_name;
56     $ua->replace_system_version ('ie', -prepend => 0);
57     push @hdr, "#; Environment: $ua";
58     print STDERR $hdr[$#hdr],"\n";
59     }
60    
61     print STDERR "MultiByte -> WideChar...\n";
62     #int MultiByteToWideChar(
63     # UINT CodePage, // code page
64     # DWORD dwFlags, // mapping flag
65     # LPCSTR lpMultiByteStr, // multibyte string (source)
66     # int cchMultiByte, // length of multibyte string
67     # LPWSTR lpWideCharStr, // wide string (result)
68     # int cchWideChar // length of wide string
69     #);
70     ## <http://www.microsoft.com/japan/developer/library/jpwinpf/_win32_multibytetowidechar.htm>
71    
72     my $M2W = new Win32::API (kernel32 => "MultiByteToWideChar", INPIPI => 'I');
73     my %M2U;
74     for my $F (0x00..0xFF) { ## one byte
75     my $mb = pack 'C', $F;
76     my $wc = "\x00" x 10;
77     my $len = $M2W->Call ($CodePage, 0, $mb, length ($mb) => $wc, length ($wc));
78     $wc = substr ($wc, 0, $len * 2);
79     if (length ($wc) > 2 && $wc ne "\x00\x00") {
80     $wc =~ s/(.)(.)/$2$1/gs;
81     $wc =~ s/(.)/sprintf '%02X', ord $1/ges;
82     $M2U{ $F } = hex $wc;
83     }
84     }
85     for my $F (0x80..0xFF) { ## two bytes
86     for my $S (@{$S_range{$CodePage} || [0x01..0xFF] }) {
87     my $mb = pack 'CC', $F, $S;
88     my $wc = "\x00" x 10;
89     my $len = $M2W->Call ($CodePage, 0, $mb, length ($mb) => $wc, length ($wc));
90     $wc = substr ($wc, 0, $len * 2);
91     if (length ($wc) > 2) {
92     $wc =~ s/(.)(.)/$2$1/gs;
93     $wc =~ s/(.)/sprintf '%02X', ord $1/ges;
94     $M2U{ 0x100 * $F + $S } = hex $wc;
95     }
96     }}
97     if ($CodePage == 20932) {
98     for my $F (0xA1..0xFE) { ## three bytes
99     for my $S (@{$S_range{$CodePage} || [0x01..0xFF] }) {
100     my $mb = pack 'CCC', 0x8F, $F, $S;
101     my $wc = "\x00" x 10;
102     my $len = $M2W->Call ($CodePage, 0, $mb, length ($mb) => $wc, length ($wc));
103     $wc = substr ($wc, 0, $len * 2);
104     if (length ($wc) > 2) {
105     $wc =~ s/(.)(.)/$2$1/gs;
106     $wc =~ s/(.)/sprintf '%02X', ord $1/ges;
107     $M2U{ 0x100 * $F + $S } = hex $wc;
108     }
109     }}
110     }
111     $M2U{0x00} = 0x0000;
112    
113     print STDERR "WideChar -> MultiByte...\n";
114     #int WideCharToMultiByte(
115     # UINT CodePage, // code page
116     # DWORD dwFlags, // mapping flag
117     # LPCWSTR lpWideCharStr, // wide string (source)
118     # int cchWideChar, // length of wide string
119     # LPSTR lpMultiByteStr, // multibyte string (result)
120     # int cchMultiByte, // length of multibyte
121     # LPCSTR lpDefaultChar, // substition char of unmappable one
122     # LPBOOL lpUsedDefaultChar // substition char is used?
123     #);
124     ## <http://www.microsoft.com/japan/developer/library/jpwinpf/_win32_widechartomultibyte.htm>
125    
126     my $W2M = new Win32::API (kernel32 => "WideCharToMultiByte", INPIPIPP => 'I');
127     my %U2M;
128     for my $R (0x00..0xFF) {
129     for my $C (0x00..0xFF) {
130     my $wc = pack 'CC', $C, $R; ## UTF-16LE
131     my $mb = "\x00" x 10;
132     $W2M->Call ($CodePage, 0, $wc, length ($wc) / 2 => $mb, length ($mb), "\x00", 0);
133     $mb =~ s/\x00.*$//;
134     if (length $mb) {
135     $mb =~ s/(.)/sprintf '%02X', ord $1/ges;
136     $U2M{ 0x100 * $R + $C } = hex $mb;
137     }
138     }}
139     $U2M{0x0000} = 0x00;
140    
141     print STDERR "Creating table...\n";
142    
143     my @t;
144     for my $U (keys %U2M) {
145     if ($M2U{$U2M{$U}} == $U) {
146     push @t, sprintf '0x%04X U+%04X # %s',
147     $U2M{$U}, $U, NAME ($U);
148     delete $M2U{ $U2M{$U} };
149     } else {
150     push @t, sprintf '0x%04X U+%04X <- # %s',
151     $U2M{$U}, $U, NAME ($U);
152     }
153     }
154     for my $C (keys %M2U) {
155     push @t, sprintf '0x%04X U+%04X -> # %s',
156     $C, $M2U{$C}, NAME ($M2U{$C});
157     }
158     for (@t) {
159     s/^0x00(..)/0x$1/;
160     }
161    
162     print "#; This file is auto-generated (at @{[ sprintf '%04d-%02d-%02dT%02d:%02d:%02dZ', (gmtime)[5]+1900, (gmtime)[4]+1, (gmtime)[3,2,1,0] ]}).\n";
163     print join "\n", @hdr, sort @t;
164     print "\n";
165    
166     =head1 REQUIRES
167    
168     L<Win32::API> (MUST), unicore/Name.pl (MUST; in perl 5.8.0),
169     L<Message::Field::UA> (SHOULD)
170    
171     =head1 BUGS
172    
173     0x00 or U+0000 can't be treated correctly so that
174     0x00 <-> U+0000 is hardcoded. This may cause problem
175     if the code page does not have 0x00 <-> U+0000 mapping.
176    
177     Mapping to 0x00 or U+0000 is also having problem.
178     For example, CP20127, Microsoft mapping of ASCII,
179     has an entry of 0x80 -> U+0000. But this entry does
180     not occur in outputed table.
181    
182     In other points, this script has a lot of "hardcoded"
183     code, so that making table of some unknown code page
184     can result incorrect mapping.
185    
186     =head1 AUTHOR
187    
188     Nanashi-san <nanashi@san.invalid>
189    
190     =head1 LICENSE
191    
192     Public Domain.
193    
194     =cut
195    
196     # $Date: 2002/11/03 11:08:15 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24