/[suikacvs]/perl/lib/Encode/Table/tool/winapi2tbl.pl
Suika

Contents of /perl/lib/Encode/Table/tool/winapi2tbl.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (hide annotations) (download)
Thu Dec 12 07:47:19 2002 UTC (21 years, 11 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.1: +18 -19 lines
File MIME type: text/plain
*** empty log message ***

1 wakaba 1.1 #!/usr/bin/perl
2    
3     =head1 NAME
4    
5     winapi2tbl --- Mapping table generator (from Win32 API)
6    
7     =head1 USAGE
8    
9     $ perl winapi2tbl.pl 932 > winapi-cp932.tbl
10    
11     =cut
12    
13     use strict;
14     require Win32::API;
15    
16     my $CodePage = shift || 932;
17     my %S_range = (
18     932 => [0x40..0x7E,0x80..0xFC],
19     936 => [0x40..0x7E,0x80..0xFE],
20     949 => [0x41..0x5A,0x61..0x7A,0x81..0xFE],
21     950 => [0x40..0x7E,0xA1..0xFE],
22     1361 => [0x31..0xFE],
23     20000 => [0x21..0x7E,0xA1..0xFE],
24 wakaba 1.2 20932 => [0xA1..0xFE],
25 wakaba 1.1 20936 => [0xA1..0xFE],
26     20949 => [0xA1..0xFE],
27     );
28    
29     {
30     my @name = split /\n/, require 'unicore/Name.pl';
31     my %name;
32     for (@name) {
33     if (/^(....) ([^\t]+)/) {
34     $name{hex $1} = $2;
35     }
36     }
37     sub NAME ($) {
38     $_[0] < 0x0020 ? '<control>' :
39     $_[0] < 0x007F ? $name{$_[0]} :
40     $_[0] < 0x00A0 ? '<control>' :
41     $name{$_[0]} ? $name{$_[0]} :
42     $_[0] < 0x00A0 ? '<control>' :
43     $_[0] < 0x3400 ? '' :
44     $_[0] < 0xA000 ? '<cjk>' :
45     $_[0] < 0xE000 ? '<hangul>' :
46     $_[0] < 0xF900 ? '<private>' :
47     '';
48     }
49     }
50    
51     my @hdr;
52 wakaba 1.2 print STDERR "Checking system version...\n";
53 wakaba 1.1 if (eval q{require Message::Field::UA}) {
54     my $ua = new Message::Field::UA;
55     $ua->add_our_name;
56     $ua->replace_system_version ('ie', -prepend => 0);
57     push @hdr, "#; Environment: $ua";
58     print STDERR $hdr[$#hdr],"\n";
59     }
60    
61     print STDERR "MultiByte -> WideChar...\n";
62     #int MultiByteToWideChar(
63     # UINT CodePage, // code page
64     # DWORD dwFlags, // mapping flag
65     # LPCSTR lpMultiByteStr, // multibyte string (source)
66     # int cchMultiByte, // length of multibyte string
67     # LPWSTR lpWideCharStr, // wide string (result)
68     # int cchWideChar // length of wide string
69     #);
70     ## <http://www.microsoft.com/japan/developer/library/jpwinpf/_win32_multibytetowidechar.htm>
71    
72     my $M2W = new Win32::API (kernel32 => "MultiByteToWideChar", INPIPI => 'I');
73     my %M2U;
74     for my $F (0x00..0xFF) { ## one byte
75     my $mb = pack 'C', $F;
76     my $wc = "\x00" x 10;
77     my $len = $M2W->Call ($CodePage, 0, $mb, length ($mb) => $wc, length ($wc));
78     $wc = substr ($wc, 0, $len * 2);
79 wakaba 1.2 if (length ($wc) >= 2 && $wc ne "\x00\x00") {
80 wakaba 1.1 $wc =~ s/(.)(.)/$2$1/gs;
81     $wc =~ s/(.)/sprintf '%02X', ord $1/ges;
82     $M2U{ $F } = hex $wc;
83     }
84     }
85     for my $F (0x80..0xFF) { ## two bytes
86     for my $S (@{$S_range{$CodePage} || [0x01..0xFF] }) {
87     my $mb = pack 'CC', $F, $S;
88     my $wc = "\x00" x 10;
89     my $len = $M2W->Call ($CodePage, 0, $mb, length ($mb) => $wc, length ($wc));
90     $wc = substr ($wc, 0, $len * 2);
91 wakaba 1.2 if (length ($wc) >= 2) {
92 wakaba 1.1 $wc =~ s/(.)(.)/$2$1/gs;
93     $wc =~ s/(.)/sprintf '%02X', ord $1/ges;
94 wakaba 1.2 $M2U{ 0x100 * $F + $S } = hex ($wc) unless 0x10000 * $M2U{$F} + $M2U{$S} == hex ($wc);
95 wakaba 1.1 }
96     }}
97     $M2U{0x00} = 0x0000;
98    
99     print STDERR "WideChar -> MultiByte...\n";
100     #int WideCharToMultiByte(
101     # UINT CodePage, // code page
102     # DWORD dwFlags, // mapping flag
103     # LPCWSTR lpWideCharStr, // wide string (source)
104     # int cchWideChar, // length of wide string
105     # LPSTR lpMultiByteStr, // multibyte string (result)
106     # int cchMultiByte, // length of multibyte
107     # LPCSTR lpDefaultChar, // substition char of unmappable one
108     # LPBOOL lpUsedDefaultChar // substition char is used?
109     #);
110     ## <http://www.microsoft.com/japan/developer/library/jpwinpf/_win32_widechartomultibyte.htm>
111    
112     my $W2M = new Win32::API (kernel32 => "WideCharToMultiByte", INPIPIPP => 'I');
113     my %U2M;
114     for my $R (0x00..0xFF) {
115     for my $C (0x00..0xFF) {
116     my $wc = pack 'CC', $C, $R; ## UTF-16LE
117     my $mb = "\x00" x 10;
118     $W2M->Call ($CodePage, 0, $wc, length ($wc) / 2 => $mb, length ($mb), "\x00", 0);
119     $mb =~ s/\x00.*$//;
120     if (length $mb) {
121     $mb =~ s/(.)/sprintf '%02X', ord $1/ges;
122     $U2M{ 0x100 * $R + $C } = hex $mb;
123     }
124     }}
125     $U2M{0x0000} = 0x00;
126 wakaba 1.2
127     for my $R1 (0xD8..0xDB) { for my $C1 (0x00..0xFF) {
128     for my $R2 (0xDC..0xDF) { for my $C2 (0x00..0xFF) {
129     my $wc = pack 'CC', $C1, $R1, $C2, $R2; ## UTF-16LE
130     my $mb = "\x00" x 10;
131     $W2M->Call ($CodePage, 0, $wc, length ($wc) / 2 => $mb, length ($mb), "\x00", 0);
132     $mb =~ s/\x00.*$//;
133     if (length $mb) {
134     $mb =~ s/(.)/sprintf '%02X', ord $1/ges;
135     $U2M{ 0x10000 + (0x100 * $R1 + $C1 - 0xD800) * 0x400
136     + (0x100 * $R2 + $C2 - 0xDC00) } = hex ($mb);
137     }
138     }}}}
139 wakaba 1.1
140     print STDERR "Creating table...\n";
141    
142     my @t;
143     for my $U (keys %U2M) {
144     if ($M2U{$U2M{$U}} == $U) {
145     push @t, sprintf '0x%04X U+%04X # %s',
146     $U2M{$U}, $U, NAME ($U);
147     delete $M2U{ $U2M{$U} };
148     } else {
149     push @t, sprintf '0x%04X U+%04X <- # %s',
150     $U2M{$U}, $U, NAME ($U);
151     }
152     }
153     for my $C (keys %M2U) {
154     push @t, sprintf '0x%04X U+%04X -> # %s',
155     $C, $M2U{$C}, NAME ($M2U{$C});
156     }
157     for (@t) {
158     s/^0x00(..)/0x$1/;
159     }
160    
161     print "#; This file is auto-generated (at @{[ sprintf '%04d-%02d-%02dT%02d:%02d:%02dZ', (gmtime)[5]+1900, (gmtime)[4]+1, (gmtime)[3,2,1,0] ]}).\n";
162     print join "\n", @hdr, sort @t;
163     print "\n";
164    
165     =head1 REQUIRES
166    
167     L<Win32::API> (MUST), unicore/Name.pl (MUST; in perl 5.8.0),
168     L<Message::Field::UA> (SHOULD)
169    
170     =head1 BUGS
171    
172     0x00 or U+0000 can't be treated correctly so that
173     0x00 <-> U+0000 is hardcoded. This may cause problem
174     if the code page does not have 0x00 <-> U+0000 mapping.
175    
176     Mapping to 0x00 or U+0000 is also having problem.
177     For example, CP20127, Microsoft mapping of ASCII,
178     has an entry of 0x80 -> U+0000. But this entry does
179     not occur in outputed table.
180    
181     In other points, this script has a lot of "hardcoded"
182     code, so that making table of some unknown code page
183     can result incorrect mapping.
184    
185     =head1 AUTHOR
186    
187     Nanashi-san <nanashi@san.invalid>
188    
189     =head1 LICENSE
190    
191     Public Domain.
192    
193     =cut
194    
195     # $Date: 2002/11/03 11:08:15 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24