/[suikacvs]/test/oldencodeutils/lib/Encode/lib/jcode.pl
Suika

Contents of /test/oldencodeutils/lib/Encode/lib/jcode.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (hide annotations) (download)
Mon Oct 14 07:02:02 2002 UTC (22 years, 6 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
File MIME type: text/plain
2002-10-14  Nanashi-san <nanashi@san.invalid>

	* jcode.pl: New script.
	(Commited by Wakaba <w@suika.fam.cx>.)

1 wakaba 1.1
2     =head1 NAME
3    
4     jcode.pl wrapper module --- jcode.pl compatible interface of Encode
5     modules
6    
7     =head1 DESCRIPTION
8    
9     Since the perl4 age, C<jcode.pl> is widely used to convert Japanese
10     coding systems. With the release of perl 5.8, which includes
11     new standarized convertion module of C<Encode>, jcode.pl is
12     finally expected to be obsoleted. But there are so many scripts
13     using jcode.pl that we cannot delete old jcode.pl soon.
14    
15     This library provides the same interface with original jcode.pl
16     but actual convertion process is done by Encode modules.
17    
18     =cut
19    
20     package jcode;
21     use strict;
22     use 5.7.3;
23     require Encode::ISO2022::SevenBit;
24     require Encode::ISO2022::EightBit;
25     require Encode::SJIS::JIS;
26     require Encode::Table;
27     require Encode::Table::JISEditions;
28    
29     ## jcode.pl interface
30     our $rcsid = q$Id: jcode.pl,v 1.1 2002/10/13 12:32:38 wakaba Exp $;
31     our $version;
32     our (%convf, %h2zf, %z2hf);
33    
34     ## Original variables
35     our $VERSION=do{my @r=(q$Revision: 1.1 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
36     my $__encoder = {};
37     our $WIDTH_COMPATIBLE = 1;
38     our $UNIFY_LATIN_IRV = 1;
39     my $__re_hyphen = '';
40    
41     =item $jcode::VERSION
42    
43     Version of this wrapper library, in standard perl5 style.
44     (This variable is not implemented in original jcode.pl.)
45    
46     =item $jcode::UNIFY_LATIN_IRV = 0/1
47    
48     If true, JIS X 0201 latin set is code-point-unified with
49     ISO/IEC 646 IRV (ASCII). Default is 1. (This variable is
50     not implemented in original jcode.pl.)
51    
52     =item $jcode::WIDTH_COMPATIBLE = 0/1
53    
54     If true, separate fullwidth and halfwidth characters
55     so that convertion result will be same as that of original
56     jcode.pl. (This variable is not implemented in original jcode.pl.)
57    
58     Default is 1.
59    
60     =cut
61    
62     &init unless defined $version;
63    
64     =item jcode::init ()
65    
66     Initialization of this wrapper module. Like original jcode.pl,
67     this procedure NEED NOT be called by hand. It is automatically
68     called.
69    
70     In spite of sameness of function name, its process is not
71     same as that of original jcode.pl's.
72    
73     =cut
74    
75     sub init () {
76     $version = $rcsid =~ /,v ([0-9.]+)/ ? $1 : 'unknown';
77     $__encoder->{jis} = Encode::find_encoding ('iso-2022-jp');
78     $__encoder->{euc} = Encode::find_encoding ('euc-jp');
79     $__encoder->{sjis} = Encode::find_encoding ('shift_jis');
80     $__encoder->{sjisA} = Encode::find_encoding ('shift_jis-ascii');
81     for (qw/jis euc sjis sjisA/) {
82     $__encoder->{$_}->{_encode_mapping} = 0;
83     $__encoder->{$_}->{_decode_mapping} = 0;
84     }
85     my $start94n = 0xE9F6C0; ## HYPHEN == 01-30 == 0x213E
86     for (0x40-0x30, 0x42-0x30, 79, 0x4F-0x30) {
87     $__re_hyphen .= chr ($start94n + 94*94*$_ + 29);
88     }
89    
90     no strict 'refs';
91     for my $in (qw/jis sjis euc/) {
92     for my $out (qw/jis sjis euc/) {
93     $convf{$in, $out} = *{ qq(${in}2${out}) };
94     }
95     $h2zf{$in} = *{ qq(h2z_${in}) };
96     $z2hf{$in} = *{ qq(z2h_${in}) };
97     }
98     }
99    
100     =item jcode::get_inout (@)
101    
102     This function does not work with this wrapper library.
103    
104     =item jcode::jis_inout (@)
105    
106     This function does not work with this wrapper library.
107    
108     =cut
109    
110     sub get_inout (@) {
111     warn "$0: jcode.pl: get_inout is called but do nothing";
112     }
113    
114     sub jis_inout (@) {
115     warn "$0: jcode.pl: jis_inout is called but do nothing";
116     }
117    
118     =item jcode::getcode (\$s)
119    
120     Guess coding system of $s and return it. Possible returned
121     values are 'jis' (7-bit ISO/IEC 2022 (superset of ISO-2022-JP)),
122     'euc' (Japanese EUC), 'sjis' (Shift JIS) or undef (detection
123     is failed). Although original jcode.pl may return 'binary',
124     this library does not return it.
125    
126     =cut
127    
128     sub getcode (\$) {
129     my $s = shift;
130     require Encode::Guess;
131     my $e = Encode::Guess::guess_encoding ($$s, qw/euc-jp shiftjis 7bit-jis/);
132     $e = $e->name if ref $e;
133     my $code;
134     if ($e =~ /shift/) {
135     $code = 'sjis';
136     } elsif ($e =~ /euc/) {
137     $code = 'euc';
138     } elsif ($e =~ /jis/) {
139     $code = 'jis';
140     } elsif ($e =~ /utf-?8/) {
141     $code = 'utf8';
142     }
143     my $matched; ## $matched is not implemented.
144     wantarray ? ($matched, $code) : $code;
145     }
146     # Internal-used function jcode::max is not implemented.
147    
148     =item jcode::convert (\$s, [$output_code, $input_code, $option])
149    
150     Convert $s from $input_code to $output_code (with $option).
151     If $input_code is not specified, guessed value is used.
152     Likewise unless $output_code 'jis' is supporsed.
153    
154     =cut
155    
156     sub convert (\$;$$$) {
157     my ($s, $output, $input, $option) = @_;
158     return (undef, undef) unless $input = $input || &getcode ($s);
159     return (undef, $input) if $input eq 'binary';
160     $output ||= 'jis';
161     $output = $input if $output eq 'noconv';
162     my $f = $convf{$input, $output};
163     &$f ($s, $option);
164     wantarray ? ($f, $input) : $input;
165     }
166    
167     =item jcode::jis ($output_code, $s, [$input_code, $option])
168    
169     Return $s as 7-bit ISO/IEC 2022 string.
170    
171     =item jcode::euc ($output_code, $s, [$input_code, $option])
172    
173     Return $s as Japanese EUC string.
174    
175     =item jcode::sjis ($output_code, $s, [$input_code, $option])
176    
177     Return $s as Shift JIS string.
178    
179     =cut
180    
181     sub jis ($;$$) { &to (jis => @_) }
182     sub euc ($;$$) { &to (euc => @_) }
183     sub sjis ($;$$) { &to (sjis => @_) }
184    
185     =item jcode::to ($output_code, $s, [$input_code, $option])
186    
187     Return-by-value interface of &convert.
188    
189     =item jcode::what ($s)
190    
191     Return-by-value interface of &getcode.
192    
193     =item jcode::trans ($s, @)
194    
195     Return-by-value interface of &tr.
196    
197     =cut
198    
199     sub to ($$;$$) {
200     my ($output, $s, $input, $option) = @_;
201     &convert (\$s, $output, $input, $option);
202     $s;
203     }
204    
205     sub what ($) {
206     my $s = shift;
207     &getcode (\$s);
208     }
209    
210     sub trans ($@) {
211     my $s = shift;
212     &tr (\$s, @_);
213     $s;
214     }
215    
216     sub sjis2jis ($;$$) {
217     my ($s, $option, $n) = @_;
218     $$s = $__encoder->{sjis}->decode ($$s);
219     my @tbl;
220     push @tbl, 'jisx0201_katakana_to_jisx0208_1990' if !$WIDTH_COMPATIBLE || $option =~ /z/;
221     push @tbl, 'jisx0208_1990_to_jisx0201_katakana' if $option =~ /h/;
222     push @tbl, 'jisx0201_latin_to_ascii__cpunify' if $UNIFY_LATIN_IRV;
223     $$s = Encode::Table::convert ($$s, \@tbl);
224     $$s = Encode::Table::convert ($$s, [qw/jisx0208_1990_to_ascii/]) unless $WIDTH_COMPATIBLE;
225     #my @tbl = qw/jisx0208_1990_to_1983__cpunify/;
226     #push @tbl, 'jisx0201_latin_to_ascii__cpunify' if $UNIFY_LATIN_IRV;
227     #$$s = Encode::Table::convert ($$s, \@tbl);
228     $$s = $__encoder->{jis}->encode ($$s);
229     $n;
230     }
231     # Internal-use-functions _sjis2jis, __sjis2jis are not implemented.
232    
233     sub euc2jis ($;$$) {
234     my ($s, $option, $n) = @_;
235     $$s = $__encoder->{euc}->decode ($$s);
236     my @tbl;
237     push @tbl, 'jisx0201_katakana_to_jisx0208_1990' if !$WIDTH_COMPATIBLE || $option =~ /z/;
238     push @tbl, 'jisx0208_1990_to_jisx0201_katakana' if $option =~ /h/;
239     $$s = Encode::Table::convert ($$s, \@tbl);
240     $$s = Encode::Table::convert ($$s, [qw/jisx0208_1990_to_ascii/]) unless $WIDTH_COMPATIBLE;
241     #my @tbl = qw/jisx0208_1990_to_1983__cpunify/;
242     #push @tbl, 'jisx0201_latin_to_ascii__cpunify' if $UNIFY_LATIN_IRV;
243     $$s = Encode::Table::convert ($$s, \@tbl);
244     $$s = $__encoder->{jis}->encode ($$s);
245     $n;
246     }
247     # Internal-use-functions _euc2jis, __euc2jis are not implemented.
248    
249     sub jis2euc ($;$$) {
250     my ($s, $option, $n) = @_;
251     $$s = $__encoder->{jis}->decode ($$s);
252     my @tbl = qw/jisx0208_1983_to_1990__cpunify/;
253     $$s = Encode::Table::convert ($$s, \@tbl);
254     $$s = Encode::Table::convert ($$s, [qw/jisx0208_1990_to_ascii/]) unless $WIDTH_COMPATIBLE;
255     my @tbl;
256     push @tbl, 'jisx0201_latin_to_ascii__cpunify' if $UNIFY_LATIN_IRV;
257     push @tbl, 'jisx0201_katakana_to_jisx0208_1990' if !$WIDTH_COMPATIBLE || $option =~ /z/;
258     push @tbl, 'jisx0208_1990_to_jisx0201_katakana' if $option =~ /h/;
259     $$s = Encode::Table::convert ($$s, \@tbl);
260     $$s = $__encoder->{euc}->encode ($$s);
261     $n;
262     }
263     # Internal-use-function _jis2euc is not implemented.
264    
265     sub jis2sjis (\$;$$) {
266     my ($s, $option, $n) = @_;
267     $$s = $__encoder->{jis}->decode ($$s);
268     my @tbl = qw/jisx0208_1983_to_1990__cpunify/;
269     $$s = Encode::Table::convert ($$s, \@tbl);
270     $$s = Encode::Table::convert ($$s, [qw/jisx0208_1990_to_ascii/]) unless $WIDTH_COMPATIBLE;
271     my @tbl;
272     push @tbl, 'ascii_to_jisx0201_latin__cpunify' if $UNIFY_LATIN_IRV;
273     push @tbl, 'jisx0201_katakana_to_jisx0208_1990' if !$WIDTH_COMPATIBLE || $option =~ /z/;
274     push @tbl, 'jisx0208_1990_to_jisx0201_katakana' if $option =~ /h/;
275     $$s = Encode::Table::convert ($$s, \@tbl);
276     $$s = $__encoder->{sjis}->encode ($$s);
277     $n;
278     }
279     # Internal-use-function _jis2sjis is not implemented.
280    
281     sub sjis2euc ($;$$) {
282     my ($s, $option, $n) = @_;
283     my @tbl;
284     push @tbl, 'jisx0201_latin_to_ascii__cpunify' if $UNIFY_LATIN_IRV;
285     push @tbl, 'jisx0201_katakana_to_jisx0208_1990' if !$WIDTH_COMPATIBLE || $option =~ /z/;
286     push @tbl, 'jisx0208_1990_to_jisx0201_katakana' if $option =~ /h/;
287     $$s = $__encoder->{sjis}->decode ($$s);
288     $$s = Encode::Table::convert ($$s, [qw/jisx0208_1990_to_ascii/]) unless $WIDTH_COMPATIBLE;
289     $$s = Encode::Table::convert ($$s, \@tbl);
290     $$s = $__encoder->{euc}->encode ($$s);
291     $n;
292     }
293     # Internal-use-function s2e is not implemented.
294    
295     sub euc2sjis (\$;$$) {
296     my ($s, $option, $n) = @_;
297     my @tbl;
298     push @tbl, 'ascii_to_jisx0201_latin__cpunify' if $UNIFY_LATIN_IRV;
299     push @tbl, 'jisx0201_katakana_to_jisx0208_1990' if !$WIDTH_COMPATIBLE || $option =~ /z/;
300     push @tbl, 'jisx0208_1990_to_jisx0201_katakana' if $option =~ /h/;
301     $$s = $__encoder->{euc}->decode ($$s);
302     $$s = Encode::Table::convert ($$s, [qw/jisx0208_1990_to_ascii/]) unless $WIDTH_COMPATIBLE;
303     $$s = Encode::Table::convert ($$s, \@tbl);
304     $$s = $__encoder->{sjis}->encode ($$s);
305     $n;
306     }
307     # Internal-use-function e2s is not implemented.
308    
309     sub jis2jis ($;$) {
310     my ($s, $option) = @_;
311     # $$s =~ s/\x1B\x24[\x40\x42]/\x1B\x24\x42/g;
312     # $$s =~ s/\x1B\x28[BJ]/\x1B\x28\x42/g;
313     &h2z_jis ($s) if $option =~ /z/;
314     &z2h_jis ($s) if $option =~ /h/;
315     }
316    
317     sub sjis2sjis ($;$) {
318     my ($s, $option) = @_;
319     &h2z_sjis ($s) if $option =~ /z/;
320     &z2h_sjis ($s) if $option =~ /h/;
321     }
322    
323     sub euc2euc ($;$) {
324     my ($s, $option) = @_;
325     &h2z_euc ($s) if $option =~ /z/;
326     &z2h_euc ($s) if $option =~ /h/;
327     }
328    
329     =item jcode::cache ()
330    
331     =item jcode::nocache ()
332    
333     =item jcode::flushcache ()
334    
335     These method does not work with this wrapper library.
336    
337     =cut
338    
339     sub cache () { 0 }
340     sub nocache () { 0 }
341     sub flushcache () { 0 }
342    
343     sub h2z_jis (\$) {
344     my $s = shift;
345     $$s = $__encoder->{jis}->decode ($$s);
346     $$s = Encode::Table::convert ($$s, [qw/jisx0201_katakana_to_jisx0208_1990/]);
347     $$s = $__encoder->{jis}->encode ($$s);
348     }
349     # Internal-use-function _h2z_jis is not implemented.
350    
351     sub h2z_euc (\$) {
352     my $s = shift;
353     $$s = $__encoder->{euc}->decode ($$s);
354     $$s = Encode::Table::convert ($$s, [qw/jisx0201_katakana_to_jisx0208_1990/]);
355     $$s = $__encoder->{euc}->encode ($$s);
356     }
357    
358     sub h2z_sjis (\$) {
359     my $s = shift;
360     $$s = $__encoder->{sjis}->decode ($$s);
361     $$s = Encode::Table::convert ($$s, [qw/jisx0201_katakana_to_jisx0208_1990/]);
362     $$s = $__encoder->{sjis}->encode ($$s);
363     }
364    
365     sub z2h_jis (\$) {
366     my $s = shift;
367     # not implemented.
368     }
369     # Internal-use-functions _z2h_jis, __z2h_jis are not implemented.
370    
371     sub z2h_euc ($) {
372     my $s = shift;
373     $$s = $__encoder->{euc}->decode ($$s);
374     $$s = Encode::Table::convert ($$s, [qw/jisx0208_1990_to_jisx0201_katakana/]);
375     $$s = $__encoder->{euc}->encode ($$s);
376     }
377    
378     sub z2h_sjis ($) {
379     my $s = shift;
380     $$s = $__encoder->{sjis}->decode ($$s);
381     $$s = Encode::Table::convert ($$s, [qw/jisx0208_1990_to_jisx0201_katakana/]);
382     $$s = $__encoder->{sjis}->encode ($$s);
383     }
384    
385     # Internal-use-functions init_z2h_euc, init_z2h_sjis are not implemented.
386    
387     =item jcode::tr (\$s, $from => $to, $option)
388    
389     jcode version of tr///. Instead of '-' (HYPHEN-MINUS SIGN)
390     of ISO/IEC 646 IRV (ASCII), HYPHEN of JIS X 0208 is able to
391     be used to specify ranges of code point.
392    
393     =cut
394    
395     sub tr (\$$$;$) {
396     my ($s, $from => $to, $opt) = @_;
397     my $input = $$s =~ /\x1B/ ? 'jis' : 'euc';
398     $$s = $__encoder->{$input}->decode ($$s);
399     $from = $__encoder->{$input}->decode ($from);
400     $to = $__encoder->{$input}->decode ($to);
401     $from =~ s/\x5C([\x5C$__re_hyphen])|[$__re_hyphen]/$1||'-'/ge;
402     $to =~ s/\x5C([\x5C$__re_hyphen])|[$__re_hyphen]/$1||'-'/ge;
403     $from =~ s/([{}\\])/\\$1/g;
404     $to =~ s/([{}\\])/\\$1/g;
405     eval qq{\$\$s =~ tr{$from}{$to}$opt} or warn $@;
406     print &Encode::encode ('shift-jis', qq{\$\$s =~ tr{$from}{$to}$opt\n} );
407     $$s = $__encoder->{$input}->encode ($$s);
408     }
409     # Internal-use-functions _maketable, _expnd1, _expnd2 are not implemented.
410    
411     =head1 USAGE
412    
413     You just place this library instead of original jcode.pl.
414     Usually it is enough.
415    
416     =head1 SEE ALSO
417    
418     L<Encode>, L<Encode::ISO2022>, L<Encode::SJIS>,
419     L<Encode::Table>
420    
421     =head1 BUGS
422    
423     Details of convertion process is different from original jcode.pl's
424     because of difference between it and Encode.
425    
426     Some less frequently used functions of jcode.pl is not implemented
427     in this wrapper library.
428    
429     =head1 AUTHORS
430    
431     Nanashi-san
432    
433     =head1 LICENSE
434    
435     This library is a Public Domain software.
436    
437     =cut
438    
439     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24  
Google Analytics is used in this page; Cookies are used. 忍者AdMax is used in this page; Cookies are used. Privacy policy.