/[suikacvs]/perl/lib/Encode/HZ.pm
Suika

Contents of /perl/lib/Encode/HZ.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (hide annotations) (download)
Thu Dec 12 08:17:16 2002 UTC (21 years, 10 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.4: +11 -9 lines
]

1 wakaba 1.1 package Encode::HZ;
2     use strict;
3    
4     use vars qw($VERSION);
5 wakaba 1.5 $VERSION = do {my @r =(q$Revision: 1.4 $ =~ /\d+/g);sprintf "%d."."%02d" x $#r, @r};
6 wakaba 1.1 use base qw(Encode::Encoding);
7 wakaba 1.4 __PACKAGE__->Define(qw/hz chinese-hz hz-gb-2312 hz-gb2312 cp52936/);
8 wakaba 1.1
9     sub needs_lines { 1 }
10    
11     sub perlio_ok {
12     return 0; # for the time being
13     }
14    
15     sub decode
16     {
17     my ($obj,$str,$chk) = @_;
18 wakaba 1.3 my $gb = Encode::find_encoding($obj->__hz_encoding_name);
19 wakaba 1.1
20     $str =~ s{~ # starting tilde
21     (?:
22     (~) # another tilde - escaped (set $1)
23     | # or
24     \x0D?\x0A # \n - output nothing
25     | # or
26     \{ # opening brace of GB data
27     ( # set $2 to any number of...
28 wakaba 1.5 (?:[\x21-\x7D][\x21-\x7E])*
29 wakaba 1.1 )
30     ~\} # closing brace of GB data
31 wakaba 1.5 |
32     \{
33     ((?:[\x21-\x7D][\x21-\x7E])+[\x0D\x0A])
34     # | # XXX: invalid escape - maybe die on $chk?
35 wakaba 1.1 )
36     }{
37 wakaba 1.5 my ($t, $c, $d) = ($1, $2, $3);
38 wakaba 1.1 if (defined $t) { # two tildes make one tilde
39     '~';
40     } elsif (defined $c) { # decode the characters
41 wakaba 1.3 $c =~ tr/\x21-\x7E/\xA1-\xFE/;
42 wakaba 1.1 $gb->decode($c, $chk);
43 wakaba 1.5 } elsif (defined $d) { # decode the characters
44     $d =~ tr/\x21-\x7E/\xA1-\xFE/;
45     $gb->decode($d, $chk);
46 wakaba 1.1 } else { # ~\n and invalid escape = ''
47     '';
48     }
49     }egx;
50    
51     return $str;
52     }
53    
54     sub encode ($$;$) {
55     my ($obj,$str,$chk) = @_;
56     $_[1] = '';
57 wakaba 1.3 my $gb = Encode::find_encoding($obj->__hz_encoding_name);
58 wakaba 1.1
59     $str =~ s/~/~~/g;
60     $str = $gb->encode ($str, 1);
61    
62     $str =~ s{ ((?:[\xA1-\xFE][\xA1-\xFE])+) }{
63     my $c = $1;
64     $c =~ tr/\xA1-\xFE/\x21-\x7E/;
65     sprintf q(~{%s~}), $c;
66     }goex;
67     $str;
68     }
69    
70 wakaba 1.3 sub __hz_encoding_name { 'euc-cn' }
71    
72 wakaba 1.1 package Encode::HZ::HZ8;
73     use base qw(Encode::HZ);
74 wakaba 1.2 __PACKAGE__->Define(qw/hz8 x-hz8/);
75 wakaba 1.1
76     sub encode ($$;$) {
77     my ($obj,$str,$chk) = @_;
78     $_[1] = '';
79 wakaba 1.3 my $gb = Encode::find_encoding($obj->__hz_encoding_name);
80 wakaba 1.1
81     $str =~ s/~/~~/g;
82     $str = $gb->encode ($str, 1);
83    
84     $str =~ s{ ((?:[\xA1-\xFE][\xA1-\xFE])+) }{
85     sprintf q(~{%s~}), $1;
86     }goex;
87     $str;
88     }
89    
90 wakaba 1.3 package Encode::HZ::HZ165;
91     use base qw(Encode::HZ);
92 wakaba 1.4 __PACKAGE__->Define(qw/hz-iso-ir-165 hz-isoir165 x-iso-ir-165-hz/);
93 wakaba 1.3
94     sub __hz_encoding_name { 'cn-gb-isoir165' }
95    
96 wakaba 1.1 1;
97     __END__
98    
99     =head1 NAME
100    
101 wakaba 1.3 Encode::HZ --- Encode module for HZ (HZ-GB-2312 and HZ for
102     ISO-IR 165) and HZ8
103 wakaba 1.1
104     =head1 DESCRIPTION
105    
106     This module make the module Encode of Perl (5.7.3 or later)
107     to be able to encode/decode HZ and its variant coding systems.
108    
109     Note that Encode::CN::HZ, standard module of Perl, can encode/decode
110     HZ (HZ-GB-2312 in IANA name), but other variants such as
111     HZ8 can't be encoded/decode.
112 wakaba 1.2
113 wakaba 1.3 =head1 ENCODINGS
114    
115     =over 4
116    
117     =item hz-gb-2312
118    
119     HZ 7-bit encoding for Chinese with GB 2312-80,
120     defined by RFC 1842 and RFC 1843.
121 wakaba 1.4 (Alias: hz, chinese-hz (emacsen), CP52936 (M$),
122     hz-gb2312)
123    
124     Note that hz8 is also decodable with this encoding.
125 wakaba 1.3
126     =item hz8
127    
128     HZ 8-bit encoding for Chinese with GB 2312-80.
129     (Alias: x-hz8)
130    
131 wakaba 1.4 Note that hz-gb-2312 is also decodable with this encoding.
132    
133     =item hz-iso-ir-165
134 wakaba 1.3
135     HZ 7-bit encoding for Chinese with ISO-IR 165
136     (syntax is same as hz-gb-2312, but coded character
137 wakaba 1.4 set is differ) (Alias: hz-isoir165, x-iso-ir-165-hz)
138 wakaba 1.3
139     Note that you need load Encode module that support
140     'cn-gb-isoir165' encoding (defined by RFC 1922),
141     such as Encode::ISO2022::EightBit.
142    
143 wakaba 1.4 Also note that since ISO-IR 165 is nealy superset of GB 2312-80,
144     hz-iso-ir-165 is also considerable as a superset of
145     hz-gb-2312.
146    
147 wakaba 1.3 =back
148    
149 wakaba 1.2 =head1 TODO
150    
151     Support of extended HZ such as EHZ.
152 wakaba 1.1
153     =head1 ACKNOWLEDGEMENTS
154    
155     Most part of this module is taken from Encode::CN::HZ.
156    
157     =head1 COPYRIGHT
158    
159     Copyright 2002 Wakaba <w@suika.fam.cx>
160    
161     This library is free software; you can redistribute it
162     and/or modify it under the same terms as Perl itself.
163    
164     =cut
165 wakaba 1.3
166 wakaba 1.5 # $Date: 2002/10/14 06:58:35 $
167 wakaba 1.3 ### HZ.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24