/[suikacvs]/perl/lib/Encode/HZ.pm
Suika

Contents of /perl/lib/Encode/HZ.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (show annotations) (download)
Thu Dec 12 08:17:16 2002 UTC (21 years, 5 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.4: +11 -9 lines
]

1 package Encode::HZ;
2 use strict;
3
4 use vars qw($VERSION);
5 $VERSION = do {my @r =(q$Revision: 1.4 $ =~ /\d+/g);sprintf "%d."."%02d" x $#r, @r};
6 use base qw(Encode::Encoding);
7 __PACKAGE__->Define(qw/hz chinese-hz hz-gb-2312 hz-gb2312 cp52936/);
8
9 sub needs_lines { 1 }
10
11 sub perlio_ok {
12 return 0; # for the time being
13 }
14
15 sub decode
16 {
17 my ($obj,$str,$chk) = @_;
18 my $gb = Encode::find_encoding($obj->__hz_encoding_name);
19
20 $str =~ s{~ # starting tilde
21 (?:
22 (~) # another tilde - escaped (set $1)
23 | # or
24 \x0D?\x0A # \n - output nothing
25 | # or
26 \{ # opening brace of GB data
27 ( # set $2 to any number of...
28 (?:[\x21-\x7D][\x21-\x7E])*
29 )
30 ~\} # closing brace of GB data
31 |
32 \{
33 ((?:[\x21-\x7D][\x21-\x7E])+[\x0D\x0A])
34 # | # XXX: invalid escape - maybe die on $chk?
35 )
36 }{
37 my ($t, $c, $d) = ($1, $2, $3);
38 if (defined $t) { # two tildes make one tilde
39 '~';
40 } elsif (defined $c) { # decode the characters
41 $c =~ tr/\x21-\x7E/\xA1-\xFE/;
42 $gb->decode($c, $chk);
43 } elsif (defined $d) { # decode the characters
44 $d =~ tr/\x21-\x7E/\xA1-\xFE/;
45 $gb->decode($d, $chk);
46 } else { # ~\n and invalid escape = ''
47 '';
48 }
49 }egx;
50
51 return $str;
52 }
53
54 sub encode ($$;$) {
55 my ($obj,$str,$chk) = @_;
56 $_[1] = '';
57 my $gb = Encode::find_encoding($obj->__hz_encoding_name);
58
59 $str =~ s/~/~~/g;
60 $str = $gb->encode ($str, 1);
61
62 $str =~ s{ ((?:[\xA1-\xFE][\xA1-\xFE])+) }{
63 my $c = $1;
64 $c =~ tr/\xA1-\xFE/\x21-\x7E/;
65 sprintf q(~{%s~}), $c;
66 }goex;
67 $str;
68 }
69
70 sub __hz_encoding_name { 'euc-cn' }
71
72 package Encode::HZ::HZ8;
73 use base qw(Encode::HZ);
74 __PACKAGE__->Define(qw/hz8 x-hz8/);
75
76 sub encode ($$;$) {
77 my ($obj,$str,$chk) = @_;
78 $_[1] = '';
79 my $gb = Encode::find_encoding($obj->__hz_encoding_name);
80
81 $str =~ s/~/~~/g;
82 $str = $gb->encode ($str, 1);
83
84 $str =~ s{ ((?:[\xA1-\xFE][\xA1-\xFE])+) }{
85 sprintf q(~{%s~}), $1;
86 }goex;
87 $str;
88 }
89
90 package Encode::HZ::HZ165;
91 use base qw(Encode::HZ);
92 __PACKAGE__->Define(qw/hz-iso-ir-165 hz-isoir165 x-iso-ir-165-hz/);
93
94 sub __hz_encoding_name { 'cn-gb-isoir165' }
95
96 1;
97 __END__
98
99 =head1 NAME
100
101 Encode::HZ --- Encode module for HZ (HZ-GB-2312 and HZ for
102 ISO-IR 165) and HZ8
103
104 =head1 DESCRIPTION
105
106 This module make the module Encode of Perl (5.7.3 or later)
107 to be able to encode/decode HZ and its variant coding systems.
108
109 Note that Encode::CN::HZ, standard module of Perl, can encode/decode
110 HZ (HZ-GB-2312 in IANA name), but other variants such as
111 HZ8 can't be encoded/decode.
112
113 =head1 ENCODINGS
114
115 =over 4
116
117 =item hz-gb-2312
118
119 HZ 7-bit encoding for Chinese with GB 2312-80,
120 defined by RFC 1842 and RFC 1843.
121 (Alias: hz, chinese-hz (emacsen), CP52936 (M$),
122 hz-gb2312)
123
124 Note that hz8 is also decodable with this encoding.
125
126 =item hz8
127
128 HZ 8-bit encoding for Chinese with GB 2312-80.
129 (Alias: x-hz8)
130
131 Note that hz-gb-2312 is also decodable with this encoding.
132
133 =item hz-iso-ir-165
134
135 HZ 7-bit encoding for Chinese with ISO-IR 165
136 (syntax is same as hz-gb-2312, but coded character
137 set is differ) (Alias: hz-isoir165, x-iso-ir-165-hz)
138
139 Note that you need load Encode module that support
140 'cn-gb-isoir165' encoding (defined by RFC 1922),
141 such as Encode::ISO2022::EightBit.
142
143 Also note that since ISO-IR 165 is nealy superset of GB 2312-80,
144 hz-iso-ir-165 is also considerable as a superset of
145 hz-gb-2312.
146
147 =back
148
149 =head1 TODO
150
151 Support of extended HZ such as EHZ.
152
153 =head1 ACKNOWLEDGEMENTS
154
155 Most part of this module is taken from Encode::CN::HZ.
156
157 =head1 COPYRIGHT
158
159 Copyright 2002 Wakaba <w@suika.fam.cx>
160
161 This library is free software; you can redistribute it
162 and/or modify it under the same terms as Perl itself.
163
164 =cut
165
166 # $Date: 2002/10/14 06:58:35 $
167 ### HZ.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24