/[suikacvs]/perl/lib/Encode/HZ.pm
Suika

Contents of /perl/lib/Encode/HZ.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (show annotations) (download)
Mon Oct 14 06:58:35 2002 UTC (22 years, 1 month ago) by wakaba
Branch: MAIN
Changes since 1.3: +16 -10 lines
2002-10-14  Nanashi-san

	* ISO2022.pm, SJIS.pm: Bug fix of utf8 flag control.
	(Committed by Wakaba <w@suika.fam.cx>.)

1 package Encode::HZ;
2 use strict;
3
4 use vars qw($VERSION);
5 $VERSION = do {my @r =(q$Revision: 1.3 $ =~ /\d+/g);sprintf "%d."."%02d" x $#r, @r};
6 use base qw(Encode::Encoding);
7 __PACKAGE__->Define(qw/hz chinese-hz hz-gb-2312 hz-gb2312 cp52936/);
8
9 sub needs_lines { 1 }
10
11 sub perlio_ok {
12 return 0; # for the time being
13 }
14
15 sub decode
16 {
17 my ($obj,$str,$chk) = @_;
18 my $gb = Encode::find_encoding($obj->__hz_encoding_name);
19
20 $str =~ s{~ # starting tilde
21 (?:
22 (~) # another tilde - escaped (set $1)
23 | # or
24 \x0D?\x0A # \n - output nothing
25 | # or
26 \{ # opening brace of GB data
27 ( # set $2 to any number of...
28 (?:
29 [^~] # non-tilde GB character
30 | # or
31 ~(?!\}) # tilde not followed by a closing brace
32 )*
33 )
34 ~\} # closing brace of GB data
35 | # XXX: invalid escape - maybe die on $chk?
36 )
37 }{
38 my ($t, $c) = ($1, $2);
39 if (defined $t) { # two tildes make one tilde
40 '~';
41 } elsif (defined $c) { # decode the characters
42 $c =~ tr/\x21-\x7E/\xA1-\xFE/;
43 $gb->decode($c, $chk);
44 } else { # ~\n and invalid escape = ''
45 '';
46 }
47 }egx;
48
49 return $str;
50 }
51
52 sub encode ($$;$) {
53 my ($obj,$str,$chk) = @_;
54 $_[1] = '';
55 my $gb = Encode::find_encoding($obj->__hz_encoding_name);
56
57 $str =~ s/~/~~/g;
58 $str = $gb->encode ($str, 1);
59
60 $str =~ s{ ((?:[\xA1-\xFE][\xA1-\xFE])+) }{
61 my $c = $1;
62 $c =~ tr/\xA1-\xFE/\x21-\x7E/;
63 sprintf q(~{%s~}), $c;
64 }goex;
65 $str;
66 }
67
68 sub __hz_encoding_name { 'euc-cn' }
69
70 package Encode::HZ::HZ8;
71 use base qw(Encode::HZ);
72 __PACKAGE__->Define(qw/hz8 x-hz8/);
73
74 sub encode ($$;$) {
75 my ($obj,$str,$chk) = @_;
76 $_[1] = '';
77 my $gb = Encode::find_encoding($obj->__hz_encoding_name);
78
79 $str =~ s/~/~~/g;
80 $str = $gb->encode ($str, 1);
81
82 $str =~ s{ ((?:[\xA1-\xFE][\xA1-\xFE])+) }{
83 sprintf q(~{%s~}), $1;
84 }goex;
85 $str;
86 }
87
88 package Encode::HZ::HZ165;
89 use base qw(Encode::HZ);
90 __PACKAGE__->Define(qw/hz-iso-ir-165 hz-isoir165 x-iso-ir-165-hz/);
91
92 sub __hz_encoding_name { 'cn-gb-isoir165' }
93
94 1;
95 __END__
96
97 =head1 NAME
98
99 Encode::HZ --- Encode module for HZ (HZ-GB-2312 and HZ for
100 ISO-IR 165) and HZ8
101
102 =head1 DESCRIPTION
103
104 This module make the module Encode of Perl (5.7.3 or later)
105 to be able to encode/decode HZ and its variant coding systems.
106
107 Note that Encode::CN::HZ, standard module of Perl, can encode/decode
108 HZ (HZ-GB-2312 in IANA name), but other variants such as
109 HZ8 can't be encoded/decode.
110
111 =head1 ENCODINGS
112
113 =over 4
114
115 =item hz-gb-2312
116
117 HZ 7-bit encoding for Chinese with GB 2312-80,
118 defined by RFC 1842 and RFC 1843.
119 (Alias: hz, chinese-hz (emacsen), CP52936 (M$),
120 hz-gb2312)
121
122 Note that hz8 is also decodable with this encoding.
123
124 =item hz8
125
126 HZ 8-bit encoding for Chinese with GB 2312-80.
127 (Alias: x-hz8)
128
129 Note that hz-gb-2312 is also decodable with this encoding.
130
131 =item hz-iso-ir-165
132
133 HZ 7-bit encoding for Chinese with ISO-IR 165
134 (syntax is same as hz-gb-2312, but coded character
135 set is differ) (Alias: hz-isoir165, x-iso-ir-165-hz)
136
137 Note that you need load Encode module that support
138 'cn-gb-isoir165' encoding (defined by RFC 1922),
139 such as Encode::ISO2022::EightBit.
140
141 Also note that since ISO-IR 165 is nealy superset of GB 2312-80,
142 hz-iso-ir-165 is also considerable as a superset of
143 hz-gb-2312.
144
145 =back
146
147 =head1 TODO
148
149 Support of extended HZ such as EHZ.
150
151 =head1 ACKNOWLEDGEMENTS
152
153 Most part of this module is taken from Encode::CN::HZ.
154
155 =head1 COPYRIGHT
156
157 Copyright 2002 Wakaba <w@suika.fam.cx>
158
159 This library is free software; you can redistribute it
160 and/or modify it under the same terms as Perl itself.
161
162 =cut
163
164 # $Date: 2002/09/16 06:35:16 $
165 ### HZ.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24