3 |
|
|
4 |
use vars qw($VERSION); |
use vars qw($VERSION); |
5 |
$VERSION = do {my @r =(q$Revision$ =~ /\d+/g);sprintf "%d."."%02d" x $#r, @r}; |
$VERSION = do {my @r =(q$Revision$ =~ /\d+/g);sprintf "%d."."%02d" x $#r, @r}; |
|
|
|
|
use Encode (); |
|
|
require Encode::CN; |
|
6 |
use base qw(Encode::Encoding); |
use base qw(Encode::Encoding); |
7 |
__PACKAGE__->Define(qw/hz chinese-hz hz-gb-2312 cp52936/); |
__PACKAGE__->Define(qw/hz chinese-hz hz-gb-2312 hz-gb2312 cp52936/); |
8 |
|
|
9 |
sub needs_lines { 1 } |
sub needs_lines { 1 } |
10 |
|
|
25 |
| # or |
| # or |
26 |
\{ # opening brace of GB data |
\{ # opening brace of GB data |
27 |
( # set $2 to any number of... |
( # set $2 to any number of... |
28 |
(?: |
(?:[\x21-\x7D][\x21-\x7E])* |
|
[^~] # non-tilde GB character |
|
|
| # or |
|
|
~(?!\}) # tilde not followed by a closing brace |
|
|
)* |
|
29 |
) |
) |
30 |
~\} # closing brace of GB data |
~\} # closing brace of GB data |
31 |
| # XXX: invalid escape - maybe die on $chk? |
| |
32 |
|
\{ |
33 |
|
((?:[\x21-\x7D][\x21-\x7E])+[\x0D\x0A]) |
34 |
|
# | # XXX: invalid escape - maybe die on $chk? |
35 |
) |
) |
36 |
}{ |
}{ |
37 |
my ($t, $c) = ($1, $2); |
my ($t, $c, $d) = ($1, $2, $3); |
38 |
if (defined $t) { # two tildes make one tilde |
if (defined $t) { # two tildes make one tilde |
39 |
'~'; |
'~'; |
40 |
} elsif (defined $c) { # decode the characters |
} elsif (defined $c) { # decode the characters |
41 |
$c =~ tr/\x21-\x7E/\xA1-\xFE/; |
$c =~ tr/\x21-\x7E/\xA1-\xFE/; |
42 |
$gb->decode($c, $chk); |
$gb->decode($c, $chk); |
43 |
|
} elsif (defined $d) { # decode the characters |
44 |
|
$d =~ tr/\x21-\x7E/\xA1-\xFE/; |
45 |
|
$gb->decode($d, $chk); |
46 |
} else { # ~\n and invalid escape = '' |
} else { # ~\n and invalid escape = '' |
47 |
''; |
''; |
48 |
} |
} |
89 |
|
|
90 |
package Encode::HZ::HZ165; |
package Encode::HZ::HZ165; |
91 |
use base qw(Encode::HZ); |
use base qw(Encode::HZ); |
92 |
__PACKAGE__->Define(qw/hz-isoir165 x-iso-ir-165-hz/); |
__PACKAGE__->Define(qw/hz-iso-ir-165 hz-isoir165 x-iso-ir-165-hz/); |
93 |
|
|
94 |
sub __hz_encoding_name { 'cn-gb-isoir165' } |
sub __hz_encoding_name { 'cn-gb-isoir165' } |
95 |
|
|
118 |
|
|
119 |
HZ 7-bit encoding for Chinese with GB 2312-80, |
HZ 7-bit encoding for Chinese with GB 2312-80, |
120 |
defined by RFC 1842 and RFC 1843. |
defined by RFC 1842 and RFC 1843. |
121 |
(Alias: hz, chinese-hz (emacsen), CP52936 (M$)) |
(Alias: hz, chinese-hz (emacsen), CP52936 (M$), |
122 |
|
hz-gb2312) |
123 |
|
|
124 |
|
Note that hz8 is also decodable with this encoding. |
125 |
|
|
126 |
=item hz8 |
=item hz8 |
127 |
|
|
128 |
HZ 8-bit encoding for Chinese with GB 2312-80. |
HZ 8-bit encoding for Chinese with GB 2312-80. |
129 |
(Alias: x-hz8) |
(Alias: x-hz8) |
130 |
|
|
131 |
=item hz-isoir165 |
Note that hz-gb-2312 is also decodable with this encoding. |
132 |
|
|
133 |
|
=item hz-iso-ir-165 |
134 |
|
|
135 |
HZ 7-bit encoding for Chinese with ISO-IR 165 |
HZ 7-bit encoding for Chinese with ISO-IR 165 |
136 |
(syntax is same as hz-gb-2312, but coded character |
(syntax is same as hz-gb-2312, but coded character |
137 |
set is differ) (Alias: x-iso-ir-165-hz) |
set is differ) (Alias: hz-isoir165, x-iso-ir-165-hz) |
138 |
|
|
139 |
Note that you need load Encode module that support |
Note that you need load Encode module that support |
140 |
'cn-gb-isoir165' encoding (defined by RFC 1922), |
'cn-gb-isoir165' encoding (defined by RFC 1922), |
141 |
such as Encode::ISO2022::EightBit. |
such as Encode::ISO2022::EightBit. |
142 |
|
|
143 |
|
Also note that since ISO-IR 165 is nealy superset of GB 2312-80, |
144 |
|
hz-iso-ir-165 is also considerable as a superset of |
145 |
|
hz-gb-2312. |
146 |
|
|
147 |
=back |
=back |
148 |
|
|
149 |
=head1 TODO |
=head1 TODO |