/[suikacvs]/perl/lib/Encode/HZ.pm
Suika

Contents of /perl/lib/Encode/HZ.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (hide annotations) (download)
Mon Oct 14 06:58:35 2002 UTC (22 years ago) by wakaba
Branch: MAIN
Changes since 1.3: +16 -10 lines
2002-10-14  Nanashi-san

	* ISO2022.pm, SJIS.pm: Bug fix of utf8 flag control.
	(Committed by Wakaba <w@suika.fam.cx>.)

1 wakaba 1.1 package Encode::HZ;
2     use strict;
3    
4     use vars qw($VERSION);
5 wakaba 1.4 $VERSION = do {my @r =(q$Revision: 1.3 $ =~ /\d+/g);sprintf "%d."."%02d" x $#r, @r};
6 wakaba 1.1 use base qw(Encode::Encoding);
7 wakaba 1.4 __PACKAGE__->Define(qw/hz chinese-hz hz-gb-2312 hz-gb2312 cp52936/);
8 wakaba 1.1
9     sub needs_lines { 1 }
10    
11     sub perlio_ok {
12     return 0; # for the time being
13     }
14    
15     sub decode
16     {
17     my ($obj,$str,$chk) = @_;
18 wakaba 1.3 my $gb = Encode::find_encoding($obj->__hz_encoding_name);
19 wakaba 1.1
20     $str =~ s{~ # starting tilde
21     (?:
22     (~) # another tilde - escaped (set $1)
23     | # or
24     \x0D?\x0A # \n - output nothing
25     | # or
26     \{ # opening brace of GB data
27     ( # set $2 to any number of...
28     (?:
29     [^~] # non-tilde GB character
30     | # or
31     ~(?!\}) # tilde not followed by a closing brace
32     )*
33     )
34     ~\} # closing brace of GB data
35     | # XXX: invalid escape - maybe die on $chk?
36     )
37     }{
38     my ($t, $c) = ($1, $2);
39     if (defined $t) { # two tildes make one tilde
40     '~';
41     } elsif (defined $c) { # decode the characters
42 wakaba 1.3 $c =~ tr/\x21-\x7E/\xA1-\xFE/;
43 wakaba 1.1 $gb->decode($c, $chk);
44     } else { # ~\n and invalid escape = ''
45     '';
46     }
47     }egx;
48    
49     return $str;
50     }
51    
52     sub encode ($$;$) {
53     my ($obj,$str,$chk) = @_;
54     $_[1] = '';
55 wakaba 1.3 my $gb = Encode::find_encoding($obj->__hz_encoding_name);
56 wakaba 1.1
57     $str =~ s/~/~~/g;
58     $str = $gb->encode ($str, 1);
59    
60     $str =~ s{ ((?:[\xA1-\xFE][\xA1-\xFE])+) }{
61     my $c = $1;
62     $c =~ tr/\xA1-\xFE/\x21-\x7E/;
63     sprintf q(~{%s~}), $c;
64     }goex;
65     $str;
66     }
67    
68 wakaba 1.3 sub __hz_encoding_name { 'euc-cn' }
69    
70 wakaba 1.1 package Encode::HZ::HZ8;
71     use base qw(Encode::HZ);
72 wakaba 1.2 __PACKAGE__->Define(qw/hz8 x-hz8/);
73 wakaba 1.1
74     sub encode ($$;$) {
75     my ($obj,$str,$chk) = @_;
76     $_[1] = '';
77 wakaba 1.3 my $gb = Encode::find_encoding($obj->__hz_encoding_name);
78 wakaba 1.1
79     $str =~ s/~/~~/g;
80     $str = $gb->encode ($str, 1);
81    
82     $str =~ s{ ((?:[\xA1-\xFE][\xA1-\xFE])+) }{
83     sprintf q(~{%s~}), $1;
84     }goex;
85     $str;
86     }
87    
88 wakaba 1.3 package Encode::HZ::HZ165;
89     use base qw(Encode::HZ);
90 wakaba 1.4 __PACKAGE__->Define(qw/hz-iso-ir-165 hz-isoir165 x-iso-ir-165-hz/);
91 wakaba 1.3
92     sub __hz_encoding_name { 'cn-gb-isoir165' }
93    
94 wakaba 1.1 1;
95     __END__
96    
97     =head1 NAME
98    
99 wakaba 1.3 Encode::HZ --- Encode module for HZ (HZ-GB-2312 and HZ for
100     ISO-IR 165) and HZ8
101 wakaba 1.1
102     =head1 DESCRIPTION
103    
104     This module make the module Encode of Perl (5.7.3 or later)
105     to be able to encode/decode HZ and its variant coding systems.
106    
107     Note that Encode::CN::HZ, standard module of Perl, can encode/decode
108     HZ (HZ-GB-2312 in IANA name), but other variants such as
109     HZ8 can't be encoded/decode.
110 wakaba 1.2
111 wakaba 1.3 =head1 ENCODINGS
112    
113     =over 4
114    
115     =item hz-gb-2312
116    
117     HZ 7-bit encoding for Chinese with GB 2312-80,
118     defined by RFC 1842 and RFC 1843.
119 wakaba 1.4 (Alias: hz, chinese-hz (emacsen), CP52936 (M$),
120     hz-gb2312)
121    
122     Note that hz8 is also decodable with this encoding.
123 wakaba 1.3
124     =item hz8
125    
126     HZ 8-bit encoding for Chinese with GB 2312-80.
127     (Alias: x-hz8)
128    
129 wakaba 1.4 Note that hz-gb-2312 is also decodable with this encoding.
130    
131     =item hz-iso-ir-165
132 wakaba 1.3
133     HZ 7-bit encoding for Chinese with ISO-IR 165
134     (syntax is same as hz-gb-2312, but coded character
135 wakaba 1.4 set is differ) (Alias: hz-isoir165, x-iso-ir-165-hz)
136 wakaba 1.3
137     Note that you need load Encode module that support
138     'cn-gb-isoir165' encoding (defined by RFC 1922),
139     such as Encode::ISO2022::EightBit.
140    
141 wakaba 1.4 Also note that since ISO-IR 165 is nealy superset of GB 2312-80,
142     hz-iso-ir-165 is also considerable as a superset of
143     hz-gb-2312.
144    
145 wakaba 1.3 =back
146    
147 wakaba 1.2 =head1 TODO
148    
149     Support of extended HZ such as EHZ.
150 wakaba 1.1
151     =head1 ACKNOWLEDGEMENTS
152    
153     Most part of this module is taken from Encode::CN::HZ.
154    
155     =head1 COPYRIGHT
156    
157     Copyright 2002 Wakaba <w@suika.fam.cx>
158    
159     This library is free software; you can redistribute it
160     and/or modify it under the same terms as Perl itself.
161    
162     =cut
163 wakaba 1.3
164 wakaba 1.4 # $Date: 2002/09/16 06:35:16 $
165 wakaba 1.3 ### HZ.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24