/[suikacvs]/perl/lib/Encode/HZ.pm
Suika

Contents of /perl/lib/Encode/HZ.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (hide annotations) (download)
Mon Sep 16 06:35:16 2002 UTC (22 years, 1 month ago) by wakaba
Branch: MAIN
Changes since 1.2: +46 -9 lines
2002-09-16  Wakaba <w@suika.fam.cx>

	* ISO2022.pm:
	- (iso2022_to_internal): Invoke G1,G2,G3 by locking
	shifts of ESC Fs style.
	- (make_initial_charset): Create charset definition
	of 94^2 DRCSes.
	- (undef_char): New option.
	- (pod:TODO): New section.
	* HZ.pm:
	- (__hz_encoding_name): New function.
	- (Encode::HZ): Added new alias names.
	- (Encode::HZ::HZ165): New package.
	- (pod:ENCODINGS): New section.

1 wakaba 1.1 package Encode::HZ;
2     use strict;
3    
4     use vars qw($VERSION);
5 wakaba 1.3 $VERSION = do {my @r =(q$Revision: 1.2 $ =~ /\d+/g);sprintf "%d."."%02d" x $#r, @r};
6 wakaba 1.1
7     use Encode ();
8     require Encode::CN;
9     use base qw(Encode::Encoding);
10 wakaba 1.3 __PACKAGE__->Define(qw/hz chinese-hz hz-gb-2312 cp52936/);
11 wakaba 1.1
12     sub needs_lines { 1 }
13    
14     sub perlio_ok {
15     return 0; # for the time being
16     }
17    
18     sub decode
19     {
20     my ($obj,$str,$chk) = @_;
21 wakaba 1.3 my $gb = Encode::find_encoding($obj->__hz_encoding_name);
22 wakaba 1.1
23     $str =~ s{~ # starting tilde
24     (?:
25     (~) # another tilde - escaped (set $1)
26     | # or
27     \x0D?\x0A # \n - output nothing
28     | # or
29     \{ # opening brace of GB data
30     ( # set $2 to any number of...
31     (?:
32     [^~] # non-tilde GB character
33     | # or
34     ~(?!\}) # tilde not followed by a closing brace
35     )*
36     )
37     ~\} # closing brace of GB data
38     | # XXX: invalid escape - maybe die on $chk?
39     )
40     }{
41     my ($t, $c) = ($1, $2);
42     if (defined $t) { # two tildes make one tilde
43     '~';
44     } elsif (defined $c) { # decode the characters
45 wakaba 1.3 $c =~ tr/\x21-\x7E/\xA1-\xFE/;
46 wakaba 1.1 $gb->decode($c, $chk);
47     } else { # ~\n and invalid escape = ''
48     '';
49     }
50     }egx;
51    
52     return $str;
53     }
54    
55     sub encode ($$;$) {
56     my ($obj,$str,$chk) = @_;
57     $_[1] = '';
58 wakaba 1.3 my $gb = Encode::find_encoding($obj->__hz_encoding_name);
59 wakaba 1.1
60     $str =~ s/~/~~/g;
61     $str = $gb->encode ($str, 1);
62    
63     $str =~ s{ ((?:[\xA1-\xFE][\xA1-\xFE])+) }{
64     my $c = $1;
65     $c =~ tr/\xA1-\xFE/\x21-\x7E/;
66     sprintf q(~{%s~}), $c;
67     }goex;
68     $str;
69     }
70    
71 wakaba 1.3 sub __hz_encoding_name { 'euc-cn' }
72    
73 wakaba 1.1 package Encode::HZ::HZ8;
74     use base qw(Encode::HZ);
75 wakaba 1.2 __PACKAGE__->Define(qw/hz8 x-hz8/);
76 wakaba 1.1
77     sub encode ($$;$) {
78     my ($obj,$str,$chk) = @_;
79     $_[1] = '';
80 wakaba 1.3 my $gb = Encode::find_encoding($obj->__hz_encoding_name);
81 wakaba 1.1
82     $str =~ s/~/~~/g;
83     $str = $gb->encode ($str, 1);
84    
85     $str =~ s{ ((?:[\xA1-\xFE][\xA1-\xFE])+) }{
86     sprintf q(~{%s~}), $1;
87     }goex;
88     $str;
89     }
90    
91 wakaba 1.3 package Encode::HZ::HZ165;
92     use base qw(Encode::HZ);
93     __PACKAGE__->Define(qw/hz-isoir165 x-iso-ir-165-hz/);
94    
95     sub __hz_encoding_name { 'cn-gb-isoir165' }
96    
97 wakaba 1.1 1;
98     __END__
99    
100     =head1 NAME
101    
102 wakaba 1.3 Encode::HZ --- Encode module for HZ (HZ-GB-2312 and HZ for
103     ISO-IR 165) and HZ8
104 wakaba 1.1
105     =head1 DESCRIPTION
106    
107     This module make the module Encode of Perl (5.7.3 or later)
108     to be able to encode/decode HZ and its variant coding systems.
109    
110     Note that Encode::CN::HZ, standard module of Perl, can encode/decode
111     HZ (HZ-GB-2312 in IANA name), but other variants such as
112     HZ8 can't be encoded/decode.
113 wakaba 1.2
114 wakaba 1.3 =head1 ENCODINGS
115    
116     =over 4
117    
118     =item hz-gb-2312
119    
120     HZ 7-bit encoding for Chinese with GB 2312-80,
121     defined by RFC 1842 and RFC 1843.
122     (Alias: hz, chinese-hz (emacsen), CP52936 (M$))
123    
124     =item hz8
125    
126     HZ 8-bit encoding for Chinese with GB 2312-80.
127     (Alias: x-hz8)
128    
129     =item hz-isoir165
130    
131     HZ 7-bit encoding for Chinese with ISO-IR 165
132     (syntax is same as hz-gb-2312, but coded character
133     set is differ) (Alias: x-iso-ir-165-hz)
134    
135     Note that you need load Encode module that support
136     'cn-gb-isoir165' encoding (defined by RFC 1922),
137     such as Encode::ISO2022::EightBit.
138    
139     =back
140    
141 wakaba 1.2 =head1 TODO
142    
143     Support of extended HZ such as EHZ.
144 wakaba 1.1
145     =head1 ACKNOWLEDGEMENTS
146    
147     Most part of this module is taken from Encode::CN::HZ.
148    
149     =head1 COPYRIGHT
150    
151     Copyright 2002 Wakaba <w@suika.fam.cx>
152    
153     This library is free software; you can redistribute it
154     and/or modify it under the same terms as Perl itself.
155    
156     =cut
157 wakaba 1.3
158     # $Date: $
159     ### HZ.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24