/[suikacvs]/perl/lib/Encode/HZ.pm
Suika

Diff of /perl/lib/Encode/HZ.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2 by wakaba, Sun Sep 15 04:15:51 2002 UTC revision 1.5 by wakaba, Thu Dec 12 08:17:16 2002 UTC
# Line 3  use strict; Line 3  use strict;
3    
4  use vars qw($VERSION);  use vars qw($VERSION);
5  $VERSION = do {my @r =(q$Revision$ =~ /\d+/g);sprintf "%d."."%02d" x $#r, @r};  $VERSION = do {my @r =(q$Revision$ =~ /\d+/g);sprintf "%d."."%02d" x $#r, @r};
   
 use Encode ();  
 require Encode::CN;  
6  use base qw(Encode::Encoding);  use base qw(Encode::Encoding);
7  __PACKAGE__->Define(qw/hz hz-gb-2312/);  __PACKAGE__->Define(qw/hz chinese-hz hz-gb-2312 hz-gb2312 cp52936/);
8    
9  sub needs_lines  { 1 }  sub needs_lines  { 1 }
10    
# Line 18  sub perlio_ok { Line 15  sub perlio_ok {
15  sub decode  sub decode
16  {  {
17      my ($obj,$str,$chk) = @_;      my ($obj,$str,$chk) = @_;
18      my $gb = Encode::find_encoding('gb2312-raw');      my $gb = Encode::find_encoding($obj->__hz_encoding_name);
19    
20      $str =~ s{~                 # starting tilde      $str =~ s{~                 # starting tilde
21          (?:          (?:
# Line 28  sub decode Line 25  sub decode
25                  |               #     or                  |               #     or
26              \{                  # opening brace of GB data              \{                  # opening brace of GB data
27                  (               #  set $2 to any number of...                  (               #  set $2 to any number of...
28                      (?:                      (?:[\x21-\x7D][\x21-\x7E])*
                         [^~]    #  non-tilde GB character  
                             |   #     or  
                         ~(?!\}) #  tilde not followed by a closing brace  
                     )*  
29                  )                  )
30              ~\}                 # closing brace of GB data              ~\}                 # closing brace of GB data
31                  |               # XXX: invalid escape - maybe die on $chk?                  |
32                \{
33                    ((?:[\x21-\x7D][\x21-\x7E])+[\x0D\x0A])
34            #       |               # XXX: invalid escape - maybe die on $chk?
35          )          )
36      }{      }{
37        my ($t, $c) = ($1, $2);        my ($t, $c, $d) = ($1, $2, $3);
38        if (defined $t) { # two tildes make one tilde        if (defined $t) { # two tildes make one tilde
39          '~';          '~';
40        } elsif (defined $c) {    # decode the characters        } elsif (defined $c) {    # decode the characters
41          $c =~ tr/\xA1-\xFE/\x21-\x7E/;          $c =~ tr/\x21-\x7E/\xA1-\xFE/;
42          $gb->decode($c, $chk);          $gb->decode($c, $chk);
43          } elsif (defined $d) {    # decode the characters
44            $d =~ tr/\x21-\x7E/\xA1-\xFE/;
45            $gb->decode($d, $chk);
46        } else {  # ~\n and invalid escape = ''        } else {  # ~\n and invalid escape = ''
47          '';          '';
48        }        }
# Line 55  sub decode Line 54  sub decode
54  sub encode ($$;$) {  sub encode ($$;$) {
55    my ($obj,$str,$chk) = @_;    my ($obj,$str,$chk) = @_;
56    $_[1] = '';    $_[1] = '';
57    my $gb = Encode::find_encoding('euc-cn');    my $gb = Encode::find_encoding($obj->__hz_encoding_name);
58        
59    $str =~ s/~/~~/g;    $str =~ s/~/~~/g;
60    $str = $gb->encode ($str, 1);    $str = $gb->encode ($str, 1);
# Line 68  sub encode ($$;$) { Line 67  sub encode ($$;$) {
67    $str;    $str;
68  }  }
69    
70  package Encode::HZ::HZ8;  sub __hz_encoding_name { 'euc-cn' }
71    
72    package Encode::HZ::HZ8;
73  use base qw(Encode::HZ);  use base qw(Encode::HZ);
74  __PACKAGE__->Define(qw/hz8 x-hz8/);  __PACKAGE__->Define(qw/hz8 x-hz8/);
75    
76  sub encode ($$;$) {  sub encode ($$;$) {
77    my ($obj,$str,$chk) = @_;    my ($obj,$str,$chk) = @_;
78    $_[1] = '';    $_[1] = '';
79    my $gb = Encode::find_encoding('euc-cn');    my $gb = Encode::find_encoding($obj->__hz_encoding_name);
80        
81    $str =~ s/~/~~/g;    $str =~ s/~/~~/g;
82    $str = $gb->encode ($str, 1);    $str = $gb->encode ($str, 1);
# Line 87  sub encode ($$;$) { Line 87  sub encode ($$;$) {
87    $str;    $str;
88  }  }
89    
90    package Encode::HZ::HZ165;
91    use base qw(Encode::HZ);
92    __PACKAGE__->Define(qw/hz-iso-ir-165 hz-isoir165 x-iso-ir-165-hz/);
93    
94    sub __hz_encoding_name { 'cn-gb-isoir165' }
95    
96  1;  1;
97  __END__  __END__
98    
   
99  =head1 NAME  =head1 NAME
100    
101  Encode::HZ --- Encode module for HZ (HZ-GB-2312), HZ8  Encode::HZ --- Encode module for HZ (HZ-GB-2312 and HZ for
102    ISO-IR 165) and HZ8
103    
104  =head1 DESCRIPTION  =head1 DESCRIPTION
105    
# Line 104  Note that Encode::CN::HZ, standard modul Line 110  Note that Encode::CN::HZ, standard modul
110  HZ (HZ-GB-2312 in IANA name), but other variants such as  HZ (HZ-GB-2312 in IANA name), but other variants such as
111  HZ8 can't be encoded/decode.  HZ8 can't be encoded/decode.
112    
113    =head1 ENCODINGS
114    
115    =over 4
116    
117    =item hz-gb-2312
118    
119    HZ 7-bit encoding for Chinese with GB 2312-80,
120    defined by RFC 1842 and RFC 1843.
121    (Alias: hz, chinese-hz (emacsen), CP52936 (M$),
122    hz-gb2312)
123    
124    Note that hz8 is also decodable with this encoding.
125    
126    =item hz8
127    
128    HZ 8-bit encoding for Chinese with GB 2312-80.
129    (Alias: x-hz8)
130    
131    Note that hz-gb-2312 is also decodable with this encoding.
132    
133    =item hz-iso-ir-165
134    
135    HZ 7-bit encoding for Chinese with ISO-IR 165
136    (syntax is same as hz-gb-2312, but coded character
137    set is differ) (Alias: hz-isoir165, x-iso-ir-165-hz)
138    
139    Note that you need load Encode module that support
140    'cn-gb-isoir165' encoding (defined by RFC 1922),
141    such as Encode::ISO2022::EightBit.
142    
143    Also note that since ISO-IR 165 is nealy superset of GB 2312-80,
144    hz-iso-ir-165 is also considerable as a superset of
145    hz-gb-2312.
146    
147    =back
148    
149  =head1 TODO  =head1 TODO
150    
151  Support of extended HZ such as EHZ.  Support of extended HZ such as EHZ.
# Line 120  This library is free software; you can r Line 162  This library is free software; you can r
162  and/or modify it under the same terms as Perl itself.  and/or modify it under the same terms as Perl itself.
163    
164  =cut  =cut
165    
166    # $Date$
167    ### HZ.pm ends here

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.5

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24