/[suikacvs]/perl/lib/Encode/HZ.pm
Suika

Diff of /perl/lib/Encode/HZ.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.3 by wakaba, Mon Sep 16 06:35:16 2002 UTC revision 1.5 by wakaba, Thu Dec 12 08:17:16 2002 UTC
# Line 3  use strict; Line 3  use strict;
3    
4  use vars qw($VERSION);  use vars qw($VERSION);
5  $VERSION = do {my @r =(q$Revision$ =~ /\d+/g);sprintf "%d."."%02d" x $#r, @r};  $VERSION = do {my @r =(q$Revision$ =~ /\d+/g);sprintf "%d."."%02d" x $#r, @r};
   
 use Encode ();  
 require Encode::CN;  
6  use base qw(Encode::Encoding);  use base qw(Encode::Encoding);
7  __PACKAGE__->Define(qw/hz chinese-hz hz-gb-2312 cp52936/);  __PACKAGE__->Define(qw/hz chinese-hz hz-gb-2312 hz-gb2312 cp52936/);
8    
9  sub needs_lines  { 1 }  sub needs_lines  { 1 }
10    
# Line 28  sub decode Line 25  sub decode
25                  |               #     or                  |               #     or
26              \{                  # opening brace of GB data              \{                  # opening brace of GB data
27                  (               #  set $2 to any number of...                  (               #  set $2 to any number of...
28                      (?:                      (?:[\x21-\x7D][\x21-\x7E])*
                         [^~]    #  non-tilde GB character  
                             |   #     or  
                         ~(?!\}) #  tilde not followed by a closing brace  
                     )*  
29                  )                  )
30              ~\}                 # closing brace of GB data              ~\}                 # closing brace of GB data
31                  |               # XXX: invalid escape - maybe die on $chk?                  |
32                \{
33                    ((?:[\x21-\x7D][\x21-\x7E])+[\x0D\x0A])
34            #       |               # XXX: invalid escape - maybe die on $chk?
35          )          )
36      }{      }{
37        my ($t, $c) = ($1, $2);        my ($t, $c, $d) = ($1, $2, $3);
38        if (defined $t) { # two tildes make one tilde        if (defined $t) { # two tildes make one tilde
39          '~';          '~';
40        } elsif (defined $c) {    # decode the characters        } elsif (defined $c) {    # decode the characters
41          $c =~ tr/\x21-\x7E/\xA1-\xFE/;          $c =~ tr/\x21-\x7E/\xA1-\xFE/;
42          $gb->decode($c, $chk);          $gb->decode($c, $chk);
43          } elsif (defined $d) {    # decode the characters
44            $d =~ tr/\x21-\x7E/\xA1-\xFE/;
45            $gb->decode($d, $chk);
46        } else {  # ~\n and invalid escape = ''        } else {  # ~\n and invalid escape = ''
47          '';          '';
48        }        }
# Line 90  sub encode ($$;$) { Line 89  sub encode ($$;$) {
89    
90  package Encode::HZ::HZ165;  package Encode::HZ::HZ165;
91  use base qw(Encode::HZ);  use base qw(Encode::HZ);
92  __PACKAGE__->Define(qw/hz-isoir165 x-iso-ir-165-hz/);  __PACKAGE__->Define(qw/hz-iso-ir-165 hz-isoir165 x-iso-ir-165-hz/);
93    
94  sub __hz_encoding_name { 'cn-gb-isoir165' }  sub __hz_encoding_name { 'cn-gb-isoir165' }
95    
# Line 119  HZ8 can't be encoded/decode. Line 118  HZ8 can't be encoded/decode.
118    
119  HZ 7-bit encoding for Chinese with GB 2312-80,  HZ 7-bit encoding for Chinese with GB 2312-80,
120  defined by RFC 1842 and RFC 1843.  defined by RFC 1842 and RFC 1843.
121  (Alias: hz, chinese-hz (emacsen), CP52936 (M$))  (Alias: hz, chinese-hz (emacsen), CP52936 (M$),
122    hz-gb2312)
123    
124    Note that hz8 is also decodable with this encoding.
125    
126  =item hz8  =item hz8
127    
128  HZ 8-bit encoding for Chinese with GB 2312-80.  HZ 8-bit encoding for Chinese with GB 2312-80.
129  (Alias: x-hz8)  (Alias: x-hz8)
130    
131  =item hz-isoir165  Note that hz-gb-2312 is also decodable with this encoding.
132    
133    =item hz-iso-ir-165
134    
135  HZ 7-bit encoding for Chinese with ISO-IR 165  HZ 7-bit encoding for Chinese with ISO-IR 165
136  (syntax is same as hz-gb-2312, but coded character  (syntax is same as hz-gb-2312, but coded character
137  set is differ) (Alias: x-iso-ir-165-hz)  set is differ) (Alias: hz-isoir165, x-iso-ir-165-hz)
138    
139  Note that you need load Encode module that support  Note that you need load Encode module that support
140  'cn-gb-isoir165' encoding (defined by RFC 1922),  'cn-gb-isoir165' encoding (defined by RFC 1922),
141  such as Encode::ISO2022::EightBit.  such as Encode::ISO2022::EightBit.
142    
143    Also note that since ISO-IR 165 is nealy superset of GB 2312-80,
144    hz-iso-ir-165 is also considerable as a superset of
145    hz-gb-2312.
146    
147  =back  =back
148    
149  =head1 TODO  =head1 TODO

Legend:
Removed from v.1.3  
changed lines
  Added in v.1.5

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24