/[suikacvs]/perl/lib/Encode/Unicode/UTF1.pm
Suika

Contents of /perl/lib/Encode/Unicode/UTF1.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (hide annotations) (download)
Mon Sep 23 08:28:39 2002 UTC (22 years, 2 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.4: +2 -3 lines
2002-09-23  Nanashi-san

	* UTF8.pm, UTF9.pm: New modules.  (Committed by
	Wakaba <w@suika.fam.cx>.)

1 wakaba 1.3 require 5.7.3;
2 wakaba 1.1 package Encode::Unicode::UTF1;
3     use strict;
4 wakaba 1.3 use vars qw($VERSION);
5 wakaba 1.5 $VERSION=do{my @r=(q$Revision: 1.4 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
6 wakaba 1.1 use base qw(Encode::Encoding);
7 wakaba 1.2 __PACKAGE__->Define (qw/ISO-10646-UTF-1 utf-1 utf1 csISO10646UTF1 iso-ir-178/);
8 wakaba 1.1
9     sub encode ($$;$) {
10     my ($obj, $str, $chk) = @_;
11     my @str = split //, $str;
12     $str = '';
13     for (@str) {
14     $str .= join '', map {chr} _ucs4toutf1 (ord $_);
15     }
16     $_[1] = '' if $chk;
17     return $str;
18     }
19    
20     sub decode ($$;$) {
21     my ($obj, $str, $chk) = @_;
22     $str =~ s{([\xA0-\xF5].|[\xF6-\xFB]..|[\xFC-\xFF]....)}{
23     chr (_utf1toucs4 (unpack 'C*', $1))
24     }gex;
25     $_[1] = '' if $chk;
26     return $str;
27     }
28    
29     sub _ucs4toutf1 ($) {
30     my $U = shift;
31     return ($U) if $U <= 0x9F;
32     return (0xA0, $U) if $U <= 0xFF;
33     return (int (0xA1 + ( $U - 0x100 ) / 0xBE),
34     T( ( $U - 0x100 ) % 0xBE)) if $U <= 0x4015;
35     return (int (0xF6 + ( $U - 0x4016 ) / ( 0xBE**2 )),
36     T( ( $U - 0x4016 ) / 0xBE % 0xBE),
37     T( ( $U - 0x4016 ) % 0xBE )) if $U <= 0x38E2D;
38     return (int (0xFC + ( $U - 0x38E2E ) / ( 0xBE**4 )),
39     T( ( $U - 0x38E2E ) / ( 0xBE**3 ) % 0xBE),
40     T( ( $U - 0x38E2E ) / ( 0xBE**2 ) % 0xBE),
41     T( ( $U - 0x38E2E ) / 0xBE % 0xBE),
42     T( ( $U - 0x38E2E ) % 0xBE ));
43     }
44    
45     sub _utf1toucs4 (@) {
46     my ($x, $y, $z, $v, $w) = @_;
47     return $x if @_ == 1 && $x <= 0x9F;
48     return $y if $x == 0xA0;
49     return ($x - 0xA1) * 0xBE + U($y) + 0x100
50     if 0xA1 <= $x && $x <= 0xF5;
51     return ($x - 0xF6) * ( 0xBE**2 ) + U($y) * 0xBE + U($z) + 0x4016
52     if 0xF6 <= $x && $x <= 0xFB;
53     return ($x - 0xFC) * ( 0xBE**4 ) + U($y) * ( 0xBE**3 )
54     + U($z) * ( 0xBE**2 ) + U($v) * 0xBE
55     + U($w) + 0x38E2E;
56     }
57    
58    
59     sub T ($) {
60     my $z = int (shift);
61     return $z + 0x21 if $z <= 0x5D;
62     return $z + 0x42 if $z <= 0xBD;
63     return $z - 0xBE if $z <= 0xDE;
64     return $z - 0x60;
65     }
66    
67    
68     sub U ($) {
69     my $z = shift;
70     return $z + 0xBE if $z <= 0x20;
71     return $z - 0x21 if $z <= 0x7E;
72     return $z + 0x60 if $z <= 0x9F;
73     return $z - 0x42;
74     }
75    
76     1;
77     __END__
78    
79     =head1 NAME
80    
81     Encode::Unicode::UTF1 --- Encode/decode of UTF-1
82    
83     =head1 EXAMPLE
84    
85     use Encode;
86     my $s = "some string in utf-8 (to be converted to utf-\x{4E00})";
87     print encode ('utf-1', $s);
88    
89     my $b = "\xE0\xC2\xE0\xC4\xE0\xC6\xE0\xC8\xE0\xCA";
90     print decode ('utf-1', $b);
91    
92     =head1 LICENSE
93    
94 wakaba 1.4 Copyright 2002 Wakaba E<lt>w@suika.fam.cxE<gt>.
95 wakaba 1.1
96     This program is free software; you can redistribute it and/or modify
97     it under the terms of the GNU General Public License as published by
98     the Free Software Foundation; either version 2 of the License, or
99     (at your option) any later version.
100    
101     This program is distributed in the hope that it will be useful,
102     but WITHOUT ANY WARRANTY; without even the implied warranty of
103     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
104     GNU General Public License for more details.
105    
106     You should have received a copy of the GNU General Public License
107     along with this program; see the file COPYING. If not, write to
108     the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
109     Boston, MA 02111-1307, USA.
110    
111 wakaba 1.4 =cut
112 wakaba 1.1
113 wakaba 1.5 # $Date: 2002/09/20 14:01:45 $
114 wakaba 1.4 ### UTF1.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24