/[suikacvs]/perl/lib/Encode/Unicode/UTF1.pm
Suika

Contents of /perl/lib/Encode/Unicode/UTF1.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.5 - (show annotations) (download)
Mon Sep 23 08:28:39 2002 UTC (22 years, 2 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
Changes since 1.4: +2 -3 lines
2002-09-23  Nanashi-san

	* UTF8.pm, UTF9.pm: New modules.  (Committed by
	Wakaba <w@suika.fam.cx>.)

1 require 5.7.3;
2 package Encode::Unicode::UTF1;
3 use strict;
4 use vars qw($VERSION);
5 $VERSION=do{my @r=(q$Revision: 1.4 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
6 use base qw(Encode::Encoding);
7 __PACKAGE__->Define (qw/ISO-10646-UTF-1 utf-1 utf1 csISO10646UTF1 iso-ir-178/);
8
9 sub encode ($$;$) {
10 my ($obj, $str, $chk) = @_;
11 my @str = split //, $str;
12 $str = '';
13 for (@str) {
14 $str .= join '', map {chr} _ucs4toutf1 (ord $_);
15 }
16 $_[1] = '' if $chk;
17 return $str;
18 }
19
20 sub decode ($$;$) {
21 my ($obj, $str, $chk) = @_;
22 $str =~ s{([\xA0-\xF5].|[\xF6-\xFB]..|[\xFC-\xFF]....)}{
23 chr (_utf1toucs4 (unpack 'C*', $1))
24 }gex;
25 $_[1] = '' if $chk;
26 return $str;
27 }
28
29 sub _ucs4toutf1 ($) {
30 my $U = shift;
31 return ($U) if $U <= 0x9F;
32 return (0xA0, $U) if $U <= 0xFF;
33 return (int (0xA1 + ( $U - 0x100 ) / 0xBE),
34 T( ( $U - 0x100 ) % 0xBE)) if $U <= 0x4015;
35 return (int (0xF6 + ( $U - 0x4016 ) / ( 0xBE**2 )),
36 T( ( $U - 0x4016 ) / 0xBE % 0xBE),
37 T( ( $U - 0x4016 ) % 0xBE )) if $U <= 0x38E2D;
38 return (int (0xFC + ( $U - 0x38E2E ) / ( 0xBE**4 )),
39 T( ( $U - 0x38E2E ) / ( 0xBE**3 ) % 0xBE),
40 T( ( $U - 0x38E2E ) / ( 0xBE**2 ) % 0xBE),
41 T( ( $U - 0x38E2E ) / 0xBE % 0xBE),
42 T( ( $U - 0x38E2E ) % 0xBE ));
43 }
44
45 sub _utf1toucs4 (@) {
46 my ($x, $y, $z, $v, $w) = @_;
47 return $x if @_ == 1 && $x <= 0x9F;
48 return $y if $x == 0xA0;
49 return ($x - 0xA1) * 0xBE + U($y) + 0x100
50 if 0xA1 <= $x && $x <= 0xF5;
51 return ($x - 0xF6) * ( 0xBE**2 ) + U($y) * 0xBE + U($z) + 0x4016
52 if 0xF6 <= $x && $x <= 0xFB;
53 return ($x - 0xFC) * ( 0xBE**4 ) + U($y) * ( 0xBE**3 )
54 + U($z) * ( 0xBE**2 ) + U($v) * 0xBE
55 + U($w) + 0x38E2E;
56 }
57
58
59 sub T ($) {
60 my $z = int (shift);
61 return $z + 0x21 if $z <= 0x5D;
62 return $z + 0x42 if $z <= 0xBD;
63 return $z - 0xBE if $z <= 0xDE;
64 return $z - 0x60;
65 }
66
67
68 sub U ($) {
69 my $z = shift;
70 return $z + 0xBE if $z <= 0x20;
71 return $z - 0x21 if $z <= 0x7E;
72 return $z + 0x60 if $z <= 0x9F;
73 return $z - 0x42;
74 }
75
76 1;
77 __END__
78
79 =head1 NAME
80
81 Encode::Unicode::UTF1 --- Encode/decode of UTF-1
82
83 =head1 EXAMPLE
84
85 use Encode;
86 my $s = "some string in utf-8 (to be converted to utf-\x{4E00})";
87 print encode ('utf-1', $s);
88
89 my $b = "\xE0\xC2\xE0\xC4\xE0\xC6\xE0\xC8\xE0\xCA";
90 print decode ('utf-1', $b);
91
92 =head1 LICENSE
93
94 Copyright 2002 Wakaba E<lt>w@suika.fam.cxE<gt>.
95
96 This program is free software; you can redistribute it and/or modify
97 it under the terms of the GNU General Public License as published by
98 the Free Software Foundation; either version 2 of the License, or
99 (at your option) any later version.
100
101 This program is distributed in the hope that it will be useful,
102 but WITHOUT ANY WARRANTY; without even the implied warranty of
103 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
104 GNU General Public License for more details.
105
106 You should have received a copy of the GNU General Public License
107 along with this program; see the file COPYING. If not, write to
108 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
109 Boston, MA 02111-1307, USA.
110
111 =cut
112
113 # $Date: 2002/09/20 14:01:45 $
114 ### UTF1.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24