1 |
wakaba |
1.3 |
require 5.7.3;
|
2 |
wakaba |
1.1 |
package Encode::Unicode::UTF1;
|
3 |
|
|
use strict;
|
4 |
wakaba |
1.3 |
use vars qw($VERSION);
|
5 |
wakaba |
1.5 |
$VERSION=do{my @r=(q$Revision: 1.4 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
|
6 |
wakaba |
1.1 |
use base qw(Encode::Encoding);
|
7 |
wakaba |
1.2 |
__PACKAGE__->Define (qw/ISO-10646-UTF-1 utf-1 utf1 csISO10646UTF1 iso-ir-178/);
|
8 |
wakaba |
1.1 |
|
9 |
|
|
sub encode ($$;$) {
|
10 |
|
|
my ($obj, $str, $chk) = @_;
|
11 |
|
|
my @str = split //, $str;
|
12 |
|
|
$str = '';
|
13 |
|
|
for (@str) {
|
14 |
|
|
$str .= join '', map {chr} _ucs4toutf1 (ord $_);
|
15 |
|
|
}
|
16 |
|
|
$_[1] = '' if $chk;
|
17 |
|
|
return $str;
|
18 |
|
|
}
|
19 |
|
|
|
20 |
|
|
sub decode ($$;$) {
|
21 |
|
|
my ($obj, $str, $chk) = @_;
|
22 |
|
|
$str =~ s{([\xA0-\xF5].|[\xF6-\xFB]..|[\xFC-\xFF]....)}{
|
23 |
|
|
chr (_utf1toucs4 (unpack 'C*', $1))
|
24 |
|
|
}gex;
|
25 |
|
|
$_[1] = '' if $chk;
|
26 |
|
|
return $str;
|
27 |
|
|
}
|
28 |
|
|
|
29 |
|
|
sub _ucs4toutf1 ($) {
|
30 |
|
|
my $U = shift;
|
31 |
|
|
return ($U) if $U <= 0x9F;
|
32 |
|
|
return (0xA0, $U) if $U <= 0xFF;
|
33 |
|
|
return (int (0xA1 + ( $U - 0x100 ) / 0xBE),
|
34 |
|
|
T( ( $U - 0x100 ) % 0xBE)) if $U <= 0x4015;
|
35 |
|
|
return (int (0xF6 + ( $U - 0x4016 ) / ( 0xBE**2 )),
|
36 |
|
|
T( ( $U - 0x4016 ) / 0xBE % 0xBE),
|
37 |
|
|
T( ( $U - 0x4016 ) % 0xBE )) if $U <= 0x38E2D;
|
38 |
|
|
return (int (0xFC + ( $U - 0x38E2E ) / ( 0xBE**4 )),
|
39 |
|
|
T( ( $U - 0x38E2E ) / ( 0xBE**3 ) % 0xBE),
|
40 |
|
|
T( ( $U - 0x38E2E ) / ( 0xBE**2 ) % 0xBE),
|
41 |
|
|
T( ( $U - 0x38E2E ) / 0xBE % 0xBE),
|
42 |
|
|
T( ( $U - 0x38E2E ) % 0xBE ));
|
43 |
|
|
}
|
44 |
|
|
|
45 |
|
|
sub _utf1toucs4 (@) {
|
46 |
|
|
my ($x, $y, $z, $v, $w) = @_;
|
47 |
|
|
return $x if @_ == 1 && $x <= 0x9F;
|
48 |
|
|
return $y if $x == 0xA0;
|
49 |
|
|
return ($x - 0xA1) * 0xBE + U($y) + 0x100
|
50 |
|
|
if 0xA1 <= $x && $x <= 0xF5;
|
51 |
|
|
return ($x - 0xF6) * ( 0xBE**2 ) + U($y) * 0xBE + U($z) + 0x4016
|
52 |
|
|
if 0xF6 <= $x && $x <= 0xFB;
|
53 |
|
|
return ($x - 0xFC) * ( 0xBE**4 ) + U($y) * ( 0xBE**3 )
|
54 |
|
|
+ U($z) * ( 0xBE**2 ) + U($v) * 0xBE
|
55 |
|
|
+ U($w) + 0x38E2E;
|
56 |
|
|
}
|
57 |
|
|
|
58 |
|
|
|
59 |
|
|
sub T ($) {
|
60 |
|
|
my $z = int (shift);
|
61 |
|
|
return $z + 0x21 if $z <= 0x5D;
|
62 |
|
|
return $z + 0x42 if $z <= 0xBD;
|
63 |
|
|
return $z - 0xBE if $z <= 0xDE;
|
64 |
|
|
return $z - 0x60;
|
65 |
|
|
}
|
66 |
|
|
|
67 |
|
|
|
68 |
|
|
sub U ($) {
|
69 |
|
|
my $z = shift;
|
70 |
|
|
return $z + 0xBE if $z <= 0x20;
|
71 |
|
|
return $z - 0x21 if $z <= 0x7E;
|
72 |
|
|
return $z + 0x60 if $z <= 0x9F;
|
73 |
|
|
return $z - 0x42;
|
74 |
|
|
}
|
75 |
|
|
|
76 |
|
|
1;
|
77 |
|
|
__END__
|
78 |
|
|
|
79 |
|
|
=head1 NAME
|
80 |
|
|
|
81 |
|
|
Encode::Unicode::UTF1 --- Encode/decode of UTF-1
|
82 |
|
|
|
83 |
|
|
=head1 EXAMPLE
|
84 |
|
|
|
85 |
|
|
use Encode;
|
86 |
|
|
my $s = "some string in utf-8 (to be converted to utf-\x{4E00})";
|
87 |
|
|
print encode ('utf-1', $s);
|
88 |
|
|
|
89 |
|
|
my $b = "\xE0\xC2\xE0\xC4\xE0\xC6\xE0\xC8\xE0\xCA";
|
90 |
|
|
print decode ('utf-1', $b);
|
91 |
|
|
|
92 |
|
|
=head1 LICENSE
|
93 |
|
|
|
94 |
wakaba |
1.4 |
Copyright 2002 Wakaba E<lt>w@suika.fam.cxE<gt>.
|
95 |
wakaba |
1.1 |
|
96 |
|
|
This program is free software; you can redistribute it and/or modify
|
97 |
|
|
it under the terms of the GNU General Public License as published by
|
98 |
|
|
the Free Software Foundation; either version 2 of the License, or
|
99 |
|
|
(at your option) any later version.
|
100 |
|
|
|
101 |
|
|
This program is distributed in the hope that it will be useful,
|
102 |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
103 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
104 |
|
|
GNU General Public License for more details.
|
105 |
|
|
|
106 |
|
|
You should have received a copy of the GNU General Public License
|
107 |
|
|
along with this program; see the file COPYING. If not, write to
|
108 |
|
|
the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
109 |
|
|
Boston, MA 02111-1307, USA.
|
110 |
|
|
|
111 |
wakaba |
1.4 |
=cut
|
112 |
wakaba |
1.1 |
|
113 |
wakaba |
1.5 |
# $Date: 2002/09/20 14:01:45 $
|
114 |
wakaba |
1.4 |
### UTF1.pm ends here
|