| 1 |
wakaba |
1.3 |
require 5.7.3;
|
| 2 |
wakaba |
1.1 |
package Encode::Unicode::UTF1;
|
| 3 |
|
|
use strict;
|
| 4 |
wakaba |
1.3 |
use vars qw($VERSION);
|
| 5 |
wakaba |
1.5 |
$VERSION=do{my @r=(q$Revision: 1.4 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
|
| 6 |
wakaba |
1.1 |
use base qw(Encode::Encoding);
|
| 7 |
wakaba |
1.2 |
__PACKAGE__->Define (qw/ISO-10646-UTF-1 utf-1 utf1 csISO10646UTF1 iso-ir-178/);
|
| 8 |
wakaba |
1.1 |
|
| 9 |
|
|
sub encode ($$;$) {
|
| 10 |
|
|
my ($obj, $str, $chk) = @_;
|
| 11 |
|
|
my @str = split //, $str;
|
| 12 |
|
|
$str = '';
|
| 13 |
|
|
for (@str) {
|
| 14 |
|
|
$str .= join '', map {chr} _ucs4toutf1 (ord $_);
|
| 15 |
|
|
}
|
| 16 |
|
|
$_[1] = '' if $chk;
|
| 17 |
|
|
return $str;
|
| 18 |
|
|
}
|
| 19 |
|
|
|
| 20 |
|
|
sub decode ($$;$) {
|
| 21 |
|
|
my ($obj, $str, $chk) = @_;
|
| 22 |
|
|
$str =~ s{([\xA0-\xF5].|[\xF6-\xFB]..|[\xFC-\xFF]....)}{
|
| 23 |
|
|
chr (_utf1toucs4 (unpack 'C*', $1))
|
| 24 |
|
|
}gex;
|
| 25 |
|
|
$_[1] = '' if $chk;
|
| 26 |
|
|
return $str;
|
| 27 |
|
|
}
|
| 28 |
|
|
|
| 29 |
|
|
sub _ucs4toutf1 ($) {
|
| 30 |
|
|
my $U = shift;
|
| 31 |
|
|
return ($U) if $U <= 0x9F;
|
| 32 |
|
|
return (0xA0, $U) if $U <= 0xFF;
|
| 33 |
|
|
return (int (0xA1 + ( $U - 0x100 ) / 0xBE),
|
| 34 |
|
|
T( ( $U - 0x100 ) % 0xBE)) if $U <= 0x4015;
|
| 35 |
|
|
return (int (0xF6 + ( $U - 0x4016 ) / ( 0xBE**2 )),
|
| 36 |
|
|
T( ( $U - 0x4016 ) / 0xBE % 0xBE),
|
| 37 |
|
|
T( ( $U - 0x4016 ) % 0xBE )) if $U <= 0x38E2D;
|
| 38 |
|
|
return (int (0xFC + ( $U - 0x38E2E ) / ( 0xBE**4 )),
|
| 39 |
|
|
T( ( $U - 0x38E2E ) / ( 0xBE**3 ) % 0xBE),
|
| 40 |
|
|
T( ( $U - 0x38E2E ) / ( 0xBE**2 ) % 0xBE),
|
| 41 |
|
|
T( ( $U - 0x38E2E ) / 0xBE % 0xBE),
|
| 42 |
|
|
T( ( $U - 0x38E2E ) % 0xBE ));
|
| 43 |
|
|
}
|
| 44 |
|
|
|
| 45 |
|
|
sub _utf1toucs4 (@) {
|
| 46 |
|
|
my ($x, $y, $z, $v, $w) = @_;
|
| 47 |
|
|
return $x if @_ == 1 && $x <= 0x9F;
|
| 48 |
|
|
return $y if $x == 0xA0;
|
| 49 |
|
|
return ($x - 0xA1) * 0xBE + U($y) + 0x100
|
| 50 |
|
|
if 0xA1 <= $x && $x <= 0xF5;
|
| 51 |
|
|
return ($x - 0xF6) * ( 0xBE**2 ) + U($y) * 0xBE + U($z) + 0x4016
|
| 52 |
|
|
if 0xF6 <= $x && $x <= 0xFB;
|
| 53 |
|
|
return ($x - 0xFC) * ( 0xBE**4 ) + U($y) * ( 0xBE**3 )
|
| 54 |
|
|
+ U($z) * ( 0xBE**2 ) + U($v) * 0xBE
|
| 55 |
|
|
+ U($w) + 0x38E2E;
|
| 56 |
|
|
}
|
| 57 |
|
|
|
| 58 |
|
|
|
| 59 |
|
|
sub T ($) {
|
| 60 |
|
|
my $z = int (shift);
|
| 61 |
|
|
return $z + 0x21 if $z <= 0x5D;
|
| 62 |
|
|
return $z + 0x42 if $z <= 0xBD;
|
| 63 |
|
|
return $z - 0xBE if $z <= 0xDE;
|
| 64 |
|
|
return $z - 0x60;
|
| 65 |
|
|
}
|
| 66 |
|
|
|
| 67 |
|
|
|
| 68 |
|
|
sub U ($) {
|
| 69 |
|
|
my $z = shift;
|
| 70 |
|
|
return $z + 0xBE if $z <= 0x20;
|
| 71 |
|
|
return $z - 0x21 if $z <= 0x7E;
|
| 72 |
|
|
return $z + 0x60 if $z <= 0x9F;
|
| 73 |
|
|
return $z - 0x42;
|
| 74 |
|
|
}
|
| 75 |
|
|
|
| 76 |
|
|
1;
|
| 77 |
|
|
__END__
|
| 78 |
|
|
|
| 79 |
|
|
=head1 NAME
|
| 80 |
|
|
|
| 81 |
|
|
Encode::Unicode::UTF1 --- Encode/decode of UTF-1
|
| 82 |
|
|
|
| 83 |
|
|
=head1 EXAMPLE
|
| 84 |
|
|
|
| 85 |
|
|
use Encode;
|
| 86 |
|
|
my $s = "some string in utf-8 (to be converted to utf-\x{4E00})";
|
| 87 |
|
|
print encode ('utf-1', $s);
|
| 88 |
|
|
|
| 89 |
|
|
my $b = "\xE0\xC2\xE0\xC4\xE0\xC6\xE0\xC8\xE0\xCA";
|
| 90 |
|
|
print decode ('utf-1', $b);
|
| 91 |
|
|
|
| 92 |
|
|
=head1 LICENSE
|
| 93 |
|
|
|
| 94 |
wakaba |
1.4 |
Copyright 2002 Wakaba E<lt>w@suika.fam.cxE<gt>.
|
| 95 |
wakaba |
1.1 |
|
| 96 |
|
|
This program is free software; you can redistribute it and/or modify
|
| 97 |
|
|
it under the terms of the GNU General Public License as published by
|
| 98 |
|
|
the Free Software Foundation; either version 2 of the License, or
|
| 99 |
|
|
(at your option) any later version.
|
| 100 |
|
|
|
| 101 |
|
|
This program is distributed in the hope that it will be useful,
|
| 102 |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 103 |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 104 |
|
|
GNU General Public License for more details.
|
| 105 |
|
|
|
| 106 |
|
|
You should have received a copy of the GNU General Public License
|
| 107 |
|
|
along with this program; see the file COPYING. If not, write to
|
| 108 |
|
|
the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
| 109 |
|
|
Boston, MA 02111-1307, USA.
|
| 110 |
|
|
|
| 111 |
wakaba |
1.4 |
=cut
|
| 112 |
wakaba |
1.1 |
|
| 113 |
wakaba |
1.5 |
# $Date: 2002/09/20 14:01:45 $
|
| 114 |
wakaba |
1.4 |
### UTF1.pm ends here
|