/[suikacvs]/perl/lib/Encode/Unicode/UTF1.pm
Suika

Contents of /perl/lib/Encode/Unicode/UTF1.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (show annotations) (download)
Fri Sep 20 14:01:45 2002 UTC (21 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.3: +5 -7 lines
2002-09-20  Wakaba <w@suika.fam.cx>

	* ISO2022.pm:
	- (iso2022_to_internal): New function.
	- (_iso2022_to_internal): Renamed from iso2022_to_internal.
	- (iso2022_to_internal): Experimental support of DOCS.
	- (internal_to_iso2022): Output in UCS coding systems
	if the character is unable to be encoded in ISO/IEC 2022
	coded character sets.
	- (_i2o): New procedure.
	- ($C->{option}->{designate_to}->{coding_system}): New option
	property object.
	- ($C->{coding_system}): New property.
	- (%CODING_SYSTEM): New hash.  (Alias to Encode::Charset's one.)
	* Charset.pm (make_initial_coding_system): Set 'reset_state'
	property with 1 value to coding systems of DOCS with 02/14 I byte.

1 require 5.7.3;
2 package Encode::Unicode::UTF1;
3 use strict;
4 use vars qw($VERSION);
5 $VERSION=do{my @r=(q$Revision: 1.3 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
6 use base qw(Encode::Encoding);
7 __PACKAGE__->Define (qw/ISO-10646-UTF-1 utf-1 utf1 csISO10646UTF1 iso-ir-178/);
8
9 sub encode ($$;$) {
10 my ($obj, $str, $chk) = @_;
11 my @str = split //, $str;
12 $str = '';
13 for (@str) {
14 $str .= join '', map {chr} _ucs4toutf1 (ord $_);
15 }
16 $_[1] = '' if $chk;
17 return $str;
18 }
19
20 sub decode ($$;$) {
21 my ($obj, $str, $chk) = @_;
22 $str =~ s{([\xA0-\xF5].|[\xF6-\xFB]..|[\xFC-\xFF]....)}{
23 chr (_utf1toucs4 (unpack 'C*', $1))
24 }gex;
25 Encode::_utf8_on ($str);
26 $_[1] = '' if $chk;
27 return $str;
28 }
29
30 sub _ucs4toutf1 ($) {
31 my $U = shift;
32 return ($U) if $U <= 0x9F;
33 return (0xA0, $U) if $U <= 0xFF;
34 return (int (0xA1 + ( $U - 0x100 ) / 0xBE),
35 T( ( $U - 0x100 ) % 0xBE)) if $U <= 0x4015;
36 return (int (0xF6 + ( $U - 0x4016 ) / ( 0xBE**2 )),
37 T( ( $U - 0x4016 ) / 0xBE % 0xBE),
38 T( ( $U - 0x4016 ) % 0xBE )) if $U <= 0x38E2D;
39 return (int (0xFC + ( $U - 0x38E2E ) / ( 0xBE**4 )),
40 T( ( $U - 0x38E2E ) / ( 0xBE**3 ) % 0xBE),
41 T( ( $U - 0x38E2E ) / ( 0xBE**2 ) % 0xBE),
42 T( ( $U - 0x38E2E ) / 0xBE % 0xBE),
43 T( ( $U - 0x38E2E ) % 0xBE ));
44 }
45
46 sub _utf1toucs4 (@) {
47 my ($x, $y, $z, $v, $w) = @_;
48 return $x if @_ == 1 && $x <= 0x9F;
49 return $y if $x == 0xA0;
50 return ($x - 0xA1) * 0xBE + U($y) + 0x100
51 if 0xA1 <= $x && $x <= 0xF5;
52 return ($x - 0xF6) * ( 0xBE**2 ) + U($y) * 0xBE + U($z) + 0x4016
53 if 0xF6 <= $x && $x <= 0xFB;
54 return ($x - 0xFC) * ( 0xBE**4 ) + U($y) * ( 0xBE**3 )
55 + U($z) * ( 0xBE**2 ) + U($v) * 0xBE
56 + U($w) + 0x38E2E;
57 }
58
59
60 sub T ($) {
61 my $z = int (shift);
62 return $z + 0x21 if $z <= 0x5D;
63 return $z + 0x42 if $z <= 0xBD;
64 return $z - 0xBE if $z <= 0xDE;
65 return $z - 0x60;
66 }
67
68
69 sub U ($) {
70 my $z = shift;
71 return $z + 0xBE if $z <= 0x20;
72 return $z - 0x21 if $z <= 0x7E;
73 return $z + 0x60 if $z <= 0x9F;
74 return $z - 0x42;
75 }
76
77 1;
78 __END__
79
80 =head1 NAME
81
82 Encode::Unicode::UTF1 --- Encode/decode of UTF-1
83
84 =head1 EXAMPLE
85
86 use Encode;
87 my $s = "some string in utf-8 (to be converted to utf-\x{4E00})";
88 print encode ('utf-1', $s);
89
90 my $b = "\xE0\xC2\xE0\xC4\xE0\xC6\xE0\xC8\xE0\xCA";
91 print decode ('utf-1', $b);
92
93 =head1 LICENSE
94
95 Copyright 2002 Wakaba E<lt>w@suika.fam.cxE<gt>.
96
97 This program is free software; you can redistribute it and/or modify
98 it under the terms of the GNU General Public License as published by
99 the Free Software Foundation; either version 2 of the License, or
100 (at your option) any later version.
101
102 This program is distributed in the hope that it will be useful,
103 but WITHOUT ANY WARRANTY; without even the implied warranty of
104 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
105 GNU General Public License for more details.
106
107 You should have received a copy of the GNU General Public License
108 along with this program; see the file COPYING. If not, write to
109 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
110 Boston, MA 02111-1307, USA.
111
112 =cut
113
114 # $Date: $
115 ### UTF1.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24