/[suikacvs]/perl/lib/Encode/Unicode/UTF1.pm
Suika

Contents of /perl/lib/Encode/Unicode/UTF1.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (hide annotations) (download)
Fri Sep 20 14:01:45 2002 UTC (21 years, 8 months ago) by wakaba
Branch: MAIN
Changes since 1.3: +5 -7 lines
2002-09-20  Wakaba <w@suika.fam.cx>

	* ISO2022.pm:
	- (iso2022_to_internal): New function.
	- (_iso2022_to_internal): Renamed from iso2022_to_internal.
	- (iso2022_to_internal): Experimental support of DOCS.
	- (internal_to_iso2022): Output in UCS coding systems
	if the character is unable to be encoded in ISO/IEC 2022
	coded character sets.
	- (_i2o): New procedure.
	- ($C->{option}->{designate_to}->{coding_system}): New option
	property object.
	- ($C->{coding_system}): New property.
	- (%CODING_SYSTEM): New hash.  (Alias to Encode::Charset's one.)
	* Charset.pm (make_initial_coding_system): Set 'reset_state'
	property with 1 value to coding systems of DOCS with 02/14 I byte.

1 wakaba 1.3 require 5.7.3;
2 wakaba 1.1 package Encode::Unicode::UTF1;
3     use strict;
4 wakaba 1.3 use vars qw($VERSION);
5 wakaba 1.4 $VERSION=do{my @r=(q$Revision: 1.3 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
6 wakaba 1.1 use base qw(Encode::Encoding);
7 wakaba 1.2 __PACKAGE__->Define (qw/ISO-10646-UTF-1 utf-1 utf1 csISO10646UTF1 iso-ir-178/);
8 wakaba 1.1
9     sub encode ($$;$) {
10     my ($obj, $str, $chk) = @_;
11     my @str = split //, $str;
12     $str = '';
13     for (@str) {
14     $str .= join '', map {chr} _ucs4toutf1 (ord $_);
15     }
16     $_[1] = '' if $chk;
17     return $str;
18     }
19    
20     sub decode ($$;$) {
21     my ($obj, $str, $chk) = @_;
22     $str =~ s{([\xA0-\xF5].|[\xF6-\xFB]..|[\xFC-\xFF]....)}{
23     chr (_utf1toucs4 (unpack 'C*', $1))
24     }gex;
25     Encode::_utf8_on ($str);
26     $_[1] = '' if $chk;
27     return $str;
28     }
29    
30     sub _ucs4toutf1 ($) {
31     my $U = shift;
32     return ($U) if $U <= 0x9F;
33     return (0xA0, $U) if $U <= 0xFF;
34     return (int (0xA1 + ( $U - 0x100 ) / 0xBE),
35     T( ( $U - 0x100 ) % 0xBE)) if $U <= 0x4015;
36     return (int (0xF6 + ( $U - 0x4016 ) / ( 0xBE**2 )),
37     T( ( $U - 0x4016 ) / 0xBE % 0xBE),
38     T( ( $U - 0x4016 ) % 0xBE )) if $U <= 0x38E2D;
39     return (int (0xFC + ( $U - 0x38E2E ) / ( 0xBE**4 )),
40     T( ( $U - 0x38E2E ) / ( 0xBE**3 ) % 0xBE),
41     T( ( $U - 0x38E2E ) / ( 0xBE**2 ) % 0xBE),
42     T( ( $U - 0x38E2E ) / 0xBE % 0xBE),
43     T( ( $U - 0x38E2E ) % 0xBE ));
44     }
45    
46     sub _utf1toucs4 (@) {
47     my ($x, $y, $z, $v, $w) = @_;
48     return $x if @_ == 1 && $x <= 0x9F;
49     return $y if $x == 0xA0;
50     return ($x - 0xA1) * 0xBE + U($y) + 0x100
51     if 0xA1 <= $x && $x <= 0xF5;
52     return ($x - 0xF6) * ( 0xBE**2 ) + U($y) * 0xBE + U($z) + 0x4016
53     if 0xF6 <= $x && $x <= 0xFB;
54     return ($x - 0xFC) * ( 0xBE**4 ) + U($y) * ( 0xBE**3 )
55     + U($z) * ( 0xBE**2 ) + U($v) * 0xBE
56     + U($w) + 0x38E2E;
57     }
58    
59    
60     sub T ($) {
61     my $z = int (shift);
62     return $z + 0x21 if $z <= 0x5D;
63     return $z + 0x42 if $z <= 0xBD;
64     return $z - 0xBE if $z <= 0xDE;
65     return $z - 0x60;
66     }
67    
68    
69     sub U ($) {
70     my $z = shift;
71     return $z + 0xBE if $z <= 0x20;
72     return $z - 0x21 if $z <= 0x7E;
73     return $z + 0x60 if $z <= 0x9F;
74     return $z - 0x42;
75     }
76    
77     1;
78     __END__
79    
80     =head1 NAME
81    
82     Encode::Unicode::UTF1 --- Encode/decode of UTF-1
83    
84     =head1 EXAMPLE
85    
86     use Encode;
87     my $s = "some string in utf-8 (to be converted to utf-\x{4E00})";
88     print encode ('utf-1', $s);
89    
90     my $b = "\xE0\xC2\xE0\xC4\xE0\xC6\xE0\xC8\xE0\xCA";
91     print decode ('utf-1', $b);
92    
93     =head1 LICENSE
94    
95 wakaba 1.4 Copyright 2002 Wakaba E<lt>w@suika.fam.cxE<gt>.
96 wakaba 1.1
97     This program is free software; you can redistribute it and/or modify
98     it under the terms of the GNU General Public License as published by
99     the Free Software Foundation; either version 2 of the License, or
100     (at your option) any later version.
101    
102     This program is distributed in the hope that it will be useful,
103     but WITHOUT ANY WARRANTY; without even the implied warranty of
104     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
105     GNU General Public License for more details.
106    
107     You should have received a copy of the GNU General Public License
108     along with this program; see the file COPYING. If not, write to
109     the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
110     Boston, MA 02111-1307, USA.
111    
112 wakaba 1.4 =cut
113 wakaba 1.1
114 wakaba 1.4 # $Date: $
115     ### UTF1.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24