/[suikacvs]/perl/lib/Encode/Unicode/UTF9.pm
Suika

Contents of /perl/lib/Encode/Unicode/UTF9.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (hide annotations) (download)
Mon Sep 23 08:28:39 2002 UTC (22 years, 2 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
2002-09-23  Nanashi-san

	* UTF8.pm, UTF9.pm: New modules.  (Committed by
	Wakaba <w@suika.fam.cx>.)

1 wakaba 1.1 =head1 NAME
2    
3     Encode::Unicode::UTF9 --- Encode/decode of UTF-9
4    
5     =head1 ENCODINGS
6    
7     =over 4
8    
9     =item utf-9
10    
11     UTF-9, defined in draft-abela-utf9-00. (Alias: utf9)
12    
13     =back
14    
15     =cut
16    
17     require v5.7.3;
18     package Encode::Unicode::UTF9;
19     use strict;
20     use vars qw($VERSION);
21     $VERSION=do{my @r=(q$Revision: 1.2 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
22     use base qw(Encode::Encoding);
23     __PACKAGE__->Define (qw/utf-9 utf9/);
24    
25     my %_4to9;
26     sub encode ($$;$) {
27     my ($obj, $str, $chk) = @_;
28     my $r = '';
29     for (split //, $str) {
30     unless ($_4to9{$_}) {
31     my $U = ord $_;
32     if ($U <= 0x7F) {
33     $_4to9{$_} = $_;
34     } else {
35     $_4to9{$_} = _ucs4_to_utf9 ($U);
36     }
37     }
38     $r .= $_4to9{$_};
39     }
40     $_[1] = '' if $chk;
41     return $r;
42     }
43    
44     my %_9to4;
45     sub decode ($$;$) {
46     my ($obj, $str, $chk) = @_;
47     $str =~ s{
48     ([\x80-\x8F][\x80-\xFF])
49     |([\x90-\x93][\x80-\xFF][\x80-\xFF])
50     |([\x94-\x97][\x80-\xFF][\x80-\xFF][\x80-\xFF])
51     |([\x98-\x9F][\x80-\xFF][\x80-\xFF][\x80-\xFF][\x80-\xFF])
52     |([\x80-\xFF])
53     }{
54     my ($o2,$o3,$o4,$o5,$o1) = ($1,$2,$3,$4,$5);
55     unless ($_9to4{$o2.$o3.$o4.$o5.$o1}) {
56     if ($o1) {
57     $_9to4{$o1} = $o1;
58     } elsif ($o2) {
59     my @o = split //, $o2;
60     $_9to4{$o2} =
61     chr (((ord ($o[0]) & 0x7F) << 7) + (ord ($o[1]) & 0x7F));
62     } elsif ($o3) {
63     my @o = split //, $o3;
64     $_9to4{$o3} =
65     chr (((ord ($o[0]) & 0x03) << 14) + ((ord ($o[1]) & 0x7F) << 7)
66     + (ord ($o[2]) & 0x7F));
67     } elsif ($o4) {
68     my @o = split //, $o4;
69     $_9to4{$o4} =
70     chr (((ord ($o[0]) & 0x03) << 21) + ((ord ($o[1]) & 0x7F) << 14)
71     + ((ord ($o[2]) & 0x7F) << 7) + (ord ($o[3]) & 0x7F));
72     } else {
73     my @o = split //, $o5;
74     $_9to4{$o5} =
75     chr (((ord ($o[0]) & 0x07) << 28) + ((ord ($o[1]) & 0x7F) << 21)
76     + ((ord ($o[2]) & 0x7F) << 14) + ((ord ($o[3]) & 0x7F) << 7)
77     + (ord ($o[4]) & 0x7F));
78     }
79     }
80     $_9to4{$o2.$o3.$o4.$o5.$o1};
81     }goex;
82     $_[1] = '' if $chk;
83     return $str;
84     }
85    
86     sub _ucs4_to_utf9 ($) {
87     my $U = shift;
88     if ($U <= 0x007F || (0x00A0 <= $U && $U <= 0x00FF)) {
89     return pack 'C', $U;
90     } elsif ($U <= 0x07FF) {
91     return pack 'C2', (0x80 | ($U >> 7)), (0x80 | ($U & 0x7F));
92     } elsif ($U <= 0xFFFF) {
93     return pack 'C3', (0x90 | ($U >> 14)), (0x80 | (($U >> 7) & 0x7F)),
94     (0x80 | ($U & 0x7F));
95     } elsif ($U <= 0x7FFFFF) {
96     return pack 'C4', (0x94 | ($U >> 21)), (0x80 | (($U >> 14) & 0x7F)),
97     (0x80 | (($U >> 7) & 0x7F)), (0x80 | ($U & 0x7F));
98     } else {#if ($U <= 0x7FFFFFFF) {
99     return pack 'C5', (0x98 | (($U >> 28) & 0x07)),
100     (0x80 | (($U >> 21) & 0x7F)), (0x80 | (($U >> 14) & 0x7F)),
101     (0x80 | (($U >> 7) & 0x7F)), (0x80 | ($U & 0x7F));
102     }
103     }
104    
105     1;
106    
107     =head1 LICENSE
108    
109     Copyright 2002 Nanashi-san
110    
111     This program is free software; you can redistribute it and/or modify
112     it under the terms of the GNU General Public License as published by
113     the Free Software Foundation; either version 2 of the License, or
114     (at your option) any later version.
115    
116     This program is distributed in the hope that it will be useful,
117     but WITHOUT ANY WARRANTY; without even the implied warranty of
118     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
119     GNU General Public License for more details.
120    
121     You should have received a copy of the GNU General Public License
122     along with this program; see the file COPYING. If not, write to
123     the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
124     Boston, MA 02111-1307, USA.
125    
126     =cut
127    
128     ## $Date: 2002/09/15 04:15:51 $
129     ### UTF9.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24