/[suikacvs]/perl/lib/Encode/Unicode/UTF9.pm
Suika

Contents of /perl/lib/Encode/Unicode/UTF9.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (show annotations) (download)
Mon Sep 23 08:28:39 2002 UTC (22 years, 2 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
2002-09-23  Nanashi-san

	* UTF8.pm, UTF9.pm: New modules.  (Committed by
	Wakaba <w@suika.fam.cx>.)

1 =head1 NAME
2
3 Encode::Unicode::UTF9 --- Encode/decode of UTF-9
4
5 =head1 ENCODINGS
6
7 =over 4
8
9 =item utf-9
10
11 UTF-9, defined in draft-abela-utf9-00. (Alias: utf9)
12
13 =back
14
15 =cut
16
17 require v5.7.3;
18 package Encode::Unicode::UTF9;
19 use strict;
20 use vars qw($VERSION);
21 $VERSION=do{my @r=(q$Revision: 1.2 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
22 use base qw(Encode::Encoding);
23 __PACKAGE__->Define (qw/utf-9 utf9/);
24
25 my %_4to9;
26 sub encode ($$;$) {
27 my ($obj, $str, $chk) = @_;
28 my $r = '';
29 for (split //, $str) {
30 unless ($_4to9{$_}) {
31 my $U = ord $_;
32 if ($U <= 0x7F) {
33 $_4to9{$_} = $_;
34 } else {
35 $_4to9{$_} = _ucs4_to_utf9 ($U);
36 }
37 }
38 $r .= $_4to9{$_};
39 }
40 $_[1] = '' if $chk;
41 return $r;
42 }
43
44 my %_9to4;
45 sub decode ($$;$) {
46 my ($obj, $str, $chk) = @_;
47 $str =~ s{
48 ([\x80-\x8F][\x80-\xFF])
49 |([\x90-\x93][\x80-\xFF][\x80-\xFF])
50 |([\x94-\x97][\x80-\xFF][\x80-\xFF][\x80-\xFF])
51 |([\x98-\x9F][\x80-\xFF][\x80-\xFF][\x80-\xFF][\x80-\xFF])
52 |([\x80-\xFF])
53 }{
54 my ($o2,$o3,$o4,$o5,$o1) = ($1,$2,$3,$4,$5);
55 unless ($_9to4{$o2.$o3.$o4.$o5.$o1}) {
56 if ($o1) {
57 $_9to4{$o1} = $o1;
58 } elsif ($o2) {
59 my @o = split //, $o2;
60 $_9to4{$o2} =
61 chr (((ord ($o[0]) & 0x7F) << 7) + (ord ($o[1]) & 0x7F));
62 } elsif ($o3) {
63 my @o = split //, $o3;
64 $_9to4{$o3} =
65 chr (((ord ($o[0]) & 0x03) << 14) + ((ord ($o[1]) & 0x7F) << 7)
66 + (ord ($o[2]) & 0x7F));
67 } elsif ($o4) {
68 my @o = split //, $o4;
69 $_9to4{$o4} =
70 chr (((ord ($o[0]) & 0x03) << 21) + ((ord ($o[1]) & 0x7F) << 14)
71 + ((ord ($o[2]) & 0x7F) << 7) + (ord ($o[3]) & 0x7F));
72 } else {
73 my @o = split //, $o5;
74 $_9to4{$o5} =
75 chr (((ord ($o[0]) & 0x07) << 28) + ((ord ($o[1]) & 0x7F) << 21)
76 + ((ord ($o[2]) & 0x7F) << 14) + ((ord ($o[3]) & 0x7F) << 7)
77 + (ord ($o[4]) & 0x7F));
78 }
79 }
80 $_9to4{$o2.$o3.$o4.$o5.$o1};
81 }goex;
82 $_[1] = '' if $chk;
83 return $str;
84 }
85
86 sub _ucs4_to_utf9 ($) {
87 my $U = shift;
88 if ($U <= 0x007F || (0x00A0 <= $U && $U <= 0x00FF)) {
89 return pack 'C', $U;
90 } elsif ($U <= 0x07FF) {
91 return pack 'C2', (0x80 | ($U >> 7)), (0x80 | ($U & 0x7F));
92 } elsif ($U <= 0xFFFF) {
93 return pack 'C3', (0x90 | ($U >> 14)), (0x80 | (($U >> 7) & 0x7F)),
94 (0x80 | ($U & 0x7F));
95 } elsif ($U <= 0x7FFFFF) {
96 return pack 'C4', (0x94 | ($U >> 21)), (0x80 | (($U >> 14) & 0x7F)),
97 (0x80 | (($U >> 7) & 0x7F)), (0x80 | ($U & 0x7F));
98 } else {#if ($U <= 0x7FFFFFFF) {
99 return pack 'C5', (0x98 | (($U >> 28) & 0x07)),
100 (0x80 | (($U >> 21) & 0x7F)), (0x80 | (($U >> 14) & 0x7F)),
101 (0x80 | (($U >> 7) & 0x7F)), (0x80 | ($U & 0x7F));
102 }
103 }
104
105 1;
106
107 =head1 LICENSE
108
109 Copyright 2002 Nanashi-san
110
111 This program is free software; you can redistribute it and/or modify
112 it under the terms of the GNU General Public License as published by
113 the Free Software Foundation; either version 2 of the License, or
114 (at your option) any later version.
115
116 This program is distributed in the hope that it will be useful,
117 but WITHOUT ANY WARRANTY; without even the implied warranty of
118 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
119 GNU General Public License for more details.
120
121 You should have received a copy of the GNU General Public License
122 along with this program; see the file COPYING. If not, write to
123 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
124 Boston, MA 02111-1307, USA.
125
126 =cut
127
128 ## $Date: 2002/09/15 04:15:51 $
129 ### UTF9.pm ends here

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24