/[suikacvs]/perl/lib/Encode/Unicode/Escape.pm
Suika

Contents of /perl/lib/Encode/Unicode/Escape.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (hide annotations) (download)
Tue Dec 24 05:33:25 2002 UTC (21 years, 11 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
x-u-escaped support

1 wakaba 1.1 =head1 NAME
2    
3     Encode::Unicode::Escape --- Encode/decode of Unicode escape (x-u-escaped)
4     of Java
5    
6     =head1 ENCODINGS
7    
8     =over 4
9    
10     =cut
11    
12     package Encode::Unicode::Escape;
13     use strict;
14     use vars qw($VERSION);
15     $VERSION=do{my @r=(q$Revision: 1.2 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
16    
17     use base qw(Encode::Encoding);
18     __PACKAGE__->Define (qw/unicode-escape x-u-escaped/);
19    
20     =item unicode-escape
21    
22     Unicode escape encoding used with Java, Mozilla, etc. (Alias: x-u-escaped)
23    
24     =cut
25    
26     sub encode ($$;$) {
27     use integer;
28     my ($obj, $str, $chk) = @_;
29     $_[1] = '' if $chk;
30     $str =~ s{([^\x00-\x7F])|\\\\|\\(u+[0-9A-Fa-f]{4})}{
31     my ($C,$u) = ($1,$2);
32     if ($C) { ## U+0080-U+FFFF. See L</BUGS>.
33     my $c = ord $C;
34     if ($c < 0x10000) {
35     sprintf '\u%04X', $c;
36     } else {
37     sprintf '\U%08X', $c;
38     }
39     } elsif ($u) { ## \u+....
40     '\u'.$u;
41     } else {
42     '\\\\';
43     }
44     }ge;
45     Encode::_utf8_off ($str);
46     $str;
47     }
48     # chr ((ord ($u) - 0x10000) / 0x400 + 0xD800)
49     # chr ((ord ($u) - 0x10000) % 0x400 + 0xDC00)
50    
51     sub decode ($$;$) {
52     no warnings;
53     my ($obj, $str, $chk) = @_;
54     $_[1] = '' if $chk;
55     $str =~ s{\\\\|\\(u+)([0-9A-Fa-f]{4})}{
56     my ($u,$h) = ($1, $2);
57     if ($u eq 'u') {
58     chr (hex $h);
59     } elsif ($u) {
60     '\\'.substr ($u, 1).$h;
61     } else {
62     '\\\\';
63     }
64     }ge;
65     #Encode::_utf8_on ($str);
66     return $str;
67     }
68     # chr (0x10000+(ord($u1)-0xD800)*0x400+(ord($u2)-0xDC00))
69    
70     =back
71    
72     =head1 BUGS
73    
74     I don't know how U+10000 or higher characters should be treated,
75     so this implemention may be incorect...
76    
77     =head1 SEE ALSO
78    
79     "Java Language Specification" -- 3.3 Unicode Escapes.
80     <http://java.sun.com/docs/books/jls/second_edition/html/lexical.doc.html#100850>,
81     <http://java.sun.com/docs/books/jls/first_edition/html/3.doc.html#100850>.
82     Japanese translation is available as TR X 0005:2002. See
83     <http://www.y-adagio.com/public/standards/tr_javalang2/lexical.doc.html>,
84     <http://www.y-adagio.com/public/standards/tr_javalang/3.doc.htm>.
85    
86     =head1 LICENSE
87    
88     Copyright 2002 Nanashi-san <nanashi-san@nanashi.invalid>
89    
90     This program is free software; you can redistribute it and/or modify
91     it under the terms of the GNU General Public License as published by
92     the Free Software Foundation; either version 2 of the License, or
93     (at your option) any later version.
94    
95     This program is distributed in the hope that it will be useful,
96     but WITHOUT ANY WARRANTY; without even the implied warranty of
97     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
98     GNU General Public License for more details.
99    
100     You should have received a copy of the GNU General Public License
101     along with this program; see the file COPYING. If not, write to
102     the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
103     Boston, MA 02111-1307, USA.
104    
105     =cut
106    
107     1; # $Date: 2002/09/15 04:15:51 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24