/[suikacvs]/perl/lib/Encode/MNEM/VIQR.pm
Suika

Contents of /perl/lib/Encode/MNEM/VIQR.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (hide annotations) (download)
Thu Dec 12 08:17:16 2002 UTC (21 years, 11 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
]

1 wakaba 1.1
2     =head1 NAME
3    
4     Encode::MNEM::VIQR --- VIQR (Vietnamese Quoted Readable) encoding
5    
6     =head1 ENCODINGS
7    
8     =over 4
9    
10     =cut
11    
12     package Encode::MNEM::VIQR;
13     use strict;
14     our $VERSION = do{my @r=(q$Revision: 1.10 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
15     use base qw(Encode::Encoding);
16     __PACKAGE__->Define (qw!viqr csviqr!);
17    
18     our %_VIQR_TO_UCS = (
19     q[A(?] => "\x{1EB2}", ## LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
20     q[A(~] => "\x{1EB4}", ## LATIN CAPITAL LETTER A WITH BREVE AND TILDE
21     q[A^~] => "\x{1EAA}", ## LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
22     q[Y?] => "\x{1EF6}", ## LATIN CAPITAL LETTER Y WITH HOOK ABOVE
23     q[Y~] => "\x{1EF8}", ## LATIN CAPITAL LETTER Y WITH TILDE
24     q[Y.] => "\x{1EF4}", ## LATIN CAPITAL LETTER Y WITH DOT BELOW
25     q[A.] => "\x{1EA0}", ## LATIN CAPITAL LETTER A WITH DOT BELOW
26     q[A('] => "\x{1EAE}", ## LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
27     q[A(`] => "\x{1EB0}", ## LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
28     q[A(.] => "\x{1EB6}", ## LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
29     q[A^'] => "\x{1EA4}", ## LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
30     q[A^`] => "\x{1EA6}", ## LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
31     q[A^?] => "\x{1EA8}", ## LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
32     q[A^.] => "\x{1EAC}", ## LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
33     q[E~] => "\x{1EBC}", ## LATIN CAPITAL LETTER E WITH TILDE
34     q[E.] => "\x{1EB8}", ## LATIN CAPITAL LETTER E WITH DOT BELOW
35     q[E^'] => "\x{1EBE}", ## LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
36     q[E^`] => "\x{1EC0}", ## LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
37     q[E^?] => "\x{1EC2}", ## LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
38     q[E^~] => "\x{1EC4}", ## LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
39     q[E^.] => "\x{1EC6}", ## LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
40     q[O^'] => "\x{1ED0}", ## LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
41     q[O^`] => "\x{1ED2}", ## LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
42     q[O^?] => "\x{1ED4}", ## LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
43     q[O^~] => "\x{1ED6}", ## LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
44     q[O^.] => "\x{1ED8}", ## LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
45     q[O+.] => "\x{1EE2}", ## LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
46     q[O+'] => "\x{1EDA}", ## LATIN CAPITAL LETTER O WITH HORN AND ACUTE
47     q[O+`] => "\x{1EDC}", ## LATIN CAPITAL LETTER O WITH HORN AND GRAVE
48     q[O+?] => "\x{1EDE}", ## LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
49     q[I.] => "\x{1ECA}", ## LATIN CAPITAL LETTER I WITH DOT BELOW
50     q[O?] => "\x{1ECE}", ## LATIN CAPITAL LETTER O WITH HOOK ABOVE
51     q[O.] => "\x{1ECC}", ## LATIN CAPITAL LETTER O WITH DOT BELOW
52     q[I?] => "\x{1EC8}", ## LATIN CAPITAL LETTER I WITH HOOK ABOVE
53     q[U?] => "\x{1EE6}", ## LATIN CAPITAL LETTER U WITH HOOK ABOVE
54     q[U~] => "\x{0168}", ## LATIN CAPITAL LETTER U WITH TILDE
55     q[U.] => "\x{1EE4}", ## LATIN CAPITAL LETTER U WITH DOT BELOW
56     q[Y`] => "\x{1EF2}", ## LATIN CAPITAL LETTER Y WITH GRAVE
57     q[O~] => "\x{00D5}", ## LATIN CAPITAL LETTER O WITH TILDE
58     q[a('] => "\x{1EAF}", ## LATIN SMALL LETTER A WITH BREVE AND ACUTE
59     q[a(`] => "\x{1EB1}", ## LATIN SMALL LETTER A WITH BREVE AND GRAVE
60     q[a(.] => "\x{1EB7}", ## LATIN SMALL LETTER A WITH BREVE AND DOT BELOW
61     q[a^'] => "\x{1EA5}", ## LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE
62     q[a^`] => "\x{1EA7}", ## LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE
63     q[a^?] => "\x{1EA9}", ## LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
64     q[a^.] => "\x{1EAD}", ## LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW
65     q[e~] => "\x{1EBD}", ## LATIN SMALL LETTER E WITH TILDE
66     q[e.] => "\x{1EB9}", ## LATIN SMALL LETTER E WITH DOT BELOW
67     q[e^'] => "\x{1EBF}", ## LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE
68     q[e^`] => "\x{1EC1}", ## LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE
69     q[e^?] => "\x{1EC3}", ## LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
70     q[e^~] => "\x{1EC5}", ## LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE
71     q[e^.] => "\x{1EC7}", ## LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW
72     q[o^'] => "\x{1ED1}", ## LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE
73     q[o^`] => "\x{1ED3}", ## LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE
74     q[o^?] => "\x{1ED5}", ## LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
75     q[o^~] => "\x{1ED7}", ## LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE
76     q[O+~] => "\x{1EE0}", ## LATIN CAPITAL LETTER O WITH HORN AND TILDE
77     q[O+] => "\x{01A0}", ## LATIN CAPITAL LETTER O WITH HORN
78     q[o^.] => "\x{1ED9}", ## LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW
79     q[o+`] => "\x{1EDD}", ## LATIN SMALL LETTER O WITH HORN AND GRAVE
80     q[o+?] => "\x{1EDF}", ## LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE
81     q[i.] => "\x{1ECB}", ## LATIN SMALL LETTER I WITH DOT BELOW
82     q[U+.] => "\x{1EF0}", ## LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
83     q[U+'] => "\x{1EE8}", ## LATIN CAPITAL LETTER U WITH HORN AND ACUTE
84     q[U+`] => "\x{1EEA}", ## LATIN CAPITAL LETTER U WITH HORN AND GRAVE
85     q[U+?] => "\x{1EEC}", ## LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
86     q[o+] => "\x{01A1}", ## LATIN SMALL LETTER O WITH HORN
87     q[o+'] => "\x{1EDB}", ## LATIN SMALL LETTER O WITH HORN AND ACUTE
88     q[U+] => "\x{01AF}", ## LATIN CAPITAL LETTER U WITH HORN
89     q[A`] => "\x{00C0}", ## LATIN CAPITAL LETTER A WITH GRAVE
90     q[A'] => "\x{00C1}", ## LATIN CAPITAL LETTER A WITH ACUTE
91     q[A^] => "\x{00C2}", ## LATIN CAPITAL LETTER A WITH CIRCUMFLEX
92     q[A~] => "\x{00C3}", ## LATIN CAPITAL LETTER A WITH TILDE
93     q[A?] => "\x{1EA2}", ## LATIN CAPITAL LETTER A WITH HOOK ABOVE
94     q[A(] => "\x{0102}", ## LATIN CAPITAL LETTER A WITH BREVE
95     q[a(?] => "\x{1EB3}", ## LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE
96     q[a(~] => "\x{1EB5}", ## LATIN SMALL LETTER A WITH BREVE AND TILDE
97     q[E`] => "\x{00C8}", ## LATIN CAPITAL LETTER E WITH GRAVE
98     q[E'] => "\x{00C9}", ## LATIN CAPITAL LETTER E WITH ACUTE
99     q[E^] => "\x{00CA}", ## LATIN CAPITAL LETTER E WITH CIRCUMFLEX
100     q[E?] => "\x{1EBA}", ## LATIN CAPITAL LETTER E WITH HOOK ABOVE
101     q[I`] => "\x{00CC}", ## LATIN CAPITAL LETTER I WITH GRAVE
102     q[I'] => "\x{00CD}", ## LATIN CAPITAL LETTER I WITH ACUTE
103     q[I~] => "\x{0128}", ## LATIN CAPITAL LETTER I WITH TILDE
104     q[y`] => "\x{1EF3}", ## LATIN SMALL LETTER Y WITH GRAVE
105     q[DD] => "\x{0110}", ## LATIN CAPITAL LETTER D WITH STROKE
106     q[u+'] => "\x{1EE9}", ## LATIN SMALL LETTER U WITH HORN AND ACUTE
107     q[O`] => "\x{00D2}", ## LATIN CAPITAL LETTER O WITH GRAVE
108     q[O'] => "\x{00D3}", ## LATIN CAPITAL LETTER O WITH ACUTE
109     q[O^] => "\x{00D4}", ## LATIN CAPITAL LETTER O WITH CIRCUMFLEX
110     q[a.] => "\x{1EA1}", ## LATIN SMALL LETTER A WITH DOT BELOW
111     q[y?] => "\x{1EF7}", ## LATIN SMALL LETTER Y WITH HOOK ABOVE
112     q[u+`] => "\x{1EEB}", ## LATIN SMALL LETTER U WITH HORN AND GRAVE
113     q[u+?] => "\x{1EED}", ## LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE
114     q[U`] => "\x{00D9}", ## LATIN CAPITAL LETTER U WITH GRAVE
115     q[U'] => "\x{00DA}", ## LATIN CAPITAL LETTER U WITH ACUTE
116     q[y~] => "\x{1EF9}", ## LATIN SMALL LETTER Y WITH TILDE
117     q[y.] => "\x{1EF5}", ## LATIN SMALL LETTER Y WITH DOT BELOW
118     q[Y'] => "\x{00DD}", ## LATIN CAPITAL LETTER Y WITH ACUTE
119     q[o+~] => "\x{1EE1}", ## LATIN SMALL LETTER O WITH HORN AND TILDE
120     q[u+] => "\x{01B0}", ## LATIN SMALL LETTER U WITH HORN
121     q[a`] => "\x{00E0}", ## LATIN SMALL LETTER A WITH GRAVE
122     q[a'] => "\x{00E1}", ## LATIN SMALL LETTER A WITH ACUTE
123     q[a^] => "\x{00E2}", ## LATIN SMALL LETTER A WITH CIRCUMFLEX
124     q[a~] => "\x{00E3}", ## LATIN SMALL LETTER A WITH TILDE
125     q[a?] => "\x{1EA3}", ## LATIN SMALL LETTER A WITH HOOK ABOVE
126     q[a(] => "\x{0103}", ## LATIN SMALL LETTER A WITH BREVE
127     q[u+~] => "\x{1EEF}", ## LATIN SMALL LETTER U WITH HORN AND TILDE
128     q[a^~] => "\x{1EAB}", ## LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE
129     q[e`] => "\x{00E8}", ## LATIN SMALL LETTER E WITH GRAVE
130     q[e'] => "\x{00E9}", ## LATIN SMALL LETTER E WITH ACUTE
131     q[e^] => "\x{00EA}", ## LATIN SMALL LETTER E WITH CIRCUMFLEX
132     q[e?] => "\x{1EBB}", ## LATIN SMALL LETTER E WITH HOOK ABOVE
133     q[i`] => "\x{00EC}", ## LATIN SMALL LETTER I WITH GRAVE
134     q[i'] => "\x{00ED}", ## LATIN SMALL LETTER I WITH ACUTE
135     q[i~] => "\x{0129}", ## LATIN SMALL LETTER I WITH TILDE
136     q[i?] => "\x{1EC9}", ## LATIN SMALL LETTER I WITH HOOK ABOVE
137     q[dd] => "\x{0111}", ## LATIN SMALL LETTER D WITH STROKE
138     q[u+.] => "\x{1EF1}", ## LATIN SMALL LETTER U WITH HORN AND DOT BELOW
139     q[o`] => "\x{00F2}", ## LATIN SMALL LETTER O WITH GRAVE
140     q[o'] => "\x{00F3}", ## LATIN SMALL LETTER O WITH ACUTE
141     q[o^] => "\x{00F4}", ## LATIN SMALL LETTER O WITH CIRCUMFLEX
142     q[o~] => "\x{00F5}", ## LATIN SMALL LETTER O WITH TILDE
143     q[o?] => "\x{1ECF}", ## LATIN SMALL LETTER O WITH HOOK ABOVE
144     q[o.] => "\x{1ECD}", ## LATIN SMALL LETTER O WITH DOT BELOW
145     q[u.] => "\x{1EE5}", ## LATIN SMALL LETTER U WITH DOT BELOW
146     q[u`] => "\x{00F9}", ## LATIN SMALL LETTER U WITH GRAVE
147     q[u'] => "\x{00FA}", ## LATIN SMALL LETTER U WITH ACUTE
148     q[u~] => "\x{0169}", ## LATIN SMALL LETTER U WITH TILDE
149     q[u?] => "\x{1EE7}", ## LATIN SMALL LETTER U WITH HOOK ABOVE
150     q[y'] => "\x{00FD}", ## LATIN SMALL LETTER Y WITH ACUTE
151     q[o+.] => "\x{1EE3}", ## LATIN SMALL LETTER O WITH HORN AND DOT BELOW
152     q[U+~] => "\x{1EEE}", ## LATIN CAPITAL LETTER U WITH HORN AND TILDE
153    
154     ## Some people use [OoUu]\* instead of [OoUu]\+.
155     ## VNTeX uses [Oo][Oo]|[Uu][Uu] instead of [OoUu]\+.
156     );
157     our %_UCS_TO_VIQR = reverse %_VIQR_TO_UCS;
158    
159     =item viqr
160    
161     Vietnamese quoted readable -- the mnemonic encoding for
162     Vietnamese. (Alias: csviqr (IANA))
163    
164     =cut
165    
166     sub encode ($$;$) {
167     my ($obj, $str, $chk) = @_;
168     $_[1] = '' if $chk;
169     my $char = $obj->__reg_mnem;
170     $str =~ s{(\\(?:$char|.)|$char|\x01)}{
171     my $c = $1;
172     if (length ($c) > 1) {
173     "\\" . substr ($c, 0, 1) . "\\" . substr ($c, 1);
174     } else {
175     "\\" . $c;
176     }
177     }ge;
178     $str =~ s{([\x{00C0}-\x{01FF}\x{1EA0}-\x{1EFF}])}{
179     $_UCS_TO_VIQR{$1} || $1;
180     }ge;
181     if ($chk == 0x0100) {
182     $str =~ s/([^\x00-\x7F])/sprintf '\x{%04X}', ord $1/ge;
183     } elsif ($chk == 0x0200) {
184     $str =~ s/([^\x00-\x7F])/sprintf '&#%d;', ord $1/ge;
185     } elsif ($chk == 0x0200) {
186     $str =~ s/([^\x00-\x7F])/sprintf '&#x%04X;', ord $1/ge;
187     } else {
188     $str =~ s/[^\x00-\x7F]/\?/g;
189     }
190     Encode::_utf8_off ($str);
191     return "\\V".$str;
192     }
193    
194     sub decode ($$;$) {
195     my ($obj, $str, $chk) = @_;
196     $_[1] = '' if $chk;
197     my $char = $obj->__reg_mnem;
198     $str = "\\V".$str;
199     $str =~ s{
200     \\([LlMmVv])((?:(?!\\[LlMmVv]).)+)
201     }{
202     my ($mode, $s) = (uc ($1), $2);
203     if ($mode eq 'V') { ## Vietnamese
204     $s =~ s{\\($char|.)|($char)|\x01}{
205     my ($e, $c) = ($1, $2);
206     if ($c || length ($e) > 1) {
207     $_VIQR_TO_UCS{$c || $e} || $c || $e;
208     } else {
209     $e;
210     }
211     }ges;
212     $s;
213     } elsif ($mode eq 'M') { ## English
214     $s =~ s{\\($char|.)}{
215     my ($e) = ($1);
216     if (length ($e) > 1) {
217     $_VIQR_TO_UCS{$e} || $e;
218     } else {
219     $e;
220     }
221     }ges;
222     $s;
223     } else { ## Literala
224     $s;
225     }
226     }gesx;
227     return $str;
228     }
229    
230     sub __reg_mnem ($) { q/[AEIOUYaeiouy][(^+,`?~.]+|[Dd][Dd]/ }
231    
232     1;
233     __END__
234    
235     =back
236    
237     Note that it is known that there is variants of VIQR.
238     One use "O*" / "U*" instead of "O+" / "U+". Other
239     use "OO" / "UU". Since I don't know how these variants
240     are used and inclution of these notation is incompatible,
241     this version of this module does not implement these.
242    
243     =head1 SEE ALSO
244    
245     <http://www.vietstd.org/report/rep92.htm>,
246     RFC 1456 <urn:ietf:rfc:1456>
247    
248     =head1 LICENSE
249    
250     Copyright 2002 Nanashi-san <nanashi@san.invalid>
251    
252     This library is free software; you can redistribute it
253     and/or modify it under the same terms as Perl itself.
254    
255     =cut
256    
257     # $Date: 2002/10/16 10:39:35 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24