/[suikacvs]/perl/lib/Encode/MNEM/VIQR.pm
Suika

Contents of /perl/lib/Encode/MNEM/VIQR.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (show annotations) (download)
Thu Dec 12 08:17:16 2002 UTC (21 years, 11 months ago) by wakaba
Branch: MAIN
CVS Tags: HEAD
]

1
2 =head1 NAME
3
4 Encode::MNEM::VIQR --- VIQR (Vietnamese Quoted Readable) encoding
5
6 =head1 ENCODINGS
7
8 =over 4
9
10 =cut
11
12 package Encode::MNEM::VIQR;
13 use strict;
14 our $VERSION = do{my @r=(q$Revision: 1.10 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
15 use base qw(Encode::Encoding);
16 __PACKAGE__->Define (qw!viqr csviqr!);
17
18 our %_VIQR_TO_UCS = (
19 q[A(?] => "\x{1EB2}", ## LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
20 q[A(~] => "\x{1EB4}", ## LATIN CAPITAL LETTER A WITH BREVE AND TILDE
21 q[A^~] => "\x{1EAA}", ## LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
22 q[Y?] => "\x{1EF6}", ## LATIN CAPITAL LETTER Y WITH HOOK ABOVE
23 q[Y~] => "\x{1EF8}", ## LATIN CAPITAL LETTER Y WITH TILDE
24 q[Y.] => "\x{1EF4}", ## LATIN CAPITAL LETTER Y WITH DOT BELOW
25 q[A.] => "\x{1EA0}", ## LATIN CAPITAL LETTER A WITH DOT BELOW
26 q[A('] => "\x{1EAE}", ## LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
27 q[A(`] => "\x{1EB0}", ## LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
28 q[A(.] => "\x{1EB6}", ## LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
29 q[A^'] => "\x{1EA4}", ## LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
30 q[A^`] => "\x{1EA6}", ## LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
31 q[A^?] => "\x{1EA8}", ## LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
32 q[A^.] => "\x{1EAC}", ## LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
33 q[E~] => "\x{1EBC}", ## LATIN CAPITAL LETTER E WITH TILDE
34 q[E.] => "\x{1EB8}", ## LATIN CAPITAL LETTER E WITH DOT BELOW
35 q[E^'] => "\x{1EBE}", ## LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
36 q[E^`] => "\x{1EC0}", ## LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
37 q[E^?] => "\x{1EC2}", ## LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
38 q[E^~] => "\x{1EC4}", ## LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
39 q[E^.] => "\x{1EC6}", ## LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
40 q[O^'] => "\x{1ED0}", ## LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
41 q[O^`] => "\x{1ED2}", ## LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
42 q[O^?] => "\x{1ED4}", ## LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
43 q[O^~] => "\x{1ED6}", ## LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
44 q[O^.] => "\x{1ED8}", ## LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
45 q[O+.] => "\x{1EE2}", ## LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
46 q[O+'] => "\x{1EDA}", ## LATIN CAPITAL LETTER O WITH HORN AND ACUTE
47 q[O+`] => "\x{1EDC}", ## LATIN CAPITAL LETTER O WITH HORN AND GRAVE
48 q[O+?] => "\x{1EDE}", ## LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
49 q[I.] => "\x{1ECA}", ## LATIN CAPITAL LETTER I WITH DOT BELOW
50 q[O?] => "\x{1ECE}", ## LATIN CAPITAL LETTER O WITH HOOK ABOVE
51 q[O.] => "\x{1ECC}", ## LATIN CAPITAL LETTER O WITH DOT BELOW
52 q[I?] => "\x{1EC8}", ## LATIN CAPITAL LETTER I WITH HOOK ABOVE
53 q[U?] => "\x{1EE6}", ## LATIN CAPITAL LETTER U WITH HOOK ABOVE
54 q[U~] => "\x{0168}", ## LATIN CAPITAL LETTER U WITH TILDE
55 q[U.] => "\x{1EE4}", ## LATIN CAPITAL LETTER U WITH DOT BELOW
56 q[Y`] => "\x{1EF2}", ## LATIN CAPITAL LETTER Y WITH GRAVE
57 q[O~] => "\x{00D5}", ## LATIN CAPITAL LETTER O WITH TILDE
58 q[a('] => "\x{1EAF}", ## LATIN SMALL LETTER A WITH BREVE AND ACUTE
59 q[a(`] => "\x{1EB1}", ## LATIN SMALL LETTER A WITH BREVE AND GRAVE
60 q[a(.] => "\x{1EB7}", ## LATIN SMALL LETTER A WITH BREVE AND DOT BELOW
61 q[a^'] => "\x{1EA5}", ## LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE
62 q[a^`] => "\x{1EA7}", ## LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE
63 q[a^?] => "\x{1EA9}", ## LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
64 q[a^.] => "\x{1EAD}", ## LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW
65 q[e~] => "\x{1EBD}", ## LATIN SMALL LETTER E WITH TILDE
66 q[e.] => "\x{1EB9}", ## LATIN SMALL LETTER E WITH DOT BELOW
67 q[e^'] => "\x{1EBF}", ## LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE
68 q[e^`] => "\x{1EC1}", ## LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE
69 q[e^?] => "\x{1EC3}", ## LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
70 q[e^~] => "\x{1EC5}", ## LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE
71 q[e^.] => "\x{1EC7}", ## LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW
72 q[o^'] => "\x{1ED1}", ## LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE
73 q[o^`] => "\x{1ED3}", ## LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE
74 q[o^?] => "\x{1ED5}", ## LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
75 q[o^~] => "\x{1ED7}", ## LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE
76 q[O+~] => "\x{1EE0}", ## LATIN CAPITAL LETTER O WITH HORN AND TILDE
77 q[O+] => "\x{01A0}", ## LATIN CAPITAL LETTER O WITH HORN
78 q[o^.] => "\x{1ED9}", ## LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW
79 q[o+`] => "\x{1EDD}", ## LATIN SMALL LETTER O WITH HORN AND GRAVE
80 q[o+?] => "\x{1EDF}", ## LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE
81 q[i.] => "\x{1ECB}", ## LATIN SMALL LETTER I WITH DOT BELOW
82 q[U+.] => "\x{1EF0}", ## LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
83 q[U+'] => "\x{1EE8}", ## LATIN CAPITAL LETTER U WITH HORN AND ACUTE
84 q[U+`] => "\x{1EEA}", ## LATIN CAPITAL LETTER U WITH HORN AND GRAVE
85 q[U+?] => "\x{1EEC}", ## LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
86 q[o+] => "\x{01A1}", ## LATIN SMALL LETTER O WITH HORN
87 q[o+'] => "\x{1EDB}", ## LATIN SMALL LETTER O WITH HORN AND ACUTE
88 q[U+] => "\x{01AF}", ## LATIN CAPITAL LETTER U WITH HORN
89 q[A`] => "\x{00C0}", ## LATIN CAPITAL LETTER A WITH GRAVE
90 q[A'] => "\x{00C1}", ## LATIN CAPITAL LETTER A WITH ACUTE
91 q[A^] => "\x{00C2}", ## LATIN CAPITAL LETTER A WITH CIRCUMFLEX
92 q[A~] => "\x{00C3}", ## LATIN CAPITAL LETTER A WITH TILDE
93 q[A?] => "\x{1EA2}", ## LATIN CAPITAL LETTER A WITH HOOK ABOVE
94 q[A(] => "\x{0102}", ## LATIN CAPITAL LETTER A WITH BREVE
95 q[a(?] => "\x{1EB3}", ## LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE
96 q[a(~] => "\x{1EB5}", ## LATIN SMALL LETTER A WITH BREVE AND TILDE
97 q[E`] => "\x{00C8}", ## LATIN CAPITAL LETTER E WITH GRAVE
98 q[E'] => "\x{00C9}", ## LATIN CAPITAL LETTER E WITH ACUTE
99 q[E^] => "\x{00CA}", ## LATIN CAPITAL LETTER E WITH CIRCUMFLEX
100 q[E?] => "\x{1EBA}", ## LATIN CAPITAL LETTER E WITH HOOK ABOVE
101 q[I`] => "\x{00CC}", ## LATIN CAPITAL LETTER I WITH GRAVE
102 q[I'] => "\x{00CD}", ## LATIN CAPITAL LETTER I WITH ACUTE
103 q[I~] => "\x{0128}", ## LATIN CAPITAL LETTER I WITH TILDE
104 q[y`] => "\x{1EF3}", ## LATIN SMALL LETTER Y WITH GRAVE
105 q[DD] => "\x{0110}", ## LATIN CAPITAL LETTER D WITH STROKE
106 q[u+'] => "\x{1EE9}", ## LATIN SMALL LETTER U WITH HORN AND ACUTE
107 q[O`] => "\x{00D2}", ## LATIN CAPITAL LETTER O WITH GRAVE
108 q[O'] => "\x{00D3}", ## LATIN CAPITAL LETTER O WITH ACUTE
109 q[O^] => "\x{00D4}", ## LATIN CAPITAL LETTER O WITH CIRCUMFLEX
110 q[a.] => "\x{1EA1}", ## LATIN SMALL LETTER A WITH DOT BELOW
111 q[y?] => "\x{1EF7}", ## LATIN SMALL LETTER Y WITH HOOK ABOVE
112 q[u+`] => "\x{1EEB}", ## LATIN SMALL LETTER U WITH HORN AND GRAVE
113 q[u+?] => "\x{1EED}", ## LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE
114 q[U`] => "\x{00D9}", ## LATIN CAPITAL LETTER U WITH GRAVE
115 q[U'] => "\x{00DA}", ## LATIN CAPITAL LETTER U WITH ACUTE
116 q[y~] => "\x{1EF9}", ## LATIN SMALL LETTER Y WITH TILDE
117 q[y.] => "\x{1EF5}", ## LATIN SMALL LETTER Y WITH DOT BELOW
118 q[Y'] => "\x{00DD}", ## LATIN CAPITAL LETTER Y WITH ACUTE
119 q[o+~] => "\x{1EE1}", ## LATIN SMALL LETTER O WITH HORN AND TILDE
120 q[u+] => "\x{01B0}", ## LATIN SMALL LETTER U WITH HORN
121 q[a`] => "\x{00E0}", ## LATIN SMALL LETTER A WITH GRAVE
122 q[a'] => "\x{00E1}", ## LATIN SMALL LETTER A WITH ACUTE
123 q[a^] => "\x{00E2}", ## LATIN SMALL LETTER A WITH CIRCUMFLEX
124 q[a~] => "\x{00E3}", ## LATIN SMALL LETTER A WITH TILDE
125 q[a?] => "\x{1EA3}", ## LATIN SMALL LETTER A WITH HOOK ABOVE
126 q[a(] => "\x{0103}", ## LATIN SMALL LETTER A WITH BREVE
127 q[u+~] => "\x{1EEF}", ## LATIN SMALL LETTER U WITH HORN AND TILDE
128 q[a^~] => "\x{1EAB}", ## LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE
129 q[e`] => "\x{00E8}", ## LATIN SMALL LETTER E WITH GRAVE
130 q[e'] => "\x{00E9}", ## LATIN SMALL LETTER E WITH ACUTE
131 q[e^] => "\x{00EA}", ## LATIN SMALL LETTER E WITH CIRCUMFLEX
132 q[e?] => "\x{1EBB}", ## LATIN SMALL LETTER E WITH HOOK ABOVE
133 q[i`] => "\x{00EC}", ## LATIN SMALL LETTER I WITH GRAVE
134 q[i'] => "\x{00ED}", ## LATIN SMALL LETTER I WITH ACUTE
135 q[i~] => "\x{0129}", ## LATIN SMALL LETTER I WITH TILDE
136 q[i?] => "\x{1EC9}", ## LATIN SMALL LETTER I WITH HOOK ABOVE
137 q[dd] => "\x{0111}", ## LATIN SMALL LETTER D WITH STROKE
138 q[u+.] => "\x{1EF1}", ## LATIN SMALL LETTER U WITH HORN AND DOT BELOW
139 q[o`] => "\x{00F2}", ## LATIN SMALL LETTER O WITH GRAVE
140 q[o'] => "\x{00F3}", ## LATIN SMALL LETTER O WITH ACUTE
141 q[o^] => "\x{00F4}", ## LATIN SMALL LETTER O WITH CIRCUMFLEX
142 q[o~] => "\x{00F5}", ## LATIN SMALL LETTER O WITH TILDE
143 q[o?] => "\x{1ECF}", ## LATIN SMALL LETTER O WITH HOOK ABOVE
144 q[o.] => "\x{1ECD}", ## LATIN SMALL LETTER O WITH DOT BELOW
145 q[u.] => "\x{1EE5}", ## LATIN SMALL LETTER U WITH DOT BELOW
146 q[u`] => "\x{00F9}", ## LATIN SMALL LETTER U WITH GRAVE
147 q[u'] => "\x{00FA}", ## LATIN SMALL LETTER U WITH ACUTE
148 q[u~] => "\x{0169}", ## LATIN SMALL LETTER U WITH TILDE
149 q[u?] => "\x{1EE7}", ## LATIN SMALL LETTER U WITH HOOK ABOVE
150 q[y'] => "\x{00FD}", ## LATIN SMALL LETTER Y WITH ACUTE
151 q[o+.] => "\x{1EE3}", ## LATIN SMALL LETTER O WITH HORN AND DOT BELOW
152 q[U+~] => "\x{1EEE}", ## LATIN CAPITAL LETTER U WITH HORN AND TILDE
153
154 ## Some people use [OoUu]\* instead of [OoUu]\+.
155 ## VNTeX uses [Oo][Oo]|[Uu][Uu] instead of [OoUu]\+.
156 );
157 our %_UCS_TO_VIQR = reverse %_VIQR_TO_UCS;
158
159 =item viqr
160
161 Vietnamese quoted readable -- the mnemonic encoding for
162 Vietnamese. (Alias: csviqr (IANA))
163
164 =cut
165
166 sub encode ($$;$) {
167 my ($obj, $str, $chk) = @_;
168 $_[1] = '' if $chk;
169 my $char = $obj->__reg_mnem;
170 $str =~ s{(\\(?:$char|.)|$char|\x01)}{
171 my $c = $1;
172 if (length ($c) > 1) {
173 "\\" . substr ($c, 0, 1) . "\\" . substr ($c, 1);
174 } else {
175 "\\" . $c;
176 }
177 }ge;
178 $str =~ s{([\x{00C0}-\x{01FF}\x{1EA0}-\x{1EFF}])}{
179 $_UCS_TO_VIQR{$1} || $1;
180 }ge;
181 if ($chk == 0x0100) {
182 $str =~ s/([^\x00-\x7F])/sprintf '\x{%04X}', ord $1/ge;
183 } elsif ($chk == 0x0200) {
184 $str =~ s/([^\x00-\x7F])/sprintf '&#%d;', ord $1/ge;
185 } elsif ($chk == 0x0200) {
186 $str =~ s/([^\x00-\x7F])/sprintf '&#x%04X;', ord $1/ge;
187 } else {
188 $str =~ s/[^\x00-\x7F]/\?/g;
189 }
190 Encode::_utf8_off ($str);
191 return "\\V".$str;
192 }
193
194 sub decode ($$;$) {
195 my ($obj, $str, $chk) = @_;
196 $_[1] = '' if $chk;
197 my $char = $obj->__reg_mnem;
198 $str = "\\V".$str;
199 $str =~ s{
200 \\([LlMmVv])((?:(?!\\[LlMmVv]).)+)
201 }{
202 my ($mode, $s) = (uc ($1), $2);
203 if ($mode eq 'V') { ## Vietnamese
204 $s =~ s{\\($char|.)|($char)|\x01}{
205 my ($e, $c) = ($1, $2);
206 if ($c || length ($e) > 1) {
207 $_VIQR_TO_UCS{$c || $e} || $c || $e;
208 } else {
209 $e;
210 }
211 }ges;
212 $s;
213 } elsif ($mode eq 'M') { ## English
214 $s =~ s{\\($char|.)}{
215 my ($e) = ($1);
216 if (length ($e) > 1) {
217 $_VIQR_TO_UCS{$e} || $e;
218 } else {
219 $e;
220 }
221 }ges;
222 $s;
223 } else { ## Literala
224 $s;
225 }
226 }gesx;
227 return $str;
228 }
229
230 sub __reg_mnem ($) { q/[AEIOUYaeiouy][(^+,`?~.]+|[Dd][Dd]/ }
231
232 1;
233 __END__
234
235 =back
236
237 Note that it is known that there is variants of VIQR.
238 One use "O*" / "U*" instead of "O+" / "U+". Other
239 use "OO" / "UU". Since I don't know how these variants
240 are used and inclution of these notation is incompatible,
241 this version of this module does not implement these.
242
243 =head1 SEE ALSO
244
245 <http://www.vietstd.org/report/rep92.htm>,
246 RFC 1456 <urn:ietf:rfc:1456>
247
248 =head1 LICENSE
249
250 Copyright 2002 Nanashi-san <nanashi@san.invalid>
251
252 This library is free software; you can redistribute it
253 and/or modify it under the same terms as Perl itself.
254
255 =cut
256
257 # $Date: 2002/10/16 10:39:35 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24