1 |
|
2 |
=head1 NAME |
3 |
|
4 |
Encode::MNEM::VIQR --- VIQR (Vietnamese Quoted Readable) encoding |
5 |
|
6 |
=head1 ENCODINGS |
7 |
|
8 |
=over 4 |
9 |
|
10 |
=cut |
11 |
|
12 |
package Encode::MNEM::VIQR; |
13 |
use strict; |
14 |
our $VERSION = do{my @r=(q$Revision: 1.10 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r}; |
15 |
use base qw(Encode::Encoding); |
16 |
__PACKAGE__->Define (qw!viqr csviqr!); |
17 |
|
18 |
our %_VIQR_TO_UCS = ( |
19 |
q[A(?] => "\x{1EB2}", ## LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE |
20 |
q[A(~] => "\x{1EB4}", ## LATIN CAPITAL LETTER A WITH BREVE AND TILDE |
21 |
q[A^~] => "\x{1EAA}", ## LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE |
22 |
q[Y?] => "\x{1EF6}", ## LATIN CAPITAL LETTER Y WITH HOOK ABOVE |
23 |
q[Y~] => "\x{1EF8}", ## LATIN CAPITAL LETTER Y WITH TILDE |
24 |
q[Y.] => "\x{1EF4}", ## LATIN CAPITAL LETTER Y WITH DOT BELOW |
25 |
q[A.] => "\x{1EA0}", ## LATIN CAPITAL LETTER A WITH DOT BELOW |
26 |
q[A('] => "\x{1EAE}", ## LATIN CAPITAL LETTER A WITH BREVE AND ACUTE |
27 |
q[A(`] => "\x{1EB0}", ## LATIN CAPITAL LETTER A WITH BREVE AND GRAVE |
28 |
q[A(.] => "\x{1EB6}", ## LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW |
29 |
q[A^'] => "\x{1EA4}", ## LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE |
30 |
q[A^`] => "\x{1EA6}", ## LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE |
31 |
q[A^?] => "\x{1EA8}", ## LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE |
32 |
q[A^.] => "\x{1EAC}", ## LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW |
33 |
q[E~] => "\x{1EBC}", ## LATIN CAPITAL LETTER E WITH TILDE |
34 |
q[E.] => "\x{1EB8}", ## LATIN CAPITAL LETTER E WITH DOT BELOW |
35 |
q[E^'] => "\x{1EBE}", ## LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE |
36 |
q[E^`] => "\x{1EC0}", ## LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE |
37 |
q[E^?] => "\x{1EC2}", ## LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE |
38 |
q[E^~] => "\x{1EC4}", ## LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE |
39 |
q[E^.] => "\x{1EC6}", ## LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW |
40 |
q[O^'] => "\x{1ED0}", ## LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE |
41 |
q[O^`] => "\x{1ED2}", ## LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE |
42 |
q[O^?] => "\x{1ED4}", ## LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE |
43 |
q[O^~] => "\x{1ED6}", ## LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE |
44 |
q[O^.] => "\x{1ED8}", ## LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW |
45 |
q[O+.] => "\x{1EE2}", ## LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW |
46 |
q[O+'] => "\x{1EDA}", ## LATIN CAPITAL LETTER O WITH HORN AND ACUTE |
47 |
q[O+`] => "\x{1EDC}", ## LATIN CAPITAL LETTER O WITH HORN AND GRAVE |
48 |
q[O+?] => "\x{1EDE}", ## LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE |
49 |
q[I.] => "\x{1ECA}", ## LATIN CAPITAL LETTER I WITH DOT BELOW |
50 |
q[O?] => "\x{1ECE}", ## LATIN CAPITAL LETTER O WITH HOOK ABOVE |
51 |
q[O.] => "\x{1ECC}", ## LATIN CAPITAL LETTER O WITH DOT BELOW |
52 |
q[I?] => "\x{1EC8}", ## LATIN CAPITAL LETTER I WITH HOOK ABOVE |
53 |
q[U?] => "\x{1EE6}", ## LATIN CAPITAL LETTER U WITH HOOK ABOVE |
54 |
q[U~] => "\x{0168}", ## LATIN CAPITAL LETTER U WITH TILDE |
55 |
q[U.] => "\x{1EE4}", ## LATIN CAPITAL LETTER U WITH DOT BELOW |
56 |
q[Y`] => "\x{1EF2}", ## LATIN CAPITAL LETTER Y WITH GRAVE |
57 |
q[O~] => "\x{00D5}", ## LATIN CAPITAL LETTER O WITH TILDE |
58 |
q[a('] => "\x{1EAF}", ## LATIN SMALL LETTER A WITH BREVE AND ACUTE |
59 |
q[a(`] => "\x{1EB1}", ## LATIN SMALL LETTER A WITH BREVE AND GRAVE |
60 |
q[a(.] => "\x{1EB7}", ## LATIN SMALL LETTER A WITH BREVE AND DOT BELOW |
61 |
q[a^'] => "\x{1EA5}", ## LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE |
62 |
q[a^`] => "\x{1EA7}", ## LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE |
63 |
q[a^?] => "\x{1EA9}", ## LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE |
64 |
q[a^.] => "\x{1EAD}", ## LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW |
65 |
q[e~] => "\x{1EBD}", ## LATIN SMALL LETTER E WITH TILDE |
66 |
q[e.] => "\x{1EB9}", ## LATIN SMALL LETTER E WITH DOT BELOW |
67 |
q[e^'] => "\x{1EBF}", ## LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE |
68 |
q[e^`] => "\x{1EC1}", ## LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE |
69 |
q[e^?] => "\x{1EC3}", ## LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE |
70 |
q[e^~] => "\x{1EC5}", ## LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE |
71 |
q[e^.] => "\x{1EC7}", ## LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW |
72 |
q[o^'] => "\x{1ED1}", ## LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE |
73 |
q[o^`] => "\x{1ED3}", ## LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE |
74 |
q[o^?] => "\x{1ED5}", ## LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE |
75 |
q[o^~] => "\x{1ED7}", ## LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE |
76 |
q[O+~] => "\x{1EE0}", ## LATIN CAPITAL LETTER O WITH HORN AND TILDE |
77 |
q[O+] => "\x{01A0}", ## LATIN CAPITAL LETTER O WITH HORN |
78 |
q[o^.] => "\x{1ED9}", ## LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW |
79 |
q[o+`] => "\x{1EDD}", ## LATIN SMALL LETTER O WITH HORN AND GRAVE |
80 |
q[o+?] => "\x{1EDF}", ## LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE |
81 |
q[i.] => "\x{1ECB}", ## LATIN SMALL LETTER I WITH DOT BELOW |
82 |
q[U+.] => "\x{1EF0}", ## LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW |
83 |
q[U+'] => "\x{1EE8}", ## LATIN CAPITAL LETTER U WITH HORN AND ACUTE |
84 |
q[U+`] => "\x{1EEA}", ## LATIN CAPITAL LETTER U WITH HORN AND GRAVE |
85 |
q[U+?] => "\x{1EEC}", ## LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE |
86 |
q[o+] => "\x{01A1}", ## LATIN SMALL LETTER O WITH HORN |
87 |
q[o+'] => "\x{1EDB}", ## LATIN SMALL LETTER O WITH HORN AND ACUTE |
88 |
q[U+] => "\x{01AF}", ## LATIN CAPITAL LETTER U WITH HORN |
89 |
q[A`] => "\x{00C0}", ## LATIN CAPITAL LETTER A WITH GRAVE |
90 |
q[A'] => "\x{00C1}", ## LATIN CAPITAL LETTER A WITH ACUTE |
91 |
q[A^] => "\x{00C2}", ## LATIN CAPITAL LETTER A WITH CIRCUMFLEX |
92 |
q[A~] => "\x{00C3}", ## LATIN CAPITAL LETTER A WITH TILDE |
93 |
q[A?] => "\x{1EA2}", ## LATIN CAPITAL LETTER A WITH HOOK ABOVE |
94 |
q[A(] => "\x{0102}", ## LATIN CAPITAL LETTER A WITH BREVE |
95 |
q[a(?] => "\x{1EB3}", ## LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE |
96 |
q[a(~] => "\x{1EB5}", ## LATIN SMALL LETTER A WITH BREVE AND TILDE |
97 |
q[E`] => "\x{00C8}", ## LATIN CAPITAL LETTER E WITH GRAVE |
98 |
q[E'] => "\x{00C9}", ## LATIN CAPITAL LETTER E WITH ACUTE |
99 |
q[E^] => "\x{00CA}", ## LATIN CAPITAL LETTER E WITH CIRCUMFLEX |
100 |
q[E?] => "\x{1EBA}", ## LATIN CAPITAL LETTER E WITH HOOK ABOVE |
101 |
q[I`] => "\x{00CC}", ## LATIN CAPITAL LETTER I WITH GRAVE |
102 |
q[I'] => "\x{00CD}", ## LATIN CAPITAL LETTER I WITH ACUTE |
103 |
q[I~] => "\x{0128}", ## LATIN CAPITAL LETTER I WITH TILDE |
104 |
q[y`] => "\x{1EF3}", ## LATIN SMALL LETTER Y WITH GRAVE |
105 |
q[DD] => "\x{0110}", ## LATIN CAPITAL LETTER D WITH STROKE |
106 |
q[u+'] => "\x{1EE9}", ## LATIN SMALL LETTER U WITH HORN AND ACUTE |
107 |
q[O`] => "\x{00D2}", ## LATIN CAPITAL LETTER O WITH GRAVE |
108 |
q[O'] => "\x{00D3}", ## LATIN CAPITAL LETTER O WITH ACUTE |
109 |
q[O^] => "\x{00D4}", ## LATIN CAPITAL LETTER O WITH CIRCUMFLEX |
110 |
q[a.] => "\x{1EA1}", ## LATIN SMALL LETTER A WITH DOT BELOW |
111 |
q[y?] => "\x{1EF7}", ## LATIN SMALL LETTER Y WITH HOOK ABOVE |
112 |
q[u+`] => "\x{1EEB}", ## LATIN SMALL LETTER U WITH HORN AND GRAVE |
113 |
q[u+?] => "\x{1EED}", ## LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE |
114 |
q[U`] => "\x{00D9}", ## LATIN CAPITAL LETTER U WITH GRAVE |
115 |
q[U'] => "\x{00DA}", ## LATIN CAPITAL LETTER U WITH ACUTE |
116 |
q[y~] => "\x{1EF9}", ## LATIN SMALL LETTER Y WITH TILDE |
117 |
q[y.] => "\x{1EF5}", ## LATIN SMALL LETTER Y WITH DOT BELOW |
118 |
q[Y'] => "\x{00DD}", ## LATIN CAPITAL LETTER Y WITH ACUTE |
119 |
q[o+~] => "\x{1EE1}", ## LATIN SMALL LETTER O WITH HORN AND TILDE |
120 |
q[u+] => "\x{01B0}", ## LATIN SMALL LETTER U WITH HORN |
121 |
q[a`] => "\x{00E0}", ## LATIN SMALL LETTER A WITH GRAVE |
122 |
q[a'] => "\x{00E1}", ## LATIN SMALL LETTER A WITH ACUTE |
123 |
q[a^] => "\x{00E2}", ## LATIN SMALL LETTER A WITH CIRCUMFLEX |
124 |
q[a~] => "\x{00E3}", ## LATIN SMALL LETTER A WITH TILDE |
125 |
q[a?] => "\x{1EA3}", ## LATIN SMALL LETTER A WITH HOOK ABOVE |
126 |
q[a(] => "\x{0103}", ## LATIN SMALL LETTER A WITH BREVE |
127 |
q[u+~] => "\x{1EEF}", ## LATIN SMALL LETTER U WITH HORN AND TILDE |
128 |
q[a^~] => "\x{1EAB}", ## LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE |
129 |
q[e`] => "\x{00E8}", ## LATIN SMALL LETTER E WITH GRAVE |
130 |
q[e'] => "\x{00E9}", ## LATIN SMALL LETTER E WITH ACUTE |
131 |
q[e^] => "\x{00EA}", ## LATIN SMALL LETTER E WITH CIRCUMFLEX |
132 |
q[e?] => "\x{1EBB}", ## LATIN SMALL LETTER E WITH HOOK ABOVE |
133 |
q[i`] => "\x{00EC}", ## LATIN SMALL LETTER I WITH GRAVE |
134 |
q[i'] => "\x{00ED}", ## LATIN SMALL LETTER I WITH ACUTE |
135 |
q[i~] => "\x{0129}", ## LATIN SMALL LETTER I WITH TILDE |
136 |
q[i?] => "\x{1EC9}", ## LATIN SMALL LETTER I WITH HOOK ABOVE |
137 |
q[dd] => "\x{0111}", ## LATIN SMALL LETTER D WITH STROKE |
138 |
q[u+.] => "\x{1EF1}", ## LATIN SMALL LETTER U WITH HORN AND DOT BELOW |
139 |
q[o`] => "\x{00F2}", ## LATIN SMALL LETTER O WITH GRAVE |
140 |
q[o'] => "\x{00F3}", ## LATIN SMALL LETTER O WITH ACUTE |
141 |
q[o^] => "\x{00F4}", ## LATIN SMALL LETTER O WITH CIRCUMFLEX |
142 |
q[o~] => "\x{00F5}", ## LATIN SMALL LETTER O WITH TILDE |
143 |
q[o?] => "\x{1ECF}", ## LATIN SMALL LETTER O WITH HOOK ABOVE |
144 |
q[o.] => "\x{1ECD}", ## LATIN SMALL LETTER O WITH DOT BELOW |
145 |
q[u.] => "\x{1EE5}", ## LATIN SMALL LETTER U WITH DOT BELOW |
146 |
q[u`] => "\x{00F9}", ## LATIN SMALL LETTER U WITH GRAVE |
147 |
q[u'] => "\x{00FA}", ## LATIN SMALL LETTER U WITH ACUTE |
148 |
q[u~] => "\x{0169}", ## LATIN SMALL LETTER U WITH TILDE |
149 |
q[u?] => "\x{1EE7}", ## LATIN SMALL LETTER U WITH HOOK ABOVE |
150 |
q[y'] => "\x{00FD}", ## LATIN SMALL LETTER Y WITH ACUTE |
151 |
q[o+.] => "\x{1EE3}", ## LATIN SMALL LETTER O WITH HORN AND DOT BELOW |
152 |
q[U+~] => "\x{1EEE}", ## LATIN CAPITAL LETTER U WITH HORN AND TILDE |
153 |
|
154 |
## Some people use [OoUu]\* instead of [OoUu]\+. |
155 |
## VNTeX uses [Oo][Oo]|[Uu][Uu] instead of [OoUu]\+. |
156 |
); |
157 |
our %_UCS_TO_VIQR = reverse %_VIQR_TO_UCS; |
158 |
|
159 |
=item viqr |
160 |
|
161 |
Vietnamese quoted readable -- the mnemonic encoding for |
162 |
Vietnamese. (Alias: csviqr (IANA)) |
163 |
|
164 |
=cut |
165 |
|
166 |
sub encode ($$;$) { |
167 |
my ($obj, $str, $chk) = @_; |
168 |
$_[1] = '' if $chk; |
169 |
my $char = $obj->__reg_mnem; |
170 |
$str =~ s{(\\(?:$char|.)|$char|\x01)}{ |
171 |
my $c = $1; |
172 |
if (length ($c) > 1) { |
173 |
"\\" . substr ($c, 0, 1) . "\\" . substr ($c, 1); |
174 |
} else { |
175 |
"\\" . $c; |
176 |
} |
177 |
}ge; |
178 |
$str =~ s{([\x{00C0}-\x{01FF}\x{1EA0}-\x{1EFF}])}{ |
179 |
$_UCS_TO_VIQR{$1} || $1; |
180 |
}ge; |
181 |
if ($chk == 0x0100) { |
182 |
$str =~ s/([^\x00-\x7F])/sprintf '\x{%04X}', ord $1/ge; |
183 |
} elsif ($chk == 0x0200) { |
184 |
$str =~ s/([^\x00-\x7F])/sprintf '&#%d;', ord $1/ge; |
185 |
} elsif ($chk == 0x0200) { |
186 |
$str =~ s/([^\x00-\x7F])/sprintf '&#x%04X;', ord $1/ge; |
187 |
} else { |
188 |
$str =~ s/[^\x00-\x7F]/\?/g; |
189 |
} |
190 |
Encode::_utf8_off ($str); |
191 |
return "\\V".$str; |
192 |
} |
193 |
|
194 |
sub decode ($$;$) { |
195 |
my ($obj, $str, $chk) = @_; |
196 |
$_[1] = '' if $chk; |
197 |
my $char = $obj->__reg_mnem; |
198 |
$str = "\\V".$str; |
199 |
$str =~ s{ |
200 |
\\([LlMmVv])((?:(?!\\[LlMmVv]).)+) |
201 |
}{ |
202 |
my ($mode, $s) = (uc ($1), $2); |
203 |
if ($mode eq 'V') { ## Vietnamese |
204 |
$s =~ s{\\($char|.)|($char)|\x01}{ |
205 |
my ($e, $c) = ($1, $2); |
206 |
if ($c || length ($e) > 1) { |
207 |
$_VIQR_TO_UCS{$c || $e} || $c || $e; |
208 |
} else { |
209 |
$e; |
210 |
} |
211 |
}ges; |
212 |
$s; |
213 |
} elsif ($mode eq 'M') { ## English |
214 |
$s =~ s{\\($char|.)}{ |
215 |
my ($e) = ($1); |
216 |
if (length ($e) > 1) { |
217 |
$_VIQR_TO_UCS{$e} || $e; |
218 |
} else { |
219 |
$e; |
220 |
} |
221 |
}ges; |
222 |
$s; |
223 |
} else { ## Literala |
224 |
$s; |
225 |
} |
226 |
}gesx; |
227 |
return $str; |
228 |
} |
229 |
|
230 |
sub __reg_mnem ($) { q/[AEIOUYaeiouy][(^+,`?~.]+|[Dd][Dd]/ } |
231 |
|
232 |
1; |
233 |
__END__ |
234 |
|
235 |
=back |
236 |
|
237 |
Note that it is known that there is variants of VIQR. |
238 |
One use "O*" / "U*" instead of "O+" / "U+". Other |
239 |
use "OO" / "UU". Since I don't know how these variants |
240 |
are used and inclution of these notation is incompatible, |
241 |
this version of this module does not implement these. |
242 |
|
243 |
=head1 SEE ALSO |
244 |
|
245 |
<http://www.vietstd.org/report/rep92.htm>, |
246 |
RFC 1456 <urn:ietf:rfc:1456> |
247 |
|
248 |
=head1 LICENSE |
249 |
|
250 |
Copyright 2002 Nanashi-san <nanashi@san.invalid> |
251 |
|
252 |
This library is free software; you can redistribute it |
253 |
and/or modify it under the same terms as Perl itself. |
254 |
|
255 |
=cut |
256 |
|
257 |
# $Date: 2002/10/16 10:39:35 $ |