1 |
|
2 |
=head1 NAME
|
3 |
|
4 |
Message::Util -- Utilities for Message::* Perl modules.
|
5 |
|
6 |
=head1 DESCRIPTION
|
7 |
|
8 |
Useful functions for Message::* Perl modules.
|
9 |
This module is only intended for internal use.
|
10 |
Note that there is another module, Message::Tool.
|
11 |
|
12 |
=cut
|
13 |
|
14 |
package Message::Util;
|
15 |
#require 5.6.0;
|
16 |
use strict;
|
17 |
use vars qw(%FMT2STR %OPTION %REG $VERSION);
|
18 |
$VERSION=do{my @r=(q$Revision: 1.26 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
|
19 |
|
20 |
require Carp;
|
21 |
|
22 |
=head1 REGEXPS (%Message::Util::REG)
|
23 |
|
24 |
=head2 Naming Rules
|
25 |
|
26 |
key = *(prefix) [format] token-name
|
27 |
|
28 |
prefix = 'M_' ;; With matching "(" ")"s
|
29 |
/ 'S_' ;; Simple (not strict) expression
|
30 |
/ 'NON_' ;; Negative character class
|
31 |
format = E<lt>specification id, such as C<http>E<gt> ;; if necessary
|
32 |
token-name = E<lt>BNF name =~ tr/-/_/E<gt>
|
33 |
|
34 |
=cut
|
35 |
|
36 |
$REG{MATCH_NONE} = qr/(?!)/;
|
37 |
$REG{MATCH_ALL} = qr/[\x00-\xFF]/;
|
38 |
## Whitespace
|
39 |
$REG{WSP} = qr/[\x09\x20]/;
|
40 |
$REG{FWS} = qr/[\x09\x20]*/; ## not same as 2822's
|
41 |
## Basic structure
|
42 |
if (defined $^V) { # $^V gt v6.5.0
|
43 |
$REG{comment} = qr/\x28(?:\x5C[\x00-\xFF]|[^\x28\x29\x5C]|(??{$REG{comment}}))*\x29/;
|
44 |
$REG{M_comment} = qr/\x28((?:\x5C[\x00-\xFF]|[^\x28\x29\x5C]|(??{$REG{comment}}))*)\x29/;
|
45 |
} else {
|
46 |
$REG{comment} = qr/\x28(?:\x5C(?:.|[\x0A\x0D])|[^\x28\x29\x5C]|\x28(?:\x5C(?:.|[\x0A\x0D])|[^\x28\x29\x5C])*\x29)*\x29/;
|
47 |
$REG{M_comment} = qr/\x28((?:\x5C(?:.|[\x0A\x0D])|[^\x28\x29\x5C]|\x28(?:\x5C(?:.|[\x0A\x0D])|[^\x28\x29\x5C])*\x29)*)\x29/;
|
48 |
}
|
49 |
|
50 |
$REG{quoted_string} = qr/\x22(?:\x5C[\x00-\xFF]|[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\xFF])*\x22/;
|
51 |
$REG{M_quoted_string} = qr/\x22((?:\x5C[\x00-\xFF]|[^\x0D\x22\x5C])*)\x22/;
|
52 |
|
53 |
$REG{domain_literal} = qr/\x5B(?:\x5C[\x00-\xFF]|[\x00-\x0C\x0E-\x5A\x5E-\xFF])*\x5D/;
|
54 |
$REG{M_domain_literal} = qr/\x5B((?:\x5C[\x00-\xFF]|[\x00-\x0C\x0E-\x5A\x5E-\xFF])*)\x5D/;
|
55 |
|
56 |
#$REG{angle_quoted} = qr/\x3C[\x09\x20\x21\x23-\x3B\x3D\x3F-\x5B\x5D\x5F\x61-\x7A\x7E]*\x3E/;
|
57 |
$REG{angle_qcontent} = qr/(?:$REG{quoted_string}|$REG{domain_literal}|[^\x3C\x3E\x22\x5B])+/;
|
58 |
$REG{angle_quoted} = qr/<$REG{angle_qcontent}>|<>/;
|
59 |
$REG{M_angle_quoted} = qr/<($REG{angle_qcontent})>|<>/;
|
60 |
|
61 |
|
62 |
=head2 tokens
|
63 |
|
64 |
atext NON_atext 822.atext
|
65 |
atext_dot NON_atext_dot 822.atext / "."
|
66 |
NON_atext_dot_wsp 822.atext / "." / WSP
|
67 |
http_token NON_http_token http.token
|
68 |
NON_http_token_wsp http.token / WSP
|
69 |
attribute_char rfc2231.attribute-char
|
70 |
NON_http_attribute_char http.token AND rfc2231.attribute-char
|
71 |
|
72 |
=cut
|
73 |
|
74 |
$REG{atext} = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2F\x30-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]+/;
|
75 |
$REG{atext_dot} = qr/[\x21\x23-\x27\x2A\x2B\x2D-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]+/;
|
76 |
$REG{atext_dot_wsp} = qr/[\x09\x20\x21\x23-\x27\x2A\x2B\x2D-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]+/;
|
77 |
$REG{atext_dot8} = qr/[\x21\x23-\x27\x2A\x2B\x2D-\x39\x3D\x3F\x41-\x5A\x5E-\x7E\x80-\xFF]+/;
|
78 |
$REG{token} = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
|
79 |
$REG{http_token} = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7A\x7C\x7E]+/;
|
80 |
$REG{attribute_char} = qr/[\x21\x23-\x24\x26\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
|
81 |
|
82 |
$REG{NON_atext} = qr/[^\x21\x23-\x27\x2A\x2B\x2D\x2F\x30-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]/;
|
83 |
$REG{NON_atext_wsp} = qr/[^\x09\x20\x21\x23-\x27\x2A\x2B\x2D\x2F\x30-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]/;
|
84 |
$REG{NON_atext_dot} = qr/[^\x21\x23-\x27\x2A\x2B\x2D-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]/;
|
85 |
$REG{NON_atext_dot_wsp} = qr/[^\x09\x20\x21\x23-\x27\x2A\x2B\x2D-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]/;
|
86 |
$REG{NON_token} = qr/[^\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]/;
|
87 |
$REG{NON_http_token} = qr/[^\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7A\x7C\x7E]/;
|
88 |
$REG{NON_http_token_wsp} = qr/[^\x09\x20\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7A\x7C\x7E]/;
|
89 |
$REG{NON_attribute_char} = qr/[^\x21\x23-\x24\x26\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]/;
|
90 |
$REG{NON_http_attribute_char} = qr/[^\x21\x23-\x24\x26\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7A\x7C\x7E]/;
|
91 |
$REG{NON_http_attribute_char_wsp} = qr/[^\x09\x20\x21\x23-\x24\x26\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7A\x7C\x7E]/;
|
92 |
## Yes, C<attribute-char> does not appear in HTTP spec.
|
93 |
|
94 |
$REG{S_uri} = qr#[\x21\x23-\x3B\x3D\x3F-\x5B\x5D\x5F\x61-\x7A\x7E]+#;
|
95 |
|
96 |
$REG{NON_base64alphabet} = qr#[^A-Za-z0-9+/=]#;
|
97 |
|
98 |
$REG{dot_atom} = qr/$REG{atext}(?:$REG{FWS}\x2E$REG{FWS}$REG{atext})*/;
|
99 |
$REG{dot_atom_dot} = qr/$REG{atext_dot}(?:$REG{FWS}\x2E$REG{FWS}$REG{atext_dot})*/;
|
100 |
$REG{dot_word} = qr/(?:$REG{atext}|$REG{quoted_string})(?:$REG{FWS}\x2E$REG{FWS}(?:$REG{atext}|$REG{quoted_string}))*/;
|
101 |
$REG{dot_word_dot} = qr/(?:$REG{atext_dot}|$REG{quoted_string})(?:$REG{FWS}\x2E$REG{FWS}(?:$REG{atext_dot}|$REG{quoted_string}))*/;
|
102 |
$REG{phrase} = qr/(?:$REG{atext}|$REG{quoted_string})(?:$REG{atext}|$REG{quoted_string}|\.|$REG{FWS})*/;
|
103 |
## RFC 822 phrase (not strict)
|
104 |
|
105 |
#$REG{domain} = qr/(?:$REG{dot_atom}|$REG{domain_literal})/;
|
106 |
$REG{domain} = qr/(?:$REG{dot_atom_dot}|$REG{domain_literal})/;
|
107 |
#$REG{addr_spec} = qr/$REG{dot_word}$REG{FWS}\x40$REG{FWS}$REG{domain}/;
|
108 |
$REG{addr_spec} = qr/$REG{dot_word_dot}$REG{FWS}\x40$REG{FWS}$REG{domain}/;
|
109 |
$REG{msg_id} = qr/<$REG{FWS}$REG{addr_spec}$REG{FWS}>/;
|
110 |
|
111 |
$REG{M_addr_spec} = qr/($REG{dot_word_dot})$REG{FWS}\x40$REG{FWS}($REG{domain})/;
|
112 |
|
113 |
$REG{date_time} = qr/(?:[A-Za-z]+$REG{FWS},$REG{FWS})?[0-9]+$REG{WSP}*[A-Za-z]+$REG{WSP}*[0-9]+$REG{WSP}+[0-9]+$REG{FWS}:$REG{WSP}*[0-9]+(?:$REG{FWS}:$REG{WSP}*[0-9]+)?$REG{FWS}(?:[A-Za-z]+|[+-]$REG{WSP}*[0-9]+)/;
|
114 |
$REG{asctime} = qr/[A-Za-z]+$REG{WSP}*[A-Za-z]+$REG{WSP}*[0-9]+$REG{WSP}+[0-9]+$REG{FWS}:$REG{WSP}*[0-9]+$REG{FWS}:$REG{WSP}*[0-9]+$REG{WSP}+[0-9]+/;
|
115 |
|
116 |
## MIME encoded-word
|
117 |
$REG{M_encoded_word} = qr/=\x3F($REG{attribute_char})(?:\x2A($REG{attribute_char}))?\x3F($REG{attribute_char})\x3F([\x21-\x3E\x40-\x7E]+)\x3F=/;
|
118 |
$REG{S_encoded_word} = qr/=\x3F$REG{atext_dot}\x3F=/;
|
119 |
#$REG{S_encoded_word_comment} = qr/=\x3F[\x21-\x27\x2A-\x5B\x5D-\x7E]+\x3F=/;
|
120 |
## not used anywhere
|
121 |
|
122 |
## obsoleted
|
123 |
*FMT2STR = \%Message::Util::Formatter::FMT2STR;
|
124 |
|
125 |
=head1 STRUCTURED FIELD FUNCTIONS
|
126 |
|
127 |
=over 4
|
128 |
|
129 |
=item $nocomment:-) = Message::Util::delete_comment ($string)
|
130 |
|
131 |
Gets rid of all C<comment>s. Inserts a SP instead.
|
132 |
|
133 |
=cut
|
134 |
|
135 |
sub delete_comment ($) {
|
136 |
use re 'eval';
|
137 |
my $body = shift;
|
138 |
$body =~ s{($REG{quoted_string}|$REG{domain_literal}|$REG{angle_quoted})|$REG{comment}}{
|
139 |
my $o = $1; $o? $o : ' ';
|
140 |
}gex;
|
141 |
$body;
|
142 |
}
|
143 |
|
144 |
sub delete_wsp ($) {
|
145 |
my $body = shift;
|
146 |
$body =~ s{($REG{quoted_string}|$REG{domain_literal})|((?:$REG{token}|$REG{S_encoded_word})(?:$REG{WSP}+(?:$REG{token}|$REG{S_encoded_word}))+)|$REG{WSP}+}{
|
147 |
my ($o,$p) = ($1,$2);
|
148 |
if ($o) {$o}
|
149 |
elsif ($p) {$p=~s/$REG{WSP}+/\x20/g;$p}
|
150 |
else {''}
|
151 |
}gex;
|
152 |
$body;
|
153 |
}
|
154 |
|
155 |
sub remove_meaningless_wsp ($) {
|
156 |
my $body = shift;
|
157 |
$body =~ s{($REG{quoted_string}|$REG{domain_literal})|$REG{WSP}+}{
|
158 |
$1 || '';
|
159 |
}gex;
|
160 |
$body;
|
161 |
}
|
162 |
|
163 |
sub wsps_to_sp ($) {
|
164 |
my $body = shift;
|
165 |
$body =~ s{($REG{quoted_string}|$REG{domain_literal})|$REG{WSP}+}{
|
166 |
$1 || ' ';
|
167 |
}gex;
|
168 |
$body;
|
169 |
}
|
170 |
|
171 |
=item Message::Util::quote_ccontent ($yourself, $ccontent)
|
172 |
|
173 |
Quotes C<ccontent> (to be).
|
174 |
|
175 |
=cut
|
176 |
|
177 |
sub quote_ccontent ($;%) {
|
178 |
my $ccontent = shift;
|
179 |
my %option = @_;
|
180 |
if ($option{strict_quoted_pair}) {
|
181 |
$ccontent =~ s/([\x28\x29\x5C]|\x3D\x3F)/\x5C$1/g;
|
182 |
} else {
|
183 |
$ccontent =~ s/([\x28\x29\x5C]|\x3D\x3F)([\x21-\x7E])?/"\x5C$1".(defined $2?"\x5C$2":'')/ge;
|
184 |
}
|
185 |
$ccontent;
|
186 |
}
|
187 |
|
188 |
=item $unquoted = Message::Util::unquote_ccontent ($string)
|
189 |
|
190 |
Unquotes C<quoted-pair> in C<comment>s.
|
191 |
|
192 |
=cut
|
193 |
|
194 |
sub unquote_ccontent ($) {
|
195 |
use re 'eval';
|
196 |
my $comment = shift;
|
197 |
$comment =~ s{$REG{M_comment}}{
|
198 |
my $ctext = $1;
|
199 |
$ctext =~ s/\x5C([\x00-\xFF])/$1/g;
|
200 |
'('.$ctext.')';
|
201 |
}goex;
|
202 |
$comment;
|
203 |
}
|
204 |
|
205 |
=item $unquoted = Message::Util::unquote_quoted_string ($string)
|
206 |
|
207 |
Unquotes C<quoted-pair> in C<quoted-string>s and
|
208 |
unquotes C<quoted-string> (or gets rid of C<DQUOTE>s).
|
209 |
|
210 |
=cut
|
211 |
|
212 |
sub unquote_quoted_string ($) {
|
213 |
my $quoted_string = shift;
|
214 |
$quoted_string =~ s{$REG{M_quoted_string}}{
|
215 |
my $qtext = $1;
|
216 |
$qtext =~ s/\x5C([\x00-\xFF])/$1/g;
|
217 |
$qtext;
|
218 |
}goex;
|
219 |
$quoted_string;
|
220 |
}
|
221 |
|
222 |
=item Message::Util::unquote_if_quoted_string ($string)
|
223 |
|
224 |
Unquotes if and only if given string is A C<quoted-string>.
|
225 |
This function returns two value, (C<$unquoted-string>,
|
226 |
C<$was-quoted-string?>).
|
227 |
|
228 |
=cut
|
229 |
|
230 |
sub unquote_if_quoted_string ($) {
|
231 |
my $quoted_string = shift; my $isq = 0;
|
232 |
$quoted_string =~ s{^$REG{M_quoted_string}$}{
|
233 |
my $qtext = $1;
|
234 |
$qtext =~ s/\x5C([\x00-\xFF])/$1/gs;
|
235 |
$isq = 1;
|
236 |
$qtext;
|
237 |
}goex;
|
238 |
wantarray? ($quoted_string, $isq): $quoted_string;
|
239 |
}
|
240 |
|
241 |
sub unquote_if_angle_quoted ($) {
|
242 |
my $quoted_string = shift; my $isq = 0;
|
243 |
$quoted_string =~ s{^$REG{M_angle_quoted}$}{
|
244 |
my $qtext = $1;
|
245 |
$qtext =~ s/\x5C([\x00-\xFF])/$1/g;
|
246 |
$isq = 1;
|
247 |
$qtext;
|
248 |
}goex;
|
249 |
wantarray? ($quoted_string, $isq): $quoted_string;
|
250 |
}
|
251 |
|
252 |
sub unquote_if_domain_literal ($) {
|
253 |
my $quoted_string = shift; my $isq = 0;
|
254 |
$quoted_string =~ s{^$REG{M_domain_literal}$}{
|
255 |
my $qtext = $1;
|
256 |
$qtext =~ s/\x5C([\x00-\xFF])/$1/g;
|
257 |
$isq = 1;
|
258 |
$qtext;
|
259 |
}goex;
|
260 |
wantarray? ($quoted_string, $isq): $quoted_string;
|
261 |
}
|
262 |
|
263 |
=item $quoted = Message::Util::quote_unsafe_string ($string)
|
264 |
|
265 |
Quotes string itself by C<DQUOTES> if it contains of
|
266 |
I<unsafe> character.
|
267 |
|
268 |
Default I<unsafe> is defined as E<lt>not ( atom / "." / %x09 / %x20 ) E<gt>.
|
269 |
|
270 |
=cut
|
271 |
|
272 |
sub quote_unsafe_string ($;%) {
|
273 |
my $string = shift;
|
274 |
my %option = @_;
|
275 |
$option{unsafe} ||= 'NON_atext_dot';
|
276 |
$option{unsafe_regex} = $option{unsafe} if $option{unsafe} =~ /^\(\?-xism:/;
|
277 |
$option{unsafe_regex} ||= qr/$REG{$option{unsafe}}|$REG{WSP}$REG{WSP}|^$REG{WSP}|$REG{WSP}$|^=\x3F/;
|
278 |
my $r = qr/([\x22\x5C])([\x21-\x7E])?/;
|
279 |
$r = qr/([\x22\x5C])/ if $option{strict}; ## usefor-article
|
280 |
if ($string =~ /$option{unsafe_regex}/) {
|
281 |
$string =~ s/$r/"\x5C$1".(defined $2?"\x5C$2":'')/ge;
|
282 |
$string = '"'.$string.'"';
|
283 |
}
|
284 |
$string;
|
285 |
}
|
286 |
|
287 |
sub quote_unsafe_domain ($) {
|
288 |
my $string = shift;
|
289 |
if ($string =~ /^\[[^\[\]]+\]$/) {
|
290 |
#
|
291 |
} elsif ($string =~ /$REG{NON_atext_dot}/ || $string =~ /^\.|\.$/) {
|
292 |
$string =~ s/([\x0D\x5B-\x5D])/\x5C$1/g;
|
293 |
$string = '['.$string.']';
|
294 |
}
|
295 |
$string;
|
296 |
}
|
297 |
|
298 |
sub remove_wsp ($) {
|
299 |
my $s = shift;
|
300 |
$s =~ s{($REG{quoted_string}|$REG{domain_literal}|$REG{angle_quoted})|$REG{WSP}+}{
|
301 |
$1
|
302 |
}gex;
|
303 |
$s;
|
304 |
}
|
305 |
|
306 |
=item $encoded = Message::Util::encode_printable_string ($string)
|
307 |
=item $decoded = Message::Util::decode_printable_string ($string)
|
308 |
|
309 |
Encodes or decodes string in PrintableString, described in
|
310 |
RFC 1327, RFC 2156. These functions supports 8bit octets encoded
|
311 |
as '(ddd)' format, although RFC 2156 allows it for only 7bit
|
312 |
octets.
|
313 |
|
314 |
=cut
|
315 |
|
316 |
my %To_Printable_String = (
|
317 |
'@' => '(a)',
|
318 |
'%' => '(p)',
|
319 |
'!' => '(b)',
|
320 |
'"' => '(q)',
|
321 |
'_' => '(u)',
|
322 |
'(' => '(l)',
|
323 |
')' => '(r)',
|
324 |
);
|
325 |
my %From_Printable_String = reverse %To_Printable_String;
|
326 |
sub encode_printable_string ($) {
|
327 |
my $s = shift;
|
328 |
$s =~ s{ ([^0-9A-Za-z\x20'+,./:=?-]) }{
|
329 |
my $c = $1;
|
330 |
unless ($To_Printable_String{$c}) {
|
331 |
$To_Printable_String{$c} = sprintf '(%03d)', ord $c;
|
332 |
}
|
333 |
$To_Printable_String{$c};
|
334 |
}gex;
|
335 |
$s;
|
336 |
}
|
337 |
sub decode_printable_string ($) {
|
338 |
my $s = shift;
|
339 |
$s =~ s{ \( ([0-9A-Za-z]+) \) }{
|
340 |
my $c = lc $1; my $p = "($c)";
|
341 |
if ($c !~ /[^0-9]/) {
|
342 |
$From_Printable_String{$p} = pack 'C', 0+$c;
|
343 |
} elsif (!defined $From_Printable_String{$p}) {
|
344 |
$From_Printable_String{$p} = $p; ## Invalid!
|
345 |
}
|
346 |
$From_Printable_String{$p};
|
347 |
}gex;
|
348 |
$s;
|
349 |
}
|
350 |
|
351 |
=item $encoded = Message::Util::encode_t61_string ($string)
|
352 |
=item $decoded = Message::Util::decode_t61_string ($string)
|
353 |
|
354 |
Encodes or decodes string in T.61String described in RFC 1327,
|
355 |
RFC 2056.
|
356 |
|
357 |
=cut
|
358 |
|
359 |
sub encode_t61_string ($) {
|
360 |
my $s = shift;
|
361 |
$s =~ s{ ([^0-9A-Za-z\x20'+,./:=?-]) }{
|
362 |
sprintf '{%03d}', ord $1;
|
363 |
}gex;
|
364 |
$s;
|
365 |
}
|
366 |
sub decode_t61_string ($) {
|
367 |
my $s = shift;
|
368 |
$s =~ s{ \{ ([0-9]+) \} }{
|
369 |
my $c = $1; my $i = 0;
|
370 |
my $r = '';
|
371 |
while (my $d = substr ($c, $i * 3, 3)) {
|
372 |
$r .= pack 'C', 0+$d; $i++;
|
373 |
}
|
374 |
$r;
|
375 |
}gex;
|
376 |
$s;
|
377 |
}
|
378 |
=item $encoded = Message::Util::encode_printable_string ($string)
|
379 |
=item $decoded = Message::Util::decode_printable_string ($string)
|
380 |
|
381 |
Encodes or decodes string in RFC 822 with restricted encoding,
|
382 |
defined by RFC 1137.
|
383 |
|
384 |
=cut
|
385 |
|
386 |
my %To_Encoded_Atom = (
|
387 |
"\x20" => '_',
|
388 |
'_' => '#u#',
|
389 |
'(' => '#l#',
|
390 |
')' => '#r#',
|
391 |
',' => '#m#',
|
392 |
':' => '#c#',
|
393 |
"\x5C" => '#b#',
|
394 |
'#' => '#h#',
|
395 |
'=' => '#e#',
|
396 |
'/' => '#s#',
|
397 |
);
|
398 |
my %From_Encoded_Atom = reverse %To_Encoded_Atom;
|
399 |
sub encode_restricted_rfc822 ($) {
|
400 |
my $s = shift;
|
401 |
$s =~ s{ ([^\x21\x24-\x27\x2A\x2B\x2D\x2F\x30-\x39\x3D\x3F\x41-\x5A\x5E\x60-\x7E]) }{
|
402 |
my $c = $1; ## \x2E \x40
|
403 |
unless ($To_Encoded_Atom{$c}) {
|
404 |
$To_Encoded_Atom{$c} = sprintf '#%03d#', ord $c;
|
405 |
}
|
406 |
$To_Encoded_Atom{$c};
|
407 |
}gex;
|
408 |
$s;
|
409 |
}
|
410 |
sub decode_restricted_rfc822 ($) {
|
411 |
my $s = shift;
|
412 |
$s =~ s{ \# ([0-9A-Za-z]+) \# | _ }{
|
413 |
my $c = lc $1; my $p = "#$c#";
|
414 |
if ($c !~ /[^0-9]/) {
|
415 |
$From_Encoded_Atom{$p} = pack 'C', 0+$c;
|
416 |
} elsif (!defined $From_Encoded_Atom{$p}) {
|
417 |
$From_Encoded_Atom{$p} = $p; ## Invalid!
|
418 |
}
|
419 |
$From_Encoded_Atom{$p};
|
420 |
}gex;
|
421 |
$s;
|
422 |
}
|
423 |
|
424 |
=item $Message::Util::make_clone ($parent)
|
425 |
|
426 |
Returns clone.
|
427 |
|
428 |
=cut
|
429 |
|
430 |
sub make_clone ($) {
|
431 |
my $s = shift;
|
432 |
if (ref $s eq 'ARRAY') {
|
433 |
$s = [map {make_clone ($_)} @$s];
|
434 |
} elsif (ref $s eq 'HASH') {
|
435 |
$s = {map {make_clone ($_)} (%$s)};
|
436 |
} elsif (ref $s && ref $s ne 'CODE' && ref $s ne 'Regexp') {
|
437 |
$s = $s->clone;
|
438 |
}
|
439 |
$s;
|
440 |
}
|
441 |
|
442 |
=head1 ENCODER and DECODER
|
443 |
|
444 |
=over 4
|
445 |
|
446 |
=item Message::Util::encode_header_string ($yourself, $string, [%options])
|
447 |
|
448 |
=cut
|
449 |
|
450 |
sub encode_header_string ($$;%) {
|
451 |
require Message::MIME::Charset;
|
452 |
my $yourself = shift; my $s = shift; my %o = @_;
|
453 |
$o{charset} ||= $yourself->{option}->{encoding_after_encode};
|
454 |
$o{charset} = Message::MIME::Charset::name_normalize ($o{charset});
|
455 |
$o{current_charset} = Message::MIME::Charset::name_normalize ($o{current_charset});
|
456 |
my ($t,%r) = Message::MIME::Charset::encode ($o{charset}, $s);
|
457 |
my @o = (language => $o{language});
|
458 |
if ($r{success}) { ## Convertion succeed
|
459 |
$o{charset} = $r{charset} if $r{charset};
|
460 |
$o{charset} = '' if $o{charset} =~ /\*/;
|
461 |
(value => $t, @o, Message::MIME::Charset::name_minimumize ($o{charset}, $t));
|
462 |
} else { ## Fault
|
463 |
$o{current_charset} = '' if $o{current_charset} =~ /\*/;
|
464 |
(value => $t, failed => 1, @o,
|
465 |
Message::MIME::Charset::name_minimumize ($o{current_charset}, $t));
|
466 |
}
|
467 |
}
|
468 |
|
469 |
sub decode_header_string ($$;%) {
|
470 |
require Message::MIME::Charset;
|
471 |
require Message::MIME::EncodedWord;
|
472 |
my $yourself = shift; my $s = shift; my %o = @_;
|
473 |
$o{charset} ||= $yourself->{option}->{encoding_before_decode};
|
474 |
$o{charset} = Message::MIME::Charset::name_normalize ($o{charset});
|
475 |
my ($t, %r); ## decoded-text, success?
|
476 |
if ($o{type} !~ /quoted|encoded|domain|word/) {
|
477 |
my (@s, @r);
|
478 |
$s =~ s{(([\x09\x20]*(?:\x5C[\x00-\xFF]
|
479 |
|[\x00-\x08\x0A-\x1F\x21-\x5B\x5D-\xFF])+|[\x09\x20]+$))}
|
480 |
{ push @s, $1; '' }goesx;
|
481 |
for my $i (0..$#s) {
|
482 |
if ($s[$i] =~ /^($REG{FWS})$REG{M_encoded_word}$/) {
|
483 |
my ($t, $w) = ('', $1);
|
484 |
($t, $r[$i]) = (Message::MIME::EncodedWord::_decode_eword ($2, $3, $4, $5));
|
485 |
if ($r[$i]) {
|
486 |
$s[$i] = $t;
|
487 |
if ($i == 0 || $r[$i-1] == 0) {
|
488 |
$s[$i] = $w.$s[$i];
|
489 |
}
|
490 |
}
|
491 |
} else {
|
492 |
my ($u, %q) = ($s[$i]);
|
493 |
$u =~ s/\x5C([\x00-\xFF])/$1/g unless $o{type} =~ /text/;
|
494 |
($u,%q) = Message::MIME::Charset::decode ($o{charset}, $u);
|
495 |
$s[$i] = $u if $q{success};
|
496 |
}
|
497 |
}
|
498 |
$t = join '', @s; $r{success} = 1;
|
499 |
} else {
|
500 |
($t,%r) = Message::MIME::Charset::decode ($o{charset}, $s);
|
501 |
}
|
502 |
$r{success} ? (value => $t, success => 1, language => $o{language}): ## suceess
|
503 |
(value => $s, language => $o{language}, success => 0,
|
504 |
charset => ($o{charset}=~/\*/?'':$o{charset})); ## fault
|
505 |
}
|
506 |
|
507 |
sub encode_body_string {
|
508 |
require Message::MIME::Charset;
|
509 |
my $yourself = shift; my $s = shift; my %o = @_;
|
510 |
$o{charset} ||= $yourself->{option}->{encoding_after_encode};
|
511 |
$o{charset} = Message::MIME::Charset::name_normalize ($o{charset});
|
512 |
$o{current_charset} = Message::MIME::Charset::name_normalize ($o{current_charset});
|
513 |
my ($t,%r) = Message::MIME::Charset::encode ($o{charset}, $s);
|
514 |
my @o = ();
|
515 |
if ($r{success}) { ## Convertion successed
|
516 |
$o{charset} = $r{charset} if $r{charset};
|
517 |
$o{charset} = '' if $o{charset} =~ /\*/;
|
518 |
(value => $t, @o, Message::MIME::Charset::name_minimumize ($o{charset}, $t));
|
519 |
} else { ## Fault
|
520 |
$o{current_charset} = '' if $o{current_charset} =~ /\*/;
|
521 |
(value => $t, failed => 1, @o,
|
522 |
Message::MIME::Charset::name_minimumize ($o{current_charset}, $t));
|
523 |
}
|
524 |
}
|
525 |
|
526 |
sub decode_body_string {
|
527 |
require Message::MIME::Charset;
|
528 |
my $yourself = shift; my $s = shift; my %o = @_;
|
529 |
$o{charset} ||= $yourself->{option}->{encoding_before_decode};
|
530 |
$o{charset} = Message::MIME::Charset::name_normalize ($o{charset});
|
531 |
my ($t,%r) = Message::MIME::Charset::decode ($o{charset}, $s);
|
532 |
$r{success} ? (value => $t, success => 1): ## suceess
|
533 |
(value => $s, success => 0,
|
534 |
charset => ($o{charset}=~/\*/?'':$o{charset})); ## fault
|
535 |
}
|
536 |
|
537 |
=item Message::Util::decode_quoted_string ($yourself, $quoted-string)
|
538 |
|
539 |
Returns unquoted and decoded a given C<quoted-string>
|
540 |
or a string containing one or multiple C<quoted-string>s.
|
541 |
|
542 |
=cut
|
543 |
|
544 |
sub decode_quoted_string ($$;%) {
|
545 |
my $yourself = shift;
|
546 |
my $quoted_string = shift;
|
547 |
my %option = @_;
|
548 |
$option{type} ||= 'phrase';
|
549 |
$quoted_string =~ s{$REG{M_quoted_string}|([^\x22]+)}{
|
550 |
my ($qtext, $t) = ($1, $2);
|
551 |
if (length $t) {
|
552 |
$t =~ s/$REG{WSP}+/\x20/g;
|
553 |
my %s = &{$yourself->{option}->{hook_decode_string}}
|
554 |
($yourself, $t, type => $option{type},
|
555 |
charset => $option{charset});
|
556 |
$s{value};
|
557 |
} else {
|
558 |
$qtext =~ s/\x5C([\x00-\xFF])/$1/g;
|
559 |
my %s = &{$yourself->{option}->{hook_decode_string}}
|
560 |
($yourself, $qtext, type => $option{type}.'/quoted',
|
561 |
charset => $option{charset});
|
562 |
$s{value};
|
563 |
}
|
564 |
}goex;
|
565 |
$quoted_string;
|
566 |
}
|
567 |
|
568 |
=item Message::Util::encode_qcontent ($yourself, $string)
|
569 |
|
570 |
Encodes (by C<hook_encode_string> of C<$yourself-E<gt>{option}>)
|
571 |
C<qcontent> (content of C<quoted-string>) within C<$string>.
|
572 |
|
573 |
=cut
|
574 |
|
575 |
sub encode_qcontent ($$) {
|
576 |
my $yourself = shift;
|
577 |
my $quoted_strings = shift;
|
578 |
$quoted_strings =~ s{$REG{M_quoted_string}}{
|
579 |
my ($qtext) = ($1);
|
580 |
$qtext =~ s/\x5C([\x00-\xFF])/$1/g;
|
581 |
my %s = &{$yourself->{option}->{hook_encode_string}} ($yourself, $qtext,
|
582 |
type => 'phrase/quoted');
|
583 |
$s{value} =~ s/([\x0D\x22\x5C])([\x20-\xFF])?/"\x5C$1".($2?"\x5C$2":'')/ges;
|
584 |
'"'.$s{value}.'"';
|
585 |
}goex;
|
586 |
$quoted_strings;
|
587 |
}
|
588 |
|
589 |
=item Message::Util::decode_qcontent ($yourself, $string)
|
590 |
|
591 |
Decodes (by C<hook_decode_string> of C<$yourself-E<gt>{option}>)
|
592 |
C<qcontent> (content of C<quoted-string>) within C<$string>.
|
593 |
|
594 |
=cut
|
595 |
|
596 |
sub decode_qcontent ($$) {
|
597 |
my $yourself = shift;
|
598 |
my $quoted_string = shift;
|
599 |
$quoted_string =~ s{$REG{M_quoted_string}}{
|
600 |
my ($qtext) = ($1);
|
601 |
$qtext =~ s/\x5C([\x00-\xFF])/$1/g;
|
602 |
my %s = &{$yourself->{option}->{hook_decode_string}} ($yourself, $qtext,
|
603 |
type => 'phrase/quoted');
|
604 |
$s{value} =~ s/([\x22\x5C])([\x20-\xFF])?/"\x5C$1".($2?"\x5C$2":'')/ge;
|
605 |
'"'.$s{value}.'"';
|
606 |
}goex;
|
607 |
$quoted_string;
|
608 |
}
|
609 |
|
610 |
=item @comments = Message::Util::comment_to_array ($youtself, $comments)
|
611 |
|
612 |
Replaces C<comment>s to C< > (a SP), decodes C<ccontent>s,
|
613 |
and returns them as array.
|
614 |
|
615 |
=cut
|
616 |
|
617 |
sub comment_to_array ($$) {
|
618 |
use re 'eval';
|
619 |
my $yourself = shift;
|
620 |
my $body = shift;
|
621 |
my @r = ();
|
622 |
$body =~ s{($REG{quoted_string}|$REG{domain_literal})|$REG{M_comment}}{
|
623 |
my ($o, $c) = ($1, $2);
|
624 |
if ($o) {$o}
|
625 |
else {
|
626 |
require Message::MIME::EncodedWord;
|
627 |
push @r, Message::MIME::EncodedWord::decode_ccontent ($yourself, $c);
|
628 |
' ';
|
629 |
}
|
630 |
}gex;
|
631 |
@r;
|
632 |
}
|
633 |
|
634 |
sub delete_comment_to_array ($$;%) {
|
635 |
use re 'eval';
|
636 |
my $yourself = shift;
|
637 |
my $body = shift;
|
638 |
my %option = @_;
|
639 |
my $areg = ''; $areg = '|'.$REG{angle_quoted} if $option{-use_angle_quoted};
|
640 |
my @r = ();
|
641 |
$body =~ s{($REG{quoted_string}|$REG{domain_literal}$areg)|$REG{M_comment}}{
|
642 |
my ($o, $c) = ($1, $2);
|
643 |
if ($o) {$o}
|
644 |
else {
|
645 |
require Message::MIME::EncodedWord;
|
646 |
push @r, Message::MIME::EncodedWord::decode_ccontent ($yourself, $c);
|
647 |
' ';
|
648 |
}
|
649 |
}gex;
|
650 |
($body, @r);
|
651 |
}
|
652 |
|
653 |
=item Message::Util::encode_ccontent ($yourself, $ccontent)
|
654 |
|
655 |
Encodes C<ccontent> (content of C<comment>).
|
656 |
|
657 |
=cut
|
658 |
|
659 |
sub encode_ccontent ($$) {
|
660 |
my $yourself = shift;
|
661 |
my $ccontent = shift;
|
662 |
my %f = &{$yourself->{option}->{hook_encode_string}} ($yourself,
|
663 |
$ccontent, type => 'ccontent');
|
664 |
$f{value} =~ s/([\x28\x29\x5C]|\x3D\x3F)([\x21-\x7E])?/"\x5C$1".(defined $2?"\x5C$2":'')/ge;
|
665 |
$f{value};
|
666 |
}
|
667 |
|
668 |
=item Message::Util::decode_ccontent ($yourself, $ccontent)
|
669 |
|
670 |
Decodes C<ccontent> (content of C<comment>).
|
671 |
|
672 |
=cut
|
673 |
|
674 |
sub decode_ccontent ($$) {
|
675 |
require Message::MIME::EncodedWord;
|
676 |
Message::MIME::EncodedWord::decode_ccontent ($_[0], $_[1]);
|
677 |
}
|
678 |
|
679 |
## obsoleted
|
680 |
sub sprintxf ($;\%) {
|
681 |
require Message::Util::Formatter;
|
682 |
Message::Util::Formatter->new->replace (@_);
|
683 |
}
|
684 |
|
685 |
sub decide_newline ($) {
|
686 |
my $s = shift;
|
687 |
my $nl = "\x0D\x0A";
|
688 |
my $crlf = $s =~ s/\x0D\x0A/\x0D\x0A/gs;
|
689 |
my $lfcr = $s =~ s/\x0A\x0D/\x0A\x0D/gs;
|
690 |
my $cr = $s =~ s/\x0D(?!\x0A)/\x0D/gs;
|
691 |
my $lf = $s =~ s/(?<!\x0D)\x0A/\x0A/gs;
|
692 |
if ($crlf >= $cr && $crlf >= $lf && $crlf >= $lfcr ) { $nl = "\x0D\x0A" }
|
693 |
elsif ($lfcr >= $cr && $lfcr >= $lf) { $nl = "\x0A\x0D" }
|
694 |
elsif ($cr >= $lf) { $nl = "\x0D" }
|
695 |
else { $nl = "\x0A" }
|
696 |
$nl;
|
697 |
}
|
698 |
|
699 |
=item $fqdn = Message::Util::get_host_fqdn
|
700 |
|
701 |
Returns FQDN of THIS host. If it is unable to get the FQDN,
|
702 |
returns undef.
|
703 |
|
704 |
=over 3
|
705 |
|
706 |
=item $Message::Util::OPTION{use_Net_Domain} = 1/0
|
707 |
|
708 |
Whether using Net::Domain module to get FQDN or not.
|
709 |
|
710 |
=item $Message::Util::OPTION{use_Sys_Hostname} = 1/0
|
711 |
|
712 |
Whether using Sys::Hostnamen module to get FQDN or not.
|
713 |
|
714 |
=item $Message::Util::OPTION{use_Sys_Hostname_Long} = 1/0
|
715 |
|
716 |
Whether using Sys::Hostnamen::Long module to get FQDN or not.
|
717 |
|
718 |
=back
|
719 |
|
720 |
Note that the value returned by Sys::Hostname::hostname
|
721 |
usually does not match with the FQDN. This module is prepared
|
722 |
as the last way to get. If you want not to get non-FQDN,
|
723 |
set 0 to use_Sys_Hostname. Sys::Hostname is bundled with
|
724 |
Perl. This is why its default is 1.
|
725 |
|
726 |
=cut
|
727 |
|
728 |
$OPTION{use_Net_Domain} = 1;
|
729 |
$OPTION{use_Sys_Hostname} = 1;
|
730 |
$OPTION{use_Sys_Hostname_Long} = 1;
|
731 |
$OPTION{use_cache_host_fqdn} = 1;
|
732 |
$OPTION{__cache_host_fqdn} = undef;
|
733 |
|
734 |
sub get_host_fqdn () {
|
735 |
my $f = undef;
|
736 |
return $OPTION{__cache_host_fqdn}
|
737 |
if $OPTION{use_cache_host_fqdn} && $OPTION{__cache_host_fqdn};
|
738 |
if ($OPTION{use_Net_Domain}) {
|
739 |
eval q{require Net::Domain;
|
740 |
$f = &Net::Domain::hostfqdn;
|
741 |
} or Carp::carp ("get_host_fqdn: get by Net::Domain: $@");
|
742 |
if ($f) {
|
743 |
$OPTION{__cache_host_fqdn} = $f;
|
744 |
return $f;
|
745 |
}
|
746 |
}
|
747 |
if ($OPTION{use_Sys_Hostname_Long}) {
|
748 |
eval q{require Sys::Hostname::Long;
|
749 |
$f = &Sys::Hostname::Long::hostname_long;
|
750 |
} or Carp::carp ("get_host_fqdn: get by Sys::Hostname::Long: $@");
|
751 |
if ($f) {
|
752 |
$OPTION{__cache_host_fqdn} = $f;
|
753 |
return $f;
|
754 |
}
|
755 |
}
|
756 |
if ($OPTION{use_Sys_Hostname}) {
|
757 |
eval q{require Sys::Hostname;
|
758 |
$f = &Sys::Hostname::hostname;
|
759 |
} or Carp::carp ("get_host_fqdn: get by Sys::Hostname: $@");
|
760 |
if ($f) {
|
761 |
$OPTION{__cache_host_fqdn} = $f;
|
762 |
return $f;
|
763 |
}
|
764 |
}
|
765 |
undef;
|
766 |
}
|
767 |
|
768 |
sub is_utf8 ($) {
|
769 |
my $s = shift;
|
770 |
return Encode::is_utf8 ($s) if $Encode::VERSION;
|
771 |
0;
|
772 |
}
|
773 |
|
774 |
sub enentity_html ($) {
|
775 |
my $s = shift;
|
776 |
$s =~ s/&/&/;
|
777 |
$s =~ s/</</;
|
778 |
$s =~ s/>/>/;
|
779 |
$s =~ s/"/"/;
|
780 |
$s;
|
781 |
}
|
782 |
|
783 |
sub deentity_html ($) {
|
784 |
my $s = shift;
|
785 |
$s =~ s/</</;
|
786 |
$s =~ s/>/>/;
|
787 |
$s =~ s/"/"/;
|
788 |
$s =~ s/&/&/;
|
789 |
$s;
|
790 |
}
|
791 |
|
792 |
package Message::Util::Wide;
|
793 |
use vars qw(%REG);
|
794 |
|
795 |
$REG{M_quoted_string} = qr/\x22((?:\x5C.|[^\x0D\x22\x5C])*)\x22/;
|
796 |
|
797 |
sub unquote_if_quoted_string ($) {
|
798 |
my $quoted_string = shift; my $isq = 0;
|
799 |
$quoted_string =~ s{^$REG{M_quoted_string}$}{
|
800 |
my $qtext = $1;
|
801 |
$qtext =~ s/\x5C(.)/$1/gs;
|
802 |
$isq = 1;
|
803 |
$qtext;
|
804 |
}esx;
|
805 |
wantarray? ($quoted_string, $isq): $quoted_string;
|
806 |
}
|
807 |
|
808 |
=head1 LICENSE
|
809 |
|
810 |
Copyright 2002 Wakaba <w@suika.fam.cx>.
|
811 |
|
812 |
This program is free software; you can redistribute it and/or modify
|
813 |
it under the terms of the GNU General Public License as published by
|
814 |
the Free Software Foundation; either version 2 of the License, or
|
815 |
(at your option) any later version.
|
816 |
|
817 |
This program is distributed in the hope that it will be useful,
|
818 |
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
819 |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
820 |
GNU General Public License for more details.
|
821 |
|
822 |
You should have received a copy of the GNU General Public License
|
823 |
along with this program; see the file COPYING. If not, write to
|
824 |
the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
825 |
Boston, MA 02111-1307, USA.
|
826 |
|
827 |
=cut
|
828 |
|
829 |
1;
|
830 |
# $Date: 2002/11/13 10:59:11 $
|