/[suikacvs]/messaging/manakai/lib/Message/Util.pm
Suika

Contents of /messaging/manakai/lib/Message/Util.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.27 - (show annotations) (download)
Sat Jan 4 03:14:14 2003 UTC (21 years, 4 months ago) by w
Branch: MAIN
CVS Tags: before-dis2-200411, manakai-release-0-3-2, manakai-release-0-3-1, manakai-release-0-4-0, manakai-200612, HEAD
Branch point for: branch-suikawiki-1, experimental-xml-parser-200401
Changes since 1.26: +3 -3 lines
Bug fix

1
2 =head1 NAME
3
4 Message::Util -- Utilities for Message::* Perl modules.
5
6 =head1 DESCRIPTION
7
8 Useful functions for Message::* Perl modules.
9 This module is only intended for internal use.
10 Note that there is another module, Message::Tool.
11
12 =cut
13
14 package Message::Util;
15 #require 5.6.0;
16 use strict;
17 use vars qw(%FMT2STR %OPTION %REG $VERSION);
18 $VERSION=do{my @r=(q$Revision: 1.26 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
19
20 require Carp;
21
22 =head1 REGEXPS (%Message::Util::REG)
23
24 =head2 Naming Rules
25
26 key = *(prefix) [format] token-name
27
28 prefix = 'M_' ;; With matching "(" ")"s
29 / 'S_' ;; Simple (not strict) expression
30 / 'NON_' ;; Negative character class
31 format = E<lt>specification id, such as C<http>E<gt> ;; if necessary
32 token-name = E<lt>BNF name =~ tr/-/_/E<gt>
33
34 =cut
35
36 $REG{MATCH_NONE} = qr/(?!)/;
37 $REG{MATCH_ALL} = qr/[\x00-\xFF]/;
38 ## Whitespace
39 $REG{WSP} = qr/[\x09\x20]/;
40 $REG{FWS} = qr/[\x09\x20]*/; ## not same as 2822's
41 ## Basic structure
42 if (defined $^V) { # $^V gt v6.5.0
43 $REG{comment} = qr/\x28(?:\x5C[\x00-\xFF]|[^\x28\x29\x5C]|(??{$REG{comment}}))*\x29/;
44 $REG{M_comment} = qr/\x28((?:\x5C[\x00-\xFF]|[^\x28\x29\x5C]|(??{$REG{comment}}))*)\x29/;
45 } else {
46 $REG{comment} = qr/\x28(?:\x5C(?:.|[\x0A\x0D])|[^\x28\x29\x5C]|\x28(?:\x5C(?:.|[\x0A\x0D])|[^\x28\x29\x5C])*\x29)*\x29/;
47 $REG{M_comment} = qr/\x28((?:\x5C(?:.|[\x0A\x0D])|[^\x28\x29\x5C]|\x28(?:\x5C(?:.|[\x0A\x0D])|[^\x28\x29\x5C])*\x29)*)\x29/;
48 }
49
50 $REG{quoted_string} = qr/\x22(?:\x5C[\x00-\xFF]|[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\xFF])*\x22/;
51 $REG{M_quoted_string} = qr/\x22((?:\x5C[\x00-\xFF]|[^\x0D\x22\x5C])*)\x22/;
52
53 $REG{domain_literal} = qr/\x5B(?:\x5C[\x00-\xFF]|[\x00-\x0C\x0E-\x5A\x5E-\xFF])*\x5D/;
54 $REG{M_domain_literal} = qr/\x5B((?:\x5C[\x00-\xFF]|[\x00-\x0C\x0E-\x5A\x5E-\xFF])*)\x5D/;
55
56 #$REG{angle_quoted} = qr/\x3C[\x09\x20\x21\x23-\x3B\x3D\x3F-\x5B\x5D\x5F\x61-\x7A\x7E]*\x3E/;
57 $REG{angle_qcontent} = qr/(?:$REG{quoted_string}|$REG{domain_literal}|[^\x3C\x3E\x22\x5B])+/;
58 $REG{angle_quoted} = qr/<$REG{angle_qcontent}>|<>/;
59 $REG{M_angle_quoted} = qr/<($REG{angle_qcontent})>|<>/;
60
61
62 =head2 tokens
63
64 atext NON_atext 822.atext
65 atext_dot NON_atext_dot 822.atext / "."
66 NON_atext_dot_wsp 822.atext / "." / WSP
67 http_token NON_http_token http.token
68 NON_http_token_wsp http.token / WSP
69 attribute_char rfc2231.attribute-char
70 NON_http_attribute_char http.token AND rfc2231.attribute-char
71
72 =cut
73
74 $REG{atext} = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2F\x30-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]+/;
75 $REG{atext_dot} = qr/[\x21\x23-\x27\x2A\x2B\x2D-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]+/;
76 $REG{atext_dot_wsp} = qr/[\x09\x20\x21\x23-\x27\x2A\x2B\x2D-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]+/;
77 $REG{atext_dot8} = qr/[\x21\x23-\x27\x2A\x2B\x2D-\x39\x3D\x3F\x41-\x5A\x5E-\x7E\x80-\xFF]+/;
78 $REG{token} = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
79 $REG{http_token} = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7A\x7C\x7E]+/;
80 $REG{attribute_char} = qr/[\x21\x23-\x24\x26\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]+/;
81
82 $REG{NON_atext} = qr/[^\x21\x23-\x27\x2A\x2B\x2D\x2F\x30-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]/;
83 $REG{NON_atext_wsp} = qr/[^\x09\x20\x21\x23-\x27\x2A\x2B\x2D\x2F\x30-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]/;
84 $REG{NON_atext_dot} = qr/[^\x21\x23-\x27\x2A\x2B\x2D-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]/;
85 $REG{NON_atext_dot_wsp} = qr/[^\x09\x20\x21\x23-\x27\x2A\x2B\x2D-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]/;
86 $REG{NON_token} = qr/[^\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]/;
87 $REG{NON_http_token} = qr/[^\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7A\x7C\x7E]/;
88 $REG{NON_http_token_wsp} = qr/[^\x09\x20\x21\x23-\x27\x2A\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7A\x7C\x7E]/;
89 $REG{NON_attribute_char} = qr/[^\x21\x23-\x24\x26\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7E]/;
90 $REG{NON_http_attribute_char} = qr/[^\x21\x23-\x24\x26\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7A\x7C\x7E]/;
91 $REG{NON_http_attribute_char_wsp} = qr/[^\x09\x20\x21\x23-\x24\x26\x2B\x2D\x2E\x30-\x39\x41-\x5A\x5E-\x7A\x7C\x7E]/;
92 ## Yes, C<attribute-char> does not appear in HTTP spec.
93
94 $REG{S_uri} = qr#[\x21\x23-\x3B\x3D\x3F-\x5B\x5D\x5F\x61-\x7A\x7E]+#;
95
96 $REG{NON_base64alphabet} = qr#[^A-Za-z0-9+/=]#;
97
98 $REG{dot_atom} = qr/$REG{atext}(?:$REG{FWS}\x2E$REG{FWS}$REG{atext})*/;
99 $REG{dot_atom_dot} = qr/$REG{atext_dot}(?:$REG{FWS}\x2E$REG{FWS}$REG{atext_dot})*/;
100 $REG{dot_word} = qr/(?:$REG{atext}|$REG{quoted_string})(?:$REG{FWS}\x2E$REG{FWS}(?:$REG{atext}|$REG{quoted_string}))*/;
101 $REG{dot_word_dot} = qr/(?:$REG{atext_dot}|$REG{quoted_string})(?:$REG{FWS}\x2E$REG{FWS}(?:$REG{atext_dot}|$REG{quoted_string}))*/;
102 $REG{phrase} = qr/(?:$REG{atext}|$REG{quoted_string})(?:$REG{atext}|$REG{quoted_string}|\.|$REG{FWS})*/;
103 ## RFC 822 phrase (not strict)
104
105 #$REG{domain} = qr/(?:$REG{dot_atom}|$REG{domain_literal})/;
106 $REG{domain} = qr/(?:$REG{dot_atom_dot}|$REG{domain_literal})/;
107 #$REG{addr_spec} = qr/$REG{dot_word}$REG{FWS}\x40$REG{FWS}$REG{domain}/;
108 $REG{addr_spec} = qr/$REG{dot_word_dot}$REG{FWS}\x40$REG{FWS}$REG{domain}/;
109 $REG{msg_id} = qr/<$REG{FWS}$REG{addr_spec}$REG{FWS}>/;
110
111 $REG{M_addr_spec} = qr/($REG{dot_word_dot})$REG{FWS}\x40$REG{FWS}($REG{domain})/;
112
113 $REG{date_time} = qr/(?:[A-Za-z]+$REG{FWS},$REG{FWS})?[0-9]+$REG{WSP}*[A-Za-z]+$REG{WSP}*[0-9]+$REG{WSP}+[0-9]+$REG{FWS}:$REG{WSP}*[0-9]+(?:$REG{FWS}:$REG{WSP}*[0-9]+)?$REG{FWS}(?:[A-Za-z]+|[+-]$REG{WSP}*[0-9]+)/;
114 $REG{asctime} = qr/[A-Za-z]+$REG{WSP}*[A-Za-z]+$REG{WSP}*[0-9]+$REG{WSP}+[0-9]+$REG{FWS}:$REG{WSP}*[0-9]+$REG{FWS}:$REG{WSP}*[0-9]+$REG{WSP}+[0-9]+/;
115
116 ## MIME encoded-word
117 $REG{M_encoded_word} = qr/=\x3F($REG{attribute_char})(?:\x2A($REG{attribute_char}))?\x3F($REG{attribute_char})\x3F([\x21-\x3E\x40-\x7E]+)\x3F=/;
118 $REG{S_encoded_word} = qr/=\x3F$REG{atext_dot}\x3F=/;
119 #$REG{S_encoded_word_comment} = qr/=\x3F[\x21-\x27\x2A-\x5B\x5D-\x7E]+\x3F=/;
120 ## not used anywhere
121
122 ## obsoleted
123 *FMT2STR = \%Message::Util::Formatter::FMT2STR;
124
125 =head1 STRUCTURED FIELD FUNCTIONS
126
127 =over 4
128
129 =item $nocomment:-) = Message::Util::delete_comment ($string)
130
131 Gets rid of all C<comment>s. Inserts a SP instead.
132
133 =cut
134
135 sub delete_comment ($) {
136 use re 'eval';
137 my $body = shift;
138 $body =~ s{($REG{quoted_string}|$REG{domain_literal}|$REG{angle_quoted})|$REG{comment}}{
139 my $o = $1; $o? $o : ' ';
140 }gex;
141 $body;
142 }
143
144 sub delete_wsp ($) {
145 my $body = shift;
146 $body =~ s{($REG{quoted_string}|$REG{domain_literal})|((?:$REG{token}|$REG{S_encoded_word})(?:$REG{WSP}+(?:$REG{token}|$REG{S_encoded_word}))+)|$REG{WSP}+}{
147 my ($o,$p) = ($1,$2);
148 if ($o) {$o}
149 elsif ($p) {$p=~s/$REG{WSP}+/\x20/g;$p}
150 else {''}
151 }gex;
152 $body;
153 }
154
155 sub remove_meaningless_wsp ($) {
156 my $body = shift;
157 $body =~ s{($REG{quoted_string}|$REG{domain_literal})|$REG{WSP}+}{
158 $1 || '';
159 }gex;
160 $body;
161 }
162
163 sub wsps_to_sp ($) {
164 my $body = shift;
165 $body =~ s{($REG{quoted_string}|$REG{domain_literal})|$REG{WSP}+}{
166 $1 || ' ';
167 }gex;
168 $body;
169 }
170
171 =item Message::Util::quote_ccontent ($yourself, $ccontent)
172
173 Quotes C<ccontent> (to be).
174
175 =cut
176
177 sub quote_ccontent ($;%) {
178 my $ccontent = shift;
179 my %option = @_;
180 if ($option{strict_quoted_pair}) {
181 $ccontent =~ s/([\x28\x29\x5C]|\x3D\x3F)/\x5C$1/g;
182 } else {
183 $ccontent =~ s/([\x28\x29\x5C]|\x3D\x3F)([\x21-\x7E])?/"\x5C$1".(defined $2?"\x5C$2":'')/ge;
184 }
185 $ccontent;
186 }
187
188 =item $unquoted = Message::Util::unquote_ccontent ($string)
189
190 Unquotes C<quoted-pair> in C<comment>s.
191
192 =cut
193
194 sub unquote_ccontent ($) {
195 use re 'eval';
196 my $comment = shift;
197 $comment =~ s{$REG{M_comment}}{
198 my $ctext = $1;
199 $ctext =~ s/\x5C([\x00-\xFF])/$1/g;
200 '('.$ctext.')';
201 }goex;
202 $comment;
203 }
204
205 =item $unquoted = Message::Util::unquote_quoted_string ($string)
206
207 Unquotes C<quoted-pair> in C<quoted-string>s and
208 unquotes C<quoted-string> (or gets rid of C<DQUOTE>s).
209
210 =cut
211
212 sub unquote_quoted_string ($) {
213 my $quoted_string = shift;
214 $quoted_string =~ s{$REG{M_quoted_string}}{
215 my $qtext = $1;
216 $qtext =~ s/\x5C([\x00-\xFF])/$1/g;
217 $qtext;
218 }goex;
219 $quoted_string;
220 }
221
222 =item Message::Util::unquote_if_quoted_string ($string)
223
224 Unquotes if and only if given string is A C<quoted-string>.
225 This function returns two value, (C<$unquoted-string>,
226 C<$was-quoted-string?>).
227
228 =cut
229
230 sub unquote_if_quoted_string ($) {
231 my $quoted_string = shift; my $isq = 0;
232 $quoted_string =~ s{^$REG{M_quoted_string}$}{
233 my $qtext = $1;
234 $qtext =~ s/\x5C([\x00-\xFF])/$1/gs;
235 $isq = 1;
236 $qtext;
237 }goex;
238 wantarray? ($quoted_string, $isq): $quoted_string;
239 }
240
241 sub unquote_if_angle_quoted ($) {
242 my $quoted_string = shift; my $isq = 0;
243 $quoted_string =~ s{^$REG{M_angle_quoted}$}{
244 my $qtext = $1;
245 $qtext =~ s/\x5C([\x00-\xFF])/$1/g;
246 $isq = 1;
247 $qtext;
248 }goex;
249 wantarray? ($quoted_string, $isq): $quoted_string;
250 }
251
252 sub unquote_if_domain_literal ($) {
253 my $quoted_string = shift; my $isq = 0;
254 $quoted_string =~ s{^$REG{M_domain_literal}$}{
255 my $qtext = $1;
256 $qtext =~ s/\x5C([\x00-\xFF])/$1/g;
257 $isq = 1;
258 $qtext;
259 }goex;
260 wantarray? ($quoted_string, $isq): $quoted_string;
261 }
262
263 =item $quoted = Message::Util::quote_unsafe_string ($string)
264
265 Quotes string itself by C<DQUOTES> if it contains of
266 I<unsafe> character.
267
268 Default I<unsafe> is defined as E<lt>not ( atom / "." / %x09 / %x20 ) E<gt>.
269
270 =cut
271
272 sub quote_unsafe_string ($;%) {
273 my $string = shift;
274 my %option = @_;
275 $option{unsafe} ||= 'NON_atext_dot';
276 $option{unsafe_regex} = $option{unsafe} if $option{unsafe} =~ /^\(\?-xism:/;
277 $option{unsafe_regex} ||= qr/$REG{$option{unsafe}}|$REG{WSP}$REG{WSP}|^$REG{WSP}|$REG{WSP}$|^=\x3F/;
278 my $r = qr/([\x22\x5C])([\x21-\x7E])?/;
279 $r = qr/([\x22\x5C])/ if $option{strict}; ## usefor-article
280 if ($string =~ /$option{unsafe_regex}/) {
281 $string =~ s/$r/"\x5C$1".(defined $2?"\x5C$2":'')/ge;
282 $string = '"'.$string.'"';
283 }
284 $string;
285 }
286
287 sub quote_unsafe_domain ($) {
288 my $string = shift;
289 if ($string =~ /^\[[^\[\]]+\]$/) {
290 #
291 } elsif ($string =~ /$REG{NON_atext_dot}/ || $string =~ /^\.|\.$/) {
292 $string =~ s/([\x0D\x5B-\x5D])/\x5C$1/g;
293 $string = '['.$string.']';
294 }
295 $string;
296 }
297
298 sub remove_wsp ($) {
299 my $s = shift;
300 $s =~ s{($REG{quoted_string}|$REG{domain_literal}|$REG{angle_quoted})|$REG{WSP}+}{
301 $1
302 }gex;
303 $s;
304 }
305
306 =item $encoded = Message::Util::encode_printable_string ($string)
307 =item $decoded = Message::Util::decode_printable_string ($string)
308
309 Encodes or decodes string in PrintableString, described in
310 RFC 1327, RFC 2156. These functions supports 8bit octets encoded
311 as '(ddd)' format, although RFC 2156 allows it for only 7bit
312 octets.
313
314 =cut
315
316 my %To_Printable_String = (
317 '@' => '(a)',
318 '%' => '(p)',
319 '!' => '(b)',
320 '"' => '(q)',
321 '_' => '(u)',
322 '(' => '(l)',
323 ')' => '(r)',
324 );
325 my %From_Printable_String = reverse %To_Printable_String;
326 sub encode_printable_string ($) {
327 my $s = shift;
328 $s =~ s{ ([^0-9A-Za-z\x20'+,./:=?-]) }{
329 my $c = $1;
330 unless ($To_Printable_String{$c}) {
331 $To_Printable_String{$c} = sprintf '(%03d)', ord $c;
332 }
333 $To_Printable_String{$c};
334 }gex;
335 $s;
336 }
337 sub decode_printable_string ($) {
338 my $s = shift;
339 $s =~ s{ \( ([0-9A-Za-z]+) \) }{
340 my $c = lc $1; my $p = "($c)";
341 if ($c !~ /[^0-9]/) {
342 $From_Printable_String{$p} = pack 'C', 0+$c;
343 } elsif (!defined $From_Printable_String{$p}) {
344 $From_Printable_String{$p} = $p; ## Invalid!
345 }
346 $From_Printable_String{$p};
347 }gex;
348 $s;
349 }
350
351 =item $encoded = Message::Util::encode_t61_string ($string)
352 =item $decoded = Message::Util::decode_t61_string ($string)
353
354 Encodes or decodes string in T.61String described in RFC 1327,
355 RFC 2056.
356
357 =cut
358
359 sub encode_t61_string ($) {
360 my $s = shift;
361 $s =~ s{ ([^0-9A-Za-z\x20'+,./:=?-]) }{
362 sprintf '{%03d}', ord $1;
363 }gex;
364 $s;
365 }
366 sub decode_t61_string ($) {
367 my $s = shift;
368 $s =~ s{ \{ ([0-9]+) \} }{
369 my $c = $1; my $i = 0;
370 my $r = '';
371 while (my $d = substr ($c, $i * 3, 3)) {
372 $r .= pack 'C', 0+$d; $i++;
373 }
374 $r;
375 }gex;
376 $s;
377 }
378 =item $encoded = Message::Util::encode_printable_string ($string)
379 =item $decoded = Message::Util::decode_printable_string ($string)
380
381 Encodes or decodes string in RFC 822 with restricted encoding,
382 defined by RFC 1137.
383
384 =cut
385
386 my %To_Encoded_Atom = (
387 "\x20" => '_',
388 '_' => '#u#',
389 '(' => '#l#',
390 ')' => '#r#',
391 ',' => '#m#',
392 ':' => '#c#',
393 "\x5C" => '#b#',
394 '#' => '#h#',
395 '=' => '#e#',
396 '/' => '#s#',
397 );
398 my %From_Encoded_Atom = reverse %To_Encoded_Atom;
399 sub encode_restricted_rfc822 ($) {
400 my $s = shift;
401 $s =~ s{ ([^\x21\x24-\x27\x2A\x2B\x2D\x2F\x30-\x39\x3D\x3F\x41-\x5A\x5E\x60-\x7E]) }{
402 my $c = $1; ## \x2E \x40
403 unless ($To_Encoded_Atom{$c}) {
404 $To_Encoded_Atom{$c} = sprintf '#%03d#', ord $c;
405 }
406 $To_Encoded_Atom{$c};
407 }gex;
408 $s;
409 }
410 sub decode_restricted_rfc822 ($) {
411 my $s = shift;
412 $s =~ s{ \# ([0-9A-Za-z]+) \# | _ }{
413 my $c = lc $1; my $p = "#$c#";
414 if ($c !~ /[^0-9]/) {
415 $From_Encoded_Atom{$p} = pack 'C', 0+$c;
416 } elsif (!defined $From_Encoded_Atom{$p}) {
417 $From_Encoded_Atom{$p} = $p; ## Invalid!
418 }
419 $From_Encoded_Atom{$p};
420 }gex;
421 $s;
422 }
423
424 =item $Message::Util::make_clone ($parent)
425
426 Returns clone.
427
428 =cut
429
430 sub make_clone ($) {
431 my $s = shift;
432 if (ref $s eq 'ARRAY') {
433 $s = [map {make_clone ($_)} @$s];
434 } elsif (ref $s eq 'HASH') {
435 $s = {map {make_clone ($_)} (%$s)};
436 } elsif (ref $s && ref $s ne 'CODE' && ref $s ne 'Regexp') {
437 $s = $s->clone;
438 }
439 $s;
440 }
441
442 =head1 ENCODER and DECODER
443
444 =over 4
445
446 =item Message::Util::encode_header_string ($yourself, $string, [%options])
447
448 =cut
449
450 sub encode_header_string ($$;%) {
451 require Message::MIME::Charset;
452 my $yourself = shift; my $s = shift; my %o = @_;
453 $o{charset} ||= $yourself->{option}->{encoding_after_encode};
454 $o{charset} = Message::MIME::Charset::name_normalize ($o{charset});
455 $o{current_charset} = Message::MIME::Charset::name_normalize ($o{current_charset});
456 my ($t,%r) = Message::MIME::Charset::encode ($o{charset}, $s);
457 my @o = (language => $o{language});
458 if ($r{success}) { ## Convertion succeed
459 $o{charset} = $r{charset} if $r{charset};
460 $o{charset} = '' if $o{charset} =~ /\*/;
461 (value => $t, @o, Message::MIME::Charset::name_minimumize ($o{charset}, $t));
462 } else { ## Fault
463 $o{current_charset} = '' if $o{current_charset} =~ /\*/;
464 (value => $t, failed => 1, @o,
465 Message::MIME::Charset::name_minimumize ($o{current_charset}, $t));
466 }
467 }
468
469 sub decode_header_string ($$;%) {
470 require Message::MIME::Charset;
471 require Message::MIME::EncodedWord;
472 my $yourself = shift; my $s = shift; my %o = @_;
473 $o{charset} ||= $yourself->{option}->{encoding_before_decode};
474 $o{charset} = Message::MIME::Charset::name_normalize ($o{charset});
475 my ($t, %r); ## decoded-text, success?
476 if ($o{type} !~ /quoted|encoded|domain|word/) {
477 my (@s, @r);
478 $s =~ s{(([\x09\x20]*(?:\x5C[\x00-\xFF]
479 |[\x00-\x08\x0A-\x1F\x21-\x5B\x5D-\xFF])+|[\x09\x20]+$))}
480 { push @s, $1; '' }goesx;
481 for my $i (0..$#s) {
482 if ($s[$i] =~ /^($REG{FWS})$REG{M_encoded_word}$/) {
483 my ($t, $w) = ('', $1);
484 ($t, $r[$i]) = (Message::MIME::EncodedWord::_decode_eword ($2, $3, $4, $5));
485 if ($r[$i]) {
486 $s[$i] = $t;
487 if ($i == 0 || $r[$i-1] == 0) {
488 $s[$i] = $w.$s[$i];
489 }
490 }
491 } else {
492 my ($u, %q) = ($s[$i]);
493 $u =~ s/\x5C([\x00-\xFF])/$1/g unless $o{type} =~ /text/;
494 ($u,%q) = Message::MIME::Charset::decode ($o{charset}, $u);
495 $s[$i] = $u if $q{success};
496 }
497 }
498 $t = join '', @s; $r{success} = 1;
499 } else {
500 ($t,%r) = Message::MIME::Charset::decode ($o{charset}, $s);
501 }
502 $r{success} ? (value => $t, success => 1, language => $o{language}): ## suceess
503 (value => $s, language => $o{language}, success => 0,
504 charset => ($o{charset}=~/\*/?'':$o{charset})); ## fault
505 }
506
507 sub encode_body_string {
508 require Message::MIME::Charset;
509 my $yourself = shift; my $s = shift; my %o = @_;
510 $o{charset} ||= $yourself->{option}->{encoding_after_encode};
511 $o{charset} = Message::MIME::Charset::name_normalize ($o{charset});
512 $o{current_charset} = Message::MIME::Charset::name_normalize ($o{current_charset});
513 my ($t,%r) = Message::MIME::Charset::encode ($o{charset}, $s);
514 my @o = ();
515 if ($r{success}) { ## Convertion successed
516 $o{charset} = $r{charset} if $r{charset};
517 $o{charset} = '' if $o{charset} =~ /\*/;
518 (value => $t, @o, Message::MIME::Charset::name_minimumize ($o{charset}, $t));
519 } else { ## Fault
520 $o{current_charset} = '' if $o{current_charset} =~ /\*/;
521 (value => $t, failed => 1, @o,
522 Message::MIME::Charset::name_minimumize ($o{current_charset}, $t));
523 }
524 }
525
526 sub decode_body_string {
527 require Message::MIME::Charset;
528 my $yourself = shift; my $s = shift; my %o = @_;
529 $o{charset} ||= $yourself->{option}->{encoding_before_decode};
530 $o{charset} = Message::MIME::Charset::name_normalize ($o{charset});
531 my ($t,%r) = Message::MIME::Charset::decode ($o{charset}, $s);
532 $r{success} ? (value => $t, success => 1): ## suceess
533 (value => $s, success => 0,
534 charset => ($o{charset}=~/\*/?'':$o{charset})); ## fault
535 }
536
537 =item Message::Util::decode_quoted_string ($yourself, $quoted-string)
538
539 Returns unquoted and decoded a given C<quoted-string>
540 or a string containing one or multiple C<quoted-string>s.
541
542 =cut
543
544 sub decode_quoted_string ($$;%) {
545 my $yourself = shift;
546 my $quoted_string = shift;
547 my %option = @_;
548 $option{type} ||= 'phrase';
549 $quoted_string =~ s{$REG{M_quoted_string}|([^\x22]+)}{
550 my ($qtext, $t) = ($1, $2);
551 if (length $t) {
552 $t =~ s/$REG{WSP}+/\x20/g;
553 my %s = &{$yourself->{option}->{hook_decode_string}}
554 ($yourself, $t, type => $option{type},
555 charset => $option{charset});
556 $s{value};
557 } else {
558 $qtext =~ s/\x5C([\x00-\xFF])/$1/g;
559 my %s = &{$yourself->{option}->{hook_decode_string}}
560 ($yourself, $qtext, type => $option{type}.'/quoted',
561 charset => $option{charset});
562 $s{value};
563 }
564 }goex;
565 $quoted_string;
566 }
567
568 =item Message::Util::encode_qcontent ($yourself, $string)
569
570 Encodes (by C<hook_encode_string> of C<$yourself-E<gt>{option}>)
571 C<qcontent> (content of C<quoted-string>) within C<$string>.
572
573 =cut
574
575 sub encode_qcontent ($$) {
576 my $yourself = shift;
577 my $quoted_strings = shift;
578 $quoted_strings =~ s{$REG{M_quoted_string}}{
579 my ($qtext) = ($1);
580 $qtext =~ s/\x5C([\x00-\xFF])/$1/g;
581 my %s = &{$yourself->{option}->{hook_encode_string}} ($yourself, $qtext,
582 type => 'phrase/quoted');
583 $s{value} =~ s/([\x0D\x22\x5C])([\x20-\xFF])?/"\x5C$1".($2?"\x5C$2":'')/ges;
584 '"'.$s{value}.'"';
585 }goex;
586 $quoted_strings;
587 }
588
589 =item Message::Util::decode_qcontent ($yourself, $string)
590
591 Decodes (by C<hook_decode_string> of C<$yourself-E<gt>{option}>)
592 C<qcontent> (content of C<quoted-string>) within C<$string>.
593
594 =cut
595
596 sub decode_qcontent ($$) {
597 my $yourself = shift;
598 my $quoted_string = shift;
599 $quoted_string =~ s{$REG{M_quoted_string}}{
600 my ($qtext) = ($1);
601 $qtext =~ s/\x5C([\x00-\xFF])/$1/g;
602 my %s = &{$yourself->{option}->{hook_decode_string}} ($yourself, $qtext,
603 type => 'phrase/quoted');
604 $s{value} =~ s/([\x22\x5C])([\x20-\xFF])?/"\x5C$1".($2?"\x5C$2":'')/ge;
605 '"'.$s{value}.'"';
606 }goex;
607 $quoted_string;
608 }
609
610 =item @comments = Message::Util::comment_to_array ($youtself, $comments)
611
612 Replaces C<comment>s to C< > (a SP), decodes C<ccontent>s,
613 and returns them as array.
614
615 =cut
616
617 sub comment_to_array ($$) {
618 use re 'eval';
619 my $yourself = shift;
620 my $body = shift;
621 my @r = ();
622 $body =~ s{($REG{quoted_string}|$REG{domain_literal})|$REG{M_comment}}{
623 my ($o, $c) = ($1, $2);
624 if ($o) {$o}
625 else {
626 require Message::MIME::EncodedWord;
627 push @r, Message::MIME::EncodedWord::decode_ccontent ($yourself, $c);
628 ' ';
629 }
630 }gex;
631 @r;
632 }
633
634 sub delete_comment_to_array ($$;%) {
635 use re 'eval';
636 my $yourself = shift;
637 my $body = shift;
638 my %option = @_;
639 my $areg = ''; $areg = '|'.$REG{angle_quoted} if $option{-use_angle_quoted};
640 my @r = ();
641 $body =~ s{($REG{quoted_string}|$REG{domain_literal}$areg)|$REG{M_comment}}{
642 my ($o, $c) = ($1, $2);
643 if ($o) {$o}
644 else {
645 require Message::MIME::EncodedWord;
646 push @r, Message::MIME::EncodedWord::decode_ccontent ($yourself, $c);
647 ' ';
648 }
649 }gex;
650 ($body, @r);
651 }
652
653 =item Message::Util::encode_ccontent ($yourself, $ccontent)
654
655 Encodes C<ccontent> (content of C<comment>).
656
657 =cut
658
659 sub encode_ccontent ($$) {
660 my $yourself = shift;
661 my $ccontent = shift;
662 my %f = &{$yourself->{option}->{hook_encode_string}} ($yourself,
663 $ccontent, type => 'ccontent');
664 $f{value} =~ s/([\x28\x29\x5C]|\x3D\x3F)([\x21-\x7E])?/"\x5C$1".(defined $2?"\x5C$2":'')/ge;
665 $f{value};
666 }
667
668 =item Message::Util::decode_ccontent ($yourself, $ccontent)
669
670 Decodes C<ccontent> (content of C<comment>).
671
672 =cut
673
674 sub decode_ccontent ($$) {
675 require Message::MIME::EncodedWord;
676 Message::MIME::EncodedWord::decode_ccontent ($_[0], $_[1]);
677 }
678
679 ## obsoleted
680 sub sprintxf ($;\%) {
681 require Message::Util::Formatter;
682 Message::Util::Formatter->new->replace (@_);
683 }
684
685 sub decide_newline ($) {
686 my $s = shift;
687 my $nl = "\x0D\x0A";
688 my $crlf = $s =~ s/\x0D\x0A/\x0D\x0A/gs;
689 my $lfcr = $s =~ s/\x0A\x0D/\x0A\x0D/gs;
690 my $cr = $s =~ s/\x0D(?!\x0A)/\x0D/gs;
691 my $lf = $s =~ s/(?<!\x0D)\x0A/\x0A/gs;
692 if ($crlf >= $cr && $crlf >= $lf && $crlf >= $lfcr ) { $nl = "\x0D\x0A" }
693 elsif ($lfcr >= $cr && $lfcr >= $lf) { $nl = "\x0A\x0D" }
694 elsif ($cr >= $lf) { $nl = "\x0D" }
695 else { $nl = "\x0A" }
696 $nl;
697 }
698
699 =item $fqdn = Message::Util::get_host_fqdn
700
701 Returns FQDN of THIS host. If it is unable to get the FQDN,
702 returns undef.
703
704 =over 3
705
706 =item $Message::Util::OPTION{use_Net_Domain} = 1/0
707
708 Whether using Net::Domain module to get FQDN or not.
709
710 =item $Message::Util::OPTION{use_Sys_Hostname} = 1/0
711
712 Whether using Sys::Hostnamen module to get FQDN or not.
713
714 =item $Message::Util::OPTION{use_Sys_Hostname_Long} = 1/0
715
716 Whether using Sys::Hostnamen::Long module to get FQDN or not.
717
718 =back
719
720 Note that the value returned by Sys::Hostname::hostname
721 usually does not match with the FQDN. This module is prepared
722 as the last way to get. If you want not to get non-FQDN,
723 set 0 to use_Sys_Hostname. Sys::Hostname is bundled with
724 Perl. This is why its default is 1.
725
726 =cut
727
728 $OPTION{use_Net_Domain} = 1;
729 $OPTION{use_Sys_Hostname} = 1;
730 $OPTION{use_Sys_Hostname_Long} = 1;
731 $OPTION{use_cache_host_fqdn} = 1;
732 $OPTION{__cache_host_fqdn} = undef;
733
734 sub get_host_fqdn () {
735 my $f = undef;
736 return $OPTION{__cache_host_fqdn}
737 if $OPTION{use_cache_host_fqdn} && $OPTION{__cache_host_fqdn};
738 if ($OPTION{use_Net_Domain}) {
739 eval q{require Net::Domain;
740 $f = &Net::Domain::hostfqdn;
741 } or Carp::carp ("get_host_fqdn: get by Net::Domain: $@");
742 if ($f) {
743 $OPTION{__cache_host_fqdn} = $f;
744 return $f;
745 }
746 }
747 if ($OPTION{use_Sys_Hostname_Long}) {
748 eval q{require Sys::Hostname::Long;
749 $f = &Sys::Hostname::Long::hostname_long;
750 } or Carp::carp ("get_host_fqdn: get by Sys::Hostname::Long: $@");
751 if ($f) {
752 $OPTION{__cache_host_fqdn} = $f;
753 return $f;
754 }
755 }
756 if ($OPTION{use_Sys_Hostname}) {
757 eval q{require Sys::Hostname;
758 $f = &Sys::Hostname::hostname;
759 } or Carp::carp ("get_host_fqdn: get by Sys::Hostname: $@");
760 if ($f) {
761 $OPTION{__cache_host_fqdn} = $f;
762 return $f;
763 }
764 }
765 undef;
766 }
767
768 sub is_utf8 ($) {
769 my $s = shift;
770 return Encode::is_utf8 ($s) if $Encode::VERSION;
771 0;
772 }
773
774 sub enentity_html ($) {
775 my $s = shift;
776 $s =~ s/&/&amp;/;
777 $s =~ s/</&lt;/;
778 $s =~ s/>/&gt;/;
779 $s =~ s/"/&quot;/;
780 $s;
781 }
782
783 sub deentity_html ($) {
784 my $s = shift;
785 $s =~ s/&lt;/</;
786 $s =~ s/&gt;/>/;
787 $s =~ s/&quot;/"/;
788 $s =~ s/&amp;/&/;
789 $s;
790 }
791
792 package Message::Util::Wide;
793 use vars qw(%REG);
794
795 $REG{M_quoted_string} = qr/\x22((?:\x5C.|[^\x0D\x22\x5C])*)\x22/;
796
797 sub unquote_if_quoted_string ($) {
798 my $quoted_string = shift; my $isq = 0;
799 $quoted_string =~ s{^$REG{M_quoted_string}$}{
800 my $qtext = $1;
801 $qtext =~ s/\x5C(.)/$1/gs;
802 $isq = 1;
803 $qtext;
804 }esx;
805 wantarray? ($quoted_string, $isq): $quoted_string;
806 }
807
808 =head1 LICENSE
809
810 Copyright 2002 Wakaba <w@suika.fam.cx>.
811
812 This program is free software; you can redistribute it and/or modify
813 it under the terms of the GNU General Public License as published by
814 the Free Software Foundation; either version 2 of the License, or
815 (at your option) any later version.
816
817 This program is distributed in the hope that it will be useful,
818 but WITHOUT ANY WARRANTY; without even the implied warranty of
819 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
820 GNU General Public License for more details.
821
822 You should have received a copy of the GNU General Public License
823 along with this program; see the file COPYING. If not, write to
824 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
825 Boston, MA 02111-1307, USA.
826
827 =cut
828
829 1;
830 # $Date: 2002/11/13 10:59:11 $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24