/[suikacvs]/messaging/manakai/lib/Message/Field/Token.pm
Suika

Contents of /messaging/manakai/lib/Message/Field/Token.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (show annotations) (download)
Sun Dec 29 03:04:53 2002 UTC (21 years, 11 months ago) by wakaba
Branch: MAIN
CVS Tags: before-dis2-200411, manakai-release-0-3-2, manakai-release-0-3-1, manakai-release-0-4-0, manakai-200612, HEAD
Branch point for: branch-suikawiki-1, experimental-xml-parser-200401
en_quoted_string, en_phrase: New

1
2 =head1 NAME
3
4 Message::Field::Token --- Message-pm: Lexical tokens used in Internet message formats
5
6 =head1 DESCRIPTION
7
8 This module provides functions for handling lexical tokens of Internet message formats,
9 such as quoted-string, comment, etc.
10
11 This module is part of Message::* Perl Modules.
12
13 =cut
14
15 package Message::Field::Token;
16 use strict;
17 our $VERSION=do{my @r=(q$Revision: 1.12 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
18
19 =head1 $phrase = Message::Field::Token::en_phrase ($string, %option)
20
21 Return given string (in internal code) as a phrase (= atom / quoted-string / encoded-word).
22
23 Options:
24
25 =over 4
26
27 =item -charset => charset (default: 'us-ascii')
28
29 Charset name (in lower cases) to be used to encode the output string.
30
31 =item -ebcdic_safe
32
33 =item -encode_encoded_word_like
34
35 =item -quoted_pair
36
37 =item -token_maxlength
38
39 =item -unsafe_char => 1*CHAR / '' (default)
40
41 See options of C<en_quoted_string>.
42
43 =item -source_charset => charset (default = *internal)
44
45 Charset name (in lower case) of given string.
46
47 =back
48
49 =cut
50
51 sub en_phrase ($;%) {
52 my $string = shift;
53 my %option = @_;
54 my @string = split /(?<=[^\x09\x20])(?=[\x09\x20])/, $string;
55 my @str = ([0, '']);
56 for my $i (0..$#string) {
57 if (Message::MIME::Charset::is_representable_in ($option{-charset}, $string[$i], \%option) && $string[$i] !~ /[\x00\x0A\x0D]/) {
58 if ($str[$#str]->[0]) { ## Preceding token should be encoded
59 push @str, [0, $string[$i]];
60 } else { ## Preceding token need not be encoded
61 $str[$#str]->[1] .= $string[$i];
62 }
63 } else { ## Should be encoded
64 if ($str[$#str]->[0]) { ## Preceding token should be encoded
65 $str[$#str]->[1] .= $string[$i];
66 } else { ## Preceding token need not be encoded
67 push @str, [1, $string[$i]];
68 }
69 }
70 }
71 my $phrase = '';
72 for my $i (0..$#str) {
73 my $ws = ''; $ws = $1 if $i != 0 && $str[$i]->[1] =~ s/^([\x09\x20])//;
74 if ($str[$i]->[0]) {
75 $str[$i]->[1] = Message::MIME::EncodedWord::encode ($str[$i]->[1], %option, -context => 'phrase', -encode_char => '\x00-\x7F');
76 } else {
77 $str[$i]->[1] = _to_quoted_string ($str[$i]->[1], \%option);
78 }
79 $phrase .= $ws . $str[$i]->[1];
80 }
81 $phrase;
82 }
83
84 =head1 $quoted_string = Message::Field::Token::en_quoted_string ($string, %option)
85
86 Quote string as a quoted-string if necessary.
87
88 Options:
89
90 =over 4
91
92 =item -charset => charset (default: 'us-ascii')
93
94 Charset name (in lower cases) to be used to encode the output string.
95
96 =item -context => 'quoted_string' (default) / 'atom' / 'token' / 'http_token' / 'attr_char' / 'http_attr_char'
97
98 Context in which given string is embeded. This value is used to check if
99 string should be quoted as a quoted-string. 'quoted_string', the default value,
100 makes string ALWAYS quoted. 'atom' makes it quoted when string contains
101 one or more characters not included in the atext (see RFC 2822).
102 Likewise, 'token' is for token of MIME, 'http_token' is for token of HTTP,
103 'attr_char' is attribute-char of MIME (RFC 2231) and 'http_attr_char'
104 is 'http_token' AND 'attr_char' (ie. safe for both HTTP and RFC 2231).
105
106 =item -ebcdic_safe
107
108 This option is only meaningful when C<-use_quoted_string> is true.
109 See L<Message::MIME::EncodedWord>::decode.
110
111 =item -encode_encoded_word_like
112
113 This option is only meaningful when C<-use_quoted_string> is true.
114 See L<Message::MIME::EncodedWord>::decode.
115
116 =item -quoted_pair => 1*CHAR / qr|(:: pattern ::)| (default: qr/([\x0D\\"]|(?<==)\?)/)
117
118 A character list or a Regexp pattern for characters to be quoted as
119 the quoted-pairs. When '', no character is quoted.
120 Quoting is performed to characters NOT encoded as encoded-words.
121
122 =item -source_charset => charset (default = *internal)
123
124 Charset name (in lower case) of given string.
125
126 =item -token_maxlength
127
128 This option is only meaningful when C<-use_quoted_string> is true.
129 See L<Message::MIME::EncodedWord>::decode.
130
131 =item -unsafe_char => 1*CHAR / '' (default)
132
133 The list of characters that when one of them is included in the string
134 it should be quoted, in addition to special characters determined by C<-context> option.
135
136 =item -use_encoded_word => 0 (default)/1
137
138 If true, characters in qcontent which unable to be represented in C<-charset> charset
139 are encoded in encoded-words.
140
141 =back
142
143 =cut
144
145 sub en_quoted_string ($;%) {
146 my ($string, %option) = @_;
147 if ($option{-use_encoded_word}) {
148 require Message::MIME::EncodedWord;
149 $option{-quoted_pair} ||= qr/([\x0D\\"]|(?<==)\?)/;
150 $string = Message::MIME::EncodedWord::encode ($string, %option, -context => 'quoted_string', -preserve_wsp => 0);
151 $option{-quoted_pair} = qr/(?!)/;
152 }
153 $string = _to_quoted_string ($string, \%option);
154 my ($s, %r) = Message::MIME::Charset::encode ($option{-charset}, $string);
155 $r{success} ? $s : $string;
156 }
157
158 sub _to_quoted_string ($$) {
159 my ($string, $option) = @_;
160 ## -- What characters should be quoted?
161 my $achar = {
162 atom => qq(0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#\$%&'*+/=?^_`{|}~-),
163 token => qq(0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#\$%&'*+^_`{|}~.-),
164 http_token => qq(0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#\$%&'*+^_`|~.-),
165 attr_char => qq(0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#\$&+^_`{|}~.-),
166 http_attr_char => qq(0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#\$&+^_`|~.-),
167 quoted_string => '',
168 }->{$option->{-context} || 'quoted_string'};
169 my $echar = $option->{-unsafe_char}; $echar =~ s/([\/\\])/\\$1/g;
170 eval qq{\$achar =~ tr/$echar//d};
171 $achar =~ s/([\\\[\]])/\\$1/g;
172 $option->{-quoted_pair} ||= qr/([\x0D"\\]|(?<==)\?)/;
173 if (!ref $option->{-quoted_pair}) {
174 $option->{-quoted_pair} = quotemeta $option->{-quoted_pair};
175 $option->{-quoted_pair} = qr/([$option->{-quoted_pair}])/;
176 }
177 ## -- Quote
178 if (length ($achar) == 0 || $string =~ /[^$achar]/) {
179 $string =~ s/$option->{-quoted_pair}/\\$1/g if $option->{-quoted_pair};
180 $string = '"' . $string . '"';
181 }
182 $string;
183 }
184
185 =head1 $string = Message::Field::Token::de_phrase ($phrase, %option)
186
187 Parses given phrase or a quoted-string and return it as a string (in internal code).
188
189 Options:
190
191 =over 4
192
193 =item -charset => charset (default: 'us-ascii')
194
195 Charset name of input. The charset MUST be a superset of ASCII.
196
197 =item -use_encoded_word => 0/1 (default)
198
199 Decodes encoded-words (out of quoted-strings).
200
201 =item -use_quoted_encoded_word => 0 (default)/1
202
203 Decodes encoded-words in quoted-strings.
204
205 =back
206
207 =cut
208
209 sub de_phrase ($;%) {
210 my ($string, %option) = @_;
211 $option{-use_encoded_word} = 1 unless defined $option{-use_encoded_word};
212 require Message::MIME::EncodedWord if $option{-use_encoded_word};
213 require Message::MIME::Charset;
214 $string =~ s("((?:\\.|[^"])*)"|([^"]+)){ ## Note: broken <"> does not match with this.
215 my ($qcontent, $atom) = ($1, $2);
216 if (defined $qcontent) {
217 if ($option{-use_quoted_encoded_word}) {
218 $qcontent = Message::MIME::EncodedWord::decode($qcontent, -process_non_encoded_word => sub {
219 $_[0] =~ s/\\(.)/$1/g;
220 my ($s, %s) = Message::MIME::Charset::decode ($option{-charset} || 'us-ascii', $_[0]);
221 $s{success} ? ($s, 0) : ($_[0], 0);
222 });
223 } else {
224 $qcontent =~ s/\\(.)/$1/g;
225 my ($s, %s) = Message::MIME::Charset::decode ($option{-charset} || 'us-ascii', $qcontent);
226 $qcontent = $s if $s{success};
227 }
228 $qcontent;
229 } else { ## 1*(atom / encoded-word / FWS)
230 if ($option{-use_encoded_word}) {
231 $atom = Message::MIME::EncodedWord::decode ($atom, -process_non_encoded_word => sub {
232 my ($s, %s) = Message::MIME::Charset::decode ($option{-charset} || 'us-ascii', $_[0]);
233 $s{success} ? ($s, 0) : ($_[0], 0);
234 });
235 }
236 $atom;
237 }
238 }ges;
239 $string;
240 }
241
242 =head1 $string = Message::Field::Token::de_quoted_string ($quoted_string, %option)
243
244 An alias to C<de_phrase>.
245
246 =cut
247
248 *de_quoted_string = \&de_phrase;
249
250 =head1 LICENSE
251
252 Copyright 2002 Wakaba <w@suika.fam.cx>
253
254 This program is free software; you can redistribute it and/or modify
255 it under the terms of the GNU General Public License as published by
256 the Free Software Foundation; either version 2 of the License, or
257 (at your option) any later version.
258
259 This program is distributed in the hope that it will be useful,
260 but WITHOUT ANY WARRANTY; without even the implied warranty of
261 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
262 GNU General Public License for more details.
263
264 You should have received a copy of the GNU General Public License
265 along with this program; see the file COPYING. If not, write to
266 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
267 Boston, MA 02111-1307, USA.
268
269 =cut
270
271 1; # $Date: $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24