/[suikacvs]/messaging/manakai/lib/Message/Field/Token.pm
Suika

Contents of /messaging/manakai/lib/Message/Field/Token.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (hide annotations) (download)
Sun Dec 29 03:04:53 2002 UTC (21 years, 11 months ago) by wakaba
Branch: MAIN
CVS Tags: before-dis2-200411, manakai-release-0-3-2, manakai-release-0-3-1, manakai-release-0-4-0, manakai-200612, HEAD
Branch point for: branch-suikawiki-1, experimental-xml-parser-200401
en_quoted_string, en_phrase: New

1 wakaba 1.1
2     =head1 NAME
3    
4     Message::Field::Token --- Message-pm: Lexical tokens used in Internet message formats
5    
6     =head1 DESCRIPTION
7    
8     This module provides functions for handling lexical tokens of Internet message formats,
9     such as quoted-string, comment, etc.
10    
11     This module is part of Message::* Perl Modules.
12    
13     =cut
14    
15     package Message::Field::Token;
16     use strict;
17     our $VERSION=do{my @r=(q$Revision: 1.12 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
18    
19     =head1 $phrase = Message::Field::Token::en_phrase ($string, %option)
20    
21     Return given string (in internal code) as a phrase (= atom / quoted-string / encoded-word).
22    
23     Options:
24    
25     =over 4
26    
27     =item -charset => charset (default: 'us-ascii')
28    
29     Charset name (in lower cases) to be used to encode the output string.
30    
31     =item -ebcdic_safe
32    
33     =item -encode_encoded_word_like
34    
35     =item -quoted_pair
36    
37     =item -token_maxlength
38    
39     =item -unsafe_char => 1*CHAR / '' (default)
40    
41     See options of C<en_quoted_string>.
42    
43     =item -source_charset => charset (default = *internal)
44    
45     Charset name (in lower case) of given string.
46    
47     =back
48    
49     =cut
50    
51     sub en_phrase ($;%) {
52     my $string = shift;
53     my %option = @_;
54     my @string = split /(?<=[^\x09\x20])(?=[\x09\x20])/, $string;
55     my @str = ([0, '']);
56     for my $i (0..$#string) {
57     if (Message::MIME::Charset::is_representable_in ($option{-charset}, $string[$i], \%option) && $string[$i] !~ /[\x00\x0A\x0D]/) {
58     if ($str[$#str]->[0]) { ## Preceding token should be encoded
59     push @str, [0, $string[$i]];
60     } else { ## Preceding token need not be encoded
61     $str[$#str]->[1] .= $string[$i];
62     }
63     } else { ## Should be encoded
64     if ($str[$#str]->[0]) { ## Preceding token should be encoded
65     $str[$#str]->[1] .= $string[$i];
66     } else { ## Preceding token need not be encoded
67     push @str, [1, $string[$i]];
68     }
69     }
70     }
71     my $phrase = '';
72     for my $i (0..$#str) {
73     my $ws = ''; $ws = $1 if $i != 0 && $str[$i]->[1] =~ s/^([\x09\x20])//;
74     if ($str[$i]->[0]) {
75     $str[$i]->[1] = Message::MIME::EncodedWord::encode ($str[$i]->[1], %option, -context => 'phrase', -encode_char => '\x00-\x7F');
76     } else {
77     $str[$i]->[1] = _to_quoted_string ($str[$i]->[1], \%option);
78     }
79     $phrase .= $ws . $str[$i]->[1];
80     }
81     $phrase;
82     }
83    
84     =head1 $quoted_string = Message::Field::Token::en_quoted_string ($string, %option)
85    
86     Quote string as a quoted-string if necessary.
87    
88     Options:
89    
90     =over 4
91    
92     =item -charset => charset (default: 'us-ascii')
93    
94     Charset name (in lower cases) to be used to encode the output string.
95    
96     =item -context => 'quoted_string' (default) / 'atom' / 'token' / 'http_token' / 'attr_char' / 'http_attr_char'
97    
98     Context in which given string is embeded. This value is used to check if
99     string should be quoted as a quoted-string. 'quoted_string', the default value,
100     makes string ALWAYS quoted. 'atom' makes it quoted when string contains
101     one or more characters not included in the atext (see RFC 2822).
102     Likewise, 'token' is for token of MIME, 'http_token' is for token of HTTP,
103     'attr_char' is attribute-char of MIME (RFC 2231) and 'http_attr_char'
104     is 'http_token' AND 'attr_char' (ie. safe for both HTTP and RFC 2231).
105    
106     =item -ebcdic_safe
107    
108     This option is only meaningful when C<-use_quoted_string> is true.
109     See L<Message::MIME::EncodedWord>::decode.
110    
111     =item -encode_encoded_word_like
112    
113     This option is only meaningful when C<-use_quoted_string> is true.
114     See L<Message::MIME::EncodedWord>::decode.
115    
116     =item -quoted_pair => 1*CHAR / qr|(:: pattern ::)| (default: qr/([\x0D\\"]|(?<==)\?)/)
117    
118     A character list or a Regexp pattern for characters to be quoted as
119     the quoted-pairs. When '', no character is quoted.
120     Quoting is performed to characters NOT encoded as encoded-words.
121    
122     =item -source_charset => charset (default = *internal)
123    
124     Charset name (in lower case) of given string.
125    
126     =item -token_maxlength
127    
128     This option is only meaningful when C<-use_quoted_string> is true.
129     See L<Message::MIME::EncodedWord>::decode.
130    
131     =item -unsafe_char => 1*CHAR / '' (default)
132    
133     The list of characters that when one of them is included in the string
134     it should be quoted, in addition to special characters determined by C<-context> option.
135    
136     =item -use_encoded_word => 0 (default)/1
137    
138     If true, characters in qcontent which unable to be represented in C<-charset> charset
139     are encoded in encoded-words.
140    
141     =back
142    
143     =cut
144    
145     sub en_quoted_string ($;%) {
146     my ($string, %option) = @_;
147     if ($option{-use_encoded_word}) {
148     require Message::MIME::EncodedWord;
149     $option{-quoted_pair} ||= qr/([\x0D\\"]|(?<==)\?)/;
150     $string = Message::MIME::EncodedWord::encode ($string, %option, -context => 'quoted_string', -preserve_wsp => 0);
151     $option{-quoted_pair} = qr/(?!)/;
152     }
153     $string = _to_quoted_string ($string, \%option);
154     my ($s, %r) = Message::MIME::Charset::encode ($option{-charset}, $string);
155     $r{success} ? $s : $string;
156     }
157    
158     sub _to_quoted_string ($$) {
159     my ($string, $option) = @_;
160     ## -- What characters should be quoted?
161     my $achar = {
162     atom => qq(0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#\$%&'*+/=?^_`{|}~-),
163     token => qq(0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#\$%&'*+^_`{|}~.-),
164     http_token => qq(0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#\$%&'*+^_`|~.-),
165     attr_char => qq(0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#\$&+^_`{|}~.-),
166     http_attr_char => qq(0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!#\$&+^_`|~.-),
167     quoted_string => '',
168     }->{$option->{-context} || 'quoted_string'};
169     my $echar = $option->{-unsafe_char}; $echar =~ s/([\/\\])/\\$1/g;
170     eval qq{\$achar =~ tr/$echar//d};
171     $achar =~ s/([\\\[\]])/\\$1/g;
172     $option->{-quoted_pair} ||= qr/([\x0D"\\]|(?<==)\?)/;
173     if (!ref $option->{-quoted_pair}) {
174     $option->{-quoted_pair} = quotemeta $option->{-quoted_pair};
175     $option->{-quoted_pair} = qr/([$option->{-quoted_pair}])/;
176     }
177     ## -- Quote
178     if (length ($achar) == 0 || $string =~ /[^$achar]/) {
179     $string =~ s/$option->{-quoted_pair}/\\$1/g if $option->{-quoted_pair};
180     $string = '"' . $string . '"';
181     }
182     $string;
183     }
184    
185     =head1 $string = Message::Field::Token::de_phrase ($phrase, %option)
186    
187     Parses given phrase or a quoted-string and return it as a string (in internal code).
188    
189     Options:
190    
191     =over 4
192    
193     =item -charset => charset (default: 'us-ascii')
194    
195     Charset name of input. The charset MUST be a superset of ASCII.
196    
197     =item -use_encoded_word => 0/1 (default)
198    
199     Decodes encoded-words (out of quoted-strings).
200    
201     =item -use_quoted_encoded_word => 0 (default)/1
202    
203     Decodes encoded-words in quoted-strings.
204    
205     =back
206    
207     =cut
208    
209     sub de_phrase ($;%) {
210     my ($string, %option) = @_;
211     $option{-use_encoded_word} = 1 unless defined $option{-use_encoded_word};
212     require Message::MIME::EncodedWord if $option{-use_encoded_word};
213     require Message::MIME::Charset;
214     $string =~ s("((?:\\.|[^"])*)"|([^"]+)){ ## Note: broken <"> does not match with this.
215     my ($qcontent, $atom) = ($1, $2);
216     if (defined $qcontent) {
217     if ($option{-use_quoted_encoded_word}) {
218     $qcontent = Message::MIME::EncodedWord::decode($qcontent, -process_non_encoded_word => sub {
219     $_[0] =~ s/\\(.)/$1/g;
220     my ($s, %s) = Message::MIME::Charset::decode ($option{-charset} || 'us-ascii', $_[0]);
221     $s{success} ? ($s, 0) : ($_[0], 0);
222     });
223     } else {
224     $qcontent =~ s/\\(.)/$1/g;
225     my ($s, %s) = Message::MIME::Charset::decode ($option{-charset} || 'us-ascii', $qcontent);
226     $qcontent = $s if $s{success};
227     }
228     $qcontent;
229     } else { ## 1*(atom / encoded-word / FWS)
230     if ($option{-use_encoded_word}) {
231     $atom = Message::MIME::EncodedWord::decode ($atom, -process_non_encoded_word => sub {
232     my ($s, %s) = Message::MIME::Charset::decode ($option{-charset} || 'us-ascii', $_[0]);
233     $s{success} ? ($s, 0) : ($_[0], 0);
234     });
235     }
236     $atom;
237     }
238     }ges;
239     $string;
240     }
241    
242     =head1 $string = Message::Field::Token::de_quoted_string ($quoted_string, %option)
243    
244     An alias to C<de_phrase>.
245    
246     =cut
247    
248     *de_quoted_string = \&de_phrase;
249    
250     =head1 LICENSE
251    
252     Copyright 2002 Wakaba <w@suika.fam.cx>
253    
254     This program is free software; you can redistribute it and/or modify
255     it under the terms of the GNU General Public License as published by
256     the Free Software Foundation; either version 2 of the License, or
257     (at your option) any later version.
258    
259     This program is distributed in the hope that it will be useful,
260     but WITHOUT ANY WARRANTY; without even the implied warranty of
261     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
262     GNU General Public License for more details.
263    
264     You should have received a copy of the GNU General Public License
265     along with this program; see the file COPYING. If not, write to
266     the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
267     Boston, MA 02111-1307, USA.
268    
269     =cut
270    
271     1; # $Date: $

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24