/[suikacvs]/messaging/manakai/lib/Message/Field/URI.pm
Suika

Contents of /messaging/manakai/lib/Message/Field/URI.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.2 - (hide annotations) (download)
Mon Apr 1 06:27:32 2002 UTC (22 years, 7 months ago) by wakaba
Branch: MAIN
Changes since 1.1: +17 -2 lines
2002-04-01  wakaba <w@suika.fam.cx>

	* URI.pm (uri): New method.

1 wakaba 1.1
2     =head1 NAME
3    
4     Message::Field::URI Perl module
5    
6     =head1 DESCRIPTION
7    
8     Perl module for URI field body (such as C<List-*:>, C<Content-Location:>).
9    
10     =cut
11    
12     package Message::Field::URI;
13     use strict;
14     require 5.6.0;
15     use re 'eval';
16     use vars qw(%DEFAULT %REG $VERSION);
17 wakaba 1.2 $VERSION=do{my @r=(q$Revision: 1.1 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
18 wakaba 1.1 require Message::Util;
19     require Message::MIME::EncodedWord;
20     use Carp;
21     use overload '""' => sub {shift->stringify};
22    
23     $REG{WSP} = qr/[\x09\x20]/;
24     $REG{FWS} = qr/[\x09\x20]*/;
25    
26     $REG{comment} = qr/\x28(?:\x5C[\x00-\xFF]|[\x00-\x0C\x0E-\x27\x2A-\x5B\x5D-\xFF]|(??{$REG{comment}}))*\x29/;
27     $REG{quoted_string} = qr/\x22(?:\x5C[\x00-\xFF]|[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\xFF])*\x22/;
28     $REG{uri_literal} = qr/\x3C[\x09\x20\x21\x23-\x3B\x3D\x3F-\x5B\x5D\x5F\x61-\x7A\x7E]*\x3E/;
29     $REG{atext} = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2F-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]/;
30     #$REG{atext_dot} = qr/[\x21\x23-\x27\x2A\x2B\x2D-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]/;
31     $REG{phrase} = qr/(?:$REG{atext}|$REG{quoted_string})(?:$REG{atext}|$REG{quoted_string}|\.|$REG{FWS})*/;
32     $REG{phrase_c} = qr/(?:$REG{atext}|$REG{quoted_string}|$REG{comment}|\.|$REG{FWS})*/;
33    
34     ## Simple version of URI regex See RFC 2396, RFC 2732, RFC 2324.
35     $REG{escaped} = qr/%[0-9A-Fa-f][0-9A-Fa-f]/;
36     $REG{scheme} = qr/(?:[A-Za-z]|$REG{escaped})(?:[0-9A-Za-z+.-]|$REG{escaped})*/;
37     ## RFC 2324 defines escaped UTF-8 schemes:-)
38     $REG{fragment} = qr/\x23(?:[\x21\x24\x26-\x3B\x3D\x3F-\x5A\x5F\x61-\x7A\x7E]|$REG{escaped})*/;
39     $REG{S_uri_body} = qr/(?:[\x21\x24\x26-\x3B\x3D\x3F-\x5A\x5B\x5D\x5F\x61-\x7A\x7E]|$REG{escaped})+/;
40     $REG{S_absoluteURI} = qr/$REG{scheme}:$REG{S_uri_body}/;
41     $REG{S_relativeURI} = qr/$REG{S_uri_body}/;
42     $REG{S_URI_reference} = qr/(?:$REG{S_absoluteURI}|$REG{S_relativeURI})(?:$REG{fragment})?|(?:$REG{fragment})/;
43     ## RFC 2396 allows <> (empty URI), but this regex doesn't.
44    
45     $REG{M_comment} = qr/\x28((?:\x5C[\x00-\xFF]|[\x00-\x0C\x0E-\x27\x2A-\x5B\x5D-\xFF]|(??{$REG{comment}}))*)\x29/;
46     $REG{M_quoted_string} = qr/\x22((?:\x5C[\x00-\xFF]|[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\xFF])*)\x22/;
47     $REG{M_uri_literal} = qr/\x3C([\x09\x20\x21\x23-\x3B\x3D\x3F-\x5B\x5D\x5F\x61-\x7A\x7E]*)\x3E/;
48     $REG{M_S_phrase_uri} = qr/($REG{phrase_c})$REG{M_uri_literal}/;
49     $REG{uri_phrase} = qr/[\x21\x23-\x3B\x3D\x3F-\x5B\x5D\x5F\x61-\x7A\x7E]+(?:$REG{WSP}+[\x21\x23-\x27\x29-\x3B\x3D\x3F-\x5B\x5D\x5F\x61-\x7A\x7E][\x21\x23-\x3B\x3D\x3F-\x5B\x5D\x5F\x61-\x7A\x7E]*)*/;
50     #$REG{M_phrase_uri} = qr/($REG{phrase_c})<$REG{FWS}($REG{S_URI_reference})$REG{FWS}>/;
51     #$REG{M_phrase_uri_a} = qr/($REG{phrase_c})<$REG{FWS}($REG{S_absoluteURI})$REG{FWS}>/;
52     #$REG{M_phrase_uri_af} = qr/($REG{phrase_c})<$REG{FWS}($REG{S_absoluteURI}(?:$REG{fragment})?)$REG{FWS}>/;
53     #$REG{M_uri_content} = qr/$REG{M_S_phrase_uri}((?:$REG{FWS}$REG{comment})*)|($REG{S_URI_reference})($REG{WSP}(?:$REG{FWS}$REG{comment})*)?/;
54     $REG{M_uri_content} = qr/$REG{M_S_phrase_uri}((?:$REG{FWS}$REG{comment})*)|($REG{uri_phrase})($REG{WSP}(?:$REG{FWS}$REG{comment})*)?/;
55    
56     #$REG{NON_atext} = qr/[^\x21\x23-\x27\x2A\x2B\x2D\x2F\x30-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]/;
57     #$REG{NON_atext_dot} = qr/[^\x21\x23-\x27\x2A\x2B\x2D-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]/;
58     $REG{NON_atext_dot_wsp} = qr/[^\x09\x20\x21\x23-\x27\x2A\x2B\x2D-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]/;
59    
60    
61     %DEFAULT = (
62     allow_absolute => 1, ## TODO: not implemented
63     allow_fragment => 1, ## TODO: not implemented
64     allow_relative => 1, ## TODO: not implemented
65     encoding_after_encode => '*default',
66     encoding_before_decode => '*default',
67     field_name => 'x-uri',
68     format => 'http', ## http, mhtml (= rfc2822, mime, news), cgi
69     hook_encode_string => #sub {shift; (value => shift, @_)},
70     \&Message::Util::encode_header_string,
71     hook_decode_string => #sub {shift; (value => shift, @_)},
72     \&Message::Util::decode_header_string,
73     output_angle_bracket => 1,
74     output_comment => 1,
75     output_display_name => 1,
76     );
77    
78     ## Initialization for both C<new> and C<parse> methods.
79     sub _initialize ($;%) {
80     my $self = shift;
81     my $fname = lc $self->{option}->{field_name};
82     my $format = $self->{option}->{format};
83     $format = 'mhtml' if $format eq 'rfc2822' || $format eq 'news'
84     || $format eq 'usefor' || $format eq 'mime';
85     if ($fname =~ /^list-/) {
86     $self->{option}->{output_display_name} = -1;
87     } elsif ($fname eq 'content-location') {
88     $self->{option}->{output_angle_bracket} = -1;
89     $self->{option}->{output_display_name} = -1;
90     ## Comments should not be used. Allowing this makes it difficult
91     ## to parse URI contains of "(" and ")".
92     #if ($format ne 'mhtml') { ## http
93     $self->{option}->{output_comment} = -1;
94     #}
95     $self->{option}->{allow_fragment} = -1;
96     } elsif ($fname eq 'link') {
97     $self->{option}->{output_display_name} = -1;
98     $self->{option}->{output_comment} = -1;
99     $self->{option}->{allow_fragment} = -1;
100     } elsif ($fname eq 'location') {
101     $self->{option}->{output_angle_bracket} = -1;
102     $self->{option}->{output_display_name} = -1;
103     $self->{option}->{output_comment} = -1;
104     if ($format ne 'cgi') { ## http
105     $self->{option}->{allow_relative} = -1;
106     $self->{option}->{allow_fragment} = -1;
107     }
108     } elsif ($fname eq 'referer') {
109     $self->{option}->{output_angle_bracket} = -1;
110     $self->{option}->{output_display_name} = -1;
111     $self->{option}->{output_comment} = -1;
112     $self->{option}->{allow_fragment} = -1;
113     } elsif ($fname eq 'uri') {
114     $self->{option}->{output_display_name} = -1;
115     $self->{option}->{output_comment} = -1;
116     } elsif ($fname eq 'content-base') {
117     $self->{option}->{output_angle_bracket} = -1;
118     $self->{option}->{output_display_name} = -1;
119     $self->{option}->{output_comment} = -1;
120     $self->{option}->{allow_relative} = -1;
121     $self->{option}->{allow_fragment} = -1;
122     }
123     }
124    
125     =head2 Message::Field::URI->new ([%option])
126    
127     Returns new Message::Field::URI. Some options can be given as hash.
128    
129     =cut
130    
131     sub new ($;%) {
132     my $class = shift;
133     my $self = bless {comment => [], option => {@_}}, $class;
134     $self->_initialize_new ();
135     for (keys %DEFAULT) {$self->{option}->{$_} ||= $DEFAULT{$_}}
136     $self;
137     }
138    
139     ## Initialization for new () method.
140     sub _initialize_new ($;%) {
141     my $self = shift;
142     $self->_initialize ();
143     }
144    
145     =head2 Message::Field::URI->parse ($field-body, [%option])
146    
147     Parses URI-type C<field-body> and returns new instance.
148     Some options can be given as hash.
149    
150     =cut
151    
152     sub parse ($$;%) {
153     my $class = shift;
154     my $body = shift;
155     my $self = bless {comment => [], option => {@_}}, $class;
156     $self->_initialize_parse ();
157     for (keys %DEFAULT) {$self->{option}->{$_} ||= $DEFAULT{$_}}
158     if ($body =~ /^$REG{M_uri_content}$/) {
159     my ($uri, $phrase_c, $comments) = ($2||$4, $1, $3||$5);
160     $uri =~ tr/\x09\x20//d;
161     $self->{uri} = $uri;
162     $phrase_c =~ s{$REG{M_comment}}{
163     my $comment = $self->_decode_ccontent ($1);
164     push @{$self->{comment}}, $comment if length $comment;
165     '';
166     }goex;
167     $phrase_c =~ s/^$REG{WSP}+//; $phrase_c =~ s/$REG{WSP}+$//;
168     $self->{display_name} = $self->_decode_quoted_string ($phrase_c);
169     $comments =~ s{$REG{M_comment}}{
170     my $comment = $self->_decode_ccontent ($1);
171     push @{$self->{comment}}, $comment if length $comment;
172     }goex;
173     }
174     $self;
175     }
176    
177     ## Initialization for parse () method.
178     sub _initialize_parse ($;%) {
179     my $self = shift;
180     $self->_initialize ();
181     }
182    
183 wakaba 1.2 =head2 $self->uri ([$newURI])
184    
185     Set/gets C<URI>. See also L<NOTE>.
186    
187     =cut
188    
189     sub uri ($;$%) {
190     my $self = shift;
191     my $dname = shift;
192     if (defined $dname) {
193     $self->{uri} = $dname;
194     }
195     $self->{uri};
196     }
197    
198 wakaba 1.1 =head2 $self->display_name ([$newname])
199    
200     Set/gets C<display-name>. Display name is prepend
201     to C<URI> as C<phrase> (C<atom>s or a C<quoted-string>).
202    
203     Note that C<display-name> is outputted only if the class
204     option C<outout_display_name> is C<1>. If its value
205     is C<-1> but C<output_comment> is C<1>, display name
206     value is outputted as C<comment>. Neither of these
207     options takes C<1> value, display name is outputted
208     nowhere.
209    
210     =cut
211    
212     sub display_name ($;$%) {
213     my $self = shift;
214     my $dname = shift;
215     if (defined $dname) {
216     $self->{display_name} = $dname;
217     }
218     $self->{display_name};
219     }
220    
221     =head2 $self->comment_add ($comment, [%option]
222    
223     Adds a C<comment>. Comments are outputed only when
224     the class option (not an option of this method!)
225     C<output_comment> is enabled (value C<1>).
226    
227     On this method, only one option, C<prepend> is available.
228     With this option, additional comment is prepend
229     to current comments. (Default value is C<-1>, append.)
230    
231     =cut
232    
233     sub comment_add ($$;%) {
234     my $self = shift;
235     my ($value, %option) = (shift, @_);
236     if ($option{prepend}) {
237     unshift @{$self->{comment}}, $value;
238     } else {
239     push @{$self->{comment}}, $value;
240     }
241     $self;
242     }
243    
244     =head2 $self->comment ()
245    
246     Returns array reference of comments. You can add/remove/change
247     array values.
248    
249     =cut
250    
251     sub comment ($) {
252     my $self = shift;
253     $self->_comment_delete_empty->{comment};
254     }
255    
256     sub _comment_delete_empty ($) {
257     my $self = shift;
258     $self->{comment} = [grep {length} @{$self->{comment}}];
259     $self;
260     }
261    
262    
263     =head2 $self->stringify ([%option])
264    
265     Returns Message::Field::URI as a string.
266    
267     =cut
268    
269     sub stringify ($;%) {
270     my $self = shift;
271     my %option = @_;
272     for (qw (allow_relative output_angle_bracket output_comment output_display_name)) {
273     $option{$_} ||= $self->{option}->{$_};
274     }
275     if ($option{allow_relative}<0 && length $self->{uri} == 0) {
276     return '';
277     }
278     my $r = '';
279     if (length $self->{display_name}) {
280     if ($option{output_display_name}>0) {
281     $r = $self->_quote_unsafe_string ($self->{display_name});
282     $r .= ' ';
283     } elsif ($option{output_comment}>0) {
284     my %f = &{$self->{option}->{hook_encode_string}}
285     ($self, $self->{display_name}, type => 'ccontent');
286     $f{value} =~ s/([\x28\x29\x5C])([\x21-\x7E])?/
287     "\x5C$1".(defined $2?"\x5C$2":'')/ge;
288     $r .= '('.$f{value}.') ';
289     }
290     }
291     if ($option{output_angle_bracket}>0) {
292     $r .= '<'.$self->{uri}.'>';
293     } else {
294     $r .= $self->{uri};
295     }
296     if ($option{output_comment}>0) {
297     $self->_comment_delete_empty ();
298     for (@{$self->{comment}}) {
299     my %f = &{$self->{option}->{hook_encode_string}}
300     ($self, $_, type => 'ccontent');
301     $f{value} =~ s/([\x28\x29\x5C])([\x21-\x7E])?/
302     "\x5C$1".(defined $2?"\x5C$2":'')/ge;
303     $r .= ' ('.$f{value}.')' if length $f{value};
304     }
305     }
306     $r;
307     }
308     sub as_string ($;%) {shift->stringify (@_)}
309    
310     =head2 $self->option ($option_name)
311    
312     Returns/set (new) value of the option.
313    
314     =cut
315    
316     sub option ($$;$) {
317     my $self = shift;
318     my ($name, $newval) = @_;
319     if ($newval) {
320     $self->{option}->{$name} = $newval;
321     }
322     $self->{option}->{$name};
323     }
324    
325     sub _quote_unsafe_string ($$;%) {
326     my $self = shift;
327     my $string = shift;
328     my %option = @_;
329     $option{unsafe} ||= 'NON_atext_dot_wsp';
330     if ($string =~ /$REG{$option{unsafe}}/ || $string =~ /$REG{WSP}$REG{WSP}/) {
331     $string =~ s/([\x22\x5C])([\x21-\x7E])?/"\x5C$1".(defined $2?"\x5C$2":'')/ge;
332     $string = '"'.$string.'"';
333     }
334     $string;
335     }
336    
337     sub _unquote_quoted_string ($$) {
338     my $self = shift;
339     my $quoted_string = shift;
340     $quoted_string =~ s{$REG{M_quoted_string}}{
341     my $qtext = $1;
342     $qtext =~ s/\x5C([\x00-\xFF])/$1/g;
343     $qtext;
344     }goex;
345     $quoted_string;
346     }
347    
348     sub _decode_quoted_string ($$) {
349     my $self = shift;
350     my $quoted_string = shift;
351     $quoted_string =~ s{$REG{M_quoted_string}|([^\x22]+)}{
352     my ($qtext,$t) = ($1, $2);
353     if ($t) {
354     $t =~ s/($REG{WSP})+/$1/g;
355     my %s = &{$self->{option}->{hook_decode_string}} ($self, $t,
356     type => 'value');
357     $s{value};
358     } else {
359     $qtext =~ s/\x5C([\x00-\xFF])/$1/g;
360     my %s = &{$self->{option}->{hook_decode_string}} ($self, $qtext,
361     type => 'value/quoted');
362     $s{value};
363     }
364     }goex;
365     $quoted_string;
366     }
367    
368     sub _decode_ccontent ($$) {
369     &Message::MIME::EncodedWord::decode_ccontent (@_[1,0]);
370     }
371    
372     =head1 NOTE
373    
374     Current version of this module does not check whether
375     URI is correct or not. In particullar, implementor
376     should be careful not to output URI that is syntactically
377     valid, but do not match to context. For example,
378     C<Location:> field defined by HTTP/1.1 [RFC2616] doesn't
379     allow relative URIs. (Interestingly, with CGI/1.1,
380     we can use relative URI as value of C<Location> field.
381    
382     There is three options related with URI type.
383     C<allow_absolute>, C<allow_relative>, and C<allow_fragment>.
384     But this options don't work as you hope.
385     These options are only reserved for future implemention.
386    
387     =head1 LICENSE
388    
389     Copyright 2002 wakaba E<lt>w@suika.fam.cxE<gt>.
390    
391     This program is free software; you can redistribute it and/or modify
392     it under the terms of the GNU General Public License as published by
393     the Free Software Foundation; either version 2 of the License, or
394     (at your option) any later version.
395    
396     This program is distributed in the hope that it will be useful,
397     but WITHOUT ANY WARRANTY; without even the implied warranty of
398     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
399     GNU General Public License for more details.
400    
401     You should have received a copy of the GNU General Public License
402     along with this program; see the file COPYING. If not, write to
403     the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
404     Boston, MA 02111-1307, USA.
405    
406     =head1 CHANGE
407    
408     See F<ChangeLog>.
409 wakaba 1.2 $Date: 2002/03/31 13:11:55 $
410 wakaba 1.1
411     =cut
412    
413     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24