/[suikacvs]/messaging/manakai/lib/Message/Field/URI.pm
Suika

Contents of /messaging/manakai/lib/Message/Field/URI.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1 - (hide annotations) (download)
Sun Mar 31 13:11:55 2002 UTC (22 years, 8 months ago) by wakaba
Branch: MAIN
2002-03-31  wakaba <w@suika.fam.cx>

	* URI.pm: New module.

1 wakaba 1.1
2     =head1 NAME
3    
4     Message::Field::URI Perl module
5    
6     =head1 DESCRIPTION
7    
8     Perl module for URI field body (such as C<List-*:>, C<Content-Location:>).
9    
10     =cut
11    
12     package Message::Field::URI;
13     use strict;
14     require 5.6.0;
15     use re 'eval';
16     use vars qw(%DEFAULT %REG $VERSION);
17     $VERSION=do{my @r=(q$Revision: 1.4 $=~/\d+/g);sprintf "%d."."%02d" x $#r,@r};
18     require Message::Util;
19     require Message::MIME::EncodedWord;
20     use Carp;
21     use overload '""' => sub {shift->stringify};
22    
23     $REG{WSP} = qr/[\x09\x20]/;
24     $REG{FWS} = qr/[\x09\x20]*/;
25    
26     $REG{comment} = qr/\x28(?:\x5C[\x00-\xFF]|[\x00-\x0C\x0E-\x27\x2A-\x5B\x5D-\xFF]|(??{$REG{comment}}))*\x29/;
27     $REG{quoted_string} = qr/\x22(?:\x5C[\x00-\xFF]|[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\xFF])*\x22/;
28     $REG{uri_literal} = qr/\x3C[\x09\x20\x21\x23-\x3B\x3D\x3F-\x5B\x5D\x5F\x61-\x7A\x7E]*\x3E/;
29     $REG{atext} = qr/[\x21\x23-\x27\x2A\x2B\x2D\x2F-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]/;
30     #$REG{atext_dot} = qr/[\x21\x23-\x27\x2A\x2B\x2D-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]/;
31     $REG{phrase} = qr/(?:$REG{atext}|$REG{quoted_string})(?:$REG{atext}|$REG{quoted_string}|\.|$REG{FWS})*/;
32     $REG{phrase_c} = qr/(?:$REG{atext}|$REG{quoted_string}|$REG{comment}|\.|$REG{FWS})*/;
33    
34     ## Simple version of URI regex See RFC 2396, RFC 2732, RFC 2324.
35     $REG{escaped} = qr/%[0-9A-Fa-f][0-9A-Fa-f]/;
36     $REG{scheme} = qr/(?:[A-Za-z]|$REG{escaped})(?:[0-9A-Za-z+.-]|$REG{escaped})*/;
37     ## RFC 2324 defines escaped UTF-8 schemes:-)
38     $REG{fragment} = qr/\x23(?:[\x21\x24\x26-\x3B\x3D\x3F-\x5A\x5F\x61-\x7A\x7E]|$REG{escaped})*/;
39     $REG{S_uri_body} = qr/(?:[\x21\x24\x26-\x3B\x3D\x3F-\x5A\x5B\x5D\x5F\x61-\x7A\x7E]|$REG{escaped})+/;
40     $REG{S_absoluteURI} = qr/$REG{scheme}:$REG{S_uri_body}/;
41     $REG{S_relativeURI} = qr/$REG{S_uri_body}/;
42     $REG{S_URI_reference} = qr/(?:$REG{S_absoluteURI}|$REG{S_relativeURI})(?:$REG{fragment})?|(?:$REG{fragment})/;
43     ## RFC 2396 allows <> (empty URI), but this regex doesn't.
44    
45     $REG{M_comment} = qr/\x28((?:\x5C[\x00-\xFF]|[\x00-\x0C\x0E-\x27\x2A-\x5B\x5D-\xFF]|(??{$REG{comment}}))*)\x29/;
46     $REG{M_quoted_string} = qr/\x22((?:\x5C[\x00-\xFF]|[\x00-\x0C\x0E-\x21\x23-\x5B\x5D-\xFF])*)\x22/;
47     $REG{M_uri_literal} = qr/\x3C([\x09\x20\x21\x23-\x3B\x3D\x3F-\x5B\x5D\x5F\x61-\x7A\x7E]*)\x3E/;
48     $REG{M_S_phrase_uri} = qr/($REG{phrase_c})$REG{M_uri_literal}/;
49     $REG{uri_phrase} = qr/[\x21\x23-\x3B\x3D\x3F-\x5B\x5D\x5F\x61-\x7A\x7E]+(?:$REG{WSP}+[\x21\x23-\x27\x29-\x3B\x3D\x3F-\x5B\x5D\x5F\x61-\x7A\x7E][\x21\x23-\x3B\x3D\x3F-\x5B\x5D\x5F\x61-\x7A\x7E]*)*/;
50     #$REG{M_phrase_uri} = qr/($REG{phrase_c})<$REG{FWS}($REG{S_URI_reference})$REG{FWS}>/;
51     #$REG{M_phrase_uri_a} = qr/($REG{phrase_c})<$REG{FWS}($REG{S_absoluteURI})$REG{FWS}>/;
52     #$REG{M_phrase_uri_af} = qr/($REG{phrase_c})<$REG{FWS}($REG{S_absoluteURI}(?:$REG{fragment})?)$REG{FWS}>/;
53     #$REG{M_uri_content} = qr/$REG{M_S_phrase_uri}((?:$REG{FWS}$REG{comment})*)|($REG{S_URI_reference})($REG{WSP}(?:$REG{FWS}$REG{comment})*)?/;
54     $REG{M_uri_content} = qr/$REG{M_S_phrase_uri}((?:$REG{FWS}$REG{comment})*)|($REG{uri_phrase})($REG{WSP}(?:$REG{FWS}$REG{comment})*)?/;
55    
56     #$REG{NON_atext} = qr/[^\x21\x23-\x27\x2A\x2B\x2D\x2F\x30-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]/;
57     #$REG{NON_atext_dot} = qr/[^\x21\x23-\x27\x2A\x2B\x2D-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]/;
58     $REG{NON_atext_dot_wsp} = qr/[^\x09\x20\x21\x23-\x27\x2A\x2B\x2D-\x39\x3D\x3F\x41-\x5A\x5E-\x7E]/;
59    
60    
61     %DEFAULT = (
62     allow_absolute => 1, ## TODO: not implemented
63     allow_fragment => 1, ## TODO: not implemented
64     allow_relative => 1, ## TODO: not implemented
65     encoding_after_encode => '*default',
66     encoding_before_decode => '*default',
67     field_name => 'x-uri',
68     format => 'http', ## http, mhtml (= rfc2822, mime, news), cgi
69     hook_encode_string => #sub {shift; (value => shift, @_)},
70     \&Message::Util::encode_header_string,
71     hook_decode_string => #sub {shift; (value => shift, @_)},
72     \&Message::Util::decode_header_string,
73     output_angle_bracket => 1,
74     output_comment => 1,
75     output_display_name => 1,
76     );
77    
78     ## Initialization for both C<new> and C<parse> methods.
79     sub _initialize ($;%) {
80     my $self = shift;
81     my $fname = lc $self->{option}->{field_name};
82     my $format = $self->{option}->{format};
83     $format = 'mhtml' if $format eq 'rfc2822' || $format eq 'news'
84     || $format eq 'usefor' || $format eq 'mime';
85     if ($fname =~ /^list-/) {
86     $self->{option}->{output_display_name} = -1;
87     } elsif ($fname eq 'content-location') {
88     $self->{option}->{output_angle_bracket} = -1;
89     $self->{option}->{output_display_name} = -1;
90     ## Comments should not be used. Allowing this makes it difficult
91     ## to parse URI contains of "(" and ")".
92     #if ($format ne 'mhtml') { ## http
93     $self->{option}->{output_comment} = -1;
94     #}
95     $self->{option}->{allow_fragment} = -1;
96     } elsif ($fname eq 'link') {
97     $self->{option}->{output_display_name} = -1;
98     $self->{option}->{output_comment} = -1;
99     $self->{option}->{allow_fragment} = -1;
100     } elsif ($fname eq 'location') {
101     $self->{option}->{output_angle_bracket} = -1;
102     $self->{option}->{output_display_name} = -1;
103     $self->{option}->{output_comment} = -1;
104     if ($format ne 'cgi') { ## http
105     $self->{option}->{allow_relative} = -1;
106     $self->{option}->{allow_fragment} = -1;
107     }
108     } elsif ($fname eq 'referer') {
109     $self->{option}->{output_angle_bracket} = -1;
110     $self->{option}->{output_display_name} = -1;
111     $self->{option}->{output_comment} = -1;
112     $self->{option}->{allow_fragment} = -1;
113     } elsif ($fname eq 'uri') {
114     $self->{option}->{output_display_name} = -1;
115     $self->{option}->{output_comment} = -1;
116     } elsif ($fname eq 'content-base') {
117     $self->{option}->{output_angle_bracket} = -1;
118     $self->{option}->{output_display_name} = -1;
119     $self->{option}->{output_comment} = -1;
120     $self->{option}->{allow_relative} = -1;
121     $self->{option}->{allow_fragment} = -1;
122     }
123     }
124    
125     =head2 Message::Field::URI->new ([%option])
126    
127     Returns new Message::Field::URI. Some options can be given as hash.
128    
129     =cut
130    
131     sub new ($;%) {
132     my $class = shift;
133     my $self = bless {comment => [], option => {@_}}, $class;
134     $self->_initialize_new ();
135     for (keys %DEFAULT) {$self->{option}->{$_} ||= $DEFAULT{$_}}
136     $self;
137     }
138    
139     ## Initialization for new () method.
140     sub _initialize_new ($;%) {
141     my $self = shift;
142     $self->_initialize ();
143     }
144    
145     =head2 Message::Field::URI->parse ($field-body, [%option])
146    
147     Parses URI-type C<field-body> and returns new instance.
148     Some options can be given as hash.
149    
150     =cut
151    
152     sub parse ($$;%) {
153     my $class = shift;
154     my $body = shift;
155     my $self = bless {comment => [], option => {@_}}, $class;
156     $self->_initialize_parse ();
157     for (keys %DEFAULT) {$self->{option}->{$_} ||= $DEFAULT{$_}}
158     if ($body =~ /^$REG{M_uri_content}$/) {
159     my ($uri, $phrase_c, $comments) = ($2||$4, $1, $3||$5);
160     $uri =~ tr/\x09\x20//d;
161     $self->{uri} = $uri;
162     $phrase_c =~ s{$REG{M_comment}}{
163     my $comment = $self->_decode_ccontent ($1);
164     push @{$self->{comment}}, $comment if length $comment;
165     '';
166     }goex;
167     $phrase_c =~ s/^$REG{WSP}+//; $phrase_c =~ s/$REG{WSP}+$//;
168     $self->{display_name} = $self->_decode_quoted_string ($phrase_c);
169     $comments =~ s{$REG{M_comment}}{
170     my $comment = $self->_decode_ccontent ($1);
171     push @{$self->{comment}}, $comment if length $comment;
172     }goex;
173     }
174     $self;
175     }
176    
177     ## Initialization for parse () method.
178     sub _initialize_parse ($;%) {
179     my $self = shift;
180     $self->_initialize ();
181     }
182    
183     =head2 $self->display_name ([$newname])
184    
185     Set/gets C<display-name>. Display name is prepend
186     to C<URI> as C<phrase> (C<atom>s or a C<quoted-string>).
187    
188     Note that C<display-name> is outputted only if the class
189     option C<outout_display_name> is C<1>. If its value
190     is C<-1> but C<output_comment> is C<1>, display name
191     value is outputted as C<comment>. Neither of these
192     options takes C<1> value, display name is outputted
193     nowhere.
194    
195     =cut
196    
197     sub display_name ($;$%) {
198     my $self = shift;
199     my $dname = shift;
200     if (defined $dname) {
201     $self->{display_name} = $dname;
202     }
203     $self->{display_name};
204     }
205    
206     =head2 $self->comment_add ($comment, [%option]
207    
208     Adds a C<comment>. Comments are outputed only when
209     the class option (not an option of this method!)
210     C<output_comment> is enabled (value C<1>).
211    
212     On this method, only one option, C<prepend> is available.
213     With this option, additional comment is prepend
214     to current comments. (Default value is C<-1>, append.)
215    
216     =cut
217    
218     sub comment_add ($$;%) {
219     my $self = shift;
220     my ($value, %option) = (shift, @_);
221     if ($option{prepend}) {
222     unshift @{$self->{comment}}, $value;
223     } else {
224     push @{$self->{comment}}, $value;
225     }
226     $self;
227     }
228    
229     =head2 $self->comment ()
230    
231     Returns array reference of comments. You can add/remove/change
232     array values.
233    
234     =cut
235    
236     sub comment ($) {
237     my $self = shift;
238     $self->_comment_delete_empty->{comment};
239     }
240    
241     sub _comment_delete_empty ($) {
242     my $self = shift;
243     $self->{comment} = [grep {length} @{$self->{comment}}];
244     $self;
245     }
246    
247    
248     =head2 $self->stringify ([%option])
249    
250     Returns Message::Field::URI as a string.
251    
252     =cut
253    
254     sub stringify ($;%) {
255     my $self = shift;
256     my %option = @_;
257     for (qw (allow_relative output_angle_bracket output_comment output_display_name)) {
258     $option{$_} ||= $self->{option}->{$_};
259     }
260     if ($option{allow_relative}<0 && length $self->{uri} == 0) {
261     return '';
262     }
263     my $r = '';
264     if (length $self->{display_name}) {
265     if ($option{output_display_name}>0) {
266     $r = $self->_quote_unsafe_string ($self->{display_name});
267     $r .= ' ';
268     } elsif ($option{output_comment}>0) {
269     my %f = &{$self->{option}->{hook_encode_string}}
270     ($self, $self->{display_name}, type => 'ccontent');
271     $f{value} =~ s/([\x28\x29\x5C])([\x21-\x7E])?/
272     "\x5C$1".(defined $2?"\x5C$2":'')/ge;
273     $r .= '('.$f{value}.') ';
274     }
275     }
276     if ($option{output_angle_bracket}>0) {
277     $r .= '<'.$self->{uri}.'>';
278     } else {
279     $r .= $self->{uri};
280     }
281     if ($option{output_comment}>0) {
282     $self->_comment_delete_empty ();
283     for (@{$self->{comment}}) {
284     my %f = &{$self->{option}->{hook_encode_string}}
285     ($self, $_, type => 'ccontent');
286     $f{value} =~ s/([\x28\x29\x5C])([\x21-\x7E])?/
287     "\x5C$1".(defined $2?"\x5C$2":'')/ge;
288     $r .= ' ('.$f{value}.')' if length $f{value};
289     }
290     }
291     $r;
292     }
293     sub as_string ($;%) {shift->stringify (@_)}
294    
295     =head2 $self->option ($option_name)
296    
297     Returns/set (new) value of the option.
298    
299     =cut
300    
301     sub option ($$;$) {
302     my $self = shift;
303     my ($name, $newval) = @_;
304     if ($newval) {
305     $self->{option}->{$name} = $newval;
306     }
307     $self->{option}->{$name};
308     }
309    
310     sub _quote_unsafe_string ($$;%) {
311     my $self = shift;
312     my $string = shift;
313     my %option = @_;
314     $option{unsafe} ||= 'NON_atext_dot_wsp';
315     if ($string =~ /$REG{$option{unsafe}}/ || $string =~ /$REG{WSP}$REG{WSP}/) {
316     $string =~ s/([\x22\x5C])([\x21-\x7E])?/"\x5C$1".(defined $2?"\x5C$2":'')/ge;
317     $string = '"'.$string.'"';
318     }
319     $string;
320     }
321    
322     sub _unquote_quoted_string ($$) {
323     my $self = shift;
324     my $quoted_string = shift;
325     $quoted_string =~ s{$REG{M_quoted_string}}{
326     my $qtext = $1;
327     $qtext =~ s/\x5C([\x00-\xFF])/$1/g;
328     $qtext;
329     }goex;
330     $quoted_string;
331     }
332    
333     sub _decode_quoted_string ($$) {
334     my $self = shift;
335     my $quoted_string = shift;
336     $quoted_string =~ s{$REG{M_quoted_string}|([^\x22]+)}{
337     my ($qtext,$t) = ($1, $2);
338     if ($t) {
339     $t =~ s/($REG{WSP})+/$1/g;
340     my %s = &{$self->{option}->{hook_decode_string}} ($self, $t,
341     type => 'value');
342     $s{value};
343     } else {
344     $qtext =~ s/\x5C([\x00-\xFF])/$1/g;
345     my %s = &{$self->{option}->{hook_decode_string}} ($self, $qtext,
346     type => 'value/quoted');
347     $s{value};
348     }
349     }goex;
350     $quoted_string;
351     }
352    
353     sub _decode_ccontent ($$) {
354     &Message::MIME::EncodedWord::decode_ccontent (@_[1,0]);
355     }
356    
357     =head1 NOTE
358    
359     Current version of this module does not check whether
360     URI is correct or not. In particullar, implementor
361     should be careful not to output URI that is syntactically
362     valid, but do not match to context. For example,
363     C<Location:> field defined by HTTP/1.1 [RFC2616] doesn't
364     allow relative URIs. (Interestingly, with CGI/1.1,
365     we can use relative URI as value of C<Location> field.
366    
367     There is three options related with URI type.
368     C<allow_absolute>, C<allow_relative>, and C<allow_fragment>.
369     But this options don't work as you hope.
370     These options are only reserved for future implemention.
371    
372     =head1 LICENSE
373    
374     Copyright 2002 wakaba E<lt>w@suika.fam.cxE<gt>.
375    
376     This program is free software; you can redistribute it and/or modify
377     it under the terms of the GNU General Public License as published by
378     the Free Software Foundation; either version 2 of the License, or
379     (at your option) any later version.
380    
381     This program is distributed in the hope that it will be useful,
382     but WITHOUT ANY WARRANTY; without even the implied warranty of
383     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
384     GNU General Public License for more details.
385    
386     You should have received a copy of the GNU General Public License
387     along with this program; see the file COPYING. If not, write to
388     the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
389     Boston, MA 02111-1307, USA.
390    
391     =head1 CHANGE
392    
393     See F<ChangeLog>.
394     $Date: 2002/03/26 05:31:55 $
395    
396     =cut
397    
398     1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24