=head1 NAME Message::Header Perl module =head1 DESCRIPTION Perl module for RFC 822/2822 message C
. =cut package Message::Header; use strict; use vars qw($VERSION %REG); $VERSION = '1.00'; use Carp (); use overload '@{}' => sub { shift->_delete_empty_field->{field} }, '""' => sub { shift->stringify }, fallback => 1; $REG{WSP} = qr/[\x09\x20]/; $REG{FWS} = qr/[\x09\x20]*/; $REG{M_field} = qr/^([^\x3A]+):$REG{FWS}([\x00-\xFF]*)$/; $REG{M_fromline} = qr/^\x3E?From$REG{WSP}+([\x00-\xFF]*)$/; $REG{UNSAFE_field_name} = qr/[\x00-\x20\x3A\x7F-\xFF]/; =head2 options These options can be getten/set by C/C method. =head3 capitalize = 0/1 (First character of) C is capitalized when C. (Default = 1) =head3 fold_length = numeric value Length of line used to fold. (Default = 70) =head3 mail_from = 0/1 Outputs "From " line (known as Un*x From, Mail-From, and so on) when C. (Default = 0) =cut =head1 CONSTRUCTORS The following methods construct new C objects: =over 4 ## Initialize my %DEFAULT = ( capitalize => 1, fold_length => 70, #field_type => {}, format => 'mail-rfc2822', mail_from => 0, output_bcc => 0, parse_all => 0, sort => 'none', translate_underscore => 1, validate => 1, ); $DEFAULT{field_type} = { ':DEFAULT' => 'Message::Field::Unstructured', received => 'Message::Field::Received', 'x-received' => 'Message::Field::Received', 'content-type' => 'Message::Field::ContentType', 'content-disposition' => 'Message::Field::ContentDisposition', 'auto-submitted' => 'Message::Field::ValueParams', link => 'Message::Field::ValueParams', archive => 'Message::Field::ValueParams', 'x-face-type' => 'Message::Field::ValueParams', subject => 'Message::Field::Subject', 'x-nsubject' => 'Message::Field::Subject', 'list-software' => 'Message::Field::UA', 'user-agent' => 'Message::Field::UA', server => 'Message::Field::UA', ## Numeric value 'content-length' => 'Message::Field::Numval', lines => 'Message::Field::Numval', 'max-forwards' => 'Message::Field::Numval', 'mime-version' => 'Message::Field::Numval', 'x-jsmail-priority' => 'Message::Field::Numval', 'x-priority' => 'Message::Field::Numval', path => 'Message::Field::Path', }; for (qw(cancel-lock importance precedence list-id x-face x-mail-count x-msmail-priority x-priority xref)) {$DEFAULT{field_type}->{$_} = 'Message::Field::Structured'} for (qw(approved bcc cc complaints-to delivered-to disposition-notification-to envelope-to errors-to fcc from mail-followup-to mail-followup-cc mail-reply-to notice-requested-upon-delivery-to reply-to resent-bcc resent-cc resent-to resent-from resent-sender return-path return-receipt-to sender to x-approved x-beenthere x-complaints-to x-envelope-from x-envelope-sender x-envelope-to x-ml-address x-ml-command x-ml-to x-nfrom x-nto)) {$DEFAULT{field_type}->{$_} = 'Message::Field::Address'} for (qw(date date-received delivery-date expires expire-date nntp-posting-date posted reply-by resent-date x-tcup-date)) {$DEFAULT{field_type}->{$_} = 'Message::Field::Date'} for (qw(article-updates client-date content-id in-reply-to message-id references resent-message-id see-also supersedes)) {$DEFAULT{field_type}->{$_} = 'Message::Field::MsgID'} for (qw(accept accept-charset accept-encoding accept-language content-language content-transfer-encoding encrypted followup-to keywords list-archive list-digest list-help list-owner list-post list-subscribe list-unsubscribe list-url uri newsgroups x-brother x-daughter x-respect x-moe x-syster x-wife)) {$DEFAULT{field_type}->{$_} = 'Message::Field::CSV'} for (qw(content-alias content-base content-location location referer url x-home-page x-http_referer x-info x-pgp-key x-ml-url x-uri x-url x-web)) {$DEFAULT{field_type}->{$_} = 'Message::Field::URI'} ## taken from L # "Good Practice" order of HTTP message headers: # - General-Headers # - Request-Headers # - Response-Headers # - Entity-Headers # (From draft-ietf-http-v11-spec-rev-01, Nov 21, 1997) my @header_order = qw( mail-from x-envelope-from relay-version path status cache-control connection date pragma transfer-encoding upgrade trailer via accept accept-charset accept-encoding accept-language authorization expect from host if-modified-since if-match if-none-match if-range if-unmodified-since max-forwards proxy-authorization range referer te user-agent accept-ranges age location proxy-authenticate retry-after server vary warning www-authenticate mime-version allow content-base content-encoding content-language content-length content-location content-md5 content-range content-type etag expires last-modified content-style-type content-script-type link xref ); my %header_order; sub _init ($;%) { my $self = shift; my %options = @_; $self->{field} = []; $self->{option} = \%DEFAULT; my @new_fields = (); for my $name (keys %options) { if (substr ($name, 0, 1) eq '-') { $self->{option}->{substr ($name, 1)} = $options{$name}; } else { push @new_fields, ($name => $options{$name}); } } $self->add (@new_fields, -parse => $self->{option}->{parse_all}) if $#new_fields > -1; my $format = $self->{option}->{format}; if ($format =~ /^cgi/) { unshift @header_order, qw(content-type location); $self->{option}->{sort} = 'good-practice'; } elsif ($format =~ /^http/) { $self->{option}->{sort} = 'good-practice'; } # Make alternative representations of @header_order. This is used # for sorting. my $i = 1; for (@header_order) { $header_order{$_} = $i++ unless $header_order{$_}; } } =item Message::Header->new ([%initial-fields/options]) Constructs a new C object. You might pass some initial C-C pairs and/or options as parameters to the constructor. Example: $hdr = new Message::Headers Date => 'Thu, 03 Feb 1994 00:00:00 +0000', Content_Type => 'text/html', Content_Location => 'http://www.foo.example/', -format => 'mail-rfc2822' ## not to be header field ; =cut sub new ($;%) { my $class = shift; my $self = bless {}, $class; $self->_init (@_); $self; } =item Message::Header->parse ($header, [%initial-fields/options]) Parses given C
and constructs a new C object. You might pass some additional C-C pairs or/and initial options as parameters to the constructor. =cut sub parse ($$;%) { my $class = shift; my $header = shift; my $self = bless {}, $class; $self->_init (@_); $header =~ s/\x0D?\x0A$REG{WSP}/\x20/gos; ## unfold for my $field (split /\x0D?\x0A/, $header) { if ($field =~ /$REG{M_fromline}/) { my $body = $1; $body = $self->_field_body ($body, 'mail-from') if $self->{option}->{parse_all}; push @{$self->{field}}, {name => 'mail-from', body => $body}; } elsif ($field =~ /$REG{M_field}/) { my ($name, $body) = (lc $1, $2); $name =~ s/$REG{WSP}+$//; $body =~ s/$REG{WSP}+$//; $body = $self->_field_body ($body, $name) if $self->{option}->{parse_all}; push @{$self->{field}}, {name => $name, body => $body}; } } $self; } =item Message::Header->parse_array (\@header, [%initial-fields/options]) Parses given C
and constructs a new C object. Same as Cparse> but this method is given an array reference. You might pass some additional C-C pairs or/and initial options as parameters to the constructor. =cut sub parse_array ($\@;%) { my $class = shift; my $header = shift; Carp::croak "parse_array: first argument is not an array reference" unless ref $header eq 'ARRAY'; my $self = bless {}, $class; $self->_init (@_); while (1) { my $field = shift @$header; while (1) { if ($$header[0] =~ /^$REG{WSP}/) { $field .= shift @$header; } else {last} } $field =~ tr/\x0D\x0A//d; ## BUG: not safe for bar CR/LF if ($field =~ /$REG{M_fromline}/) { my $body = $1; $body = $self->_field_body ($body, 'mail-from') if $self->{option}->{parse_all}; push @{$self->{field}}, {name => 'mail-from', body => $body}; } elsif ($field =~ /$REG{M_field}/) { my ($name, $body) = (lc $1, $2); $name =~ s/$REG{WSP}+$//; $body =~ s/$REG{WSP}+$//; $body = $self->_field_body ($body, $name) if $self->{option}->{parse_all}; push @{$self->{field}}, {name => $name, body => $body}; } last if $#$header < 0; } $self; } =back =head1 METHODS =head2 $self->field ($field_name) Returns C of given C. When there are two or more Cs whose name is C, this method return all Cs as array. (On scalar context, only first one is returned.) =cut sub field ($$) { my $self = shift; my $name = lc shift; my @ret; for my $field (@{$self->{field}}) { if ($field->{name} eq $name) { unless (wantarray) { $field->{body} = $self->_field_body ($field->{body}, $name); return $field->{body}; } else { $field->{body} = $self->_field_body ($field->{body}, $name); push @ret, $field->{body}; } } } if ($#ret < 0) { return $self->add ($name); } @ret; } sub field_exist ($$) { my $self = shift; my $name = lc shift; my @ret; for my $field (@{$self->{field}}) { return 1 if ($field->{name} eq $name); } 0; } =head2 $self->field_name ($index) Returns C of $index'th C. =head2 $self->field_body ($index) Returns C of $index'th C. =cut sub field_name ($$) { my $self = shift; $self->{field}->[shift]->{name}; } sub field_body ($$) { my $self = shift; my $i = shift; $self->{field}->[$i]->{body} = $self->_field_body ($self->{field}->[$i]->{body}, $self->{field}->[$i]->{name}); $self->{field}->[$i]->{body}; } sub _field_body ($$$) { my $self = shift; my ($body, $name) = @_; unless (ref $body) { my $type = $self->{option}->{field_type}->{$name} || $self->{option}->{field_type}->{':DEFAULT'}; eval "require $type" or Carp::croak ("_field_body: $type: $@"); unless ($body) { $body = $type->new (-field_name => $name, -format => $self->{option}->{format}); } else { $body = $type->parse ($body, -field_name => $name, -format => $self->{option}->{format}); } } $body; } =head2 $self->field_name_list () Returns list of all Cs. (Even if there are two or more Cs which have same C, this method returns ALL names.) =cut sub field_name_list ($) { my $self = shift; $self->_delete_empty_field (); map {$_->{name}} @{$self->{field}}; } =head2 $self->add ($field-name, $field-body, [$name, $body, ...]) Adds an new C. It is not checked whether the field which named $field_body is already exist or not. If you don't want duplicated Cs, use C method. Instead of field name-body pair, you might pass some options. Four options are available for this method. C<-parse>: Parses and validates C, and returns C object. (When multiple Cs are added, returns only last one.) (Default: C) C<-prepend>: New fields are not appended, but prepended to current fields. (Default: C<0>) C<-translate-underscore>: Do C =~ tr/_/-/. (Default: C<1>) C<-validate>: Checks whether C is valid or not. =cut sub add ($%) { my $self = shift; my %fields = @_; my %option = %{$self->{option}}; $option{parse} = defined wantarray unless defined $option{parse}; for (grep {/^-/} keys %fields) {$option{substr ($_, 1)} = $fields{$_}} my $body; for (grep {/^[^-]/} keys %fields) { my $name = lc $_; $body = $fields{$_}; $name =~ tr/_/-/ if $option{translate_underscore}; Carp::croak "add: $name: invalid field-name" if $option{validate} && $name =~ /$REG{UNSAFE_field_name}/; $body = $self->_field_body ($body, $name) if $option{parse}; if ($option{prepend}) { unshift @{$self->{field}}, {name => $name, body => $body}; } else { push @{$self->{field}}, {name => $name, body => $body}; } } $body if $option{parse}; } =head2 $self->relace ($field_name, $field_body) Set the C named C as $field_body. If $field_name C is already exists, it is replaced by new $field_body value. If not, new C is inserted. (If there are some C named as $field_name, first one is used and the others are not changed.) =cut sub replace ($%) { my $self = shift; my %params = @_; my %option = %{$self->{option}}; $option{parse} = defined wantarray unless defined $option{parse}; for (grep {/^-/} keys %params) {$option{substr ($_, 1)} = $params{$_}} my (%new_field); for (grep {/^[^-]/} keys %params) { my $name = lc $_; $name =~ tr/_/-/ if $option{translate_underscore}; Carp::croak "replace: $name: invalid field-name" if $option{validate} && $name =~ /$REG{UNSAFE_field_name}/; $params{$_} = $self->_field_body ($params{$_}, $name) if $option{parse}; $new_field{$name} = $params{$_}; } my $body = (%new_field)[-1]; for my $field (@{$self->{field}}) { if (defined $new_field{$field->{name}}) { $field->{body} = $new_field {$field->{name}}; $new_field{$field->{name}} = undef; } } for (keys %new_field) { push @{$self->{field}}, {name => $_, body => $new_field{$_}}; } $body if $option{parse}; } =head2 $self->delete ($field-name, [$name, ...]) Deletes C named as $field_name. =cut sub delete ($@) { my $self = shift; my %delete; for (@_) {$delete{lc $_} = 1} for my $field (@{$self->{field}}) { undef $field if $delete{$field->{name}}; } } =head2 $self->count ([$field_name]) Returns the number of times the given C appears. If no $field_name is given, returns the number of fields. (Same as $#$self+1) =cut sub count ($;$) { my $self = shift; my ($name) = (lc shift); unless ($name) { $self->_delete_empty_field (); return $#{$self->{field}}+1; } my $count = 0; for my $field (@{$self->{field}}) { if ($field->{name} eq $name) { $count++; } } $count; } =head2 $self->rename ($field-name, $new-name, [$old, $new,...]) Renames C<$field-name> as C<$new-name>. =cut sub rename ($%) { my $self = shift; my %params = @_; my %option = %{$self->{option}}; for (grep {/^-/} keys %params) {$option{substr ($_, 1)} = $params{$_}} my %new_name; for (grep {/^[^-]/} keys %params) { my ($old => $new) = (lc $_ => lc $params{$_}); $new =~ tr/_/-/ if $option{translate_underscore}; Carp::croak "rename: $new: invalid field-name" if $option{validate} && $new =~ /$REG{UNSAFE_field_name}/; $new_name{$old} = $new; } for my $field (@{$self->{field}}) { if (length $new_name{$field->{name}}) { $field->{name} = $new_name{$field->{name}}; } } $self if defined wantarray; } =item $self->scan(\&doit) Apply a subroutine to each header field in turn. The callback routine is called with two parameters; the name of the field and a single value. If the header has more than one value, then the routine is called once for each value. =cut sub scan ($&) { my ($self, $sub) = @_; my $sort; $sort = \&_header_cmp if $self->{option}->{sort} eq 'good-practice'; $sort = {$a cmp $b} if $self->{option}->{sort} eq 'alphabetic'; my @field = @{$self->{field}}; if (ref $sort) { @field = sort $sort @{$self->{field}}; } for my $field (@field) { next if $field->{name} =~ /^_/; &$sub($field->{name} => $field->{body}); } } # Compare function which makes it easy to sort headers in the # recommended "Good Practice" order. ## taken from HTTP::Header sub _header_cmp { my ($na, $nb) = ($a->{name}, $b->{name}); # Unknown headers are assign a large value so that they are # sorted last. This also helps avoiding a warning from -w # about comparing undefined values. $header_order{$na} = 999 unless defined $header_order{$na}; $header_order{$nb} = 999 unless defined $header_order{$nb}; $header_order{$na} <=> $header_order{$nb} || $na cmp $nb; } =head2 $self->stringify ([%option]) Returns the C
as a string. =cut sub stringify ($;%) { my $self = shift; my %params = @_; my %option = %{$self->{option}}; for (grep {/^-/} keys %params) {$option{substr ($_, 1)} = $params{$_}} my @ret; if ($option{mail_from}) { my $fromline = $self->field ('mail-from'); push @ret, 'From '.$fromline if $fromline; } $self->scan (sub { my ($name, $body) = (@_); return unless length $name; return if $option{mail_from} && $name eq 'mail-from'; return if !$option{output_bcc} && ($name eq 'bcc' || $name eq 'resent-bcc'); my $fbody; if (ref $body) { $fbody = $body->stringify (-format => $option{format}); } else { $fbody = $body; } return unless length $fbody; $fbody =~ s/\x0D(?=[^\x09\x0A\x20])/\x0D\x20/g; $fbody =~ s/\x0A(?=[^\x09\x20])/\x0A\x20/g; $name =~ s/((?:^|-)[a-z])/uc($1)/ge if $option{capitalize}; push @ret, $name.': '.$self->fold ($fbody); }); my $ret = join ("\n", @ret); $ret? $ret."\n": ''; } *as_string = \&stringify; =head2 $self->option ($option_name, [$option_value]) Set/gets new value of the option. =cut sub option ($@) { my $self = shift; if (@_ == 1) { return $self->{option}->{ shift (@_) }; } while (my ($name, $value) = splice (@_, 0, 2)) { $name =~ s/^-//; $self->{option}->{$name} = $value; if ($name eq 'format') { for my $f (@{$self->{field}}) { if (ref $f->{body}) { $f->{body}->option (-format => $value); } } } } } sub field_type ($$;$) { my $self = shift; my $field_name = shift; my $new_field_type = shift; if ($new_field_type) { $self->{option}->{field_type}->{$field_name} = $new_field_type; } $self->{option}->{field_type}->{$field_name} || $self->{option}->{field_type}->{':DEFAULT'}; } sub _delete_empty_field ($) { my $self = shift; my @ret; for my $field (@{$self->{field}}) { push @ret, $field if $field->{name}; } $self->{field} = \@ret; $self; } sub fold ($$;$) { my $self = shift; my $string = shift; my $len = shift || $self->{option}->{fold_length}; $len = 60 if $len < 60; ## This code is taken from Mail::Header 1.43 in MailTools, ## by Graham Barr (Maintained by Mark Overmeer ). my $max = int($len - 5); # 4 for leading spcs + 1 for [\,\;] my $min = int($len * 4 / 5) - 4; my $ml = $len; if (length($string) > $ml) { #Split the line up # first bias towards splitting at a , or a ; >4/5 along the line # next split a whitespace # else we are looking at a single word and probably don't want to split my $x = ""; $x .= "$1\n " while($string =~ s/^$REG{WSP}*( [^"]{$min,$max}?[\,\;] |[^"]{1,$max}$REG{WSP} |[^\s"]*(?:"[^"]*"[^\s"]*)+$REG{WSP} |[^\s"]+$REG{WSP} ) //x); $x .= $string; $string = $x; $string =~ s/(\A$REG{WSP}+|$REG{WSP}+\Z)//sog; $string =~ s/\s+\n/\n/sog; } $string; } =head2 $self->clone () Returns a copy of Message::Header object. =cut sub clone ($) { my $self = shift; my $clone = new Message::Header; for my $name (%{$self->{option}}) { if (ref $self->{option}->{$name} eq 'HASH') { $clone->{option}->{$name} = {%{$self->{option}->{$name}}}; } elsif (ref $self->{option}->{$name} eq 'ARRAY') { $clone->{option}->{$name} = [@{$self->{option}->{$name}}]; } else { $clone->{option}->{$name} = $self->{option}->{$name}; } } for (@{$self->{field}}) { $clone->add ($_->{name}, scalar $_->{body}); } $clone; } =head1 NOTE =head2 C The header field name is not case sensitive. To make the life easier for perl users who wants to avoid quoting before the => operator, you can use '_' as a synonym for '-' in header field names (this behaviour can be suppressed by setting C option to C<0> value). =head1 EXAMPLE ## Print field list use Message::Header; my $header = Message::Header->parse ($header); for my $i (0..$#$header) { print $header->field_name ($i), "\t=> ", $header->field_body ($i), "\n"; } ## Make simple header use Message::Header; use Message::Field::Address; my $header = new Message::Header; my $from = new Message::Field::Address; $from->add ('foo@foo.example', name => 'F. Foo'); my $to = new Message::Field::Address; $to->add ('bar@bar.example', name => 'Mr. Bar'); $to->add ('hoge@foo.example', name => 'Hoge-san'); $header->add ('From' => $from); $header->add ('To' => $to); $header->add ('Subject' => 'Re: Meeting'); $header->add ('References' => ''); print $header; =head1 ACKNOWLEDGEMENTS Some of codes are taken from other modules such as HTTP::Header, Mail::Header. =head1 LICENSE Copyright 2002 wakaba Ew@suika.fam.cxE. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; see the file COPYING. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. =head1 CHANGE See F. $Date: 2002/04/05 14:56:26 $ =cut 1;