1 |
# |
2 |
# -*- Perl -*- |
3 |
# $Id: mhonarc.pl,v 1.23 2000/02/29 04:25:33 satoru Exp $ |
4 |
# Copyright (C) 1997-2000 Satoru Takabayashi , |
5 |
# 1999 NOKUBI Takatsugu All rights reserved. |
6 |
# This is free software with ABSOLUTELY NO WARRANTY. |
7 |
# |
8 |
# This program is free software; you can redistribute it and/or modify |
9 |
# it under the terms of the GNU General Public License as published by |
10 |
# the Free Software Foundation; either versions 2, or (at your option) |
11 |
# any later version. |
12 |
# |
13 |
# This program is distributed in the hope that it will be useful |
14 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 |
# GNU General Public License for more details. |
17 |
# |
18 |
# You should have received a copy of the GNU General Public License |
19 |
# along with this program; if not, write to the Free Software |
20 |
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA |
21 |
# 02111-1307, USA |
22 |
# |
23 |
# This file must be encoded in EUC-JP encoding |
24 |
# |
25 |
|
26 |
package mhonarc; |
27 |
use strict; |
28 |
require 'util.pl'; |
29 |
require 'gfilter.pl'; |
30 |
require 'html.pl'; |
31 |
require 'mailnews.pl'; |
32 |
|
33 |
# |
34 |
# This pattern specifies MHonArc's file names. |
35 |
# |
36 |
my $MHONARC_MESSAGE_FILE = 'msg\d{5}\.html(?:\.gz)?'; |
37 |
|
38 |
sub mediatype() { |
39 |
return ('text/html; x-type=mhonarc'); |
40 |
} |
41 |
|
42 |
sub status() { |
43 |
return 'yes'; |
44 |
} |
45 |
|
46 |
sub recursive() { |
47 |
return 0; |
48 |
} |
49 |
|
50 |
sub pre_codeconv() { |
51 |
return 1; |
52 |
} |
53 |
|
54 |
sub post_codeconv () { |
55 |
return 0; |
56 |
} |
57 |
|
58 |
sub add_magic ($) { |
59 |
my ($magic) = @_; |
60 |
|
61 |
$magic->addMagicEntry('0 string \<!--\ MHonArc text/html; x-type=mhonarc'); |
62 |
|
63 |
return; |
64 |
} |
65 |
|
66 |
sub filter ($$$$$) { |
67 |
my ($orig_cfile, $contref, $weighted_str, $headings, $fields) |
68 |
= @_; |
69 |
my $cfile = defined $orig_cfile ? $$orig_cfile : ''; |
70 |
|
71 |
util::vprint("Processing MHonArc file ...\n"); |
72 |
|
73 |
unless ($cfile =~ /($MHONARC_MESSAGE_FILE)$/o) |
74 |
{ |
75 |
return "is MHonArc's index file! skipped."; # error |
76 |
} |
77 |
|
78 |
|
79 |
mhonarc_filter($contref, $weighted_str, $fields); |
80 |
html::html_filter($contref, $weighted_str, $fields, $headings); |
81 |
|
82 |
$$contref =~ s/^\s+//; |
83 |
mailnews::uuencode_filter($contref); |
84 |
mailnews::mailnews_filter($contref, $weighted_str, $fields); |
85 |
mailnews::mailnews_citation_filter($contref, $weighted_str); |
86 |
|
87 |
gfilter::line_adjust_filter($contref); |
88 |
gfilter::line_adjust_filter($weighted_str); |
89 |
gfilter::white_space_adjust_filter($contref); |
90 |
gfilter::show_filter_debug_info($contref, $weighted_str, |
91 |
$fields, $headings); |
92 |
return undef; |
93 |
} |
94 |
|
95 |
# Assume a normal message files by MHonArc v2.1.0 |
96 |
sub mhonarc_filter ($$) { |
97 |
my ($contref, $weighted_str) = @_; |
98 |
|
99 |
# It's useful to handle MHonArc message files. |
100 |
$$contref =~ s/<!--X-MsgBody-End-->.*//s; |
101 |
$$contref =~ s/<!--X-TopPNI-->.*<!--X-TopPNI-End-->//s; |
102 |
$$contref =~ s/<!--X-Subject-Header-Begin-->.*<!--X-Subject-Header-End-->//s; |
103 |
|
104 |
# Separate headers and a body message. |
105 |
$$contref =~ s/<!--X-Head-Body-Sep-Begin-->/\n/; |
106 |
|
107 |
# Handle a field consists of two or more lines. |
108 |
$$contref =~ s!^(<LI>)(.*?)(</LI>$)!$1 . lftospace($2) . $3!gems; |
109 |
|
110 |
# For plugging spaces before headers |
111 |
$$contref =~ s/^<LI>//gim; |
112 |
|
113 |
# Make header's name not to be indexed words. |
114 |
$$contref =~ s!</?EM>!!gi; |
115 |
$$contref =~ s/^\s+//; |
116 |
} |
117 |
|
118 |
sub lftospace ($) { |
119 |
my ($str) = @_; |
120 |
$str =~ s/[\r\n]/ /g; |
121 |
return $str; |
122 |
} |
123 |
1; |