/[suikacvs]/www/namazu/filter/gfilter.pl
Suika

Contents of /www/namazu/filter/gfilter.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1.1.1 - (show annotations) (download) (vendor branch)
Fri Nov 30 07:56:45 2001 UTC (22 years, 5 months ago) by wakaba
Branch: MAIN, wakaba
CVS Tags: initial, HEAD
Changes since 1.1: +0 -0 lines
File MIME type: text/plain

1 #
2 # -*- Perl -*-
3 # $Id: gfilter.pl,v 1.13.8.1 2001/07/09 09:55:39 takesako Exp $
4 # Copyright (C) 1999 Satoru Takabayashi ,
5 # This is free software with ABSOLUTELY NO WARRANTY.
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either versions 2, or (at your option)
10 # any later version.
11 #
12 # This program is distributed in the hope that it will be useful
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
20 # 02111-1307, USA
21 #
22 # This file must be encoded in EUC-JP encoding
23 #
24
25 package gfilter;
26
27 # Show debug information for filters
28 sub show_filter_debug_info ($$$$) {
29 my ($contref, $weighted_str, $fields, $headings) = @_;
30 util::dprint("-- title --\n$fields->{'title'}\n")
31 if defined $fields->{'title'};
32 util::dprint("-- content --\n$$contref\n");
33 util::dprint("-- weighted_str: --\n$$weighted_str\n");
34 util::dprint("-- headings --\n$$headings\n");
35 }
36
37 # Adjust white spaces
38 sub white_space_adjust_filter ($) {
39 my ($text) = @_;
40
41 $$text =~ s/[ \t]+/ /g;
42 $$text =~ s/\r\n/\n/g;
43 $$text =~ s/\r/\n/g;
44 $$text =~ s/\n+/\n/g;
45 $$text =~ s/^ +//gm;
46 $$text =~ s/ +$//gm;
47 $$text =~ s/ +/ /g;
48 # Control characters be into space
49 $$text =~ tr/\x00-\x09\x0b-\x1f/ /;
50 }
51
52 # get a title from a file name.
53 sub filename_to_title ($$) {
54 my ($cfile, $weighted_str) = @_;
55
56 # for MSWin32's filename using Shift_JIS [1998-09-24]
57 if (($mknmz::SYSTEM eq "MSWin32") || ($mknmz::SYSTEM eq "os2")) {
58 $cfile = codeconv::shiftjis_to_eucjp($cfile);
59 $cfile = codeconv::eucjp_han2zen_kana($cfile);
60 }
61
62 $cfile =~ m!^.*/([^/]*)$!;
63 my $filename = $1;
64
65 # get keywords from a file name.
66 # modified [1998-09-18]
67 my $tmp = $filename;
68 $tmp =~ s|/\\_\.-| |g;
69
70 my $weight = $conf::Weight{'html'}->{'title'};
71 $$weighted_str .= "\x7f$weight\x7f$tmp\x7f/$weight\x7f\n";
72
73 my $title = $filename;
74 return $title
75 }
76
77 # Remove SPACE/TAB at the beginning or ending of the line.
78 # And remove '>|#:' at the begenning of the line.
79 # Join hyphenation for English text.
80 # Remove LF if the line is ended with a Japanese character and
81 # length of the line is 40 or more longer.
82 #
83 # Original of this code was contributed by <furukawa@tcp-ip.or.jp>.
84 # [1997-09-15]
85 #
86 sub line_adjust_filter ($) {
87 my ($text) = @_;
88 return undef unless defined($$text);
89
90 my @tmp = split(/\n/, $$text);
91 for my $line (@tmp) {
92 $line .= "\n";
93 $line =~ s/^[ \>\|\#\:]+//;
94 $line =~ s/ +$//;
95 $line =~ s/\n// if (($line =~ /[\xa1-\xfe]\n*$/) &&
96 (length($line) >=40));
97 $line =~ s/(¡£|¡¢)$/$1\n/;
98 $line =~ s/([a-z])-\n/$1/; # for hyphenation.
99 }
100 $$text = join('', @tmp);
101 }
102
103 # not implimented yet.
104 sub analize_rcs_stamp()
105 {
106 }
107
108 1;
109

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24