/[suikacvs]/www/namazu/filter/msword.pl
Suika

Contents of /www/namazu/filter/msword.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1.1.1 - (show annotations) (download) (vendor branch)
Fri Nov 30 07:56:45 2001 UTC (22 years, 5 months ago) by wakaba
Branch: MAIN, wakaba
CVS Tags: initial, HEAD
Changes since 1.1: +0 -0 lines
File MIME type: text/plain

1 #
2 # -*- Perl -*-
3 # $Id: msword.pl,v 1.28.4.1 2001/07/13 01:14:26 knok Exp $
4 # Copyright (C) 1997-2000 Satoru Takabayashi All rights reserved.
5 # Copyright (C) 2000 Satoru Takabayashi Namazu Project All rights reserved.
6 # This is free software with ABSOLUTELY NO WARRANTY.
7 #
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either versions 2, or (at your option)
11 # any later version.
12 #
13 # This program is distributed in the hope that it will be useful
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 # 02111-1307, USA
22 #
23 # This file must be encoded in EUC-JP encoding
24 #
25
26 package msword;
27 use strict;
28 use File::Copy;
29 require 'util.pl';
30 require 'gfilter.pl';
31 require 'html.pl';
32
33 my $wordconvpath = undef;
34 my $utfconvpath = undef;
35 my $wvversionpath = undef;
36
37 sub mediatype() {
38 return ('application/msword');
39 }
40
41 sub status() {
42 $wordconvpath = util::checkcmd('wvHtml');
43 if (defined $wordconvpath) {
44 if (!util::islang("ja")) {
45 return 'yes';
46 } else {
47 $utfconvpath = util::checkcmd('lv');
48 $wvversionpath = util::checkcmd('wvVersion');
49 if ((defined $utfconvpath) && (defined $wvversionpath)) {
50 return 'yes';
51 } else {
52 return 'no';
53 }
54 }
55 } else {
56 $wordconvpath = util::checkcmd('doccat');
57 return 'yes' if defined $wordconvpath;
58 return 'no';
59 }
60 }
61
62 sub recursive() {
63 return 0;
64 }
65
66 sub pre_codeconv() {
67 return 0;
68 }
69
70 sub post_codeconv () {
71 return 0;
72 }
73
74 sub add_magic ($) {
75 return;
76 }
77
78 sub filter ($$$$$) {
79 my ($orig_cfile, $cont, $weighted_str, $headings, $fields)
80 = @_;
81 my $err = undef;
82
83 if (util::checkcmd('wvHtml')) {
84 $err = filter_wv($orig_cfile, $cont, $weighted_str, $headings, $fields);
85 } else {
86 $err = filter_doccat($orig_cfile, $cont, $weighted_str, $headings, $fields);
87 }
88 return $err;
89 }
90
91 sub filter_wv ($$$$$) {
92 my ($orig_cfile, $cont, $weighted_str, $headings, $fields)
93 = @_;
94 my $cfile = defined $orig_cfile ? $$orig_cfile : '';
95
96 my $tmpfile = util::tmpnam('NMZ.word');
97 my $tmpfile2 = util::tmpnam('NMZ.word2');
98
99
100 if (util::islang("ja")) {
101 }
102
103 util::vprint("Processing ms-word file ... (using '$wordconvpath')\n");
104
105 {
106 my $fh = util::efopen("> $tmpfile");
107 print $fh $$cont;
108 }
109
110 if (!util::islang("ja")) {
111 system("$wordconvpath $tmpfile $tmpfile2");
112 } else {
113 my $version = "unknown";
114 my $supported = undef;
115 my $fh_cmd = util::efopen("$wvversionpath $tmpfile |");
116 while (<$fh_cmd>) {
117 if (/^Version: (word\d+),/i) {
118 $version = $1;
119 #
120 # Only word8 format is supported for Japanese.
121 #
122 if ($version =~ /^word8$/) {
123 $supported = 1;
124 }
125 }
126 }
127 return _("Unsupported format: ") . $version unless $supported;
128 system("$wordconvpath $tmpfile $tmpfile2");
129 system("$utfconvpath -Iu8 -Oej $tmpfile2 > $tmpfile");
130 unlink($tmpfile2);
131 rename($tmpfile, $tmpfile2);
132 }
133
134 {
135 my $fh = util::efopen("< $tmpfile2");
136 $$cont = util::readfile($fh);
137
138 # Exclude wvHtml's footer becaues it has no good index terms.
139 $$cont =~ s/<!--Section Ends-->.*$//s;
140 }
141
142 unlink($tmpfile);
143 unlink($tmpfile2);
144
145 html::html_filter($cont, $weighted_str, $fields, $headings);
146
147 gfilter::line_adjust_filter($cont);
148 gfilter::line_adjust_filter($weighted_str);
149 gfilter::white_space_adjust_filter($cont);
150 $fields->{'title'} = gfilter::filename_to_title($cfile, $weighted_str)
151 unless $fields->{'title'};
152 gfilter::show_filter_debug_info($cont, $weighted_str,
153 $fields, $headings);
154 return undef;
155 }
156
157 sub filter_doccat ($$$$$) {
158 my ($orig_cfile, $cont, $weighted_str, $headings, $fields)
159 = @_;
160 my $cfile = defined $orig_cfile ? $$orig_cfile : '';
161
162 my $tmpfile = util::tmpnam('NMZ.word');
163 my $tmpfile2 = util::tmpnam('NMZ.word2');
164 copy("$cfile", "$tmpfile2");
165
166 system("$wordconvpath -o e $tmpfile2 > $tmpfile");
167
168 {
169 my $fh = util::efopen("< $tmpfile");
170 $$cont = util::readfile($fh);
171 }
172
173 unlink($tmpfile);
174 unlink($tmpfile2);
175
176 gfilter::line_adjust_filter($cont);
177 gfilter::line_adjust_filter($weighted_str);
178 gfilter::white_space_adjust_filter($cont);
179 $fields->{'title'} = gfilter::filename_to_title($cfile, $weighted_str)
180 unless $fields->{'title'};
181 gfilter::show_filter_debug_info($cont, $weighted_str,
182 $fields, $headings);
183 return undef;
184 }
185
186 1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24