1 |
# |
2 |
# -*- Perl -*- |
3 |
# $Id: msword.pl,v 1.28.4.1 2001/07/13 01:14:26 knok Exp $ |
4 |
# Copyright (C) 1997-2000 Satoru Takabayashi All rights reserved. |
5 |
# Copyright (C) 2000 Satoru Takabayashi Namazu Project All rights reserved. |
6 |
# This is free software with ABSOLUTELY NO WARRANTY. |
7 |
# |
8 |
# This program is free software; you can redistribute it and/or modify |
9 |
# it under the terms of the GNU General Public License as published by |
10 |
# the Free Software Foundation; either versions 2, or (at your option) |
11 |
# any later version. |
12 |
# |
13 |
# This program is distributed in the hope that it will be useful |
14 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 |
# GNU General Public License for more details. |
17 |
# |
18 |
# You should have received a copy of the GNU General Public License |
19 |
# along with this program; if not, write to the Free Software |
20 |
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA |
21 |
# 02111-1307, USA |
22 |
# |
23 |
# This file must be encoded in EUC-JP encoding |
24 |
# |
25 |
|
26 |
package msword; |
27 |
use strict; |
28 |
use File::Copy; |
29 |
require 'util.pl'; |
30 |
require 'gfilter.pl'; |
31 |
require 'html.pl'; |
32 |
|
33 |
my $wordconvpath = undef; |
34 |
my $utfconvpath = undef; |
35 |
my $wvversionpath = undef; |
36 |
|
37 |
sub mediatype() { |
38 |
return ('application/msword'); |
39 |
} |
40 |
|
41 |
sub status() { |
42 |
$wordconvpath = util::checkcmd('wvHtml'); |
43 |
if (defined $wordconvpath) { |
44 |
if (!util::islang("ja")) { |
45 |
return 'yes'; |
46 |
} else { |
47 |
$utfconvpath = util::checkcmd('lv'); |
48 |
$wvversionpath = util::checkcmd('wvVersion'); |
49 |
if ((defined $utfconvpath) && (defined $wvversionpath)) { |
50 |
return 'yes'; |
51 |
} else { |
52 |
return 'no'; |
53 |
} |
54 |
} |
55 |
} else { |
56 |
$wordconvpath = util::checkcmd('doccat'); |
57 |
return 'yes' if defined $wordconvpath; |
58 |
return 'no'; |
59 |
} |
60 |
} |
61 |
|
62 |
sub recursive() { |
63 |
return 0; |
64 |
} |
65 |
|
66 |
sub pre_codeconv() { |
67 |
return 0; |
68 |
} |
69 |
|
70 |
sub post_codeconv () { |
71 |
return 0; |
72 |
} |
73 |
|
74 |
sub add_magic ($) { |
75 |
return; |
76 |
} |
77 |
|
78 |
sub filter ($$$$$) { |
79 |
my ($orig_cfile, $cont, $weighted_str, $headings, $fields) |
80 |
= @_; |
81 |
my $err = undef; |
82 |
|
83 |
if (util::checkcmd('wvHtml')) { |
84 |
$err = filter_wv($orig_cfile, $cont, $weighted_str, $headings, $fields); |
85 |
} else { |
86 |
$err = filter_doccat($orig_cfile, $cont, $weighted_str, $headings, $fields); |
87 |
} |
88 |
return $err; |
89 |
} |
90 |
|
91 |
sub filter_wv ($$$$$) { |
92 |
my ($orig_cfile, $cont, $weighted_str, $headings, $fields) |
93 |
= @_; |
94 |
my $cfile = defined $orig_cfile ? $$orig_cfile : ''; |
95 |
|
96 |
my $tmpfile = util::tmpnam('NMZ.word'); |
97 |
my $tmpfile2 = util::tmpnam('NMZ.word2'); |
98 |
|
99 |
|
100 |
if (util::islang("ja")) { |
101 |
} |
102 |
|
103 |
util::vprint("Processing ms-word file ... (using '$wordconvpath')\n"); |
104 |
|
105 |
{ |
106 |
my $fh = util::efopen("> $tmpfile"); |
107 |
print $fh $$cont; |
108 |
} |
109 |
|
110 |
if (!util::islang("ja")) { |
111 |
system("$wordconvpath $tmpfile $tmpfile2"); |
112 |
} else { |
113 |
my $version = "unknown"; |
114 |
my $supported = undef; |
115 |
my $fh_cmd = util::efopen("$wvversionpath $tmpfile |"); |
116 |
while (<$fh_cmd>) { |
117 |
if (/^Version: (word\d+),/i) { |
118 |
$version = $1; |
119 |
# |
120 |
# Only word8 format is supported for Japanese. |
121 |
# |
122 |
if ($version =~ /^word8$/) { |
123 |
$supported = 1; |
124 |
} |
125 |
} |
126 |
} |
127 |
return _("Unsupported format: ") . $version unless $supported; |
128 |
system("$wordconvpath $tmpfile $tmpfile2"); |
129 |
system("$utfconvpath -Iu8 -Oej $tmpfile2 > $tmpfile"); |
130 |
unlink($tmpfile2); |
131 |
rename($tmpfile, $tmpfile2); |
132 |
} |
133 |
|
134 |
{ |
135 |
my $fh = util::efopen("< $tmpfile2"); |
136 |
$$cont = util::readfile($fh); |
137 |
|
138 |
# Exclude wvHtml's footer becaues it has no good index terms. |
139 |
$$cont =~ s/<!--Section Ends-->.*$//s; |
140 |
} |
141 |
|
142 |
unlink($tmpfile); |
143 |
unlink($tmpfile2); |
144 |
|
145 |
html::html_filter($cont, $weighted_str, $fields, $headings); |
146 |
|
147 |
gfilter::line_adjust_filter($cont); |
148 |
gfilter::line_adjust_filter($weighted_str); |
149 |
gfilter::white_space_adjust_filter($cont); |
150 |
$fields->{'title'} = gfilter::filename_to_title($cfile, $weighted_str) |
151 |
unless $fields->{'title'}; |
152 |
gfilter::show_filter_debug_info($cont, $weighted_str, |
153 |
$fields, $headings); |
154 |
return undef; |
155 |
} |
156 |
|
157 |
sub filter_doccat ($$$$$) { |
158 |
my ($orig_cfile, $cont, $weighted_str, $headings, $fields) |
159 |
= @_; |
160 |
my $cfile = defined $orig_cfile ? $$orig_cfile : ''; |
161 |
|
162 |
my $tmpfile = util::tmpnam('NMZ.word'); |
163 |
my $tmpfile2 = util::tmpnam('NMZ.word2'); |
164 |
copy("$cfile", "$tmpfile2"); |
165 |
|
166 |
system("$wordconvpath -o e $tmpfile2 > $tmpfile"); |
167 |
|
168 |
{ |
169 |
my $fh = util::efopen("< $tmpfile"); |
170 |
$$cont = util::readfile($fh); |
171 |
} |
172 |
|
173 |
unlink($tmpfile); |
174 |
unlink($tmpfile2); |
175 |
|
176 |
gfilter::line_adjust_filter($cont); |
177 |
gfilter::line_adjust_filter($weighted_str); |
178 |
gfilter::white_space_adjust_filter($cont); |
179 |
$fields->{'title'} = gfilter::filename_to_title($cfile, $weighted_str) |
180 |
unless $fields->{'title'}; |
181 |
gfilter::show_filter_debug_info($cont, $weighted_str, |
182 |
$fields, $headings); |
183 |
return undef; |
184 |
} |
185 |
|
186 |
1; |