1 |
# |
2 |
# -*- Perl -*- |
3 |
# $Id: excel.pl,v 1.13 2000/12/15 05:37:41 knok Exp $ |
4 |
# Copyright (C) 1997-2000 Satoru Takabayashi , |
5 |
# 1999 NOKUBI Takatsugu, |
6 |
# 2000 Namazu Project All rights reserved. |
7 |
# This is free software with ABSOLUTELY NO WARRANTY. |
8 |
# |
9 |
# This program is free software; you can redistribute it and/or modify |
10 |
# it under the terms of the GNU General Public License as published by |
11 |
# the Free Software Foundation; either versions 2, or (at your option) |
12 |
# any later version. |
13 |
# |
14 |
# This program is distributed in the hope that it will be useful |
15 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 |
# GNU General Public License for more details. |
18 |
# |
19 |
# You should have received a copy of the GNU General Public License |
20 |
# along with this program; if not, write to the Free Software |
21 |
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA |
22 |
# 02111-1307, USA |
23 |
# |
24 |
# This file must be encoded in EUC-JP encoding |
25 |
# |
26 |
|
27 |
package excel; |
28 |
use strict; |
29 |
use File::Copy; |
30 |
require 'util.pl'; |
31 |
require 'gfilter.pl'; |
32 |
require 'html.pl'; |
33 |
|
34 |
my $xlconvpath = undef; |
35 |
my $utfconvpath = undef; |
36 |
|
37 |
sub mediatype() { |
38 |
return ('application/excel'); |
39 |
} |
40 |
|
41 |
sub status() { |
42 |
$xlconvpath = util::checkcmd('xlHtml'); |
43 |
# return 'no' unless defined $xlconvpath; |
44 |
if (defined $xlconvpath) { |
45 |
if (!util::islang("ja")) { |
46 |
return 'yes'; |
47 |
} else { |
48 |
$utfconvpath = util::checkcmd('lv'); |
49 |
if (defined $utfconvpath) { |
50 |
return 'yes'; |
51 |
} else { |
52 |
return 'no'; |
53 |
} |
54 |
} |
55 |
} else { |
56 |
$xlconvpath = util::checkcmd('doccat'); |
57 |
return 'yes' if defined $xlconvpath; |
58 |
return 'no'; |
59 |
} |
60 |
} |
61 |
|
62 |
sub recursive() { |
63 |
return 0; |
64 |
} |
65 |
|
66 |
sub pre_codeconv() { |
67 |
return 0; |
68 |
} |
69 |
|
70 |
sub post_codeconv () { |
71 |
return 0; |
72 |
} |
73 |
|
74 |
sub add_magic ($) { |
75 |
my ($magic) = @_; |
76 |
|
77 |
$magic->addFileExts('\\.xls$', 'application/excel'); |
78 |
return; |
79 |
} |
80 |
|
81 |
sub filter ($$$$$) { |
82 |
my ($orig_cfile, $cont, $weighted_str, $headings, $fields) |
83 |
= @_; |
84 |
my $err = undef; |
85 |
|
86 |
if (util::checkcmd('xlHtml')) { |
87 |
$err = filter_xl($orig_cfile, $cont, $weighted_str, $headings, $fields); |
88 |
} else { |
89 |
$err = filter_doccat($orig_cfile, $cont, $weighted_str, $headings, $fields); |
90 |
} |
91 |
return $err; |
92 |
} |
93 |
|
94 |
sub filter_xl ($$$$$) { |
95 |
my ($orig_cfile, $cont, $weighted_str, $headings, $fields) |
96 |
= @_; |
97 |
my $cfile = defined $orig_cfile ? $$orig_cfile : ''; |
98 |
|
99 |
my $tmpfile = util::tmpnam('NMZ.excel'); |
100 |
my $tmpfile2 = util::tmpnam('NMZ.excel2'); |
101 |
|
102 |
|
103 |
util::vprint("Processing ms-excel file ... (using '$xlconvpath')\n"); |
104 |
|
105 |
{ |
106 |
my $fh = util::efopen("> $tmpfile"); |
107 |
print $fh $$cont; |
108 |
} |
109 |
|
110 |
# |
111 |
|
112 |
# -m: No encoding for multibyte. It's necessary to |
113 |
# handle a Japanese Excel 5.0 or 95 document correctly. |
114 |
util::systemcmd("$xlconvpath -m $tmpfile > $tmpfile2"); |
115 |
|
116 |
{ |
117 |
my $fh = util::efopen("< $tmpfile2"); |
118 |
$$cont = util::readfile($fh); |
119 |
} |
120 |
|
121 |
# Code conversion for Japanese document. |
122 |
if (util::islang("ja")) { |
123 |
my $encoding = "u8"; # UTF-8 |
124 |
# Pattern for xlHtml version 0.2.6. |
125 |
if ($$cont =~ m!^<FONT SIZE=-1><I>Last Updated using Excel 5.0 or 95</I></FONT><br>$!m) |
126 |
{ |
127 |
$encoding = "s"; # Shift_JIS |
128 |
} |
129 |
{ |
130 |
my $fh = util::efopen("> $tmpfile"); |
131 |
print $fh $$cont; |
132 |
} |
133 |
util::systemcmd("$utfconvpath -I$encoding -Oej $tmpfile > $tmpfile2"); |
134 |
{ |
135 |
my $fh = util::efopen("< $tmpfile2"); |
136 |
$$cont = util::readfile($fh); |
137 |
} |
138 |
} |
139 |
|
140 |
# Extract the author and exclude xlHtml's footer at once. |
141 |
$$cont =~ s!^<FONT SIZE=-1><I>Spreadsheet's Author: (.*?)</I></FONT><br>.*!!ms; # ' |
142 |
$fields->{'author'} = $1; |
143 |
|
144 |
unlink($tmpfile); |
145 |
unlink($tmpfile2); |
146 |
|
147 |
# Title shoud be removed. |
148 |
# Because xlHtml generate poor <TITLE>/foo/bar/NMZ.excel.tmp</TITLE>. |
149 |
$$cont =~ s!<TITLE>.+</TITLE>!!; |
150 |
|
151 |
html::html_filter($cont, $weighted_str, $fields, $headings); |
152 |
|
153 |
gfilter::line_adjust_filter($cont); |
154 |
gfilter::line_adjust_filter($weighted_str); |
155 |
gfilter::white_space_adjust_filter($cont); |
156 |
|
157 |
$fields->{'title'} = gfilter::filename_to_title($cfile, $weighted_str); |
158 |
gfilter::show_filter_debug_info($cont, $weighted_str, |
159 |
$fields, $headings); |
160 |
return undef; |
161 |
} |
162 |
|
163 |
sub filter_doccat ($$$$$) { |
164 |
my ($orig_cfile, $cont, $weighted_str, $headings, $fields) |
165 |
= @_; |
166 |
my $cfile = defined $orig_cfile ? $$orig_cfile : ''; |
167 |
|
168 |
my $tmpfile = util::tmpnam('NMZ.excel'); |
169 |
my $tmpfile2 = util::tmpnam('NMZ.excel2'); |
170 |
copy("$cfile", "$tmpfile2"); |
171 |
|
172 |
util::systemcmd("$xlconvpath -o e $tmpfile2 > $tmpfile"); |
173 |
|
174 |
{ |
175 |
my $fh = util::efopen("< $tmpfile"); |
176 |
$$cont = util::readfile($fh); |
177 |
} |
178 |
|
179 |
unlink($tmpfile); |
180 |
unlink($tmpfile2); |
181 |
|
182 |
gfilter::line_adjust_filter($cont); |
183 |
gfilter::line_adjust_filter($weighted_str); |
184 |
gfilter::white_space_adjust_filter($cont); |
185 |
$fields->{'title'} = gfilter::filename_to_title($cfile, $weighted_str) |
186 |
unless $fields->{'title'}; |
187 |
gfilter::show_filter_debug_info($cont, $weighted_str, |
188 |
$fields, $headings); |
189 |
return undef; |
190 |
} |
191 |
|
192 |
1; |