/[suikacvs]/www/namazu/filter/excel.pl
Suika

Contents of /www/namazu/filter/excel.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.1.1.1 - (show annotations) (download) (vendor branch)
Fri Nov 30 07:56:45 2001 UTC (22 years, 5 months ago) by wakaba
Branch: MAIN, wakaba
CVS Tags: initial, HEAD
Changes since 1.1: +0 -0 lines
File MIME type: text/plain

1 #
2 # -*- Perl -*-
3 # $Id: excel.pl,v 1.13 2000/12/15 05:37:41 knok Exp $
4 # Copyright (C) 1997-2000 Satoru Takabayashi ,
5 # 1999 NOKUBI Takatsugu,
6 # 2000 Namazu Project All rights reserved.
7 # This is free software with ABSOLUTELY NO WARRANTY.
8 #
9 # This program is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either versions 2, or (at your option)
12 # any later version.
13 #
14 # This program is distributed in the hope that it will be useful
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
18 #
19 # You should have received a copy of the GNU General Public License
20 # along with this program; if not, write to the Free Software
21 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
22 # 02111-1307, USA
23 #
24 # This file must be encoded in EUC-JP encoding
25 #
26
27 package excel;
28 use strict;
29 use File::Copy;
30 require 'util.pl';
31 require 'gfilter.pl';
32 require 'html.pl';
33
34 my $xlconvpath = undef;
35 my $utfconvpath = undef;
36
37 sub mediatype() {
38 return ('application/excel');
39 }
40
41 sub status() {
42 $xlconvpath = util::checkcmd('xlHtml');
43 # return 'no' unless defined $xlconvpath;
44 if (defined $xlconvpath) {
45 if (!util::islang("ja")) {
46 return 'yes';
47 } else {
48 $utfconvpath = util::checkcmd('lv');
49 if (defined $utfconvpath) {
50 return 'yes';
51 } else {
52 return 'no';
53 }
54 }
55 } else {
56 $xlconvpath = util::checkcmd('doccat');
57 return 'yes' if defined $xlconvpath;
58 return 'no';
59 }
60 }
61
62 sub recursive() {
63 return 0;
64 }
65
66 sub pre_codeconv() {
67 return 0;
68 }
69
70 sub post_codeconv () {
71 return 0;
72 }
73
74 sub add_magic ($) {
75 my ($magic) = @_;
76
77 $magic->addFileExts('\\.xls$', 'application/excel');
78 return;
79 }
80
81 sub filter ($$$$$) {
82 my ($orig_cfile, $cont, $weighted_str, $headings, $fields)
83 = @_;
84 my $err = undef;
85
86 if (util::checkcmd('xlHtml')) {
87 $err = filter_xl($orig_cfile, $cont, $weighted_str, $headings, $fields);
88 } else {
89 $err = filter_doccat($orig_cfile, $cont, $weighted_str, $headings, $fields);
90 }
91 return $err;
92 }
93
94 sub filter_xl ($$$$$) {
95 my ($orig_cfile, $cont, $weighted_str, $headings, $fields)
96 = @_;
97 my $cfile = defined $orig_cfile ? $$orig_cfile : '';
98
99 my $tmpfile = util::tmpnam('NMZ.excel');
100 my $tmpfile2 = util::tmpnam('NMZ.excel2');
101
102
103 util::vprint("Processing ms-excel file ... (using '$xlconvpath')\n");
104
105 {
106 my $fh = util::efopen("> $tmpfile");
107 print $fh $$cont;
108 }
109
110 #
111
112 # -m: No encoding for multibyte. It's necessary to
113 # handle a Japanese Excel 5.0 or 95 document correctly.
114 util::systemcmd("$xlconvpath -m $tmpfile > $tmpfile2");
115
116 {
117 my $fh = util::efopen("< $tmpfile2");
118 $$cont = util::readfile($fh);
119 }
120
121 # Code conversion for Japanese document.
122 if (util::islang("ja")) {
123 my $encoding = "u8"; # UTF-8
124 # Pattern for xlHtml version 0.2.6.
125 if ($$cont =~ m!^<FONT SIZE=-1><I>Last Updated&nbsp;using Excel 5.0 or 95</I></FONT><br>$!m)
126 {
127 $encoding = "s"; # Shift_JIS
128 }
129 {
130 my $fh = util::efopen("> $tmpfile");
131 print $fh $$cont;
132 }
133 util::systemcmd("$utfconvpath -I$encoding -Oej $tmpfile > $tmpfile2");
134 {
135 my $fh = util::efopen("< $tmpfile2");
136 $$cont = util::readfile($fh);
137 }
138 }
139
140 # Extract the author and exclude xlHtml's footer at once.
141 $$cont =~ s!^<FONT SIZE=-1><I>Spreadsheet's Author:&nbsp;(.*?)</I></FONT><br>.*!!ms; # '
142 $fields->{'author'} = $1;
143
144 unlink($tmpfile);
145 unlink($tmpfile2);
146
147 # Title shoud be removed.
148 # Because xlHtml generate poor <TITLE>/foo/bar/NMZ.excel.tmp</TITLE>.
149 $$cont =~ s!<TITLE>.+</TITLE>!!;
150
151 html::html_filter($cont, $weighted_str, $fields, $headings);
152
153 gfilter::line_adjust_filter($cont);
154 gfilter::line_adjust_filter($weighted_str);
155 gfilter::white_space_adjust_filter($cont);
156
157 $fields->{'title'} = gfilter::filename_to_title($cfile, $weighted_str);
158 gfilter::show_filter_debug_info($cont, $weighted_str,
159 $fields, $headings);
160 return undef;
161 }
162
163 sub filter_doccat ($$$$$) {
164 my ($orig_cfile, $cont, $weighted_str, $headings, $fields)
165 = @_;
166 my $cfile = defined $orig_cfile ? $$orig_cfile : '';
167
168 my $tmpfile = util::tmpnam('NMZ.excel');
169 my $tmpfile2 = util::tmpnam('NMZ.excel2');
170 copy("$cfile", "$tmpfile2");
171
172 util::systemcmd("$xlconvpath -o e $tmpfile2 > $tmpfile");
173
174 {
175 my $fh = util::efopen("< $tmpfile");
176 $$cont = util::readfile($fh);
177 }
178
179 unlink($tmpfile);
180 unlink($tmpfile2);
181
182 gfilter::line_adjust_filter($cont);
183 gfilter::line_adjust_filter($weighted_str);
184 gfilter::white_space_adjust_filter($cont);
185 $fields->{'title'} = gfilter::filename_to_title($cfile, $weighted_str)
186 unless $fields->{'title'};
187 gfilter::show_filter_debug_info($cont, $weighted_str,
188 $fields, $headings);
189 return undef;
190 }
191
192 1;

admin@suikawiki.org
ViewVC Help
Powered by ViewVC 1.1.24