32e8b52001-06-28Johan Schön // This file is part of Roxen Search
4b23672001-07-13Martin Nilsson // Copyright © 2001 Roxen IS. All rights reserved.
32e8b52001-06-28Johan Schön //
47fcc42003-03-19Jonas Wallden // $Id: Word.pmod,v 1.11 2003/03/19 10:53:26 jonasw Exp $
32e8b52001-06-28Johan Schön 
13e9832001-08-08Fredrik Noring inherit Search.Filter.HTML;
32e8b52001-06-28Johan Schön  constant contenttypes = ({ "application/msword", "application/vnd.ms-word" });
13e9832001-08-08Fredrik Noring constant fields = ({ "body", "title", "keywords"});
32e8b52001-06-28Johan Schön  Output filter(Standards.URI uri, string|Stdio.File data, string content_type) { Output res=Output(); if(objectp(data)) data=data->read();
13e9832001-08-08Fredrik Noring  string fn = tmp_filename(); object f = Stdio.File(fn, "wct"); int r = f->write(data);
32e8b52001-06-28Johan Schön  f->close();
13e9832001-08-08Fredrik Noring  if(r != sizeof(data)) error("Failed to write data for %O (returned %O, not %O)\n", fn, r, sizeof(data));
6f74192001-11-19Johan Schön  string text;
b225322003-01-20Jonas Wallden  string bin = combine_path(getcwd(), "modules/search/bin/wvWare"); string cwd = combine_path(getcwd(), "modules/search/bin"); string xml = combine_path(getcwd(), "modules/search/pike-modules/" "Search.pmod/Filter.pmod/wvHtml.xml"); string doc = combine_path(getcwd(), fn);
6f74192001-11-19Johan Schön  mixed err = catch {
47fcc42003-03-19Jonas Wallden  // Wait for process exit since rm() may otherwise fail text = my_popen( ({ bin, "-1", "-c", "utf-8", "-x", xml, doc }), cwd, 1);
6f74192001-11-19Johan Schön  };
2e3bde2001-08-14Johan Schön  if(!rm(fn)) werror("Search: Failed to remove temporary file: %s\n", fn);
6f74192001-11-19Johan Schön  if(err) throw(err);
13e9832001-08-08Fredrik Noring  return ::filter(uri, text, "text/html", ([]), "utf-8");
32e8b52001-06-28Johan Schön } string _sprintf() { return "Search.Filter.Word"; }