32e8b52001-06-28Johan Schön // This file is part of Roxen Search
4b23672001-07-13Martin Nilsson // Copyright © 2001 Roxen IS. All rights reserved.
32e8b52001-06-28Johan Schön //
6f74192001-11-19Johan Schön // $Id: Word.pmod,v 1.8 2001/11/19 13:33:15 js Exp $
32e8b52001-06-28Johan Schön 
13e9832001-08-08Fredrik Noring inherit Search.Filter.HTML;
32e8b52001-06-28Johan Schön  constant contenttypes = ({ "application/msword", "application/vnd.ms-word" });
13e9832001-08-08Fredrik Noring constant fields = ({ "body", "title", "keywords"});
32e8b52001-06-28Johan Schön  Output filter(Standards.URI uri, string|Stdio.File data, string content_type) { Output res=Output(); if(objectp(data)) data=data->read();
13e9832001-08-08Fredrik Noring  string fn = tmp_filename(); object f = Stdio.File(fn, "wct"); int r = f->write(data);
32e8b52001-06-28Johan Schön  f->close();
13e9832001-08-08Fredrik Noring  if(r != sizeof(data)) error("Failed to write data for %O (returned %O, not %O)\n", fn, r, sizeof(data));
6f74192001-11-19Johan Schön  string text; mixed err = catch { text = my_popen(({ "modules/search/bin/wvWare", "-c", "utf-8", "-x", "modules/search/pike-modules/Search.pmod/Filter.pmod/wvHtml.xml", fn })); };
2e3bde2001-08-14Johan Schön  if(!rm(fn)) werror("Search: Failed to remove temporary file: %s\n", fn);
6f74192001-11-19Johan Schön  if(err) throw(err);
13e9832001-08-08Fredrik Noring  return ::filter(uri, text, "text/html", ([]), "utf-8");
32e8b52001-06-28Johan Schön } string _sprintf() { return "Search.Filter.Word"; }