32e8b52001-06-28Johan Schön // This file is part of Roxen Search
7fbb5f2009-05-25Martin Stjernholm // Copyright © 2001 - 2009, Roxen IS. All rights reserved.
32e8b52001-06-28Johan Schön //
7fbb5f2009-05-25Martin Stjernholm // $Id: Word.pmod,v 1.13 2009/05/25 12:41:22 mast Exp $
32e8b52001-06-28Johan Schön 
40a44d2004-08-07Johan Schön inherit .HTML;
32e8b52001-06-28Johan Schön  constant contenttypes = ({ "application/msword", "application/vnd.ms-word" });
13e9832001-08-08Fredrik Noring constant fields = ({ "body", "title", "keywords"});
32e8b52001-06-28Johan Schön 
40a44d2004-08-07Johan Schön .Output filter(Standards.URI uri, string|Stdio.File data, string content_type)
32e8b52001-06-28Johan Schön {
40a44d2004-08-07Johan Schön  .Output res=.Output();
32e8b52001-06-28Johan Schön  if(objectp(data)) data=data->read();
13e9832001-08-08Fredrik Noring  string fn = tmp_filename(); object f = Stdio.File(fn, "wct"); int r = f->write(data);
32e8b52001-06-28Johan Schön  f->close();
13e9832001-08-08Fredrik Noring  if(r != sizeof(data)) error("Failed to write data for %O (returned %O, not %O)\n", fn, r, sizeof(data));
6f74192001-11-19Johan Schön  string text;
b225322003-01-20Jonas Wallden  string bin = combine_path(getcwd(), "modules/search/bin/wvWare"); string cwd = combine_path(getcwd(), "modules/search/bin"); string xml = combine_path(getcwd(), "modules/search/pike-modules/" "Search.pmod/Filter.pmod/wvHtml.xml"); string doc = combine_path(getcwd(), fn);
6f74192001-11-19Johan Schön  mixed err = catch {
47fcc42003-03-19Jonas Wallden  // Wait for process exit since rm() may otherwise fail text = my_popen( ({ bin, "-1", "-c", "utf-8", "-x", xml, doc }), cwd, 1);
6f74192001-11-19Johan Schön  };
2e3bde2001-08-14Johan Schön  if(!rm(fn)) werror("Search: Failed to remove temporary file: %s\n", fn);
6f74192001-11-19Johan Schön  if(err) throw(err);
13e9832001-08-08Fredrik Noring  return ::filter(uri, text, "text/html", ([]), "utf-8");
32e8b52001-06-28Johan Schön }