32e8b52001-06-28Johan Schön // This file is part of Roxen Search
4b23672001-07-13Martin Nilsson // Copyright © 2001 Roxen IS. All rights reserved.
32e8b52001-06-28Johan Schön //
4b23672001-07-13Martin Nilsson // $Id: Word.pmod,v 1.2 2001/07/12 22:50:40 nilsson Exp $
32e8b52001-06-28Johan Schön  // Filter for text/plain inherit Search.Filter.Base; constant contenttypes = ({ "application/msword", "application/vnd.ms-word" }); constant fields = ({ "body","title", "keywords"}); Output filter(Standards.URI uri, string|Stdio.File data, string content_type) { Output res=Output(); if(objectp(data)) data=data->read(); string s=Process.popen(sprintf("")); string fn=tmp_filename(); object f=Stdio.File(fn,"wcb");
ddc3c12001-06-29Johan Schön  f->write(data);
32e8b52001-06-28Johan Schön  f->close(); string text=Process.popen(combine_path(__FILE__, "bin/wvware/wvdump")+" "+fn+" -"); rm(fn); string md="", body=""; array a=text/"\n----------"; if(sizeof(a)>1) { md=a[0]; body=a[1]; } string field,value=""; foreach(md/"\n", string md_line) if(sscanf(md_line,"%s: %s",field,value)==2) res->fields[field]=value; res->fields->body=body; return res; } string _sprintf() { return "Search.Filter.Word"; }