Branch: Tag:

2001-08-08

2001-08-08 15:12:03 by Fredrik Noring <noring@nocrew.org>

Made Word filter basically working.

Rev: lib/modules/Search.pmod/Filter.pmod/Word.pmod:1.4

1:   // This file is part of Roxen Search   // Copyright © 2001 Roxen IS. All rights reserved.   // - // $Id: Word.pmod,v 1.3 2001/08/07 12:34:09 js Exp $ + // $Id: Word.pmod,v 1.4 2001/08/08 15:12:03 noring Exp $    - // Filter for text/plain + inherit Search.Filter.HTML;    - inherit Search.Filter.Base; -  +    constant contenttypes = ({ "application/msword", "application/vnd.ms-word" }); - constant fields = ({ "body","title", "keywords"}); + constant fields = ({ "body", "title", "keywords"});      Output filter(Standards.URI uri, string|Stdio.File data, string content_type)   {
17:    if(objectp(data))    data=data->read();    -  string s=Process.popen(sprintf("")); -  -  string fn=tmp_filename(); -  object f=Stdio.File(fn,"wcb"); -  f->write(data); +  string fn = tmp_filename(); +  object f = Stdio.File(fn, "wct"); +  int r = f->write(data);    f->close(); -  +  if(r != sizeof(data)) +  error("Failed to write data for %O (returned %O, not %O)\n", +  fn, r, sizeof(data));    -  string text=Process.popen(combine_path(__FILE__, "../../../../bin/wvdump")+" "+fn+" -"); +  string text = my_popen(({ combine_path(__FILE__, "../../../../bin/wvWare"), +  "-c", "utf-8", +  "-x", combine_path(__FILE__, "../wvHtml.xml"), +  fn })); +     rm(fn);    -  string md="", body=""; -  array a=text/"\n----------"; -  -  if(sizeof(a)>1) -  { -  md=a[0]; -  body=a[1]; +  return ::filter(uri, text, "text/html", ([]), "utf-8");   }    -  string field,value=""; -  foreach(md/"\n", string md_line) -  if(sscanf(md_line,"%s: %s",field,value)==2) -  res->fields[field]=value; -  -  res->fields->body=body; -  -  return res; - } -  +    string _sprintf()   {    return "Search.Filter.Word";   }