eac31a2001-05-31Johan Schön array(Standards.URI) index_document(Search.Database.Base db,
90f5642001-05-17Johan Schön  string|Standards.URI uri, string|Stdio.File data, string content_type, void|string language) { Search.Filter.Base filter=Search.get_filter(content_type); if(!filter)
57ab6d2001-05-26Per Hedbor  error("No indexer for content type "+content_type);
90f5642001-05-17Johan Schön 
db7a1e2001-05-26Per Hedbor  int h = gethrtime();
57ab6d2001-05-26Per Hedbor  Search.Filter.Base.Output filteroutput= filter->filter(uri, data, content_type);
db7a1e2001-05-26Per Hedbor  int ms = (gethrtime()-h); werror("filter : %5dms (%4.1fMb/s)\n", ms/1000, (strlen(data)/1024.0/1024.0)/(ms/1000000.0) );
57ab6d2001-05-26Per Hedbor 
db7a1e2001-05-26Per Hedbor  h = gethrtime();
57ab6d2001-05-26Per Hedbor  db->remove_document( uri, language );
db7a1e2001-05-26Per Hedbor  werror("remove : %5dms\n", (gethrtime()-h)/1000 );
90f5642001-05-17Johan Schön  // Tokenize and normalize all the non-anchor fields
57ab6d2001-05-26Per Hedbor 
90f5642001-05-17Johan Schön  foreach(indices(filteroutput->fields), string field)
57ab6d2001-05-26Per Hedbor  { if( strlen(filteroutput->fields[field] ) ) {
db7a1e2001-05-26Per Hedbor  h = gethrtime(); array words=Search.Utils.tokenize( Search.Utils.normalize(filteroutput->fields[field])); if( field == "body" ) { ms = (gethrtime()-h); werror("tokenize: %5dms (%4.1fMb/s)\n", ms/1000, (strlen(filteroutput->fields[field])/1024.0/1024.0) /(ms/1000000.0) ); } h = gethrtime(); db->insert_words(uri, language, field,words ); if( field == "body" ) { ms = (gethrtime()-h); werror("insert : %5dms (%4.1fMb/s)\n", ms/1000, (strlen(filteroutput->fields[field])/1024.0/1024.0) /(ms/1000000.0) ); }
57ab6d2001-05-26Per Hedbor  } }
90f5642001-05-17Johan Schön  // Tokenize any anchor fields
db7a1e2001-05-26Per Hedbor  h = gethrtime();
90f5642001-05-17Johan Schön  int source_hash=hash((string)uri)&0xf; foreach(indices(filteroutput->uri_anchors || ({ })), string link_uri) { array(string) words=
57ab6d2001-05-26Per Hedbor  Search.Utils.tokenize(Search.Utils.normalize (filteroutput->uri_anchors[link_uri])); db->insert_words(link_uri, 0, "anchor", words, source_hash);
90f5642001-05-17Johan Schön  }
feb96a2001-05-31Johan Schön  mapping md = (["title":1, "keywords": 1, "description": 1, "body": 1 ]) & filteroutput->fields; db->set_metadata(uri, language, md);
90f5642001-05-17Johan Schön  return filteroutput->links; }
eac31a2001-05-31Johan Schön array(Standards.URI) extract_links(Search.Database.Base db,
90f5642001-05-17Johan Schön  string|Standards.URI uri, string|Stdio.File data, string content_type) { Search.Filter.Base filter=Search.get_filter(content_type); if(!filter) throw("No indexer for content type "+content_type); Search.Filter.Base.Output filteroutput=filter->filter(uri, data, content_type); return filteroutput->links; }
eac31a2001-05-31Johan Schön void remove_document(Search.Database.Base db,
90f5642001-05-17Johan Schön  string|Standards.URI|string uri, void|string language) { db->remove_document(uri, language); }
6751ae2001-05-25Johan Schön 
eac31a2001-05-31Johan Schön array(Standards.URI) test_index(Search.Database.Base db, string uri)
6751ae2001-05-25Johan Schön { object request=Protocols.HTTP.get_url(uri); return index_document(db, uri, request->data(), request->headers["content-type"]); }