Branch: Tag:

2001-06-05

2001-06-05 15:00:59 by Per Hedbor <ph@opera.com>

Use the new Unicode.* functions

Rev: lib/modules/Search.pmod/Indexer.pmod:1.8
Rev: lib/modules/Search.pmod/Utils.pmod:1.3

11:    string f;    if( strlen(f = fields[field] ) )    { -  array words=Search.Utils.tokenize(Search.Utils.normalize(f)); +  array words=Search.Utils.tokenize_and_normalize( f );    db->insert_words(uri, language, field, words );    }    }
21:    foreach(indices(uri_anchors|| ({ })), string link_uri)    {    array(string) words= -  Search.Utils.tokenize(Search.Utils.normalize(uri_anchors[link_uri])); +  Search.Utils.tokenize_and_normalize(uri_anchors[link_uri]);    db->insert_words(link_uri, 0, "anchor", words, source_hash);    } -  -  h = gethrtime(); -  int source_hash=hash((string)uri)&0xf; -  foreach(indices(filteroutput->uri_anchors || ({ })), string link_uri) -  { -  array(string) words= -  Search.Utils.tokenize(Search.Utils.normalize -  (filteroutput->uri_anchors[link_uri])); -  db->insert_words(link_uri, 0, "anchor", words, source_hash); +    } - } +          array(Standards.URI) filter_and_extract_links(Search.Database.Base db,
58:   {    db->remove_document(uri, language);   } -  - array(Standards.URI) test_index(Search.Database.Base db, string uri) - { -  object request=Protocols.HTTP.get_url(uri); -  -  return filter_and_index_document(db, uri, 0, request->data(), -  request->headers["content-type"]); - } +