pike.git / lib / modules / Search.pmod / module.pmod

version» Context lines:

pike.git/lib/modules/Search.pmod/module.pmod:1:   // Roxen Whitefish main pike module   // - // Copyright © 2000, Roxen IS. + // Copyright © 2000,2001 Roxen IS.    - #include "types.h" +     - private mapping filters=([]); +     - void create() + private mapping filters; +  + // void create() + // { + // foreach(values(Search.Filter), program filter) + // { + // Search.Filter.Base tmp=filter(); + // foreach(tmp->contenttypes || ({ }), string mime) + // filters[mime]=tmp; + // } + // } +  + private void get_filters()   { -  foreach(values(Search.Filter), program filter) -  { -  Search.Filter.Base tmp=filter(); -  foreach(tmp->contenttypes || ({ }), string mime) -  filters[mime]=tmp; +  filters=([]); +  foreach(values(Search.Filter), object filter) +  foreach(filter->contenttypes || ({ }), string mime) +  filters[mime]=filter;   } - } +       Search.Filter.Base get_filter(string mime_type)   { -  +  if(!filters) +  get_filters();    if(!filters[mime_type]) return 0;    return filters[mime_type];   }    - array(string) get_filter_mime_types() + mapping(string:Search.Filter.Base) get_filter_mime_types()   { -  return indices(filters); +  if(!filters) +  get_filters(); +  return filters;   }    - array(mapping) splitter(array(string) text, array(int) context, -  function(string:string) post_normalization, -  function(mapping:int) ranking) - { -  if(sizeof(text)!=sizeof(context)) -  return 0; +     -  array(mapping) result=({}); -  for(int i=0; i<sizeof(text); i++) -  { -  array words=text[i]/" "; -  int inc=0, oldinc; -  foreach(words, string word) -  { -  oldinc=inc; -  inc+=sizeof(word)+1; -  word=post_normalization(word); -  if(!sizeof(word)) continue; -  mapping n_word=([ "word":word, -  "type":context[i], -  // "offset":offset[i]+oldinc, -  // This might be destroyed by pre_normalization -  ]); -  n_word->rank=ranking(n_word); -  result+=({ n_word }); -  } -  } +     -  return result; - } -  -  - // ---------- Anchor database ------------- -  - class Anchor_database { -  -  void add(string page, string href, string text) { -  } -  -  array(string) get_texts(string page) { -  return ({}); -  } -  - } -  -  +    // --- Page Ranking Algorithms ------------      float entropy(array(string) page_words) {    mapping(string:int) words=([]);    foreach(page_words, string word)    words[word]=1;    return (float)sizeof(words)/(float)sizeof(page_words);   }       - private constant rank_list = ([ -  T_TITLE : 1, -  T_KEYWORDS : 2, -  T_EXT_A : 3, -  T_H1 : 4, -  T_H2 : 5, -  T_H3 : 6, -  T_DESC : 7, -  T_H4 : 8, -  T_TH : 9, -  T_B : 10, -  T_I : 11, -  T_A : 12, -  T_NONE : 13, -  T_H5 : 14, -  T_H6 : 15 ]); -  - int rank(mapping word) - { -  return rank_list[word->type]; - } -  +    // A normal page has an entropy value around 0.5, so the result x should probably be   // remapped to abs(x-0.5) or even 1-abs(x-0.5)