87e9262001-06-22Martin Nilsson // This file is part of Roxen Search // Copyright © 2001 Roxen IS. All rights reserved. //
3687532002-03-25Johan Schön // $Id: Query.pmod,v 1.25 2002/03/25 12:54:32 js Exp $
87e9262001-06-22Martin Nilsson 
5d3ab52001-07-05Martin Nilsson static function(string,int:string) blobfeeder(Search.Database.Base db, array words)
11aeec2001-05-25Johan Schön {
9be7962001-07-04Martin Nilsson  mapping state = mkmapping(words,allocate(sizeof(words)));
f70d322001-09-26Johan Schön  mapping(string:mapping(int:string)) blobcache = ([ ]);
5d3ab52001-07-05Martin Nilsson  return lambda( string word, int foo )
11aeec2001-05-25Johan Schön  {
f70d322001-09-26Johan Schön  return db->get_blob(word, state[word]++, blobcache);
11aeec2001-05-25Johan Schön  }; }
dd6aa62001-06-01David Norlin static array(string) uniq_preserve_order(array(string) a) { array(string) result = ({}); foreach (a, string s) if (search(result, s) < 0) result += ({ s }); return result; }
965c0b2001-05-29Johan Schön 
eac31a2001-05-31Johan Schön Search.ResultSet do_query_or(Search.Database.Base db,
965c0b2001-05-29Johan Schön  array(string) words, Search.RankingProfile ranking) {
5d3ab52001-07-05Martin Nilsson  Search.ResultSet result = _WhiteFish.do_query_or(words, ranking->field_ranking, ranking->proximity_ranking, ranking->cutoff, blobfeeder(db, words)); return result;
965c0b2001-05-29Johan Schön }
eac31a2001-05-31Johan Schön Search.ResultSet do_query_and(Search.Database.Base db,
965c0b2001-05-29Johan Schön  array(string) words, Search.RankingProfile ranking) {
5d3ab52001-07-05Martin Nilsson  Search.ResultSet result = _WhiteFish.do_query_and(words, ranking->field_ranking, ranking->proximity_ranking, ranking->cutoff, blobfeeder(db, words)); return result;
965c0b2001-05-29Johan Schön }
eac31a2001-05-31Johan Schön Search.ResultSet do_query_phrase(Search.Database.Base db,
195e682001-06-12David Norlin  array(string) words, Search.RankingProfile ranking)
965c0b2001-05-29Johan Schön {
5d3ab52001-07-05Martin Nilsson  Search.ResultSet result = _WhiteFish.do_query_phrase(words, ranking->field_ranking, // ranking->cutoff, blobfeeder(db, words)); return result;
965c0b2001-05-29Johan Schön }
7886752001-05-31David Norlin //! @param query //! The query string entered by user. //! @param db //! The search database. //! @param defaultRanking //! Used when searching in the field "any:".
195e682001-06-12David Norlin //! //! @returns //! An array with two elements: //! @array //! @elem Search.ResultSet 0 //! The ResultSet containing the hits. //! @elem array(string) 1 //! All wanted words in the query. (I.e. not the words that were //! preceded by minus.) //! @endarray //! array(Search.ResultSet|array(string)) execute(Search.Database.Base db, Search.Grammar.AbstractParser parser, string query,
e650c62001-08-07David Norlin  Search.RankingProfile defaultRanking, array(string)|void stop_words)
7886752001-05-31David Norlin { Search.Grammar.ParseNode q = parser->parse(query);
e650c62001-08-07David Norlin  if (stop_words && sizeof(stop_words)) { q = Search.Grammar.remove_stop_words(q, stop_words); }
8b4ffb2002-03-12Johan Schön 
7886752001-05-31David Norlin  q = Search.Grammar.optimize(q);
e650c62001-08-07David Norlin 
195e682001-06-12David Norlin  if (!q) // The query was a null query return ({ Search.ResultSet(), ({}) }); // so return an empty resultset
230b0e2001-08-08David Norlin 
7886752001-05-31David Norlin  string error = Search.Grammar.validate(q); if (error) throw (error);
e650c62001-08-07David Norlin 
7886752001-05-31David Norlin  return class { static Search.RankingProfile defaultRanking; static Search.Database.Base db;
3771f12001-05-31David Norlin 
7886752001-05-31David Norlin  // Used when search is limited to another field than "any:". static Search.RankingProfile specialRanking; static void create(Search.Database.Base _db, Search.RankingProfile _defaultRanking) { db = _db; defaultRanking = _defaultRanking; specialRanking = defaultRanking->copy(); }
8b4ffb2002-03-12Johan Schön  static array(array(string)) split_words(array(string) words) { array a=({}),b=({}); foreach(words, string word) if(has_value(word, "*") || has_value(word, "?")) b+=({ word }); else a+=({ word }); return ({ a, b }); }
7886752001-05-31David Norlin  static constant ParseNode = Search.Grammar.ParseNode;
195e682001-06-12David Norlin  static array(array(string)|string) words = ({ });
7886752001-05-31David Norlin  static array(Search.ResultSet) stack = ({ }); static void push(Search.ResultSet r) { stack = ({ r }) + stack; } static Search.ResultSet pop() { if (!sizeof(stack)) error("Very bad!"); Search.ResultSet r = stack[0]; stack = stack[1 .. ]; return r; }
195e682001-06-12David Norlin  array(Search.ResultSet|array(string)) execute(ParseNode q) {
7886752001-05-31David Norlin  exec(q); if (sizeof(stack) != 1) throw ("Stack should have exactly one item!");
195e682001-06-12David Norlin  return ({ pop(), words });
7886752001-05-31David Norlin  }
8b4ffb2002-03-12Johan Schön 
7886752001-05-31David Norlin  void exec(ParseNode q) {
8b4ffb2002-03-12Johan Schön  int max_globs = 100;
7886752001-05-31David Norlin  switch (q->op) { case "and": { int first = 1; foreach (q->children, ParseNode child) if (child->op != "date") { exec(child); if (!first) { Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop(); push(r1 & r2); } else first = 0; } // ( DATE: limitations not implemented yet... ) // // foreach (q->children, ParseNode child) // if (child->op == "date") // exec(child); } break; case "or": int first = 1; foreach (q->children, ParseNode child) { exec(child); if (!first) { Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop(); push(r1 | r2); } else first = 0; } break; case "date": // NOT IMPLEMENTED YET break; case "text": {
8b4ffb2002-03-12Johan Schön  Search.RankingProfile ranking = defaultRanking;
7886752001-05-31David Norlin 
8b4ffb2002-03-12Johan Schön  if (q->field != "any") { ranking = specialRanking; int fieldID = db->get_field_id(q->field, 1); if (!fieldID && q->field != "body") { // There was no such field, so we push an empty ResultSet ! push(Search.ResultSet()); break; } ranking->field_ranking = allocate(65); ranking->field_ranking[fieldID] = 1; }
7886752001-05-31David Norlin 
8b4ffb2002-03-12Johan Schön  [array plusWords, array plusWordGlobs] = split_words(q->plusWords); [array ordinaryWords, array ordinaryWordGlobs] = split_words(q->words); [array minusWords, array minusWordGlobs] = split_words(q->minusWords);
7886752001-05-31David Norlin 
8b4ffb2002-03-12Johan Schön // werror("[%-10s] plus: %-15s ordinary: %-15s minus: %-15s\n", q->field, q>plusWords*", ", q->words*", ", q->minusWords*", "); int hasPlus = sizeof(q->plusWords) || sizeof(q->plusPhrases); int hasOrdinary = sizeof(q->words) || sizeof(q->phrases); int hasMinus = sizeof(q->minusWords) || sizeof(q->minusPhrases);
7886752001-05-31David Norlin 
8b4ffb2002-03-12Johan Schön  if(hasPlus) { int first = 1; if(sizeof(plusWords)) { words += plusWords; push(do_query_and(db, plusWords, ranking)); first = 0; } foreach(plusWordGlobs, string plusWordGlob) { push(do_query_or(db, db->expand_word_glob(plusWordGlob, max_globs), ranking)); if (!first) { Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop(); push(r1 & r2); } first = 0; } foreach (q->plusPhrases, array(string) ph) { words += ph; push(do_query_phrase(db, ph, ranking)); if (!first) { Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop(); push(r1 & r2); } first = 0; } } if(hasOrdinary) { int first = 1; if (sizeof(ordinaryWords)) { words += ordinaryWords; push(do_query_or(db, ordinaryWords, ranking)); first = 0; } foreach(ordinaryWordGlobs, string ordinaryWordGlob) { push(do_query_or(db, db->expand_word_glob(ordinaryWordGlob, max_globs), ranking)); if (!first) { Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop(); push(r1 | r2); } first = 0; } foreach (q->phrases, array(string) ph) { words += ph; push(do_query_phrase(db, ph, ranking)); if(!first) { Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop(); push(r1 | r2); } first = 0; } } if(hasPlus && hasOrdinary) { Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop(); // If a document contains must-have words AND ALSO may-have words, // it's ranking is increased. push(r1->add_ranking(r2)); } if((hasPlus || hasOrdinary) && hasMinus) { int first = 1; if (sizeof(q->minusWords)) { push(do_query_or(db, q->minusWords, ranking)); first = 0; } foreach(minusWordGlobs, string minusWordGlob) { push(do_query_or(db, db->expand_word_glob(minusWordGlob, max_globs), ranking)); if(!first) { Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop(); push(r1 | r2); } first = 0; } foreach (q->minusPhrases, array(string) ph) { push(do_query_phrase(db, ph, ranking)); if (!first) { Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop(); push(r1 | r2); } first = 0; } Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop(); push(r1 - r2); }
7886752001-05-31David Norlin  } break; default: error("Unknown type of ParseNode!"); } // switch (q->op) }
3771f12001-05-31David Norlin 
7886752001-05-31David Norlin  } (db, defaultRanking)->execute(q);
3771f12001-05-31David Norlin }