87e9262001-06-22Martin Nilsson // This file is part of Roxen Search // Copyright © 2001 Roxen IS. All rights reserved. //
f70d322001-09-26Johan Schön // $Id: Query.pmod,v 1.23 2001/09/25 22:02:37 js Exp $
87e9262001-06-22Martin Nilsson 
5d3ab52001-07-05Martin Nilsson static function(string,int:string) blobfeeder(Search.Database.Base db, array words)
11aeec2001-05-25Johan Schön {
9be7962001-07-04Martin Nilsson  mapping state = mkmapping(words,allocate(sizeof(words)));
f70d322001-09-26Johan Schön  mapping(string:mapping(int:string)) blobcache = ([ ]);
5d3ab52001-07-05Martin Nilsson  return lambda( string word, int foo )
11aeec2001-05-25Johan Schön  {
f70d322001-09-26Johan Schön  return db->get_blob(word, state[word]++, blobcache);
11aeec2001-05-25Johan Schön  }; }
dd6aa62001-06-01David Norlin static array(string) uniq_preserve_order(array(string) a) { array(string) result = ({}); foreach (a, string s) if (search(result, s) < 0) result += ({ s }); return result; }
965c0b2001-05-29Johan Schön 
eac31a2001-05-31Johan Schön Search.ResultSet do_query_or(Search.Database.Base db,
965c0b2001-05-29Johan Schön  array(string) words, Search.RankingProfile ranking) {
5d3ab52001-07-05Martin Nilsson  Search.ResultSet result = _WhiteFish.do_query_or(words, ranking->field_ranking, ranking->proximity_ranking, ranking->cutoff, blobfeeder(db, words)); return result;
965c0b2001-05-29Johan Schön }
eac31a2001-05-31Johan Schön Search.ResultSet do_query_and(Search.Database.Base db,
965c0b2001-05-29Johan Schön  array(string) words, Search.RankingProfile ranking) {
5d3ab52001-07-05Martin Nilsson  Search.ResultSet result = _WhiteFish.do_query_and(words, ranking->field_ranking, ranking->proximity_ranking, ranking->cutoff, blobfeeder(db, words)); return result;
965c0b2001-05-29Johan Schön }
eac31a2001-05-31Johan Schön Search.ResultSet do_query_phrase(Search.Database.Base db,
195e682001-06-12David Norlin  array(string) words, Search.RankingProfile ranking)
965c0b2001-05-29Johan Schön {
5d3ab52001-07-05Martin Nilsson  Search.ResultSet result = _WhiteFish.do_query_phrase(words, ranking->field_ranking, // ranking->cutoff, blobfeeder(db, words)); return result;
965c0b2001-05-29Johan Schön }
7886752001-05-31David Norlin //! @param query //! The query string entered by user. //! @param db //! The search database. //! @param defaultRanking //! Used when searching in the field "any:".
195e682001-06-12David Norlin //! //! @returns //! An array with two elements: //! @array //! @elem Search.ResultSet 0 //! The ResultSet containing the hits. //! @elem array(string) 1 //! All wanted words in the query. (I.e. not the words that were //! preceded by minus.) //! @endarray //! array(Search.ResultSet|array(string)) execute(Search.Database.Base db, Search.Grammar.AbstractParser parser, string query,
e650c62001-08-07David Norlin  Search.RankingProfile defaultRanking, array(string)|void stop_words)
7886752001-05-31David Norlin { Search.Grammar.ParseNode q = parser->parse(query);
e650c62001-08-07David Norlin  if (stop_words && sizeof(stop_words)) { q = Search.Grammar.remove_stop_words(q, stop_words); }
7886752001-05-31David Norlin  q = Search.Grammar.optimize(q);
e650c62001-08-07David Norlin 
195e682001-06-12David Norlin  if (!q) // The query was a null query return ({ Search.ResultSet(), ({}) }); // so return an empty resultset
230b0e2001-08-08David Norlin 
7886752001-05-31David Norlin  string error = Search.Grammar.validate(q); if (error) throw (error);
e650c62001-08-07David Norlin 
7886752001-05-31David Norlin  return class { static Search.RankingProfile defaultRanking; static Search.Database.Base db;
3771f12001-05-31David Norlin 
7886752001-05-31David Norlin  // Used when search is limited to another field than "any:". static Search.RankingProfile specialRanking; static void create(Search.Database.Base _db, Search.RankingProfile _defaultRanking) { db = _db; defaultRanking = _defaultRanking; specialRanking = defaultRanking->copy(); } static constant ParseNode = Search.Grammar.ParseNode;
195e682001-06-12David Norlin  static array(array(string)|string) words = ({ });
7886752001-05-31David Norlin  static array(Search.ResultSet) stack = ({ }); static void push(Search.ResultSet r) { stack = ({ r }) + stack; } static Search.ResultSet pop() { if (!sizeof(stack)) error("Very bad!"); Search.ResultSet r = stack[0]; stack = stack[1 .. ]; return r; }
195e682001-06-12David Norlin  array(Search.ResultSet|array(string)) execute(ParseNode q) {
7886752001-05-31David Norlin  exec(q); if (sizeof(stack) != 1) throw ("Stack should have exactly one item!");
195e682001-06-12David Norlin  return ({ pop(), words });
7886752001-05-31David Norlin  } void exec(ParseNode q) { switch (q->op) { case "and": { int first = 1; foreach (q->children, ParseNode child) if (child->op != "date") { exec(child); if (!first) { Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop(); push(r1 & r2); } else first = 0; } // ( DATE: limitations not implemented yet... ) // // foreach (q->children, ParseNode child) // if (child->op == "date") // exec(child); } break; case "or": int first = 1; foreach (q->children, ParseNode child) { exec(child); if (!first) { Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop(); push(r1 | r2); } else first = 0; } break; case "date": // NOT IMPLEMENTED YET break; case "text": { Search.RankingProfile ranking = defaultRanking; if (q->field != "any") { ranking = specialRanking; int fieldID = db->get_field_id(q->field, 1); if (!fieldID && q->field != "body") { // There was no such field, so we push an empty ResultSet ! push(Search.ResultSet()); break; }
230b0e2001-08-08David Norlin  ranking->field_ranking = allocate(65);
fe4e1c2001-08-09David Norlin  ranking->field_ranking[fieldID] = 1;
7886752001-05-31David Norlin  } int hasPlus = sizeof(q->plusWords) || sizeof(q->plusPhrases); int hasOrdinary = sizeof(q->words) || sizeof(q->phrases); int hasMinus = sizeof(q->minusWords) || sizeof(q->minusPhrases); if (hasPlus) { int first = 1; if (sizeof(q->plusWords)) {
195e682001-06-12David Norlin  words += q->plusWords;
7886752001-05-31David Norlin  push(do_query_and(db, q->plusWords, ranking)); first = 0; } foreach (q->plusPhrases, array(string) ph) {
195e682001-06-12David Norlin  words += ph;
7886752001-05-31David Norlin  push(do_query_phrase(db, ph, ranking)); if (!first) { Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop(); push(r1 & r2); } first = 0; } } if (hasOrdinary) { int first = 1; if (sizeof(q->words)) {
195e682001-06-12David Norlin  words += q->words;
60ae232001-06-01David Norlin  push(do_query_or(db, q->words, ranking));
7886752001-05-31David Norlin  first = 0; } foreach (q->phrases, array(string) ph) {
195e682001-06-12David Norlin  words += ph;
7886752001-05-31David Norlin  push(do_query_phrase(db, ph, ranking)); if (!first) { Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop(); push(r1 | r2); } first = 0; } } if (hasPlus && hasOrdinary) { Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop();
dd6aa62001-06-01David Norlin  // If a document contains must-have words AND ALSO may-have words, // it's ranking is increased. push(r1->add_ranking(r2));
7886752001-05-31David Norlin  } if ((hasPlus || hasOrdinary) && hasMinus) { int first = 1; if (sizeof(q->minusWords)) { push(do_query_or(db, q->minusWords, ranking)); first = 0; } foreach (q->minusPhrases, array(string) ph) { push(do_query_phrase(db, ph, ranking)); if (!first) { Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop(); push(r1 | r2); } first = 0; } Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop(); push(r1 - r2); } } break; default: error("Unknown type of ParseNode!"); } // switch (q->op) }
3771f12001-05-31David Norlin 
7886752001-05-31David Norlin  } (db, defaultRanking)->execute(q);
3771f12001-05-31David Norlin }