0844382001-06-10Per Hedbor import "..";
11aeec2001-05-25Johan Schön 
eac31a2001-05-31Johan Schön static function(int:string) blobfeeder(Search.Database.Base db, array word_ids)
11aeec2001-05-25Johan Schön { mapping state = mkmapping(word_ids,allocate(sizeof(word_ids))); return lambda( int word ) { return db->get_blob(word, state[word]++); }; }
dd6aa62001-06-01David Norlin static array(string) uniq_preserve_order(array(string) a) { array(string) result = ({}); foreach (a, string s) if (search(result, s) < 0) result += ({ s }); return result; }
965c0b2001-05-29Johan Schön 
eac31a2001-05-31Johan Schön Search.ResultSet do_query_or(Search.Database.Base db,
965c0b2001-05-29Johan Schön  array(string) words, Search.RankingProfile ranking) {
dd6aa62001-06-01David Norlin  array(int) word_ids=map(uniq_preserve_order(words), db->hash_word);
60ae232001-06-01David Norlin  Search.ResultSet result = _WhiteFish.do_query_or(word_ids, ranking->field_ranking, ranking->proximity_ranking, ranking->cutoff, blobfeeder(db, word_ids)); werror("do_query_or(%{ %O %}) => %d hits\n", words, result->size()); return result;
965c0b2001-05-29Johan Schön }
eac31a2001-05-31Johan Schön Search.ResultSet do_query_and(Search.Database.Base db,
965c0b2001-05-29Johan Schön  array(string) words, Search.RankingProfile ranking) {
dd6aa62001-06-01David Norlin  array(int) word_ids=map(uniq_preserve_order(words), db->hash_word);
60ae232001-06-01David Norlin  Search.ResultSet result = _WhiteFish.do_query_and(word_ids, ranking->field_ranking, ranking->proximity_ranking, ranking->cutoff, blobfeeder(db, word_ids)); werror("do_query_and(%{ %O %}) => %d hits\n", words, result->size()); return result;
965c0b2001-05-29Johan Schön }
eac31a2001-05-31Johan Schön Search.ResultSet do_query_phrase(Search.Database.Base db,
965c0b2001-05-29Johan Schön  array(string) words, Search.RankingProfile ranking) { array(int) word_ids=map(words, db->hash_word);
60ae232001-06-01David Norlin  Search.ResultSet result = _WhiteFish.do_query_phrase(word_ids, ranking->field_ranking, // ranking->cutoff, blobfeeder(db, word_ids)); werror("do_query_phrase(%{ %O %}) => %d hits\n", words, result->size()); return result;
965c0b2001-05-29Johan Schön }
7886752001-05-31David Norlin //! @param query //! The query string entered by user. //! @param db //! The search database. //! @param defaultRanking //! Used when searching in the field "any:". Search.ResultSet execute(Search.Database.Base db, Search.Grammar.AbstractParser parser, string query, Search.RankingProfile defaultRanking) {
965c0b2001-05-29Johan Schön 
7886752001-05-31David Norlin  Search.Grammar.ParseNode q = parser->parse(query); q = Search.Grammar.optimize(q);
8087582001-06-11David Norlin  if (!q) // The query was a null query return Search.ResultSet(); // so return an empty resultset
7886752001-05-31David Norlin  string error = Search.Grammar.validate(q); if (error) throw (error);
3771f12001-05-31David Norlin 
60ae232001-06-01David Norlin  werror("Search.Query.execute:\n%s\n", q->print());
7886752001-05-31David Norlin  return class { static Search.RankingProfile defaultRanking; static Search.Database.Base db;
3771f12001-05-31David Norlin 
7886752001-05-31David Norlin  // Used when search is limited to another field than "any:". static Search.RankingProfile specialRanking; static void create(Search.Database.Base _db, Search.RankingProfile _defaultRanking) { db = _db; defaultRanking = _defaultRanking; specialRanking = defaultRanking->copy(); } static constant ParseNode = Search.Grammar.ParseNode; static array(Search.ResultSet) stack = ({ }); static void push(Search.ResultSet r) { werror("---PUSH\n"); stack = ({ r }) + stack; } static Search.ResultSet pop() { werror("---POP\n"); if (!sizeof(stack)) error("Very bad!"); Search.ResultSet r = stack[0]; stack = stack[1 .. ]; return r; } Search.ResultSet execute(ParseNode q) { exec(q); if (sizeof(stack) != 1) throw ("Stack should have exactly one item!"); return pop(); } void exec(ParseNode q) { werror("EXEC %s\n", q->op); switch (q->op) { case "and": { int first = 1; foreach (q->children, ParseNode child) if (child->op != "date") { exec(child); if (!first) { Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop(); push(r1 & r2); } else first = 0; } // ( DATE: limitations not implemented yet... ) // // foreach (q->children, ParseNode child) // if (child->op == "date") // exec(child); } break; case "or": int first = 1; foreach (q->children, ParseNode child) { exec(child); if (!first) { Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop(); push(r1 | r2); } else first = 0; } break; case "date": // NOT IMPLEMENTED YET break; case "text": { Search.RankingProfile ranking = defaultRanking; if (q->field != "any") { ranking = specialRanking; int fieldID = db->get_field_id(q->field, 1); if (!fieldID && q->field != "body") { // There was no such field, so we push an empty ResultSet ! push(Search.ResultSet()); break; } ranking->field_ranking = allocate(66); ranking->field_ranking[fieldID] = defaultRanking->field_ranking[fieldID]; // ranking->field_ranking[fieldID] = 1; } int hasPlus = sizeof(q->plusWords) || sizeof(q->plusPhrases); int hasOrdinary = sizeof(q->words) || sizeof(q->phrases); int hasMinus = sizeof(q->minusWords) || sizeof(q->minusPhrases); if (hasPlus) { int first = 1; if (sizeof(q->plusWords)) { push(do_query_and(db, q->plusWords, ranking)); first = 0; } foreach (q->plusPhrases, array(string) ph) { push(do_query_phrase(db, ph, ranking)); if (!first) { Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop(); push(r1 & r2); } first = 0; } } if (hasOrdinary) { int first = 1; if (sizeof(q->words)) {
60ae232001-06-01David Norlin  push(do_query_or(db, q->words, ranking));
7886752001-05-31David Norlin  first = 0; } foreach (q->phrases, array(string) ph) { push(do_query_phrase(db, ph, ranking)); if (!first) { Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop(); push(r1 | r2); } first = 0; } } if (hasPlus && hasOrdinary) { Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop();
dd6aa62001-06-01David Norlin  // If a document contains must-have words AND ALSO may-have words, // it's ranking is increased. push(r1->add_ranking(r2));
7886752001-05-31David Norlin  } if ((hasPlus || hasOrdinary) && hasMinus) { int first = 1; if (sizeof(q->minusWords)) { push(do_query_or(db, q->minusWords, ranking)); first = 0; } foreach (q->minusPhrases, array(string) ph) { push(do_query_phrase(db, ph, ranking)); if (!first) { Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop(); push(r1 | r2); } first = 0; } Search.ResultSet r2 = pop(); Search.ResultSet r1 = pop(); push(r1 - r2); } } break; default: error("Unknown type of ParseNode!"); } // switch (q->op) }
3771f12001-05-31David Norlin 
7886752001-05-31David Norlin  } (db, defaultRanking)->execute(q);
3771f12001-05-31David Norlin }