40a44d | 2004-08-07 | Johan Schön | |
|
87e926 | 2001-06-22 | Martin Nilsson | |
|
bcdefe | 2004-08-19 | Fredrik Noring | |
|
87e926 | 2001-06-22 | Martin Nilsson | |
|
40a44d | 2004-08-07 | Johan Schön | | static function(string,int:string) blobfeeder(Search.Database.Base db,
array words)
|
11aeec | 2001-05-25 | Johan Schön | | {
|
9be796 | 2001-07-04 | Martin Nilsson | | mapping state = mkmapping(words,allocate(sizeof(words)));
|
f70d32 | 2001-09-26 | Johan Schön | | mapping(string:mapping(int:string)) blobcache = ([ ]);
|
5d3ab5 | 2001-07-05 | Martin Nilsson | | return lambda( string word, int foo )
|
40a44d | 2004-08-07 | Johan Schön | | {
return db->get_blob(word, state[word]++, blobcache);
};
|
11aeec | 2001-05-25 | Johan Schön | | }
|
dd6aa6 | 2001-06-01 | David Norlin | | static array(string) uniq_preserve_order(array(string) a) {
array(string) result = ({});
foreach (a, string s)
if (search(result, s) < 0)
result += ({ s });
return result;
}
|
965c0b | 2001-05-29 | Johan Schön | |
|
eac31a | 2001-05-31 | Johan Schön | | Search.ResultSet do_query_or(Search.Database.Base db,
|
40a44d | 2004-08-07 | Johan Schön | | array(string) words,
Search.RankingProfile ranking)
|
965c0b | 2001-05-29 | Johan Schön | | {
|
5d3ab5 | 2001-07-05 | Martin Nilsson | | Search.ResultSet result =
_WhiteFish.do_query_or(words,
ranking->field_ranking,
ranking->proximity_ranking,
ranking->cutoff,
blobfeeder(db, words));
return result;
|
965c0b | 2001-05-29 | Johan Schön | | }
|
eac31a | 2001-05-31 | Johan Schön | | Search.ResultSet do_query_and(Search.Database.Base db,
|
40a44d | 2004-08-07 | Johan Schön | | array(string) words,
Search.RankingProfile ranking)
|
965c0b | 2001-05-29 | Johan Schön | | {
|
5d3ab5 | 2001-07-05 | Martin Nilsson | | Search.ResultSet result =
_WhiteFish.do_query_and(words,
ranking->field_ranking,
ranking->proximity_ranking,
ranking->cutoff,
blobfeeder(db, words));
return result;
|
965c0b | 2001-05-29 | Johan Schön | | }
|
eac31a | 2001-05-31 | Johan Schön | | Search.ResultSet do_query_phrase(Search.Database.Base db,
|
195e68 | 2001-06-12 | David Norlin | | array(string) words,
Search.RankingProfile ranking)
|
965c0b | 2001-05-29 | Johan Schön | | {
|
5d3ab5 | 2001-07-05 | Martin Nilsson | | Search.ResultSet result =
_WhiteFish.do_query_phrase(words,
ranking->field_ranking,
blobfeeder(db, words));
return result;
|
965c0b | 2001-05-29 | Johan Schön | | }
|
40a44d | 2004-08-07 | Johan Schön | | enum search_order
{
RELEVANCE=1, DATE_ASC, DATE_DESC, NONE
};
static Search.ResultSet sort_resultset(Search.ResultSet resultset,
search_order order,
Search.Database.Base db)
{
}
|
788675 | 2001-05-31 | David Norlin | |
|
195e68 | 2001-06-12 | David Norlin | |
array(Search.ResultSet|array(string)) execute(Search.Database.Base db,
Search.Grammar.AbstractParser parser,
string query,
|
40a44d | 2004-08-07 | Johan Schön | | Search.RankingProfile ranking,
void|array(string) stop_words,
void|search_order order)
|
788675 | 2001-05-31 | David Norlin | | {
Search.Grammar.ParseNode q = parser->parse(query);
|
40a44d | 2004-08-07 | Johan Schön | | if (stop_words && sizeof(stop_words))
Search.Grammar.remove_stop_words(q, stop_words);
|
8b4ffb | 2002-03-12 | Johan Schön | |
|
788675 | 2001-05-31 | David Norlin | | q = Search.Grammar.optimize(q);
|
e650c6 | 2001-08-07 | David Norlin | |
|
195e68 | 2001-06-12 | David Norlin | | if (!q)
return ({ Search.ResultSet(), ({}) });
|
230b0e | 2001-08-08 | David Norlin | |
|
788675 | 2001-05-31 | David Norlin | | string error = Search.Grammar.validate(q);
if (error)
throw (error);
|
40a44d | 2004-08-07 | Johan Schön | |
array(Search.ResultSet|array(string)) res = class {
|
788675 | 2001-05-31 | David Norlin | | static Search.RankingProfile defaultRanking;
static Search.Database.Base db;
|
3771f1 | 2001-05-31 | David Norlin | |
|
788675 | 2001-05-31 | David Norlin | |
static Search.RankingProfile specialRanking;
static void create(Search.Database.Base _db, Search.RankingProfile _defaultRanking) {
db = _db;
defaultRanking = _defaultRanking;
specialRanking = defaultRanking->copy();
|
40a44d | 2004-08-07 | Johan Schön | | pop = stack->pop;
push = stack->push;
|
788675 | 2001-05-31 | David Norlin | | }
|
8b4ffb | 2002-03-12 | Johan Schön | | static array(array(string)) split_words(array(string) words)
{
array a=({}),b=({});
foreach(words, string word)
|
40a44d | 2004-08-07 | Johan Schön | | if(has_value(word, "*") || has_value(word, "?"))
b+=({ word });
else
a+=({ word });
|
8b4ffb | 2002-03-12 | Johan Schön | | return ({ a, b });
}
|
788675 | 2001-05-31 | David Norlin | | static constant ParseNode = Search.Grammar.ParseNode;
|
195e68 | 2001-06-12 | David Norlin | | static array(array(string)|string) words = ({ });
|
40a44d | 2004-08-07 | Johan Schön | | static ADT.Stack stack = ADT.Stack();
static function(Search.ResultSet:void) push;
static function(void:Search.ResultSet) pop;
|
788675 | 2001-05-31 | David Norlin | |
|
195e68 | 2001-06-12 | David Norlin | | array(Search.ResultSet|array(string)) execute(ParseNode q) {
|
788675 | 2001-05-31 | David Norlin | | exec(q);
if (sizeof(stack) != 1)
|
40a44d | 2004-08-07 | Johan Schön | | error("Stack should have exactly one item!");
|
195e68 | 2001-06-12 | David Norlin | | return ({ pop(), words });
|
788675 | 2001-05-31 | David Norlin | | }
|
8b4ffb | 2002-03-12 | Johan Schön | |
|
788675 | 2001-05-31 | David Norlin | | void exec(ParseNode q) {
|
8b4ffb | 2002-03-12 | Johan Schön | | int max_globs = 100;
|
788675 | 2001-05-31 | David Norlin | | switch (q->op) {
case "and":
|
40a44d | 2004-08-07 | Johan Schön | | {
|
788675 | 2001-05-31 | David Norlin | | int first = 1;
foreach (q->children, ParseNode child)
|
40a44d | 2004-08-07 | Johan Schön | | {
exec(child);
if (!first) {
Search.ResultSet r2 = pop();
Search.ResultSet r1 = pop();
push(r1 & r2);
|
788675 | 2001-05-31 | David Norlin | | }
|
40a44d | 2004-08-07 | Johan Schön | | else
first = 0;
|
788675 | 2001-05-31 | David Norlin | | }
|
40a44d | 2004-08-07 | Johan Schön | | }
break;
|
788675 | 2001-05-31 | David Norlin | | case "or":
|
40a44d | 2004-08-07 | Johan Schön | | {
|
788675 | 2001-05-31 | David Norlin | | int first = 1;
foreach (q->children, ParseNode child) {
exec(child);
if (!first) {
Search.ResultSet r2 = pop();
Search.ResultSet r1 = pop();
push(r1 | r2);
}
else
first = 0;
}
|
40a44d | 2004-08-07 | Johan Schön | | }
|
788675 | 2001-05-31 | David Norlin | | break;
case "date":
|
40a44d | 2004-08-07 | Johan Schön | | _WhiteFish.DateSet global_dateset = db->get_global_dateset();
if(!sizeof(global_dateset))
{
push(global_dateset);
break;
}
|
8a58cb | 2004-08-19 | Fredrik Noring | | int t_low, t_high;
catch {
t_low = t_high = Calendar.ISO.dwim_day(q->date)->unix_time();
t_high += 24*60*60-1;
};
if(!t_low && sscanf(q->date, "%4d-%2d", int y, int m) == 2)
catch {
Calendar.ISO.Month month = Calendar.ISO.Month(y, m);
t_low = month->unix_time();
t_high = month->next()->unix_time()-1;
};
if(!t_low && sscanf(q->date, "%4d", int y))
catch {
Calendar.ISO.Year year = Calendar.ISO.Year(y);
t_low = year->unix_time();
t_high = year->next()->unix_time()-1;
};
if(!t_low || !t_high)
{
push(_WhiteFish.DateSet());
break;
}
_WhiteFish.DateSet restriction;
switch(q->operator[1])
{
case "=":
restriction =
global_dateset->between(t_low-1, t_high+1)->finalize();
break;
case "<>":
case "!=":
restriction =
|
bcdefe | 2004-08-19 | Fredrik Noring | | global_dateset->not_between(t_low, t_high)->finalize();
|
8a58cb | 2004-08-19 | Fredrik Noring | | break;
case "<=":
restriction = global_dateset->before(t_high+1)->finalize();
break;
case ">=":
restriction = global_dateset->after(t_low-1)->finalize();
break;
case "<":
restriction = global_dateset->before(t_low)->finalize();
break;
case ">":
restriction = global_dateset->after(t_high)->finalize();
break;
}
push(restriction || _WhiteFish.DateSet());
|
788675 | 2001-05-31 | David Norlin | | break;
|
40a44d | 2004-08-07 | Johan Schön | |
|
788675 | 2001-05-31 | David Norlin | | case "text":
{
|
40a44d | 2004-08-07 | Johan Schön | | Search.RankingProfile ranking = defaultRanking;
if (q->field != "any")
{
ranking = specialRanking;
int fieldID = db->get_field_id(q->field, 1);
if (!fieldID && q->field != "body")
{
push(Search.ResultSet());
break;
}
ranking->field_ranking = allocate(65);
ranking->field_ranking[fieldID] = 1;
}
[array plusWords, array plusWordGlobs] = split_words(q->plusWords);
[array ordinaryWords, array ordinaryWordGlobs] = split_words(q->words);
[array minusWords, array minusWordGlobs] = split_words(q->minusWords);
int hasPlus = sizeof(q->plusWords) || sizeof(q->plusPhrases);
int hasOrdinary = sizeof(q->words) || sizeof(q->phrases);
int hasMinus = sizeof(q->minusWords) || sizeof(q->minusPhrases);
if(hasPlus)
{
int first = 1;
if(sizeof(plusWords))
{
words += plusWords;
push(do_query_and(db, plusWords, ranking));
first = 0;
}
foreach(plusWordGlobs, string plusWordGlob)
{
push(do_query_or(db, db->expand_word_glob(plusWordGlob, max_globs), ranking));
if (!first)
{
Search.ResultSet r2 = pop();
Search.ResultSet r1 = pop();
push(r1 & r2);
}
first = 0;
}
foreach (q->plusPhrases, array(string) ph)
{
words += ph;
push(do_query_phrase(db, ph, ranking));
if (!first)
{
Search.ResultSet r2 = pop();
Search.ResultSet r1 = pop();
push(r1 & r2);
}
first = 0;
}
}
if(hasOrdinary)
{
int first = 1;
if (sizeof(ordinaryWords))
{
words += ordinaryWords;
push(do_query_or(db, ordinaryWords, ranking));
first = 0;
}
foreach(ordinaryWordGlobs, string ordinaryWordGlob)
{
push(do_query_or(db, db->expand_word_glob(ordinaryWordGlob, max_globs), ranking));
if (!first)
{
Search.ResultSet r2 = pop();
Search.ResultSet r1 = pop();
push(r1 | r2);
}
first = 0;
}
foreach (q->phrases, array(string) ph)
{
words += ph;
push(do_query_phrase(db, ph, ranking));
if(!first)
{
Search.ResultSet r2 = pop();
Search.ResultSet r1 = pop();
push(r1 | r2);
}
first = 0;
}
}
if(hasPlus && hasOrdinary)
{
Search.ResultSet r2 = pop();
Search.ResultSet r1 = pop();
push(r1->add_ranking(r2));
}
if((hasPlus || hasOrdinary) && hasMinus)
{
int first = 1;
if (sizeof(q->minusWords))
{
push(do_query_or(db, q->minusWords, ranking));
first = 0;
}
foreach(minusWordGlobs, string minusWordGlob)
{
push(do_query_or(db, db->expand_word_glob(minusWordGlob, max_globs), ranking));
if(!first)
{
Search.ResultSet r2 = pop();
Search.ResultSet r1 = pop();
push(r1 | r2);
}
first = 0;
}
foreach (q->minusPhrases, array(string) ph)
{
push(do_query_phrase(db, ph, ranking));
if (!first)
{
Search.ResultSet r2 = pop();
Search.ResultSet r1 = pop();
push(r1 | r2);
}
first = 0;
}
Search.ResultSet r2 = pop();
Search.ResultSet r1 = pop();
push(r1 - r2);
}
|
788675 | 2001-05-31 | David Norlin | | }
break;
default:
error("Unknown type of ParseNode!");
}
}
|
3771f1 | 2001-05-31 | David Norlin | |
|
40a44d | 2004-08-07 | Johan Schön | | } (db, ranking)->execute(q);
res[0] -= db->get_deleted_documents();
if(!order)
order = RELEVANCE;
if(order!=NONE)
switch(order)
{
case RELEVANCE:
res[0]->sort();
break;
case DATE_ASC:
case DATE_DESC:
res[0] = res[0]->finalize()->add_ranking(db->get_global_dateset());
if(order==DATE_DESC)
res[0]->sort();
else
res[0]->sort_rev();
case NONE:
}
return res;
|
3771f1 | 2001-05-31 | David Norlin | | }
|