eb01b4 | 2010-10-26 | Martin Stjernholm | | #pike __REAL_VERSION__
|
084438 | 2001-06-10 | Per Hedbor | |
|
ff1796 | 2014-08-15 | Martin Nilsson | | protected inherit .AbstractParser;
protected inherit .Lexer;
|
40a44d | 2004-08-07 | Johan Schön | | import ".";
|
070ae4 | 2001-06-01 | David Norlin | |
|
6ec057 | 2001-05-31 | David Norlin | | #include "debug.h"
|
40a44d | 2004-08-07 | Johan Schön | |
|
6ec057 | 2001-05-31 | David Norlin | |
|
352471 | 2015-05-26 | Martin Nilsson | |
|
6ec057 | 2001-05-31 | David Norlin | |
|
352471 | 2015-05-26 | Martin Nilsson | |
|
6ec057 | 2001-05-31 | David Norlin | |
|
352471 | 2015-05-26 | Martin Nilsson | |
|
6ec057 | 2001-05-31 | David Norlin | |
|
40a44d | 2004-08-07 | Johan Schön | |
|
6ec057 | 2001-05-31 | David Norlin | |
|
40a44d | 2004-08-07 | Johan Schön | |
|
6ec057 | 2001-05-31 | David Norlin | |
|
352471 | 2015-05-26 | Martin Nilsson | |
|
6ec057 | 2001-05-31 | David Norlin | |
|
352471 | 2015-05-26 | Martin Nilsson | |
|
6ec057 | 2001-05-31 | David Norlin | |
|
352471 | 2015-05-26 | Martin Nilsson | |
|
6ec057 | 2001-05-31 | David Norlin | |
|
ff1796 | 2014-08-15 | Martin Nilsson | | protected array(array(Token|string)) tokens;
protected array(string) fieldstack;
|
40a44d | 2004-08-07 | Johan Schön | |
|
070ae4 | 2001-06-01 | David Norlin | | mapping(string:mixed) options;
|
6ec057 | 2001-05-31 | David Norlin | |
|
ff1796 | 2014-08-15 | Martin Nilsson | | protected array(Token|string) peek(void|int lookahead) {
|
6ec057 | 2001-05-31 | David Norlin | | if (lookahead >= sizeof(tokens))
lookahead = sizeof(tokens) - 1;
return tokens[lookahead];
}
|
ff1796 | 2014-08-15 | Martin Nilsson | | protected array advance()
|
40a44d | 2004-08-07 | Johan Schön | | {
array res = tokens[0];
|
6ec057 | 2001-05-31 | David Norlin | | if (sizeof(tokens) > 1)
tokens = tokens[1 .. ];
|
40a44d | 2004-08-07 | Johan Schön | | return res;
|
6ec057 | 2001-05-31 | David Norlin | | }
|
ff1796 | 2014-08-15 | Martin Nilsson | | protected int lookingAtFieldStart(void|int offset) {
|
070ae4 | 2001-06-01 | David Norlin | | multiset(string) fields = options["fields"];
|
d77baa | 2001-06-14 | David Norlin | | return peek(offset)[0] == TOKEN_TEXT
|
070ae4 | 2001-06-01 | David Norlin | | && fields[ lower_case(peek(offset)[1]) ]
&& peek(offset + 1)[0] == TOKEN_COLON;
}
|
ff1796 | 2014-08-15 | Martin Nilsson | | protected int lookingAtDateStart(void|int offset) {
|
40a44d | 2004-08-07 | Johan Schön | |
return
peek(offset)[0] == TOKEN_TEXT &&
lower_case(peek(offset)[1])=="date" &&
(< TOKEN_EQUAL, TOKEN_LESSEQUAL, TOKEN_GREATEREQUAL,
TOKEN_NOTEQUAL, TOKEN_LESS, TOKEN_GREATER >)[ peek(offset + 1)[0]];
}
|
ff1796 | 2014-08-15 | Martin Nilsson | | protected void create(mapping(string:mixed)|void opt) {
|
070ae4 | 2001-06-01 | David Norlin | | options = opt || ([ "implicit" : "or" ]);
if (!options["fields"])
options["fields"] = getDefaultFields();
|
6ec057 | 2001-05-31 | David Norlin | | }
|
40a44d | 2004-08-07 | Johan Schön | |
|
6ec057 | 2001-05-31 | David Norlin | | ParseNode parse(string q) {
fieldstack = ({ "any" });
tokens = tokenize(q);
return parseQuery();
}
|
ff1796 | 2014-08-15 | Martin Nilsson | | protected ParseNode parseQuery() {
|
070ae4 | 2001-06-01 | David Norlin | |
|
6ec057 | 2001-05-31 | David Norlin | | ParseNode or = OrNode();
for (;;) {
ParseNode n = parseExpr0();
or->addChild(n);
if (peek()[0] == TOKEN_OR)
advance();
|
b4527e | 2002-03-11 | Henrik Grubbström (Grubba) | | else if ((< TOKEN_END,
TOKEN_RPAREN >)[ peek()[0] ] ||
options->implicit != "or")
|
6ec057 | 2001-05-31 | David Norlin | | break;
}
if (sizeof(or->children) == 1)
return or->children[0];
return or;
}
|
ff1796 | 2014-08-15 | Martin Nilsson | | protected ParseNode parseExpr0() {
|
070ae4 | 2001-06-01 | David Norlin | |
|
6ec057 | 2001-05-31 | David Norlin | | ParseNode and = AndNode();
for (;;) {
ParseNode n = parseExpr1();
and->addChild(n);
if (peek()[0] == TOKEN_AND)
advance();
else if ((< TOKEN_END,
TOKEN_RPAREN,
|
b4527e | 2002-03-11 | Henrik Grubbström (Grubba) | | TOKEN_OR >)[ peek()[0] ] ||
options->implicit != "and")
|
6ec057 | 2001-05-31 | David Norlin | | break;
}
if (sizeof(and->children) == 1)
return and->children[0];
return and;
}
|
ff1796 | 2014-08-15 | Martin Nilsson | | protected ParseNode parseExpr1() {
|
070ae4 | 2001-06-01 | David Norlin | |
|
6ec057 | 2001-05-31 | David Norlin | | return parseExpr2();
}
|
ff1796 | 2014-08-15 | Martin Nilsson | | protected ParseNode parseExpr2() {
|
070ae4 | 2001-06-01 | David Norlin | |
|
6ec057 | 2001-05-31 | David Norlin | |
|
070ae4 | 2001-06-01 | David Norlin | | if (lookingAtFieldStart())
|
6ec057 | 2001-05-31 | David Norlin | | {
|
6248b8 | 2002-03-11 | Henrik Grubbström (Grubba) | |
|
6ec057 | 2001-05-31 | David Norlin | | fieldstack = ({ peek()[1] }) + fieldstack;
advance();
advance();
|
40a44d | 2004-08-07 | Johan Schön | | ParseNode n = parseExpr3();
|
6ec057 | 2001-05-31 | David Norlin | | fieldstack = fieldstack[1 .. ];
return n;
}
|
40a44d | 2004-08-07 | Johan Schön | |
if(lookingAtDateStart())
{
advance();
array operator = advance();
return parseDate(operator);
}
|
6ec057 | 2001-05-31 | David Norlin | |
if (peek()[0] == TOKEN_LPAREN) {
advance();
ParseNode n = parseQuery();
if (peek()[0] == TOKEN_RPAREN)
advance();
return n;
}
return parseExpr3();
}
|
ff1796 | 2014-08-15 | Martin Nilsson | | protected ParseNode parseExpr3() {
|
070ae4 | 2001-06-01 | David Norlin | |
|
40a44d | 2004-08-07 | Johan Schön | | if (lookingAtFieldStart() || lookingAtDateStart())
|
6ec057 | 2001-05-31 | David Norlin | | return 0;
ParseNode or = OrNode();
for (;;) {
ParseNode n = parseExpr4();
or->addChild(n);
if (peek()[0] == TOKEN_OR)
|
40a44d | 2004-08-07 | Johan Schön | | if (lookingAtFieldStart(1) || lookingAtDateStart(1))
|
6ec057 | 2001-05-31 | David Norlin | | break;
else
advance();
else
break;
}
if (sizeof(or->children) == 1)
return or->children[0];
return or;
}
|
ff1796 | 2014-08-15 | Martin Nilsson | | protected ParseNode parseExpr4() {
|
070ae4 | 2001-06-01 | David Norlin | |
|
6ec057 | 2001-05-31 | David Norlin | | ParseNode and = AndNode();
for (;;) {
ParseNode n = parseExpr5();
and->addChild(n);
if (peek()[0] == TOKEN_AND
|
070ae4 | 2001-06-01 | David Norlin | | && !(lookingAtFieldStart(1)
|
40a44d | 2004-08-07 | Johan Schön | | || lookingAtDateStart(1)
|
6ec057 | 2001-05-31 | David Norlin | | || peek(1)[0] == TOKEN_LPAREN))
advance();
else
break;
}
if (sizeof(and->children) == 1)
return and->children[0];
return and;
}
|
ff1796 | 2014-08-15 | Martin Nilsson | | protected ParseNode parseExpr5() {
|
070ae4 | 2001-06-01 | David Norlin | |
|
6ec057 | 2001-05-31 | David Norlin | | ParseNode text = TextNode();
|
b4527e | 2002-03-11 | Henrik Grubbström (Grubba) | | ParseNode res;
|
6ec057 | 2001-05-31 | David Norlin | | text->field = fieldstack[0];
|
b4527e | 2002-03-11 | Henrik Grubbström (Grubba) | | if (options->implicit == "or") {
res = OrNode();
} else {
res = AndNode();
}
|
6ec057 | 2001-05-31 | David Norlin | | for (;;) {
|
6248b8 | 2002-03-11 | Henrik Grubbström (Grubba) | | int prefix = 0;
if (peek()[0] == TOKEN_MINUS) {
advance();
prefix = '-';
}
else if (peek()[0] == TOKEN_PLUS) {
advance();
prefix = '+';
}
if (!prefix && options["implicit"] == "and")
prefix = '+';
while (!(< TOKEN_TEXT, TOKEN_END >) [ peek()[0] ])
advance();
|
40a44d | 2004-08-07 | Johan Schön | | if(lookingAtFieldStart()) {
|
6248b8 | 2002-03-11 | Henrik Grubbström (Grubba) | |
ParseNode tmp = TextNode();
tmp->field = peek()[1];
advance();
advance();
while (!(< TOKEN_TEXT, TOKEN_END >) [ peek()[0] ])
advance();
parseExpr6(prefix, tmp);
if (sizeof(tmp->words)
|| sizeof(tmp->phrases)
|| sizeof(tmp->plusWords)
|| sizeof(tmp->plusPhrases)
|| sizeof(tmp->minusWords)
|| sizeof(tmp->minusPhrases)) {
res->addChild(tmp);
}
} else {
parseExpr6(prefix, text);
}
|
6ec057 | 2001-05-31 | David Norlin | | if ( (< TOKEN_END,
TOKEN_RPAREN,
TOKEN_AND,
TOKEN_OR >) [ peek()[0] ]
|
070ae4 | 2001-06-01 | David Norlin | | || lookingAtFieldStart()
|
40a44d | 2004-08-07 | Johan Schön | | || lookingAtDateStart()
|
6ec057 | 2001-05-31 | David Norlin | | || (peek()[0] == TOKEN_LPAREN))
break;
if (peek()[0] == TOKEN_OR)
|
d77baa | 2001-06-14 | David Norlin | | if (lookingAtFieldStart(1)
|
40a44d | 2004-08-07 | Johan Schön | | || lookingAtDateStart(1)
|
6ec057 | 2001-05-31 | David Norlin | | || peek(1)[0] == TOKEN_LPAREN)
break;
else
advance();
}
|
6248b8 | 2002-03-11 | Henrik Grubbström (Grubba) | |
|
6ec057 | 2001-05-31 | David Norlin | | if (sizeof(text->words)
|| sizeof(text->phrases)
|| sizeof(text->plusWords)
|| sizeof(text->plusPhrases)
|| sizeof(text->minusWords)
|| sizeof(text->minusPhrases))
|
6248b8 | 2002-03-11 | Henrik Grubbström (Grubba) | | res->addChild(text);
if (sizeof(res->children) > 1) return res;
if (sizeof(res->children) == 1) return res->children[0];
|
6ec057 | 2001-05-31 | David Norlin | | return 0;
}
|
ff1796 | 2014-08-15 | Martin Nilsson | | protected void parseExpr6(int prefix, TextNode node) {
|
070ae4 | 2001-06-01 | David Norlin | |
|
6ec057 | 2001-05-31 | David Norlin | |
|
d77baa | 2001-06-14 | David Norlin | | if (peek()[0] == TOKEN_TEXT) {
string text = peek()[1];
|
6ec057 | 2001-05-31 | David Norlin | | advance();
|
8b4ffb | 2002-03-12 | Johan Schön | | string star = "86196759014593256";
string questionmark = "76196758925470133";
text=replace(text,({"*","?"}), ({star, questionmark}));
|
d77baa | 2001-06-14 | David Norlin | | array(string) words = Unicode.split_words_and_normalize(text);
|
8b4ffb | 2002-03-12 | Johan Schön | | for(int i=0; i<sizeof(words); i++)
words[i]=replace(words[i], ({star, questionmark}), ({"*","?"}));
|
6248b8 | 2002-03-11 | Henrik Grubbström (Grubba) | | if (words) {
|
75b6b7 | 2008-06-25 | Marcus Wellhardh | |
if (sizeof(words) > 1)
words = filter(words, lambda(string w) { return (w - "*" - "?") == "" ? 0 : 1; });
|
352471 | 2015-05-26 | Martin Nilsson | |
|
ba6066 | 2016-01-28 | Jonas Walldén | | if (sizeof(words) == 1) {
if (options["auto-glob"] &&
!has_value(words[0], "*") &&
!has_value(words[0], "?")) {
words[0] = "*" + words[0] + "*";
}
|
6248b8 | 2002-03-11 | Henrik Grubbström (Grubba) | | switch (prefix) {
|
6ec057 | 2001-05-31 | David Norlin | | case '+': node->plusWords += words; break;
case '-': node->minusWords += words; break;
default: node->words += words; break;
|
6248b8 | 2002-03-11 | Henrik Grubbström (Grubba) | | }
|
ba6066 | 2016-01-28 | Jonas Walldén | | } else if (sizeof(words) > 1) {
|
4e8784 | 2008-06-24 | Marcus Wellhardh | | words = map(words, lambda(string w) { return w - "*" - "?"; } );
|
6248b8 | 2002-03-11 | Henrik Grubbström (Grubba) | | switch (prefix) {
|
6ec057 | 2001-05-31 | David Norlin | | case '+': node->plusPhrases += ({ words }); break;
case '-': node->minusPhrases += ({ words }); break;
default: node->phrases += ({ words }); break;
|
6248b8 | 2002-03-11 | Henrik Grubbström (Grubba) | | }
|
4e8784 | 2008-06-24 | Marcus Wellhardh | | }
|
6248b8 | 2002-03-11 | Henrik Grubbström (Grubba) | | }
|
6ec057 | 2001-05-31 | David Norlin | | }
}
|
ff1796 | 2014-08-15 | Martin Nilsson | | protected ParseNode parseDate(array operator)
|
40a44d | 2004-08-07 | Johan Schön | | {
|
070ae4 | 2001-06-01 | David Norlin | |
|
6ec057 | 2001-05-31 | David Norlin | | DateNode n = DateNode();
n->date = "";
|
40a44d | 2004-08-07 | Johan Schön | | n->operator = operator;
|
6ec057 | 2001-05-31 | David Norlin | | loop:
for (;;) {
switch (peek()[0]) {
|
d77baa | 2001-06-14 | David Norlin | | case TOKEN_TEXT:
|
070ae4 | 2001-06-01 | David Norlin | | if (lookingAtFieldStart())
|
6ec057 | 2001-05-31 | David Norlin | | break loop;
break;
case TOKEN_MINUS:
case TOKEN_COLON:
break;
default:
break loop;
}
n->date += peek()[2];
advance();
}
return n;
}
|