|
|
|
|
|
|
|
#pike __REAL_VERSION__ |
|
|
|
#pragma strict_types |
|
|
|
|
|
|
|
constant ERROR_EOF= 1; |
|
constant ERROR_SYNTAX= 2; |
|
constant ERROR_CONFLICTS= 4; |
|
constant ERROR_MISSING_ACTION= 8; |
|
constant ERROR_BAD_ACTION_TYPE= 16; |
|
constant ERROR_NO_OBJECT= 32; |
|
constant ERROR_NO_SCANNER= 64; |
|
constant ERROR_MISSING_DEFINITION= 128; |
|
|
|
|
|
|
|
|
class Priority |
{ |
|
int value; |
|
|
|
|
|
|
|
|
|
|
|
int assoc; |
|
|
|
|
|
|
|
void create(int p, int a) |
{ |
value = p; |
assoc = a; |
} |
} |
|
|
|
|
class Rule |
{ |
|
int nonterminal; |
|
|
array(string|int) symbols; |
|
|
|
|
|
|
|
|
function|string action; |
|
|
|
|
int has_tokens = 0; |
|
|
int num_nonnullables = 0; |
|
|
|
|
|
|
|
|
int number = 0; |
|
|
Priority pri; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
protected void create(int nt, array(string|int) r, function|string|void a) |
{ |
mixed symbol; |
|
nonterminal = nt; |
symbols = r; |
action = a; |
|
foreach (r, symbol) { |
if (stringp(symbol)) { |
has_tokens = 1; |
break; |
} |
} |
|
num_nonnullables = sizeof(r); |
} |
} |
|
|
|
enum SeverityLevel { |
NOTICE = 0, |
WARNING, |
ERROR, |
}; |
|
|
class ErrorHandler |
{ |
|
|
|
|
|
|
|
|
|
|
optional int(-1..1) verbose = 1; |
|
protected constant severity_kind = ([ NOTICE:"Notice", |
WARNING:"Warning", |
ERROR:"Error" ]); |
|
void report(SeverityLevel level, string subsystem, string msg, |
mixed ... args) |
{ |
if (level > -verbose) { |
werror([string(0..255)]("%s: %s: "+msg+"\n"), |
severity_kind[level], subsystem, @args); |
} |
} |
|
|
|
|
|
|
|
|
protected void create(int(-1..1)|void verbosity) |
{ |
if (!undefinedp(verbosity)) |
verbose = verbosity; |
} |
} |
|
|
|
|
|
|
|
|
|
|
|
|
class Parser |
{ |
|
mapping(int : array(Rule)) grammar = ([]); |
|
|
protected mapping(string : Priority) operator_priority = ([]); |
|
protected multiset(int|string) nullable = (< >); |
|
#if 0 |
protected mapping(mixed : multiset(Rule)) derives = ([]); |
|
|
protected mapping(mixed : multiset(Rule)) begins = ([]); |
#endif /* 0 */ |
|
|
|
|
protected mapping(int : multiset(Rule)) used_by = ([]); |
|
|
Kernel start_state; |
|
|
int lr_error=0; |
|
|
protected int next_rule_number = 1; |
|
|
mapping(string:Kernel) known_states = ([]); |
|
|
function(SeverityLevel, string, string, mixed ...:void) error_handler = |
ErrorHandler()->report; |
|
void report(SeverityLevel level, string subsystem, string msg, |
mixed ... args) |
{ |
if (!error_handler) { |
error_handler = ErrorHandler()->report; |
} |
error_handler(level, subsystem, msg, @args); |
} |
|
|
|
|
|
|
|
|
protected class Item |
{ |
|
Rule r; |
|
|
int offset; |
|
|
Kernel next_state; |
|
|
Item master_item; |
|
|
multiset(string) direct_lookahead = (<>); |
|
|
multiset(string) error_lookahead = (<>); |
|
|
multiset(Item) relation = (<>); |
|
|
int counter; |
|
|
int number; |
|
|
|
int item_id; |
|
protected string _sprintf() |
{ |
array(string) res = ({ symbol_to_string(r->nonterminal), ":\t" }); |
|
if (offset) { |
foreach(r->symbols[0..offset-1], int|string symbol) { |
res += ({ symbol_to_string(symbol), " " }); |
} |
} |
res += ({ "· " }); |
if (offset != sizeof(r->symbols)) { |
foreach(r->symbols[offset..], int|string symbol) { |
res += ({ symbol_to_string(symbol), " " }); |
} |
} |
if (sizeof(indices(direct_lookahead))) { |
res += ({ "\t{ ", |
map(indices(direct_lookahead), symbol_to_string) * ", ", |
" }" }); |
} |
return res * ""; |
} |
} |
|
|
protected class Kernel { |
|
|
multiset(Rule) rules = (<>); |
|
|
array(Item) items = ({}); |
|
|
mapping(int:Item) item_id_to_item = ([]); |
|
|
mapping(int : multiset(Item)) symbol_items = ([]); |
|
|
|
|
|
|
|
mapping(int|string : Kernel|Rule) action = ([]); |
|
|
multiset closure_set = (<>); |
|
|
|
|
|
|
void add_item(Item i) |
{ |
int|string symbol; |
|
items += ({ i }); |
item_id_to_item[i->item_id] = i; |
|
if (i->offset < sizeof(i->r->symbols)) { |
symbol = i->r->symbols[i->offset]; |
|
if (symbol_items[symbol]) { |
symbol_items[symbol][i] = 1; |
} else { |
symbol_items[symbol] = (< i >); |
} |
} |
} |
|
|
|
|
|
void closure(int nonterminal) |
{ |
closure_set[nonterminal] = 1; |
if (grammar[nonterminal]) { |
foreach (grammar[nonterminal], Rule r) { |
if (!rules[r]) { |
|
Item new_item = Item(); |
|
new_item->r = r; |
new_item->item_id = r->number; |
|
|
|
|
rules[r] = 1; |
|
add_item(new_item); |
|
if (sizeof(r->symbols) && intp(r->symbols[0]) && |
!closure_set[r->symbols[0]]) { |
closure([int]r->symbols[0]); |
} |
} |
} |
} else { |
report(ERROR, "closure", |
"Definition missing for non-terminal %s", |
symbol_to_string(nonterminal)); |
lr_error |= ERROR_MISSING_DEFINITION; |
} |
} |
|
|
multiset(int|string) goto_set() |
{ |
multiset(int|string) set = (<>); |
|
foreach (items, Item i) { |
if (i->offset != sizeof(i->r->symbols)) { |
set[i->r->symbols[i->offset]] = 1; |
} |
} |
|
report(NOTICE, "goto_set", "=> (< %s >)", |
map(indices(set), symbol_to_string) * ", "); |
return set; |
} |
|
|
|
|
|
|
Kernel do_goto(int|string symbol) |
{ |
multiset(Item) items; |
|
report(NOTICE, "do_goto", |
"Performing GOTO on <%s>", |
symbol_to_string(symbol)); |
|
items = symbol_items[symbol]; |
if (items) { |
array(int) item_ids = [array(int)]map(sort(indices(items)->item_id), |
[function(int,int...:int)]`+, 1); |
string kernel_hash = sprintf("%@4c", item_ids); |
|
Kernel new_state = known_states[kernel_hash]; |
|
if (!new_state) { |
known_states[kernel_hash] = new_state = Kernel(); |
|
foreach (indices(items), Item i) { |
int|string lookahead; |
|
Item new_item = Item(); |
Rule r; |
int offset = i->offset; |
|
new_item->offset = ++offset; |
new_item->r = r = i->r; |
new_item->item_id = r->number + offset; |
|
new_state->add_item(new_item); |
|
if ((offset != sizeof(r->symbols)) && |
intp(lookahead = r->symbols[offset]) && |
!new_state->closure_set[lookahead]) { |
new_state->closure([int]lookahead); |
} |
} |
|
s_q->push(new_state); |
} else { |
|
} |
|
|
report(NOTICE, "do_goto", |
"GOTO on %s generated state:\n%s", |
symbol_to_string(symbol), |
state_to_string(new_state)); |
|
|
|
if (items) { |
foreach (indices(items), Item i) { |
i->next_state = new_state; |
} |
} |
} else { |
report(WARNING, "do_goto", |
"do_goto() on unknown symbol <%s>", |
symbol_to_string(symbol)); |
} |
} |
|
protected string _sprintf() |
{ |
return sprintf("%{%s\n%}", items); |
} |
} |
|
|
protected class StateQueue { |
|
|
int head; |
|
|
int tail; |
|
|
array(Kernel) arr = allocate(64); |
|
|
|
|
|
Kernel push(Kernel state) |
{ |
if (tail == sizeof(arr)) { |
arr += allocate(tail); |
} |
arr[tail++] = state; |
|
return state; |
} |
|
|
Kernel next() |
{ |
if (head == tail) { |
return 0; |
} else { |
return arr[head++]; |
} |
} |
} |
|
|
|
|
|
|
|
|
|
protected string builtin_symbol_to_string(int|string symbol) |
{ |
if (intp(symbol)) { |
return "nonterminal"+symbol; |
} else { |
return "\"" + symbol + "\""; |
} |
} |
|
protected function(int|string : string) symbol_to_string = builtin_symbol_to_string; |
|
|
|
|
|
string rule_to_string(Rule r) |
{ |
string res = symbol_to_string(r->nonterminal) + ":\t"; |
|
if (sizeof(r->symbols)) { |
foreach (r->symbols, int|string symbol) { |
res += symbol_to_string(symbol) + " "; |
} |
} else { |
res += "/* empty */"; |
} |
return res; |
} |
|
|
|
|
|
string item_to_string(Item i) |
{ |
return sprintf("%s", i); |
} |
|
|
|
|
|
string state_to_string(Kernel state) |
{ |
return sprintf("%s", state); |
} |
|
|
protected string _sprintf() |
{ |
array(string) res = ({}); |
|
foreach (indices(grammar), int nonterminal) { |
res += ({ symbol_to_string(nonterminal) }); |
foreach (grammar[nonterminal], Rule r) { |
res += ({ "\t: " }); |
if (sizeof(r->symbols)) { |
foreach (r->symbols, int|string symbol) { |
res += ({ symbol_to_string(symbol), " " }); |
} |
} else { |
res += ({ "/* empty */" }); |
} |
res += ({ "\n" }); |
} |
res += ({ "\n" }); |
} |
return res * ""; |
} |
|
string cast_to_string() |
{ |
return _sprintf(); |
} |
|
|
|
|
|
protected mixed cast(string type) |
{ |
if (type == "string") |
return _sprintf(); |
return UNDEFINED; |
} |
|
|
|
|
|
|
|
|
|
void set_priority(string terminal, int pri_val) |
{ |
Priority pri; |
|
if (pri = operator_priority[terminal]) { |
pri->value = pri_val; |
} else { |
operator_priority[terminal] = Priority(pri_val, 0); |
} |
} |
|
|
|
|
|
|
|
|
void set_associativity(string terminal, int assoc) |
{ |
Priority pri; |
|
if (pri = operator_priority[terminal]) { |
pri->assoc = assoc; |
} else { |
operator_priority[terminal] = Priority(0, assoc); |
} |
} |
|
|
|
|
|
|
|
|
void set_symbol_to_string(void|function(int|string:string) s_to_s) |
{ |
symbol_to_string = s_to_s || builtin_symbol_to_string; |
} |
|
|
|
|
|
|
void set_error_handler(void|function(SeverityLevel, string, string, mixed ...: void) handler) |
{ |
error_handler = handler || ErrorHandler()->report; |
} |
|
|
|
|
|
void add_rule(Rule r) |
{ |
int|string symbol; |
|
|
report(NOTICE, "add_rule", "Adding rule: %s", rule_to_string(r)); |
|
|
|
r->number = next_rule_number; |
|
next_rule_number += sizeof(r->symbols) + 1; |
|
|
if (grammar[r->nonterminal]) { |
grammar[r->nonterminal] += ({ r }); |
} else { |
grammar[r->nonterminal] = ({ r }); |
} |
|
|
if (!r->has_tokens) { |
ADT.Stack new_nullables = ADT.Stack(1024); |
|
foreach (r->symbols, symbol) { |
if (nullable[symbol]) { |
r->num_nonnullables--; |
} else { |
if (used_by[symbol]) { |
if (used_by[symbol][r]) { |
|
r->num_nonnullables--; |
} else { |
used_by[symbol][r] = 1; |
} |
} else { |
used_by[symbol] = (< r >); |
} |
} |
} |
|
if (!(r->num_nonnullables)) { |
|
new_nullables->push(r->nonterminal); |
|
while (sizeof(new_nullables)) { |
symbol = [int]new_nullables->pop(); |
report(NOTICE, "add_rule", "Nulling symbol %s", |
symbol_to_string(symbol)); |
nullable[symbol] = 1; |
if (used_by[symbol]) { |
foreach (indices(used_by[symbol]), Rule r2) { |
if (!(--r2->num_nonnullables)) { |
new_nullables->push(r2->nonterminal); |
} |
} |
used_by[symbol] = 0; |
} |
} |
} |
} else { |
|
foreach(r->symbols, symbol) { |
if (operator_priority[symbol]) { |
r->pri = operator_priority[symbol]; |
} |
} |
} |
|
|
|
|
#if 0 |
|
foreach (r->symbols, symbol) { |
if (!stringp(symbol)) { |
multiset set = begins[symbol]; |
|
r->prefix_nonterminals |= (< symbol >); |
|
if (set) { |
set[r] = 1; |
} else { |
begins[symbol] = (< r >); |
} |
|
if (grammar[symbol]) { |
foreach (grammar[symbol], Rule r2) { |
r->prefix_nonterminals |= r2->prefix_nonterminals; |
r->prefix_tokens |= r2->prefix_tokens; |
|
foreach (indices(r2->prefix_nonterminals), mixed s2) { |
set = begins[s2]; |
|
if (set) { |
set[r] = 1; |
} else { |
begins[s2] = (< r >); |
} |
} |
} |
} |
if (!nullable[symbol]) { |
break; |
} |
} else { |
r->prefix_tokens[symbol] = 1; |
break; |
} |
} |
|
|
if (begins[r->nonterminal]) { |
foreach (indices(begins[r->nonterminal]), Rule r2) { |
r2->prefix_nonterminals |= r->prefix_nonterminals; |
r2->prefix_tokens |= r->prefix_tokens; |
|
|
foreach (values(r->prefix_nonterminals), symbol) { |
multiset set = begins[symbol]; |
|
if (set) { |
set[r2] = 1; |
} else { |
begins[symbol] = (< r2 >); |
} |
} |
} |
} |
#endif /* 0 */ |
} |
|
|
|
protected Kernel first_state() |
{ |
Kernel state = Kernel(); |
|
array(int) first_state_item_ids = sort(grammar[0]->number); |
string kernel_hash = sprintf("%@4c", first_state_item_ids); |
known_states[kernel_hash] = state; |
|
foreach (grammar[0], Rule r) { |
if (!state->rules[r]) { |
Item i = Item(); |
|
i->r = r; |
|
|
i->item_id = r->number; |
|
state->add_item(i); |
state->rules[r] = 1; |
|
if ((sizeof(r->symbols)) && |
(intp(r->symbols[0]))) { |
state->closure([int]r->symbols[0]); |
} |
} |
} |
return state; |
} |
|
|
|
StateQueue s_q; |
|
protected ADT.Stack item_stack; |
|
protected void traverse_items(Item i, |
function(int:void) conflict_func) |
{ |
int depth; |
|
item_stack->push(i); |
|
i->counter = depth = sizeof(item_stack); |
|
foreach (indices(i->relation), Item i2) { |
if (!i2->counter) { |
traverse_items(i2, conflict_func); |
} |
if (i->counter > i2->counter) { |
i->counter = i2->counter; |
} |
|
i->direct_lookahead |= i2->direct_lookahead; |
} |
|
if (i->number == depth) { |
int cyclic = 0; |
int empty_cycle = 1; |
Item i2; |
|
while ((i2 = [object(Item)]item_stack->pop()) != i) { |
|
i2->number = 0x7fffffff; |
|
i2->direct_lookahead = i->direct_lookahead; |
|
cyclic = 1; |
empty_cycle &= !(sizeof(i2->error_lookahead)); |
} |
i->count = 0x7fffffff; |
|
if (cyclic) { |
report(NOTICE, "traverse_items", "Cyclic item\n%s", |
item_to_string(i)); |
conflict_func(empty_cycle && !(sizeof(i->error_lookahead))); |
} |
} |
} |
|
protected void shift_conflict(int empty) |
{ |
empty; |
} |
|
protected void handle_shift_conflicts() |
{ |
item_stack = ADT.Stack(131072); |
|
|
for (int index = 0; index < s_q->tail; index++) { |
foreach (s_q->arr[index]->items, Item i) { |
if ((i->offset != sizeof(i->r->symbols)) && |
(intp(i->r->symbols[i->offset])) && |
(!i->master_item)) { |
|
i->counter = 0; |
} else { |
i->counter = 0x7fffffff; |
} |
} |
} |
|
for (int index = 0; index < s_q->tail; index++) { |
foreach (s_q->arr[index]->items, Item i) { |
if (!i->number) { |
traverse_items(i, shift_conflict); |
} |
} |
} |
} |
|
protected void follow_conflict(int empty) |
{ |
empty; |
} |
|
protected void handle_follow_conflicts() |
{ |
item_stack = ADT.Stack(131072); |
|
|
for (int index = 0; index < s_q->tail; index++) { |
foreach (s_q->arr[index]->items, Item i) { |
if ((i->offset != sizeof(i->r->symbols)) && |
(intp(i->r->symbols[i->offset])) && |
(!i->master_item)) { |
|
i->counter = 0; |
} else { |
i->counter = 0x7fffffff; |
} |
} |
} |
|
for (int index = 0; index < s_q->tail; index++) { |
foreach (s_q->arr[index]->items, Item i) { |
if (!i->number) { |
traverse_items(i, follow_conflict); |
} |
} |
} |
} |
|
protected int go_through(Kernel state, int item_id, |
Item current_item) |
{ |
Item i, master; |
|
i = state->item_id_to_item[item_id]; |
|
|
if (!i) { |
report(ERROR, "go_through", |
"Item %d not found in state\n" |
"%s\n" |
"Backtrace:\n%s", |
item_id, |
state_to_string(state), |
describe_backtrace(backtrace())); |
return 0; |
} |
|
if (i->master_item) { |
master = i->master_item; |
} else { |
master = i; |
} |
|
if (i->offset < sizeof(i->r->symbols)) { |
if (go_through(i->next_state, item_id + 1, current_item)) { |
|
if ((master->offset < sizeof(master->r->symbols)) && |
(intp(master->r->symbols[master->offset]))) { |
|
if (master != current_item) { |
master->relation[current_item] = 1; |
} |
} |
return nullable[i->r->symbols[i->offset]]; |
} else |
return 0; |
} else { |
|
master->relation[current_item] = 1; |
return 1; |
} |
} |
|
protected int repair(Kernel state, multiset(int|string) conflicts) |
{ |
multiset(int|string) conflict_set = (<>); |
|
report(NOTICE, "repair", |
"Repairing conflict in state:\n%s\n" |
"Conflicts on (< %s >)", |
state_to_string(state), |
map(indices(conflicts), symbol_to_string) * ", "); |
|
foreach (indices(conflicts), int|string symbol) { |
int reduce_count = 0; |
int shift_count = 0; |
int only_operators = 1; |
Priority shift_pri, reduce_pri, pri; |
Rule min_rule = 0; |
|
|
|
|
|
foreach (state->items, Item i) { |
if (i->offset == sizeof(i->r->symbols)) { |
if (i->direct_lookahead[symbol]) { |
|
reduce_count++; |
if (pri = i->r->pri) { |
if (!reduce_pri || (pri->value > reduce_pri->value)) { |
reduce_pri = pri; |
} |
} else { |
only_operators = 0; |
} |
|
if ((!min_rule) || (i->r->number < min_rule->number)) { |
min_rule = i->r; |
} |
} |
} else if (!intp(i->r->symbols[i->offset])) { |
if (i->r->symbols[i->offset] == symbol) { |
|
shift_count++; |
|
if (operator_priority[symbol]) { |
shift_pri = operator_priority[symbol]; |
} else { |
only_operators = 0; |
} |
} |
} |
} |
|
int reduce_rest = 0; |
int shift_rest = 0; |
|
if (only_operators) { |
if (reduce_pri->value > shift_pri->value) { |
pri = reduce_pri; |
} else { |
pri = shift_pri; |
} |
|
foreach (state->items, Item i) { |
if (i->offset == sizeof(i->r->symbols)) { |
|
if (i->direct_lookahead[symbol]) { |
Priority new_pri; |
if ((new_pri = i->r->pri)->value < pri->value) { |
report(NOTICE, "repair", |
"Ignoring reduction of item\n%s\n" |
"on lookahead %s (Priority %d < %d)", |
item_to_string(i), |
symbol_to_string(symbol), |
new_pri->value, pri->value); |
i->direct_lookahead[symbol] = 0; |
if (!sizeof(indices(i->direct_lookahead))) { |
i->direct_lookahead = (<>); |
} |
} else if ((pri->assoc >= 0) && |
(shift_pri->value == pri->value)) { |
report(NOTICE, "repair", |
"Ignoring reduction of item\n%s\n" |
"on lookahead %s (Right associative)", |
item_to_string(i), |
symbol_to_string(symbol)); |
i->direct_lookahead[symbol] = 0; |
if (!sizeof(indices(i->direct_lookahead))) { |
i->direct_lookahead = (<>); |
} |
} else { |
report(NOTICE, "repair", |
"Kept item\n%s\n" |
"on lookahead %s", |
item_to_string(i), |
symbol_to_string(symbol)); |
reduce_rest++; |
} |
} |
} else if (i->r->symbols[i->offset] == symbol) { |
|
if (shift_pri->value < pri->value) { |
report(NOTICE, "repair", |
"Ignoring shift on item\n%s\n" |
"on lookahead %s (Priority %d < %d)", |
item_to_string(i), |
symbol_to_string(symbol), |
i->r->pri->value, pri->value); |
i->direct_lookahead = (<>); |
i->next_state = 0; |
} else if ((pri->assoc <= 0) && |
(reduce_pri->value == pri->value)) { |
report(NOTICE, "repair", |
"Ignoring shift on item\n%s\n" |
"on lookahead %s (Left associative)", |
item_to_string(i), |
symbol_to_string(symbol)); |
i->direct_lookahead = (<>); |
i->next_state = 0; |
} else { |
report(NOTICE, "repair", |
"Kept item\n%s\n" |
"on lookahead %s", |
item_to_string(i), |
symbol_to_string(symbol)); |
shift_rest++; |
} |
} |
} |
} else { |
|
if (shift_count) { |
|
foreach (state->items, Item i) { |
if (i->offset == sizeof(i->r->symbols)) { |
|
if (i->direct_lookahead[symbol]) { |
report(NOTICE, "repair", |
"Ignoring reduction on item\n%s\n" |
"on lookahead %s (can shift)", |
item_to_string(i), |
symbol_to_string(symbol)); |
i->direct_lookahead[symbol] = 0; |
if (!sizeof(indices(i->direct_lookahead))) { |
i->direct_lookahead = (<>); |
} |
} |
} else { |
|
if (i->r->symbols[i->offset] == symbol) { |
report(NOTICE, "repair", |
"Kept item\n%s\n" |
"on lookahead (shift)%s", |
item_to_string(i), |
symbol_to_string(symbol)); |
shift_rest++; |
} |
} |
} |
} else { |
|
foreach (state->items, Item i) { |
if (i->r == min_rule) { |
report(NOTICE, "repair", |
"Kept item\n%s\n" |
"on lookahead %s (first rule)", |
item_to_string(i), |
symbol_to_string(symbol)); |
reduce_rest++; |
} else { |
report(NOTICE, "repair", |
"Ignoring reduction on item\n%s\n" |
"on lookahead %s (not first rule)", |
item_to_string(i), |
symbol_to_string(symbol)); |
i->direct_lookahead[symbol] = 0; |
if (!sizeof(indices(i->direct_lookahead))) { |
i->direct_lookahead = (<>); |
} |
} |
} |
} |
} |
|
int conflict_free = 0; |
|
if (reduce_rest > 1) { |
if (shift_rest) { |
report(ERROR, "repair", |
"Shift-Reduce-Reduce conflict on lookahead %s", |
symbol_to_string(symbol)); |
} else { |
report(ERROR, "repair", |
"Reduce-Reduce conflict on lookahead %s", |
symbol_to_string(symbol)); |
} |
} else if (reduce_rest) { |
if (shift_rest) { |
report(ERROR, "repair", |
"Shift-Reduce conflict on lookahead %s", |
symbol_to_string(symbol)); |
} else { |
|
|
|
|
conflict_free = 1; |
} |
} else { |
|
|
|
|
conflict_free = 1; |
} |
if (conflict_free) { |
if (reduce_count > 1) { |
if (shift_count) { |
report(only_operators?NOTICE:WARNING, "repair", |
"Repaired Shift-Reduce-Reduce conflict on %s", |
symbol_to_string(symbol)); |
} else { |
report(only_operators?NOTICE:WARNING, "repair", |
"Repaired Reduce-Reduce conflict on %s", |
symbol_to_string(symbol)); |
} |
} else if (reduce_count) { |
if (shift_count) { |
report(only_operators?NOTICE:WARNING, "repair", |
"Repaired Shift-Reduce conflict on %s", |
symbol_to_string(symbol)); |
} else { |
|
report(NOTICE, "repair", |
"No conflict on symbol %s (Plain REDUCE)", |
symbol_to_string(symbol)); |
} |
} else { |
|
report(NOTICE, "repair", |
"No conflict on symbol %s (SHIFT)", |
symbol_to_string(symbol)); |
} |
|
} else { |
|
conflict_set[symbol] = 1; |
} |
} |
|
if (sizeof(indices(conflict_set))) { |
report(ERROR, "repair", |
"Still conflicts remaining in state\n%s\n" |
"on symbols (< %s >)", |
state_to_string(state), |
map(indices(conflict_set), symbol_to_string) * ", "); |
return ERROR_CONFLICTS; |
} else { |
report(WARNING, "repair", |
"All conflicts removed!"); |
return 0; |
} |
} |
|
#ifdef LR_PROFILE |
#define LR_GAUGE(X, BLOCK) \ |
report(NOTICE, "compile", X ": %f\n", gauge BLOCK) |
#else /* !LR_PROFILE */ |
#define LR_GAUGE(X, BLOCK) do BLOCK while(0) |
#endif /* LR_PROFILE */ |
|
|
int compile() |
{ |
int lr_error = 0; |
int state_no = 0; |
Kernel state; |
multiset(int|string) symbols, conflicts; |
|
s_q = StateQueue(); |
s_q->push(first_state()); |
|
|
|
LR_GAUGE("LR0", { |
while (state = s_q->next()) { |
|
report(NOTICE, "compile", "Compiling state %d:\n%s", state_no++, |
state_to_string(state) + "\n"); |
|
|
foreach (indices(state->goto_set()), int|string symbol) { |
state->do_goto(symbol); |
} |
} |
}); |
|
|
|
report(NOTICE, "compile", "Nullable nonterminals: (< %s >)\n", |
map(indices(nullable), symbol_to_string) * ", "); |
|
LR_GAUGE("Master items", { |
|
for (int index = 0; index < s_q->tail; index++) { |
mapping(int|string : Item) master_item =([]); |
|
foreach (s_q->arr[index]->items, Item i) { |
if (i->offset < sizeof(i->r->symbols)) { |
|
int|string symbol = i->r->symbols[i->offset]; |
|
if (!(i->master_item = master_item[symbol])) { |
master_item[symbol] = i; |
} |
} |
} |
} |
}); |
|
|
|
LR_GAUGE("LA sets", { |
|
for (int index = 0; index < s_q->tail; index++) { |
foreach (s_q->arr[index]->items, Item i) { |
if ((!i->master_item) && (i->offset != sizeof(i->r->symbols)) && |
(intp(i->r->symbols[i->offset]))) { |
|
foreach (i->next_state->items, Item i2) { |
int|string symbol; |
|
if (!i2->master_item) { |
|
if (i2->offset != sizeof(i2->r->symbols)) { |
if (intp(symbol = i2->r->symbols[i2->offset])) { |
if (nullable[symbol]) { |
|
i->relation[i2] = 1; |
} |
} else { |
|
i->direct_lookahead[symbol] = 1; |
} |
} |
} |
} |
} |
} |
} |
}); |
|
LR_GAUGE("Handle shift", { |
|
handle_shift_conflicts(); |
}); |
|
LR_GAUGE("Check shift", { |
|
|
|
|
for (int index = 0; index < s_q->tail; index++) { |
foreach (s_q->arr[index]->items, Item i) { |
if ((!i->master_item) && |
(i->offset != sizeof(i->r->symbols)) && |
(intp(i->r->symbols[i->offset]))) { |
i->error_lookahead = copy_value(i->direct_lookahead); |
} |
} |
} |
}); |
|
LR_GAUGE("Lookback sets", { |
|
for (int index = 0; index < s_q->tail; index++) { |
array(Item) items = s_q->arr[index]->items; |
|
mapping(int:array(Item)) lookup = ([]); |
foreach (items, Item i) { |
if (!i->offset) { |
if (!lookup[i->r->nonterminal]) { |
lookup[i->r->nonterminal] = ({ i }); |
} else { |
lookup[i->r->nonterminal] += ({ i }); |
} |
} |
} |
foreach (items, Item transition) { |
int|string symbol; |
|
if ((!transition->master_item) && |
(transition->offset != sizeof(transition->r->symbols)) && |
(intp(symbol = transition->r->symbols[transition->offset]))) { |
|
|
|
|
if (!lookup[symbol]) { |
|
|
report(WARNING, "compile", |
"No item for symbol <%s>\n" |
"in state:\n" |
"%s", |
symbol_to_string(symbol), |
state_to_string(s_q->arr[index])); |
continue; |
} |
|
|
foreach (lookup[symbol], Item i) { |
if (sizeof(i->r->symbols)) { |
if (go_through(i->next_state, i->item_id + 1, transition)) { |
|
Item master = i; |
if (i->master_item) { |
master = i->master_item; |
} |
|
if ((master->offset != sizeof(master->r->symbols)) && |
(intp(master->r->symbols[master->offset]))) { |
|
if (master != transition) { |
master->relation[transition] = 1; |
} |
} |
} |
} else { |
i->relation[transition] = 1; |
} |
} |
} |
} |
} |
}); |
|
LR_GAUGE("Handle follow", { |
|
handle_follow_conflicts(); |
}); |
|
LR_GAUGE("Compute LA", { |
|
for (int index = 0; index < s_q->tail; index++) { |
foreach (s_q->arr[index]->items, Item i) { |
if (i->offset == sizeof(i->r->symbols)) { |
|
|
|
|
i->direct_lookahead=`|(i->direct_lookahead, |
@indices(i->relation)->direct_lookahead); |
} |
} |
} |
}); |
|
|
|
LR_GAUGE("Check conflicts", { |
|
for (int index = 0; index < s_q->tail; index++) { |
Kernel state = s_q->arr[index]; |
|
conflicts = (<>); |
symbols = (<>); |
|
foreach (state->items, Item i) { |
if (i->offset == sizeof(i->r->symbols)) { |
|
conflicts |= i->direct_lookahead & symbols; |
symbols |= i->direct_lookahead; |
} else if (!i->master_item) { |
string|int symbol; |
|
|
|
|
|
if (!intp(symbol = i->r->symbols[i->offset])) { |
|
if (symbols[symbol]) { |
conflicts[symbol] = 1; |
} else { |
symbols[symbol] = 1; |
} |
} |
} |
} |
if (sizeof(conflicts)) { |
|
|
|
lr_error = repair(state, conflicts); |
|
} else { |
report(NOTICE, "compile", "No conflicts in state:\n%s", |
state_to_string(s_q->arr[index])); |
} |
} |
}); |
|
LR_GAUGE("Compile actions", { |
|
for (int index = 0; index < s_q->tail; index++) { |
Kernel state = s_q->arr[index]; |
|
state->action = ([]); |
|
foreach (state->items, Item i) { |
if (i->next_state) { |
|
state->action[i->r->symbols[i->offset]] = i->next_state; |
} else { |
foreach (indices(i->direct_lookahead), int|string symbol) { |
state->action[symbol] = i->r; |
} |
} |
} |
} |
start_state = s_q->arr[0]; |
}); |
|
#ifdef LR_PROFILE |
report(NOTICE, "compile", "DONE\n"); |
#endif /* LR_PROFILE */ |
|
return lr_error; |
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
mixed parse(object|function(void:string|array(string|mixed)) scanner, |
void|object action_object) |
{ |
ADT.Stack value_stack = ADT.Stack(4096); |
ADT.Stack state_stack = ADT.Stack(4096); |
Kernel state = start_state; |
|
string input; |
mixed value; |
|
lr_error = 0; |
|
if (!functionp(scanner) && |
!(objectp(scanner) && functionp(scanner->`()))) { |
report(ERROR, "parse", "parser->parse(): scanner not set!\n"); |
lr_error = ERROR_NO_SCANNER; |
return 0; |
} |
|
while (1) { |
mixed a; |
|
|
value = scanner(); |
|
if (arrayp(value)) { |
input = ([array(string)]value)[0]; |
value = ([array(mixed)]value)[1]; |
} else { |
input = [string]value; |
} |
|
while(1) { |
while (object_program(a = state->action[input]) == Rule) { |
Rule r = [object(Rule)]a; |
|
report(NOTICE, "parse", "Reducing according to rule\n%s\n", |
rule_to_string(r)); |
|
do { |
if (r->action) { |
|
string|function func = 0; |
|
if (stringp(func = r->action)) { |
if (action_object) { |
func = [string|function]action_object[r->action]; |
if (!functionp(func)) { |
if (!func) { |
report(ERROR, "parse", |
"Missing action \"%s\" in object", |
r->action); |
lr_error |= ERROR_MISSING_ACTION; |
} else { |
report(ERROR, "parse", |
"Bad type (%s) for action \"%s\" in object", |
typeof(func), r->action); |
lr_error |= ERROR_BAD_ACTION_TYPE; |
func = 0; |
} |
} |
} else { |
report(ERROR, "parse", "Missing object for action \"%s\"", |
r->action); |
lr_error |= ERROR_NO_OBJECT; |
func = 0; |
} |
} |
if (func) { |
if (sizeof(r->symbols)) { |
value_stack->push(([function(mixed ...:mixed)]func) |
(@[array(mixed)]value_stack-> |
pop(sizeof(r->symbols)))); |
state = ([array(Kernel)]state_stack->pop(sizeof(r->symbols)))[0]; |
} else { |
value_stack->push(r->action()); |
} |
break; |
} |
} |
|
if (sizeof(r->symbols)) { |
if (sizeof(r->symbols) > 1) { |
value_stack->quick_pop(sizeof(r->symbols) - 1); |
} |
state = ([array(Kernel)]state_stack->pop(sizeof(r->symbols)))[0]; |
} else { |
value_stack->push(0); |
} |
} while(0); |
|
state_stack->push(state); |
state = [object(Kernel)]state->action[r->nonterminal]; |
} |
|
if (a) { |
|
if (input == "") { |
|
|
return value_stack->pop(); |
} |
|
report(NOTICE, "parse", |
"Shifting \"%s\", value \"%O\"", input, value); |
value_stack->push(value); |
state_stack->push(state); |
state = [object(Kernel)]a; |
} else { |
|
if (input = "") { |
|
lr_error |= ERROR_EOF; |
|
if (sizeof(value_stack) != 1) { |
if (sizeof(value_stack)) { |
report(ERROR, "parse", "Bad state at EOF -- Throwing \"%O\"", |
value_stack->pop()); |
state = [object(Kernel)]state_stack->pop(); |
continue; |
} else { |
report(ERROR, "parse", "Empty stack at EOF!"); |
return 0; |
} |
} else { |
report(ERROR, "parse", "Bad state at EOF"); |
return value_stack->pop(); |
} |
} else { |
lr_error |= ERROR_SYNTAX; |
|
report(ERROR, "parse", "Bad input: %O(%O)", input, value); |
} |
} |
break; |
} |
} |
} |
} |
|
|