pike.git / lib / modules / Parser.pmod / LR.pmod / module.pmod

version» Context lines:

pike.git/lib/modules/Parser.pmod/LR.pmod/module.pmod:1:   /* -  * $Id: module.pmod,v 1.2 2002/05/22 12:33:28 nilsson Exp $ +  * $Id: module.pmod,v 1.3 2002/05/22 16:16:35 grubba Exp $    *    * A BNF-grammar in Pike.    * Compiles to a LALR(1) state-machine.    *    * Henrik Grubbström 1996-11-24    */      #pike __REAL_VERSION__      /*! LALR(1) parser generator    */    -  + // #pragma strict_types +    /*    * Defines    */      /* Errors during parsing */   /* Unexpected EOF */   constant ERROR_EOF= 1;   /* Syntax error in input */   constant ERROR_SYNTAX= 2;   /* Shift-Reduce or Reduce-Reduce */
pike.git/lib/modules/Parser.pmod/LR.pmod/module.pmod:163:   //!   //! @pre{   //! {add_rule, set_priority, set_associativity}*   //! set_symbol_to_string   //! compile   //! {parse}*   //! @}   class Parser   {    //! The grammar itself. -  mapping(int|string : array(Rule)) grammar = ([]); +  mapping(int : array(Rule)) grammar = ([]);       /* Priority table for terminal symbols */    static mapping(string : Priority) operator_priority = ([]);       static multiset(mixed) nullable = (< >);      #if 0    static mapping(mixed : multiset(Rule)) derives = ([]);       /* Maps from symbol to which rules may start with that symbol */
pike.git/lib/modules/Parser.pmod/LR.pmod/module.pmod:244:       //! Depth counter (used when compiling).    int counter;       //! Item identification number (used when compiling).    int number;       //! Used to identify the item.    //! Equal to r->number + offset.    int item_id; +  +  static string _sprintf() +  { +  array(string) res = ({ symbol_to_string(r->nonterminal), ":\t" }); +  +  if (offset) { +  foreach(r->symbols[0..offset-1], int|string symbol) { +  res += ({ symbol_to_string(symbol), " " });    } -  +  } +  res += ({ "· " }); +  if (offset != sizeof(r->symbols)) { +  foreach(r->symbols[offset..], int|string symbol) { +  res += ({ symbol_to_string(symbol), " " }); +  } +  } +  if (sizeof(indices(direct_lookahead))) { +  res += ({ "\t{ ", +  map(indices(direct_lookahead), symbol_to_string) * ", ", +  " }" }); +  } +  return res * ""; +  } +  }       //! Implements an LR(1) state    static class Kernel {       //! Used to check if a rule already has been added when doing closures.    multiset(Rule) rules = (<>);       //! Contains the items in this state.    array(Item) items = ({});   
pike.git/lib/modules/Parser.pmod/LR.pmod/module.pmod:320:       // Not needed, since 0 is the default.    // new_item->offset = 0;    // rules[r] is set by the post-increment above.    rules[r] = 1;       add_item(new_item);       if (sizeof(r->symbols) && intp(r->symbols[0]) &&    !closure_set[r->symbols[0]]) { -  closure(r->symbols[0]); +  closure([int]r->symbols[0]);    }    }    }    } else {    werror("Error: Definition missing for non-terminal %s\n",    symbol_to_string(nonterminal));    lr_error |= ERROR_MISSING_DEFINITION;    }    }   
pike.git/lib/modules/Parser.pmod/LR.pmod/module.pmod:365:    Kernel do_goto(int|string symbol)    {    multiset(Item) items;       if (verbose) {    werror("Performing GOTO on <%s>\n", symbol_to_string(symbol));    }       items = symbol_items[symbol];    if (items) { -  array(int) item_ids = Array.map(sort(indices(items)->item_id), +  array(int) item_ids = map(sort(indices(items)->item_id),    `+, 1);    string kernel_hash = sprintf("%@4c", item_ids);       Kernel new_state = known_states[kernel_hash];       if (!new_state) {    known_states[kernel_hash] = new_state = Kernel();       foreach (indices(items), Item i) {    int|string lookahead;
pike.git/lib/modules/Parser.pmod/LR.pmod/module.pmod:390:       new_item->offset = ++offset;    new_item->r = r = i->r;    new_item->item_id = r->number + offset;       new_state->add_item(new_item);       if ((offset != sizeof(r->symbols)) &&    intp(lookahead = r->symbols[offset]) &&    !new_state->closure_set[lookahead]) { -  new_state->closure(lookahead); +  new_state->closure([int]lookahead);    }    }       s_q->push(new_state);    } else {    // werror("Known state\n");    }    /* DEBUG */       if (verbose) {
pike.git/lib/modules/Parser.pmod/LR.pmod/module.pmod:418:    if (items) {    foreach (indices(items), Item i) {    i->next_state = new_state;    }    }    } else {    werror("WARNING: do_goto() on unknown symbol <%s>\n",    symbol_to_string(symbol));    }    } +  +  static string _sprintf() +  { +  return sprintf("%{%s\n%}", items);    } -  +  }       //! This is a queue, which keeps the elements even after they are retrieved.    static class State_queue {       //! Index of the head of the queue.    int head;       //! Index of the tail of the queue.    int tail;   
pike.git/lib/modules/Parser.pmod/LR.pmod/module.pmod:447:    {    if (tail == sizeof(arr)) {    arr += allocate(tail);    }    arr[tail++] = state;       return state;    }       //! Return the next state from the queue. -  int|Kernel next() +  Kernel next()    {    if (head == tail) {    return 0;    } else {    return arr[head++];    }    }    }       /*
pike.git/lib/modules/Parser.pmod/LR.pmod/module.pmod:500:    }    return res;    }       //! Pretty-prints an item to a string.    //!    //! @param i    //! Item to pretty-print.    string item_to_string(Item i)    { -  array(string) res = ({ symbol_to_string(i->r->nonterminal), ":\t" }); -  -  if (i->offset) { -  foreach(i->r->symbols[0..i->offset-1], int|string symbol) { -  res += ({ symbol_to_string(symbol), " " }); +  return sprintf("%s", i);    } -  } -  res += ({ "· " }); -  if (i->offset != sizeof(i->r->symbols)) { -  foreach(i->r->symbols[i->offset..], int|string symbol) { -  res += ({ symbol_to_string(symbol), " " }); -  } -  } -  if (sizeof(indices(i->direct_lookahead))) { -  res += ({ "\t{ ", -  map(indices(i->direct_lookahead), symbol_to_string) * ", ", -  " }" }); -  } -  return res * ""; -  } +        //! Pretty-prints a state to a string.    //!    //! @param state    //! State to pretty-print.    string state_to_string(Kernel state)    { -  return (map(state->items, item_to_string) * "\n"); +  return sprintf("%s", state);    }       //! Pretty-prints the current grammar to a string. -  string cast_to_string() +  static string _sprintf()    {    array(string) res = ({});       foreach (indices(grammar), int nonterminal) {    res += ({ symbol_to_string(nonterminal) });    foreach (grammar[nonterminal], Rule r) {    res += ({ "\t: " });    if (sizeof(r->symbols)) {    foreach (r->symbols, int|string symbol) {    res += ({ symbol_to_string(symbol), " " });
pike.git/lib/modules/Parser.pmod/LR.pmod/module.pmod:553:    } else {    res += ({ "/* empty */" });    }    res += ({ "\n" });    }    res += ({ "\n" });    }    return (res * "");    }    +  string cast_to_string() +  { +  return _sprintf(); +  } +     //! Implements casting.    //!    //! @param type    //! Type to cast to.    mixed cast(string type)    {    if (type == "string") { -  return(cast_to_string()); +  return(_sprintf());    }    throw(({ sprintf("Cast to %s not supported\n", type), backtrace() }));    }       /* Here come the functions that actually do some work */       //! Sets the priority of a terminal.    //!    //! @param terminal    //! Terminal to set the priority for.
pike.git/lib/modules/Parser.pmod/LR.pmod/module.pmod:671:    used_by[symbol] = (< r >);    }    }    }       if (!(r->num_nonnullables)) {    /* This rule was nullable */    new_nullables->push(r->nonterminal);       while (new_nullables->ptr) { -  symbol = new_nullables->pop(); +  symbol = [int]new_nullables->pop();    if (verbose) {    werror("Nulling symbol %s\n",    symbol_to_string(symbol));    }    nullable[symbol] = 1;    if (used_by[symbol]) {    foreach (indices(used_by[symbol]), Rule r2) {    if (!(--r2->num_nonnullables)) {    new_nullables->push(r2->nonterminal);    }
pike.git/lib/modules/Parser.pmod/LR.pmod/module.pmod:783:    i->r = r;    // Not needed since 0 is the default.    // i->offset = 0;    i->item_id = r->number;       state->add_item(i);    state->rules[r] = 1; /* Since this is an item with offset 0 */       if ((sizeof(r->symbols)) &&    (intp(r->symbols[0]))) { -  state->closure(r->symbols[0]); +  state->closure([int]r->symbols[0]);    }    }    }    return state;    }       //! Contains all states used. -  //! In the queue-part are the states that remain to be compiled. +  //! In the queue section are the states that remain to be compiled.    State_queue s_q;       static ADT.Stack item_stack;       static void traverse_items(Item i,    function(int:void) conflict_func)    {    int depth;       item_stack->push(i);
pike.git/lib/modules/Parser.pmod/LR.pmod/module.pmod:821:    }       i->direct_lookahead |= i2->direct_lookahead;    }       if (i->number == depth) {    int cyclic = 0;    int empty_cycle = 1;    Item i2;    -  while ((i2 = item_stack->pop()) != i) { +  while ((i2 = [object(Item)]item_stack->pop()) != i) {       i2->number = 0x7fffffff;       i2->direct_lookahead = i->direct_lookahead;       cyclic = 1;    empty_cycle &= !(sizeof(i2->error_lookahead));    }    i->count = 0x7fffffff;   
pike.git/lib/modules/Parser.pmod/LR.pmod/module.pmod:1242:    map(indices(conflict_set), symbol_to_string) * ", ");    return (ERROR_CONFLICTS);    } else {    if (verbose) {    werror("All conflicts removed!\n");    }    return (0);    }    }    + #ifdef LR_PROFILE + #define LR_GAUGE(X, BLOCK) werror(X ": %f\n", gauge BLOCK) + #else /* !LR_PROFILE */ + #define LR_GAUGE(X, BLOCK) do BLOCK while(0) + #endif /* LR_PROFILE */ +     //! Compiles the grammar into a parser, so that parse() can be called.    int compile()    {    int lr_error = 0; /* No error yet */    int state_no = 0; /* DEBUG INFO */    Kernel state;    multiset(int|string) symbols, conflicts;       s_q = State_queue();    s_q->push(first_state());       /* First make LR(0) states */    - #ifdef LR_PROFILE -  werror("LR0: %f\n", gauge { - #endif /* LR_PROFILE */ -  +  LR_GAUGE("LR0", {    while (state = s_q->next()) {       if (verbose) {    werror("Compiling state %d:\n%s", state_no++,    state_to_string(state) + "\n");    }       /* Probably better implemented as a stack */    foreach (indices(state->goto_set()), int|string symbol) {    state->do_goto(symbol);    }    } - #ifdef LR_PROFILE +     }); - #endif /* LR_PROFILE */ +        /* Compute nullables */    /* Done during add_rule */    if (verbose) {    werror("Nullable nonterminals: (< %s >)\n",    map(indices(nullable), symbol_to_string) * ", ");    }    - #ifdef LR_PROFILE -  werror("Master items: %f\n", gauge { - #endif /* LR_PROFILE */ -  +  LR_GAUGE("Master items", {    /* Mark Transition and Reduction master items */    for (int index = 0; index < s_q->tail; index++) {    mapping(int|string : Item) master_item =([]);       foreach (s_q->arr[index]->items, Item i) {    if (i->offset < sizeof(i->r->symbols)) {    /* This is not a reduction item, which represent themselves */    int|string symbol = i->r->symbols[i->offset];       if (!(i->master_item = master_item[symbol])) {    master_item[symbol] = i;    }    }    }    } -  - #ifdef LR_PROFILE +     }); - #endif /* LR_PROFILE */ +        /* Probably OK so far */    - #ifdef LR_PROFILE -  werror("LA sets: %f\n", gauge { - #endif /* LR_PROFILE */ -  +  LR_GAUGE("LA sets", {    /* Calculate look-ahead sets (DR and relation) */    for (int index = 0; index < s_q->tail; index++) {    foreach (s_q->arr[index]->items, Item i) {    if ((!i->master_item) && (i->offset != sizeof(i->r->symbols)) &&    (intp(i->r->symbols[i->offset]))) {    /* This is a non-terminal master item */    foreach (i->next_state->items, Item i2) {    int|string symbol;       if (!i2->master_item) {
pike.git/lib/modules/Parser.pmod/LR.pmod/module.pmod:1339:    } else {    /* Add the string to the direct look-ahead set (DR) */    i->direct_lookahead[symbol] = 1;    }    }    }    }    }    }    } -  - #ifdef LR_PROFILE +     }); - #endif /* LR_PROFILE */ +     - #ifdef LR_PROFILE -  werror("Handle shift: %f\n", gauge { - #endif /* LR_PROFILE */ -  +  LR_GAUGE("Handle shift", {    /* Handle SHIFT-conflicts */    handle_shift_conflicts(); -  - #ifdef LR_PROFILE +     }); - #endif /* LR_PROFILE */ +     - #ifdef LR_PROFILE -  werror("Check shift: %f\n", gauge { - #endif /* LR_PROFILE */ -  +  LR_GAUGE("Check shift", {    /* Check the shift sets */    /* (Is this needed?)    * Yes - initializes error_lookahead    */    for (int index = 0; index < s_q->tail; index++) {    foreach (s_q->arr[index]->items, Item i) {    if ((!i->master_item) &&    (i->offset != sizeof(i->r->symbols)) &&    (intp(i->r->symbols[i->offset]))) {    i->error_lookahead = copy_value(i->direct_lookahead);    }    }    } -  - #ifdef LR_PROFILE +     }); - #endif /* LR_PROFILE */ +     - #ifdef LR_PROFILE -  werror("Lookback sets: %f\n", gauge { - #endif /* LR_PROFILE */ -  +  LR_GAUGE("Lookback sets", {    /* Compute lookback-sets */    for (int index = 0; index < s_q->tail; index++) {    array(Item) items = s_q->arr[index]->items;    // Set up a lookup table to speedup lookups later.    mapping(int:array(Item)) lookup = ([]);    foreach (items, Item i) {    if (!i->offset) {    if (!lookup[i->r->nonterminal]) {    lookup[i->r->nonterminal] = ({ i });    } else {
pike.git/lib/modules/Parser.pmod/LR.pmod/module.pmod:1441:    }    }    }    } else {    i->relation[transition] = 1;    }    }    }    }    } -  - #ifdef LR_PROFILE +     }); - #endif /* LR_PROFILE */ +     - #ifdef LR_PROFILE -  werror("Handle follow: %f\n", gauge { - #endif /* LR_PROFILE */ -  +  LR_GAUGE("Handle follow", {    /* Handle follow-conflicts */    handle_follow_conflicts(); -  - #ifdef LR_PROFILE +     }); - #endif /* LR_PROFILE */ +     - #ifdef LR_PROFILE -  werror("Compute LA: %f\n", gauge { - #endif /* LR_PROFILE */ -  +  LR_GAUGE("Compute LA", {    /* Compute the lookahead (LA) */    for (int index = 0; index < s_q->tail; index++) {    foreach (s_q->arr[index]->items, Item i) {    if (i->offset == sizeof(i->r->symbols)) {    /* Reduction item (always a master item) */       /* Calculate Look-ahead for all items in look-back set */       i->direct_lookahead=`|(i->direct_lookahead,    @indices(i->relation)->direct_lookahead);    }    }    } -  - #ifdef LR_PROFILE +     }); - #endif /* LR_PROFILE */ +        /* Probably OK from this point onward */    - #ifdef LR_PROFILE -  werror("Check conflicts: %f\n", gauge { - #endif /* LR_PROFILE */ -  +  LR_GAUGE("Check conflicts", {    /* Check for conflicts */    for (int index = 0; index < s_q->tail; index++) {    Kernel state = s_q->arr[index];       conflicts = (<>);    symbols = (<>);       foreach (state->items, Item i) {    if (i->offset == sizeof(i->r->symbols)) {    /* Reduction */
pike.git/lib/modules/Parser.pmod/LR.pmod/module.pmod:1525:    /* Repair conflicts */    // int ov = verbose;    // verbose = 1;    lr_error = repair(state, conflicts);    // verbose = ov;    } else if (verbose) {    werror("No conflicts in state:\n%s\n",    state_to_string(s_q->arr[index]));    }    } -  - #ifdef LR_PROFILE +     }); - #endif /* LR_PROFILE */ +     - #ifdef LR_PROFILE -  werror("Compile actions: %f\n", gauge { - #endif /* LR_PROFILE */ -  +  LR_GAUGE("Compile actions", {    /* Compile action tables */    for (int index = 0; index < s_q->tail; index++) {    Kernel state = s_q->arr[index];       state->action = ([]);       foreach (state->items, Item i) {    if (i->next_state) {    /* SHIFT */    state->action[i->r->symbols[i->offset]] = i->next_state;    } else {    foreach (indices(i->direct_lookahead), int|string symbol) {    state->action[symbol] = i->r;    }    }    }    }    start_state = s_q->arr[0]; -  - #ifdef LR_PROFILE +     }); - #endif /* LR_PROFILE */ +       #ifdef LR_PROFILE    werror("DONE\n");   #endif /* LR_PROFILE */       return (lr_error);    }       //! Parse the input according to the compiled grammar.    //! The last value reduced is returned.
pike.git/lib/modules/Parser.pmod/LR.pmod/module.pmod:1605:    if (!functionp(scanner) &&    !(objectp(scanner) && functionp(scanner->`()))) {    werror("parser->parse(): scanner not set!\n");    lr_error = ERROR_NO_SCANNER;    return(0);    }       value = scanner();       if (arrayp(value)) { -  input = value[0]; -  value = value[1]; +  input = ([array(string)]value)[0]; +  value = ([array(mixed)]value)[1];    } else { -  input = value; +  input = [string]value;    }       while (1) {    mixed a = state->action[input];       if (object_program(a) == Rule) { -  +  Rule r = [object(Rule)]a;       if (verbose) {    werror("Reducing according to rule\n%s\n", -  rule_to_string(a)); +  rule_to_string(r));    }    -  if (a->action) { +  if (r->action) {    /* REDUCE */ -  string|function (mixed ...:mixed) func = 0; +  string|function func = 0;    -  if (stringp(func = a->action)) { +  if (stringp(func = r->action)) {    if (action_object) { -  func = action_object[a->action]; +  func = action_object[r->action];    if (!functionp(func)) {    if (!func) {    werror("Missing action \"%s\" in object\n", -  a->action); +  r->action);    lr_error |= ERROR_MISSING_ACTION;    } else {    werror("Bad type (%s) for action \"%s\" in object\n", -  typeof(func), a->action); +  typeof(func), r->action);    lr_error |= ERROR_BAD_ACTION_TYPE;    func = 0;    }    }    } else {    werror("Missing object for action \"%s\"\n", -  a->action); +  r->action);    lr_error |= ERROR_NO_OBJECT;    func = 0;    }    }    if (func) { -  if (sizeof(a->symbols)) { -  value_stack->push(func(@value_stack->pop(sizeof(a->symbols)))); -  state = state_stack->pop(sizeof(a->symbols))[0]; +  if (sizeof(r->symbols)) { +  value_stack->push(([function]func)(@value_stack->pop(sizeof(r->symbols)))); +  state = ([array(Kernel)]state_stack->pop(sizeof(r->symbols)))[0];    } else { -  value_stack->push(a->action()); +  value_stack->push(r->action());    }    } else {    // Default action. -  if (sizeof(a->symbols)) { +  if (sizeof(r->symbols)) {   #if 0 -  value_stack->push(value_stack->pop(sizeof(a->symbols))[0]); +  value_stack->push(value_stack->pop(sizeof(r->symbols))[0]);   #else /* !0 */ -  if (sizeof(a->symbols) > 1) { -  value_stack->quick_pop(sizeof(a->symbols) - 1); +  if (sizeof(r->symbols) > 1) { +  value_stack->quick_pop(sizeof(r->symbols) - 1);    }   #endif /* 0 */ -  state = state_stack->pop(sizeof(a->symbols))[0]; +  state = ([array(Kernel)]state_stack->pop(sizeof(r->symbols)))[0];    } else {    value_stack->push(0);    }    }    } else {    // Default action. -  if (sizeof(a->symbols)) { +  if (sizeof(r->symbols)) {   #if 0 -  value_stack->push(value_stack->pop(sizeof(a->symbols))[0]); +  value_stack->push(value_stack->pop(sizeof(r->symbols))[0]);   #else /* !0 */ -  if (sizeof(a->symbols) > 1) { -  value_stack->quick_pop(sizeof(a->symbols) - 1); +  if (sizeof(r->symbols) > 1) { +  value_stack->quick_pop(sizeof(r->symbols) - 1);    }   #endif /* 0 */ -  state = state_stack->pop(sizeof(a->symbols))[0]; +  state = ([array(Kernel)]state_stack->pop(sizeof(r->symbols)))[0];    } else {    value_stack->push(0);    }    }       state_stack->push(state); -  state = state->action[a->nonterminal]; /* Goto */ +  state = [object(Kernel)]state->action[r->nonterminal]; /* Goto */    } else if (a) {    /* SHIFT or ACCEPT */    if (input == "") {    /* Only the final state is allowed to shift on ""(EOF) */    /* ACCEPT */    return(value_stack->pop());    }    /* SHIFT */    if (verbose) {    werror("Shifting \"%s\", value \"%O\"\n", input, value);    }    value_stack->push(value);    state_stack->push(state); -  state = a; +  state = [object(Kernel)]a;       value = scanner();       if (arrayp(value)) { -  input = value[0]; -  value = value[1]; +  input = ([array(string)]value)[0]; +  value = ([array(mixed)]value)[1];    } else { -  input = value; +  input = [string]value;    }    } else {    /* ERROR */    if (input = "") {    /* At end of file */    lr_error |= ERROR_EOF;       if (value_stack->ptr != 1) {    if (value_stack->ptr) {    werror("Error: Bad state at EOF -- Throwing \"%O\"\n",    value_stack->pop()); -  state=state_stack->pop(); +  state = [object(Kernel)]state_stack->pop();    } else {    werror("Error: Empty stack at EOF!\n");    return (0);    }    } else {    werror("Error: Bad state at EOF\n");    return(value_stack->pop());    }    } else {    lr_error |= ERROR_SYNTAX;       werror("Error: Bad input: \""+input+"\"(\""+value+"\")\n");       value = scanner();       if (arrayp(value)) { -  input = value[0]; -  value = value[1]; +  input = ([array(string)]value)[0]; +  value = ([array(mixed)]value)[1];    } else { -  input = value; +  input = [string]value;    }    }    }    }    }   }