Branch: Tag:

2010-10-10

2010-10-10 21:56:33 by Martin Stjernholm <mast@lysator.liu.se>

Optimize single-char plus just like single-char star.

This avoids recursion on expressions like "x+", just like "x*".

2:   || This file is part of Pike. For copyright information see COPYRIGHT.   || Pike is distributed under GPL, LGPL and MPL. See the file COPYING   || for more information. - || $Id: pike_regexp.c,v 1.28 2010/08/16 19:00:19 mast Exp $ + || $Id$   */      /*
129:    * times. */   #define WORDSTART 11 /* node matching a start of a word */   #define WORDEND 12 /* node matching an end of a word */ + #define KPLUS 13 /* node Match this (simple) thing 1 or more +  * times. */   #define OPEN 20 /* no Mark this point in input as start of    * #n. */    /* OPEN+1 is number 1, etc. */
148:    * BACK Normal "nxt" pointers all implicitly point forward; BACK    * exists to make loop structures possible.    * -  * STAR complex '*', are implemented as circular BRANCH structures +  * STAR,KPLUS Complex cases are implemented as circular BRANCH structures    * using BACK. Simple cases (one character per match) are -  * implemented with STAR for speed and to minimize recursive -  * plunges. +  * implemented with STAR or KPLUS for speed and to minimize +  * recursive plunges.    *    * OPEN,CLOSE ...are numbered at compile time.    */
203:    * Flags to be passed up and down.    */   #define HASWIDTH 01 /* Known never to match null string. */ - #define SIMPLE 02 /* Simple enough to be STAR operand. */ + #define SIMPLE 02 /* Simple enough to be STAR or KPLUS operand. */   #define SPSTART 04 /* Starts with * */   #define WORST 0 /* Worst case. */   
470:   }      /* -  - regpiece - something followed by possible [*] +  - regpiece - something followed by possible [*] or [+]    *    * Note that the branching code sequence used for * is somewhat optimized:    * they use the same NOTHING node as both the endmarker for their branch
515:    }    else if(op == PLUS)    { +  if (flags & SIMPLE) +  { +  reginsert(KPLUS, ret); +  } +  else +  {    /* Emit a+ as (a&) where & means "self" /Fredrik Hubinette */    char *tmp;    tmp=regnode(BACK);
524:    regtail(ret, regnode(BRANCH));    regtail(ret, regnode(NOTHING));    } +  }       regparse++;    if (ISMULT(*regparse))
973:    }    }    break; +  case KPLUS:    case STAR:{    register char nextch;    register ptrdiff_t no;
1254:    p = "STAR";    break;    +  case KPLUS: +  p = "KPLUS"; +  break; +     default:    if(OP(op) >= OPEN && OP(op) < OPEN+NSUBEXP)    {