pike.git / src / modules / Regexp / pike_regexp.c

version» Context lines:

pike.git/src/modules/Regexp/pike_regexp.c:1:   /*   || This file is part of Pike. For copyright information see COPYRIGHT.   || Pike is distributed under GPL, LGPL and MPL. See the file COPYING   || for more information. - || $Id: pike_regexp.c,v 1.28 2010/08/16 19:00:19 mast Exp $ + || $Id$   */      /*    * regexp.c - regular expression matching    *    * DESCRIPTION    *    * Underneath the reformatting and comment blocks which were added to    * make it consistent with the rest of the code, you will find a    * modified version of Henry Specer's regular expression library.
pike.git/src/modules/Regexp/pike_regexp.c:122:    * string. */   #define BRANCH 6 /* node Match this alternative, or the    * nxt... */   #define BACK 7 /* no Match "", "nxt" ptr points backward. */   #define EXACTLY 8 /* str Match this string. */   #define NOTHING 9 /* no Match empty string. */   #define STAR 10 /* node Match this (simple) thing 0 or more    * times. */   #define WORDSTART 11 /* node matching a start of a word */   #define WORDEND 12 /* node matching an end of a word */ + #define KPLUS 13 /* node Match this (simple) thing 1 or more +  * times. */   #define OPEN 20 /* no Mark this point in input as start of    * #n. */    /* OPEN+1 is number 1, etc. */   #define CLOSE (OPEN+NSUBEXP) /* no Analogous to OPEN. */      /*    * Opcode notes:    *    * BRANCH The set of branches constituting a single choice are hooked    * together with their "nxt" pointers, since precedence prevents    * anything being concatenated to any individual branch. The    * "nxt" pointer of the last BRANCH in a choice points to the    * thing following the whole choice. This is also where the    * final "nxt" pointer of each individual branch points; each    * branch starts with the operand node of a BRANCH node.    *    * BACK Normal "nxt" pointers all implicitly point forward; BACK    * exists to make loop structures possible.    * -  * STAR complex '*', are implemented as circular BRANCH structures +  * STAR,KPLUS Complex cases are implemented as circular BRANCH structures    * using BACK. Simple cases (one character per match) are -  * implemented with STAR for speed and to minimize recursive -  * plunges. +  * implemented with STAR or KPLUS for speed and to minimize +  * recursive plunges.    *    * OPEN,CLOSE ...are numbered at compile time.    */      /*    * A node is one char of opcode followed by two chars of "nxt" pointer.    * "Nxt" pointers are stored as two 8-bit pieces, high order first. The    * value is a positive offset from the opcode of the node containing it.    * An operand, if any, simply follows the node. (Note that much of the    * code generation knows about this implicit relationship.)
pike.git/src/modules/Regexp/pike_regexp.c:196:   #define UCHARAT(p) ((int)*(unsigned char *)(p))   #else   #define UCHARAT(p) ((int)*(p)&CHARBITS)   #endif   #define ISWORDPART(c) ( isalnum(c) || (c) == '_' )      /*    * Flags to be passed up and down.    */   #define HASWIDTH 01 /* Known never to match null string. */ - #define SIMPLE 02 /* Simple enough to be STAR operand. */ + #define SIMPLE 02 /* Simple enough to be STAR or KPLUS operand. */   #define SPSTART 04 /* Starts with * */   #define WORST 0 /* Worst case. */      /*    * Global work variables for regcomp().    */   static short *regparse; /* Input-scan pointer. */   static int regnpar; /* () count. */   static char regdummy;   static char *regcode; /* Code-emit pointer; &regdummy = don't. */
pike.git/src/modules/Regexp/pike_regexp.c:463:    regtail(chain, latest);    chain = latest;    }    if (chain == (char *)NULL) /* Loop ran zero times. */    regnode(NOTHING);       return (ret);   }      /* -  - regpiece - something followed by possible [*] +  - regpiece - something followed by possible [*] or [+]    *    * Note that the branching code sequence used for * is somewhat optimized:    * they use the same NOTHING node as both the endmarker for their branch    * list and the body of the last branch. It might seem that this node could    * be dispensed with entirely, but the endmarker role is not redundant.    */   static char *regpiece(int *flagp)   {    register char *ret;    register short op;
pike.git/src/modules/Regexp/pike_regexp.c:508:    /* Emit x* as (x&|), where & means "self". */    reginsert(BRANCH, ret); /* Either x */    regoptail(ret, regnode(BACK)); /* and loop */    regoptail(ret, ret); /* back */    regtail(ret, regnode(BRANCH)); /* or */    regtail(ret, regnode(NOTHING)); /* null. */    }    }    else if(op == PLUS)    { +  if (flags & SIMPLE) +  { +  reginsert(KPLUS, ret); +  } +  else +  {    /* Emit a+ as (a&) where & means "self" /Fredrik Hubinette */    char *tmp;    tmp=regnode(BACK);    reginsert(BRANCH, tmp);    regtail(ret, tmp);    regoptail(tmp, ret);    regtail(ret, regnode(BRANCH));    regtail(ret, regnode(NOTHING));    } -  +  }       regparse++;    if (ISMULT(*regparse))    FAIL("nested * or +");       return (ret);   }         /*
pike.git/src/modules/Regexp/pike_regexp.c:966:    if (regmatch(OPERAND(scan)))    return (1);    reginput = save;    scan = regnext(scan);    } while (scan != (char *)NULL && OP(scan) == BRANCH);    return (0);    /* NOTREACHED */    }    }    break; +  case KPLUS:    case STAR:{    register char nextch;    register ptrdiff_t no;    register char *save;    register ptrdiff_t minimum;       /*    * Lookahead to avoid useless match attempts when we know    * what character comes next.    */
pike.git/src/modules/Regexp/pike_regexp.c:1247:    p = "BACK";    break;    case END:    p = "END";    break;       case STAR:    p = "STAR";    break;    +  case KPLUS: +  p = "KPLUS"; +  break; +     default:    if(OP(op) >= OPEN && OP(op) < OPEN+NSUBEXP)    {    sprintf(buf + strlen(buf), "OPEN%d", OP(op) - OPEN);    p = (char *)NULL;    break;    }    if(OP(op) >= CLOSE && OP(op) < CLOSE+NSUBEXP)    {    sprintf(buf + strlen(buf), "CLOSE%d", OP(op) - CLOSE);