pike.git / src / modules / Regexp / pike_regexp.c

version» Context lines:

pike.git/src/modules/Regexp/pike_regexp.c:92:    * potentially expensive (at present, the only such thing detected is * or +    * at the start of the r.e., which can involve a lot of backup). Regmlen is    * supplied because the test in regexec() needs it and regcomp() is computing    * it anyway.    */      /*    * Structure for regexp "program". This is essentially a linear encoding    * of a nondeterministic finite-state machine (aka syntax charts or    * "railroad normal form" in parsing technology). Each node is an opcode -  * plus a "nxt" pointer, possibly plus an operand. "Nxt" pointers of -  * all nodes except BRANCH implement concatenation; a "nxt" pointer with +  * plus a "next" pointer, possibly plus an operand. "Next" pointers of +  * all nodes except BRANCH implement concatenation; a "next" pointer with    * a BRANCH on both ends of it is connecting two alternatives. (Here we    * have one of the subtle syntax dependencies: an individual BRANCH (as    * opposed to a collection of them) is never concatenated with anything    * because of operator precedence.) The operand of some types of node is    * a literal string; for others, it is a node leading into a sub-FSM. In    * particular, the operand of a BRANCH node is the first node of the branch.    * (NB this is *not* a tree structure: the tail of the branch connects    * to the thing following the set of BRANCHes.) The opcodes are:    */    - /* definition number opnd? meaning */ - #define END 0 /* no End of program. */ - #define BOL 1 /* no Match "" at beginning of line. */ - #define EOL 2 /* no Match "" at end of line. */ - #define ANY 3 /* no Match any one character. */ - #define ANYOF 4 /* str Match any character in this string. */ - #define ANYBUT 5 /* str Match any character not in this + /* definition number opnd? meaning */ + #define END 0 /* no End of program. */ + #define BOL 1 /* no Match "" at beginning of line. */ + #define EOL 2 /* no Match "" at end of line. */ + #define ANY 3 /* no Match any one character. */ + #define ANYOF 4 /* str Match any character in this string. */ + #define ANYBUT 5 /* str Match any character not in this    * string. */ - #define BRANCH 6 /* node Match this alternative, or the -  * nxt... */ - #define BACK 7 /* no Match "", "nxt" ptr points backward. */ - #define EXACTLY 8 /* str Match this string. */ - #define NOTHING 9 /* no Match empty string. */ - #define STAR 10 /* node Match this (simple) thing 0 or more + #define BRANCH 6 /* node Match this alternative, or the +  * next... */ + #define BACK 7 /* no Match "", "next" ptr points backward. */ + #define EXACTLY 8 /* str Match this string. */ + #define NOTHING 9 /* no Match empty string. */ + #define STAR 10 /* node Match this (simple) thing 0 or more    * times. */   #define WORDSTART 11 /* node matching a start of a word */ - #define WORDEND 12 /* node matching an end of a word */ - #define KPLUS 13 /* node Match this (simple) thing 1 or more + #define WORDEND 12 /* node matching an end of a word */ + #define KPLUS 13 /* node Match this (simple) thing 1 or more    * times. */ - #define OPEN 20 /* no Mark this point in input as start of + #define OPEN 20 /* no Mark this point in input as start of    * #n. */    /* OPEN+1 is number 1, etc. */   #define CLOSE (OPEN+NSUBEXP) /* no Analogous to OPEN. */      /*    * Opcode notes:    *    * BRANCH The set of branches constituting a single choice are hooked -  * together with their "nxt" pointers, since precedence prevents +  * together with their "next" pointers, since precedence prevents    * anything being concatenated to any individual branch. The -  * "nxt" pointer of the last BRANCH in a choice points to the +  * "next" pointer of the last BRANCH in a choice points to the    * thing following the whole choice. This is also where the -  * final "nxt" pointer of each individual branch points; each +  * final "next" pointer of each individual branch points; each    * branch starts with the operand node of a BRANCH node.    * -  * BACK Normal "nxt" pointers all implicitly point forward; BACK +  * BACK Normal "next" pointers all implicitly point forward; BACK    * exists to make loop structures possible.    *    * STAR,KPLUS Complex cases are implemented as circular BRANCH structures    * using BACK. Simple cases (one character per match) are    * implemented with STAR or KPLUS for speed and to minimize    * recursive plunges.    *    * OPEN,CLOSE ...are numbered at compile time.    */      /* -  * A node is one char of opcode followed by two chars of "nxt" pointer. -  * "Nxt" pointers are stored as two 8-bit pieces, high order first. The +  * A node is one char of opcode followed by two chars of "next" pointer. +  * "Next" pointers are stored as two 8-bit pieces, high order first. The    * value is a positive offset from the opcode of the node containing it.    * An operand, if any, simply follows the node. (Note that much of the    * code generation knows about this implicit relationship.)    * -  * Using two bytes for the "nxt" pointer is vast overkill for most things, +  * Using two bytes for the "next" pointer is vast overkill for most things,    * but allows patterns to get big without disasters.    */   #define OP(p) (*(p))   #define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))   #define OPERAND(p) ((p) + 3)      /*    * Utility definitions.    */   
pike.git/src/modules/Regexp/pike_regexp.c:675:    register char *ret;    register char *ptr;       ret = regcode;    if (ret == &regdummy) {    regsize += 3;    return (ret);    }    ptr = ret;    *ptr++ = op; -  *ptr++ = '\0'; /* Null "nxt" pointer. */ +  *ptr++ = '\0'; /* Null "next" pointer. */    *ptr++ = '\0';    regcode = ptr;       return (ret);   }      /*    - regc - emit (if appropriate) a byte of code    */   static void regc(char b)
pike.git/src/modules/Regexp/pike_regexp.c:865:    * Conceptually the strategy is simple: check to see whether the current    * node matches, call self recursively to see whether the rest matches,    * and then act accordingly. In practice we make some effort to avoid    * recursion, in particular by going through "ordinary" nodes (that don't    * need to know whether the rest of the match failed) by a loop instead of    * by recursion.    */   static int regmatch(char *prog)   {    register char *scan; /* Current node. */ -  char *next; /* nxt node. */ +  char *next; /* next node. */       check_c_stack (4 * sizeof (void *));       scan = prog;   #ifdef PIKE_DEBUG    if (scan != NULL && regnarrate)    fprintf(stderr, "%s(\n", regprop(scan));   #endif    while (scan != NULL) {   #ifdef PIKE_DEBUG
pike.git/src/modules/Regexp/pike_regexp.c:1073:    default: /* Oh dear. Called inappropriately. */    regerror("internal foulup");    return(0); /* Best compromise. */    break;    }    /* NOREACHED */   }         /* -  - regnext - dig the "nxt" pointer out of a node +  - regnext - dig the "next" pointer out of a node    */   static char *regnext(register char *p)   {    register int offset;       if (p == &regdummy)    return (NULL);       offset = NEXT(p);    if (offset == 0)