tlibregexp: revert regexp fix - plan9port - [fork] Plan 9 from user space
 (HTM) git clone git://src.adamsgaard.dk/plan9port
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 6d08a0f548c3d3eda199ee91a83aacd1f895718c
 (DIR) parent 2deda14e4268e7e8af4910d453db73e210d3eb58
 (HTM) Author: Russ Cox <rsc@swtch.com>
       Date:   Fri,  7 Dec 2007 17:33:41 -0500
       
       libregexp: revert regexp fix
       
       Diffstat:
         M src/libregexp/regaux.c              |     105 ++++++++++++++++---------------
         M src/libregexp/regcomp.c             |      10 +++-------
         M src/libregexp/regcomp.h             |       6 +++---
         M src/libregexp/regexec.c             |      48 ++++++++++++++++++-------------
         M src/libregexp/rregexec.c            |      61 +++++++++++++------------------
       
       5 files changed, 113 insertions(+), 117 deletions(-)
       ---
 (DIR) diff --git a/src/libregexp/regaux.c b/src/libregexp/regaux.c
       t@@ -23,89 +23,90 @@ _renewmatch(Resub *mp, int ms, Resublist *sp)
        }
        
        /*
       - * Add ip to the list [lp, elp], but only if it is not there already.
       - * These work lists are stored and processed in increasing
       - * order of sp[0], so if the ip is there already, the one that's
       - * there already is a more left match and takes priority.
       + * Note optimization in _renewthread:
       + *         *lp must be pending when _renewthread called; if *l has been looked
       + *                at already, the optimization is a bug.
         */
       -static Relist*
       -_renewthread1(Relist *lp,        /* Relist to add to */
       -        Relist *elp,                /* limit pointer for Relist */
       +extern Relist*
       +_renewthread(Relist *lp,        /* _relist to add to */
                Reinst *ip,                /* instruction to add */
                int ms,
                Resublist *sep)                /* pointers to subexpressions */
        {
                Relist *p;
        
       -        for(p=lp; p->inst; p++)
       -                if(p->inst == ip)
       +        for(p=lp; p->inst; p++){
       +                if(p->inst == ip){
       +                        if(sep->m[0].s.sp < p->se.m[0].s.sp){
       +                                if(ms > 1)
       +                                        p->se = *sep;
       +                                else
       +                                        p->se.m[0] = sep->m[0];
       +                        }
                                return 0;
       -        
       -        if(p == elp)        /* refuse to overflow buffer */
       -                return elp;
       -
       +                }
       +        }
                p->inst = ip;
                if(ms > 1)
                        p->se = *sep;
                else
                        p->se.m[0] = sep->m[0];
       -        (p+1)->inst = 0;
       +        (++p)->inst = 0;
                return p;
        }
        
       -extern int
       -_renewthread(Relist *lp, Relist *elp, Reinst *ip, int ms, Resublist *sep)
       -{
       -        Relist *ap;
       -
       -        ap = _renewthread1(lp, elp, ip, ms, sep);
       -        if(ap == 0)
       -                return 0;
       -        if(ap == elp)
       -                return -1;
       -
       -        /*
       -         * Added ip to list at ap.  
       -         * Expand any ORs right now, so that entire
       -         * work list ends up being sorted by increasing m[0].sp.
       -         */
       -        for(; ap->inst; ap++){
       -                if(ap->inst->type == OR){
       -                        if(_renewthread1(lp, elp, ap->inst->u1.right, ms, &ap->se) == elp)
       -                                return -1;
       -                        if(_renewthread1(lp, elp, ap->inst->u2.next, ms, &ap->se) == elp)
       -                                return -1;
       -                }
       -        }
       -        return 0;
       -}
       -
        /*
         * same as renewthread, but called with
         * initial empty start pointer.
         */
       -extern int
       +extern Relist*
        _renewemptythread(Relist *lp,        /* _relist to add to */
       -        Relist *elp,
                Reinst *ip,                /* instruction to add */
                int ms,
                char *sp)                /* pointers to subexpressions */
        {
       -        Resublist sep;
       -        
       +        Relist *p;
       +
       +        for(p=lp; p->inst; p++){
       +                if(p->inst == ip){
       +                        if(sp < p->se.m[0].s.sp) {
       +                                if(ms > 1)
       +                                        memset(&p->se, 0, sizeof(p->se));
       +                                p->se.m[0].s.sp = sp;
       +                        }
       +                        return 0;
       +                }
       +        }
       +        p->inst = ip;
                if(ms > 1)
       -                memset(&sep, 0, sizeof sep);
       -        sep.m[0].s.sp = sp;
       -        sep.m[0].e.ep = 0;
       -        return _renewthread(lp, elp, ip, ms, &sep);
       +                memset(&p->se, 0, sizeof(p->se));
       +        p->se.m[0].s.sp = sp;
       +        (++p)->inst = 0;
       +        return p;
        }
        
       -extern int
       +extern Relist*
        _rrenewemptythread(Relist *lp,        /* _relist to add to */
       -        Relist *elp,
                Reinst *ip,                /* instruction to add */
                int ms,
                Rune *rsp)                /* pointers to subexpressions */
        {
       -        return _renewemptythread(lp, elp, ip, ms, (char*)rsp);
       +        Relist *p;
       +
       +        for(p=lp; p->inst; p++){
       +                if(p->inst == ip){
       +                        if(rsp < p->se.m[0].s.rsp) {
       +                                if(ms > 1)
       +                                        memset(&p->se, 0, sizeof(p->se));
       +                                p->se.m[0].s.rsp = rsp;
       +                        }
       +                        return 0;
       +                }
       +        }
       +        p->inst = ip;
       +        if(ms > 1)
       +                memset(&p->se, 0, sizeof(p->se));
       +        p->se.m[0].s.rsp = rsp;
       +        (++p)->inst = 0;
       +        return p;
        }
 (DIR) diff --git a/src/libregexp/regcomp.c b/src/libregexp/regcomp.c
       t@@ -232,7 +232,7 @@ optimize(Reprog *pp)
                int size;
                Reprog *npp;
                Reclass *cl;
       -        int diff, proglen;
       +        int diff;
        
                /*
                 *  get rid of NOOP chains
       t@@ -249,13 +249,10 @@ optimize(Reprog *pp)
                 *  necessary.  Reallocate to the actual space used
                 *  and then relocate the code.
                 */
       -        proglen = freep - pp->firstinst;
       -        size = sizeof(Reprog) + proglen*sizeof(Reinst);
       +        size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
                npp = realloc(pp, size);
       -        if(npp==0 || npp==pp){
       -                pp->proglen = proglen;
       +        if(npp==0 || npp==pp)
                        return pp;
       -        }
                diff = (char *)npp - (char *)pp;
                freep = (Reinst *)((char *)freep + diff);
                for(inst=npp->firstinst; inst<freep; inst++){
       t@@ -276,7 +273,6 @@ optimize(Reprog *pp)
                        *(char**)(void*)&inst->u2.left += diff;
                }
                *(char**)(void*)&npp->startinst += diff;
       -        npp->proglen = proglen;
                return npp;
        }
        
 (DIR) diff --git a/src/libregexp/regcomp.h b/src/libregexp/regcomp.h
       t@@ -68,7 +68,7 @@ struct        Reljunk
                Rune*        reol;
        };
        
       -extern int        _renewthread(Relist*, Relist*, Reinst*, int, Resublist*);
       +extern Relist*        _renewthread(Relist*, Reinst*, int, Resublist*);
        extern void        _renewmatch(Resub*, int, Resublist*);
       -extern int        _renewemptythread(Relist*, Relist*, Reinst*, int, char*);
       -extern int        _rrenewemptythread(Relist*, Relist*, Reinst*, int, Rune*);
       +extern Relist*        _renewemptythread(Relist*, Reinst*, int, char*);
       +extern Relist*        _rrenewemptythread(Relist*, Reinst*, int, Rune*);
 (DIR) diff --git a/src/libregexp/regexec.c b/src/libregexp/regexec.c
       t@@ -2,6 +2,7 @@
        #include "regexp9.h"
        #include "regcomp.h"
        
       +
        /*
         *  return        0 if no match
         *                >0 if a match
       t@@ -12,14 +13,16 @@ regexec1(Reprog *progp,        /* program to run */
                char *bol,        /* string to run machine on */
                Resub *mp,        /* subexpression elements */
                int ms,                /* number of elements at mp */
       -        Reljunk *j)
       +        Reljunk *j
       +)
        {
                int flag=0;
                Reinst *inst;
                Relist *tlp;
                char *s;
       -        int i, checkstart, n;
       +        int i, checkstart;
                Rune r, *rp, *ep;
       +        int n;
                Relist* tl;                /* This list, next list */
                Relist* nl;
                Relist* tle;                /* ends of this and next list */
       t@@ -45,7 +48,7 @@ regexec1(Reprog *progp,        /* program to run */
                                switch(j->starttype) {
                                case RUNE:
                                        p = utfrune(s, j->startchar);
       -                                if(p == 0 || (j->eol && p >= j->eol))
       +                                if(p == 0 || s == j->eol)
                                                return match;
                                        s = p;
                                        break;
       t@@ -53,7 +56,7 @@ regexec1(Reprog *progp,        /* program to run */
                                        if(s == bol)
                                                break;
                                        p = utfrune(s, '\n');
       -                                if(p == 0 || (j->eol && p+1 >= j->eol))
       +                                if(p == 0 || s == j->eol)
                                                return match;
                                        s = p+1;
                                        break;
       t@@ -74,16 +77,17 @@ regexec1(Reprog *progp,        /* program to run */
        
                        /* Add first instruction to current list */
                        if(match == 0)
       -                        _renewemptythread(tl, tle, progp->startinst, ms, s);
       +                        _renewemptythread(tl, progp->startinst, ms, s);
        
                        /* Execute machine until current list is empty */
       -                for(tlp=tl; tlp->inst; tlp++){
       +                for(tlp=tl; tlp->inst; tlp++){        /* assignment = */
                                for(inst = tlp->inst; ; inst = inst->u2.next){
                                        switch(inst->type){
                                        case RUNE:        /* regular character */
       -                                        if(inst->u1.r == r)
       -                                                if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
       +                                        if(inst->u1.r == r){
       +                                                if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
                                                                return -1;
       +                                        }
                                                break;
                                        case LBRA:
                                                tlp->se.m[inst->u1.subid].s.sp = s;
       t@@ -93,11 +97,11 @@ regexec1(Reprog *progp,        /* program to run */
                                                continue;
                                        case ANY:
                                                if(r != '\n')
       -                                                if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
       +                                                if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
                                                                return -1;
                                                break;
                                        case ANYNL:
       -                                        if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
       +                                        if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
                                                                return -1;
                                                break;
                                        case BOL:
       t@@ -112,7 +116,7 @@ regexec1(Reprog *progp,        /* program to run */
                                                ep = inst->u1.cp->end;
                                                for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
                                                        if(r >= rp[0] && r <= rp[1]){
       -                                                        if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
       +                                                        if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
                                                                        return -1;
                                                                break;
                                                        }
       t@@ -123,12 +127,15 @@ regexec1(Reprog *progp,        /* program to run */
                                                        if(r >= rp[0] && r <= rp[1])
                                                                break;
                                                if(rp == ep)
       -                                                if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
       +                                                if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
                                                                return -1;
                                                break;
                                        case OR:
       -                                        /* expanded during renewthread; just a place holder */
       -                                        break;
       +                                        /* evaluate right choice later */
       +                                        if(_renewthread(tl, inst->u1.right, ms, &tlp->se) == tle)
       +                                                return -1;
       +                                        /* efficiency: advance and re-evaluate */
       +                                        continue;
                                        case END:        /* Match! */
                                                match = 1;
                                                tlp->se.m[0].e.ep = s;
       t@@ -158,18 +165,19 @@ regexec2(Reprog *progp,        /* program to run */
                int rv;
                Relist *relist0, *relist1;
        
       -        relist0 = malloc((progp->proglen+1)*sizeof(Relist));
       +        /* mark space */
       +        relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
                if(relist0 == nil)
                        return -1;
       -        relist1 = malloc((progp->proglen+1)*sizeof(Relist));
       +        relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
                if(relist1 == nil){
                        free(relist1);
                        return -1;
                }
                j->relist[0] = relist0;
                j->relist[1] = relist1;
       -        j->reliste[0] = relist0 + progp->proglen;
       -        j->reliste[1] = relist1 + progp->proglen;
       +        j->reliste[0] = relist0 + BIGLISTSIZE - 2;
       +        j->reliste[1] = relist1 + BIGLISTSIZE - 2;
        
                rv = regexec1(progp, bol, mp, ms, j);
                free(relist0);
       t@@ -210,8 +218,8 @@ regexec(Reprog *progp,        /* program to run */
                /* mark space */
                j.relist[0] = relist0;
                j.relist[1] = relist1;
       -        j.reliste[0] = relist0 + nelem(relist0) - 1;
       -        j.reliste[1] = relist1 + nelem(relist1) - 1;
       +        j.reliste[0] = relist0 + nelem(relist0) - 2;
       +        j.reliste[1] = relist1 + nelem(relist1) - 2;
        
                rv = regexec1(progp, bol, mp, ms, &j);
                if(rv >= 0)
 (DIR) diff --git a/src/libregexp/rregexec.c b/src/libregexp/rregexec.c
       t@@ -9,9 +9,9 @@
         */
        static int
        rregexec1(Reprog *progp,        /* program to run */
       -        Rune *bol,        /* string to run machine on */
       -        Resub *mp,        /* subexpression elements */
       -        int ms,                /* number of elements at mp */
       +        Rune *bol,                /* string to run machine on */
       +        Resub *mp,                /* subexpression elements */
       +        int ms,                        /* number of elements at mp */
                Reljunk *j)
        {
                int flag=0;
       t@@ -28,7 +28,7 @@ rregexec1(Reprog *progp,        /* program to run */
                Rune *p;
        
                match = 0;
       -        checkstart = j->starttype;
       +        checkstart = j->startchar;
                if(mp)
                        for(i=0; i<ms; i++) {
                                mp[i].s.rsp = 0;
       t@@ -46,7 +46,7 @@ rregexec1(Reprog *progp,        /* program to run */
                                switch(j->starttype) {
                                case RUNE:
                                        p = runestrchr(s, j->startchar);
       -                                if(p == 0 || (j->reol && p >= j->reol))
       +                                if(p == 0 || p == j->reol)
                                                return match;
                                        s = p;
                                        break;
       t@@ -54,7 +54,7 @@ rregexec1(Reprog *progp,        /* program to run */
                                        if(s == bol)
                                                break;
                                        p = runestrchr(s, '\n');
       -                                if(p == 0 || (j->reol && p+1 >= j->reol))
       +                                if(p == 0 || s == j->reol)
                                                return match;
                                        s = p+1;
                                        break;
       t@@ -71,16 +71,15 @@ rregexec1(Reprog *progp,        /* program to run */
                        nl->inst = 0;
        
                        /* Add first instruction to current list */
       -                if(match == 0)
       -                        _rrenewemptythread(tl, tle, progp->startinst, ms, s);
       +                _rrenewemptythread(tl, progp->startinst, ms, s);
        
                        /* Execute machine until current list is empty */
                        for(tlp=tl; tlp->inst; tlp++){
       -                        for(inst = tlp->inst; ; inst = inst->u2.next){
       +                        for(inst=tlp->inst; ; inst = inst->u2.next){
                                        switch(inst->type){
                                        case RUNE:        /* regular character */
                                                if(inst->u1.r == r)
       -                                                if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
       +                                                if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
                                                                return -1;
                                                break;
                                        case LBRA:
       t@@ -91,11 +90,11 @@ rregexec1(Reprog *progp,        /* program to run */
                                                continue;
                                        case ANY:
                                                if(r != '\n')
       -                                                if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
       +                                                if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
                                                                return -1;
                                                break;
                                        case ANYNL:
       -                                        if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
       +                                        if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
                                                                return -1;
                                                break;
                                        case BOL:
       t@@ -110,7 +109,7 @@ rregexec1(Reprog *progp,        /* program to run */
                                                ep = inst->u1.cp->end;
                                                for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
                                                        if(r >= rp[0] && r <= rp[1]){
       -                                                        if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
       +                                                        if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
                                                                        return -1;
                                                                break;
                                                        }
       t@@ -121,12 +120,15 @@ rregexec1(Reprog *progp,        /* program to run */
                                                        if(r >= rp[0] && r <= rp[1])
                                                                break;
                                                if(rp == ep)
       -                                                if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
       +                                                if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
                                                                return -1;
                                                break;
                                        case OR:
       -                                        /* expanded during renewthread; just a place holder */
       -                                        break;
       +                                        /* evaluate right choice later */
       +                                        if(_renewthread(tl, inst->u1.right, ms, &tlp->se) == tle)
       +                                                return -1;
       +                                        /* efficiency: advance and re-evaluate */
       +                                        continue;
                                        case END:        /* Match! */
                                                match = 1;
                                                tlp->se.m[0].e.rep = s;
       t@@ -139,7 +141,7 @@ rregexec1(Reprog *progp,        /* program to run */
                        }
                        if(s == j->reol)
                                break;
       -                checkstart = j->starttype && nl->inst==0;
       +                checkstart = j->startchar && nl->inst==0;
                        s++;
                }while(r);
                return match;
       t@@ -153,26 +155,15 @@ rregexec2(Reprog *progp,        /* program to run */
                Reljunk *j
        )
        {
       -        int rv;
       -        Relist *relist0, *relist1;
       +        Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
        
       -        relist0 = malloc((progp->proglen+1)*sizeof(Relist));
       -        if(relist0 == nil)
       -                return -1;
       -        relist1 = malloc((progp->proglen+1)*sizeof(Relist));
       -        if(relist1 == nil){
       -                free(relist1);
       -                return -1;
       -        }
       +        /* mark space */
                j->relist[0] = relist0;
                j->relist[1] = relist1;
       -        j->reliste[0] = relist0 + progp->proglen;
       -        j->reliste[1] = relist1 + progp->proglen;
       +        j->reliste[0] = relist0 + nelem(relist0) - 2;
       +        j->reliste[1] = relist1 + nelem(relist1) - 2;
        
       -        rv = rregexec1(progp, bol, mp, ms, j);
       -        free(relist0);
       -        free(relist1);
       -        return rv;
       +        return rregexec1(progp, bol, mp, ms, j);
        }
        
        extern int
       t@@ -208,8 +199,8 @@ rregexec(Reprog *progp,        /* program to run */
                /* mark space */
                j.relist[0] = relist0;
                j.relist[1] = relist1;
       -        j.reliste[0] = relist0 + nelem(relist0) - 1;
       -        j.reliste[1] = relist1 + nelem(relist1) - 1;
       +        j.reliste[0] = relist0 + nelem(relist0) - 2;
       +        j.reliste[1] = relist1 + nelem(relist1) - 2;
        
                rv = rregexec1(progp, bol, mp, ms, &j);
                if(rv >= 0)