tsam: regexp fix (see libregexp change) - plan9port - [fork] Plan 9 from user space
 (HTM) git clone git://src.adamsgaard.dk/plan9port
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 608a09284ecf721781b2145ea62cce82f4f44712
 (DIR) parent 6f16e7fc1b35c2e99c9dc069e6ec81a9ac07ca21
 (HTM) Author: Russ Cox <rsc@swtch.com>
       Date:   Fri,  7 Dec 2007 15:33:58 -0500
       
       sam: regexp fix (see libregexp change)
       
       Diffstat:
         M src/cmd/sam/regexp.c                |     108 +++++++++++++++++--------------
       
       1 file changed, 59 insertions(+), 49 deletions(-)
       ---
 (DIR) diff --git a/src/cmd/sam/regexp.c b/src/cmd/sam/regexp.c
       t@@ -527,26 +527,56 @@ classmatch(int classno, int c, int negate)
        }
        
        /*
       - * Note optimization in addinst:
       - *         *l must be pending when addinst called; if *l has been looked
       - *                at already, the optimization is a bug.
       + * Add inst to the list [l, l+NLIST), but only if it is not there already.
       + * These work lists are stored and processed in increasing
       + * order of sep->p[0], so if the inst is there already, the one 
       + * there already is a more left match and takes priority.
       + * (This works for backward searches too, because there
       + * is no explicit comparison.)
         */
       -int
       -addinst(Ilist *l, Inst *inst, Rangeset *sep)
       +Ilist*
       +addinst1(Ilist *l, Inst *inst, Rangeset *sep)
        {
                Ilist *p;
        
       -        for(p = l; p->inst; p++){
       -                if(p->inst==inst){
       -                        if((sep)->p[0].p1 < p->se.p[0].p1)
       -                                p->se= *sep;        /* this would be bug */
       -                        return 0;        /* It's already there */
       -                }
       -        }
       +        for(p = l; p->inst; p++)
       +                if(p->inst==inst)
       +                        return 0;
       +        
       +        if(p == l+NLIST)
       +                return l+NLIST;
       +        
                p->inst = inst;
       -        p->se= *sep;
       +        p->se = *sep;
                (p+1)->inst = 0;
       -        return 1;
       +        return p;
       +}
       +
       +int
       +addinst(Ilist *l, Inst *inst, Rangeset *sep)
       +{
       +        Ilist *ap;
       +        
       +        ap = addinst1(l, inst, sep);
       +        if(ap == 0)
       +                return 0;
       +        if(ap == l+NLIST)
       +                return -1;
       +        
       +        /*
       +         * Added inst to list at ap.
       +         * Expand any ORs right now, so that entire 
       +         * work list ends up being sorted by increasing sep->p[0].
       +         */
       +        for(; ap->inst; ap++){
       +                if(ap->inst->type == OR){
       +                        if(addinst1(l, ap->inst->left, &ap->se) == l+NLIST)
       +                                return -1;
       +                        if(addinst1(l, ap->inst->right, &ap->se) == l+NLIST)
       +                                return -1;
       +                }
       +        }
       +        return 0;
        }
        
        int
       t@@ -556,13 +586,13 @@ execute(File *f, Posn startp, Posn eof)
                Inst *inst;
                Ilist *tlp;
                Posn p = startp;
       -        int nnl = 0, ntl;
                int c;
                int wrapped = 0;
                int startchar = startinst->type<OPERATOR? startinst->type : 0;
        
                list[0][0].inst = list[1][0].inst = 0;
                sel.p[0].p1 = -1;
       +        nl = list[0];
                /* Execute machine once for each character */
                for(;;p++){
                doloop:
       t@@ -581,21 +611,18 @@ execute(File *f, Posn startp, Posn eof)
                                default:
                                        goto Return;
                                }
       -                }else if(((wrapped && p>=startp) || sel.p[0].p1>0) && nnl==0)
       +                }else if(((wrapped && p>=startp) || sel.p[0].p1>0) && nl->inst==0)
                                break;
                        /* fast check for first char */
       -                if(startchar && nnl==0 && c!=startchar)
       +                if(startchar && nl->inst==0 && c!=startchar)
                                continue;
                        tl = list[flag];
                        nl = list[flag^=1];
                        nl->inst = 0;
       -                ntl = nnl;
       -                nnl = 0;
                        if(sel.p[0].p1<0 && (!wrapped || p<startp || startp==eof)){
                                /* Add first instruction to this list */
                                sempty.p[0].p1 = p;
       -                        if(addinst(tl, startinst, &sempty))
       -                        if(++ntl >= NLIST)
       +                        if(addinst(tl, startinst, &sempty) < 0)
                Overflow:
                                        error(Eoverflow);
                        }
       t@@ -606,8 +633,7 @@ execute(File *f, Posn startp, Posn eof)
                                default:        /* regular character */
                                        if(inst->type==c){
                Addinst:
       -                                        if(addinst(nl, inst->next, &tlp->se))
       -                                        if(++nnl >= NLIST)
       +                                        if(addinst(nl, inst->next, &tlp->se) < 0)
                                                        goto Overflow;
                                        }
                                        break;
       t@@ -645,13 +671,8 @@ execute(File *f, Posn startp, Posn eof)
                                                goto Addinst;
                                        break;
                                case OR:
       -                                /* evaluate right choice later */
       -                                if(addinst(tl, inst->right, &tlp->se))
       -                                if(++ntl >= NLIST)
       -                                        goto Overflow;
       -                                /* efficiency: advance and re-evaluate */
       -                                inst = inst->left;
       -                                goto Switchstmt;
       +                                /* already expanded; just a placeholder */
       +                                break;
                                case END:        /* Match! */
                                        tlp->se.p[0].p2 = p;
                                        newmatch(&tlp->se);
       t@@ -681,13 +702,13 @@ bexecute(File *f, Posn startp)
                Inst *inst;
                Ilist *tlp;
                Posn p = startp;
       -        int nnl = 0, ntl;
                int c;
                int wrapped = 0;
                int startchar = bstartinst->type<OPERATOR? bstartinst->type : 0;
        
                list[0][0].inst = list[1][0].inst = 0;
                sel.p[0].p1= -1;
       +        nl = list[0];
                /* Execute machine once for each character, including terminal NUL */
                for(;;--p){
                doloop:
       t@@ -706,22 +727,18 @@ bexecute(File *f, Posn startp)
                                default:
                                        goto Return;
                                }
       -                }else if(((wrapped && p<=startp) || sel.p[0].p1>0) && nnl==0)
       +                }else if(((wrapped && p<=startp) || sel.p[0].p1>0) && nl->inst==0)
                                break;
                        /* fast check for first char */
       -                if(startchar && nnl==0 && c!=startchar)
       +                if(startchar && nl->inst==0 && c!=startchar)
                                continue;
                        tl = list[flag];
                        nl = list[flag^=1];
                        nl->inst = 0;
       -                ntl = nnl;
       -                nnl = 0;
                        if(sel.p[0].p1<0 && (!wrapped || p>startp)){
                                /* Add first instruction to this list */
       -                        /* the minus is so the optimizations in addinst work */
       -                        sempty.p[0].p1 = -p;
       -                        if(addinst(tl, bstartinst, &sempty))
       -                        if(++ntl >= NLIST)
       +                        sempty.p[0].p1 = p;
       +                        if(addinst(tl, bstartinst, &sempty) < 0)
                Overflow:
                                        error(Eoverflow);
                        }
       t@@ -732,8 +749,7 @@ bexecute(File *f, Posn startp)
                                default:        /* regular character */
                                        if(inst->type == c){
                Addinst:
       -                                        if(addinst(nl, inst->next, &tlp->se))
       -                                        if(++nnl >= NLIST)
       +                                        if(addinst(nl, inst->next, &tlp->se) < 0)
                                                        goto Overflow;
                                        }
                                        break;
       t@@ -771,15 +787,9 @@ bexecute(File *f, Posn startp)
                                                goto Addinst;
                                        break;
                                case OR:
       -                                /* evaluate right choice later */
       -                                if(addinst(tl, inst->right, &tlp->se))
       -                                if(++ntl >= NLIST)
       -                                        goto Overflow;
       -                                /* efficiency: advance and re-evaluate */
       -                                inst = inst->left;
       -                                goto Switchstmt;
       +                                /* already expanded; just a placeholder */
       +                                break;
                                case END:        /* Match! */
       -                                tlp->se.p[0].p1 = -tlp->se.p[0].p1; /* minus sign */
                                        tlp->se.p[0].p2 = p;
                                        bnewmatch(&tlp->se);
                                        break;