regexec.c - 9base - revived minimalist port of Plan 9 userland to Unix
 (HTM) git clone git://git.suckless.org/9base
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       regexec.c (5029B)
       ---
            1 #include "lib9.h"
            2 #include "regexp9.h"
            3 #include "regcomp.h"
            4 
            5 
            6 /*
            7  *  return        0 if no match
            8  *                >0 if a match
            9  *                <0 if we ran out of _relist space
           10  */
           11 static int
           12 regexec1(Reprog *progp,        /* program to run */
           13         char *bol,        /* string to run machine on */
           14         Resub *mp,        /* subexpression elements */
           15         int ms,                /* number of elements at mp */
           16         Reljunk *j
           17 )
           18 {
           19         int flag=0;
           20         Reinst *inst;
           21         Relist *tlp;
           22         char *s;
           23         int i, checkstart;
           24         Rune r, *rp, *ep;
           25         int n;
           26         Relist* tl;                /* This list, next list */
           27         Relist* nl;
           28         Relist* tle;                /* ends of this and next list */
           29         Relist* nle;
           30         int match;
           31         char *p;
           32 
           33         match = 0;
           34         checkstart = j->starttype;
           35         if(mp)
           36                 for(i=0; i<ms; i++) {
           37                         mp[i].s.sp = 0;
           38                         mp[i].e.ep = 0;
           39                 }
           40         j->relist[0][0].inst = 0;
           41         j->relist[1][0].inst = 0;
           42 
           43         /* Execute machine once for each character, including terminal NUL */
           44         s = j->starts;
           45         do{
           46                 /* fast check for first char */
           47                 if(checkstart) {
           48                         switch(j->starttype) {
           49                         case RUNE:
           50                                 p = utfrune(s, j->startchar);
           51                                 if(p == 0 || s == j->eol)
           52                                         return match;
           53                                 s = p;
           54                                 break;
           55                         case BOL:
           56                                 if(s == bol)
           57                                         break;
           58                                 p = utfrune(s, '\n');
           59                                 if(p == 0 || s == j->eol)
           60                                         return match;
           61                                 s = p+1;
           62                                 break;
           63                         }
           64                 }
           65                 r = *(uchar*)s;
           66                 if(r < Runeself)
           67                         n = 1;
           68                 else
           69                         n = chartorune(&r, s);
           70 
           71                 /* switch run lists */
           72                 tl = j->relist[flag];
           73                 tle = j->reliste[flag];
           74                 nl = j->relist[flag^=1];
           75                 nle = j->reliste[flag];
           76                 nl->inst = 0;
           77 
           78                 /* Add first instruction to current list */
           79                 if(match == 0)
           80                         _renewemptythread(tl, progp->startinst, ms, s);
           81 
           82                 /* Execute machine until current list is empty */
           83                 for(tlp=tl; tlp->inst; tlp++){        /* assignment = */
           84                         for(inst = tlp->inst; ; inst = inst->u2.next){
           85                                 switch(inst->type){
           86                                 case RUNE:        /* regular character */
           87                                         if(inst->u1.r == r){
           88                                                 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
           89                                                         return -1;
           90                                         }
           91                                         break;
           92                                 case LBRA:
           93                                         tlp->se.m[inst->u1.subid].s.sp = s;
           94                                         continue;
           95                                 case RBRA:
           96                                         tlp->se.m[inst->u1.subid].e.ep = s;
           97                                         continue;
           98                                 case ANY:
           99                                         if(r != '\n')
          100                                                 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
          101                                                         return -1;
          102                                         break;
          103                                 case ANYNL:
          104                                         if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
          105                                                         return -1;
          106                                         break;
          107                                 case BOL:
          108                                         if(s == bol || *(s-1) == '\n')
          109                                                 continue;
          110                                         break;
          111                                 case EOL:
          112                                         if(s == j->eol || r == 0 || r == '\n')
          113                                                 continue;
          114                                         break;
          115                                 case CCLASS:
          116                                         ep = inst->u1.cp->end;
          117                                         for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
          118                                                 if(r >= rp[0] && r <= rp[1]){
          119                                                         if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
          120                                                                 return -1;
          121                                                         break;
          122                                                 }
          123                                         break;
          124                                 case NCCLASS:
          125                                         ep = inst->u1.cp->end;
          126                                         for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
          127                                                 if(r >= rp[0] && r <= rp[1])
          128                                                         break;
          129                                         if(rp == ep)
          130                                                 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
          131                                                         return -1;
          132                                         break;
          133                                 case OR:
          134                                         /* evaluate right choice later */
          135                                         if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle)
          136                                                 return -1;
          137                                         /* efficiency: advance and re-evaluate */
          138                                         continue;
          139                                 case END:        /* Match! */
          140                                         match = 1;
          141                                         tlp->se.m[0].e.ep = s;
          142                                         if(mp != 0)
          143                                                 _renewmatch(mp, ms, &tlp->se);
          144                                         break;
          145                                 }
          146                                 break;
          147                         }
          148                 }
          149                 if(s == j->eol)
          150                         break;
          151                 checkstart = j->starttype && nl->inst==0;
          152                 s += n;
          153         }while(r);
          154         return match;
          155 }
          156 
          157 static int
          158 regexec2(Reprog *progp,        /* program to run */
          159         char *bol,        /* string to run machine on */
          160         Resub *mp,        /* subexpression elements */
          161         int ms,                /* number of elements at mp */
          162         Reljunk *j
          163 )
          164 {
          165         int rv;
          166         Relist *relist0, *relist1;
          167 
          168         /* mark space */
          169         relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
          170         if(relist0 == nil)
          171                 return -1;
          172         relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
          173         if(relist1 == nil){
          174                 free(relist1);
          175                 return -1;
          176         }
          177         j->relist[0] = relist0;
          178         j->relist[1] = relist1;
          179         j->reliste[0] = relist0 + BIGLISTSIZE - 2;
          180         j->reliste[1] = relist1 + BIGLISTSIZE - 2;
          181 
          182         rv = regexec1(progp, bol, mp, ms, j);
          183         free(relist0);
          184         free(relist1);
          185         return rv;
          186 }
          187 
          188 extern int
          189 regexec(Reprog *progp,        /* program to run */
          190         char *bol,        /* string to run machine on */
          191         Resub *mp,        /* subexpression elements */
          192         int ms)                /* number of elements at mp */
          193 {
          194         Reljunk j;
          195         Relist relist0[LISTSIZE], relist1[LISTSIZE];
          196         int rv;
          197 
          198         /*
          199           *  use user-specified starting/ending location if specified
          200          */
          201         j.starts = bol;
          202         j.eol = 0;
          203         if(mp && ms>0){
          204                 if(mp->s.sp)
          205                         j.starts = mp->s.sp;
          206                 if(mp->e.ep)
          207                         j.eol = mp->e.ep;
          208         }
          209         j.starttype = 0;
          210         j.startchar = 0;
          211         if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) {
          212                 j.starttype = RUNE;
          213                 j.startchar = progp->startinst->u1.r;
          214         }
          215         if(progp->startinst->type == BOL)
          216                 j.starttype = BOL;
          217 
          218         /* mark space */
          219         j.relist[0] = relist0;
          220         j.relist[1] = relist1;
          221         j.reliste[0] = relist0 + nelem(relist0) - 2;
          222         j.reliste[1] = relist1 + nelem(relist1) - 2;
          223 
          224         rv = regexec1(progp, bol, mp, ms, &j);
          225         if(rv >= 0)
          226                 return rv;
          227         rv = regexec2(progp, bol, mp, ms, &j);
          228         if(rv >= 0)
          229                 return rv;
          230         return -1;
          231 }