rregexec.c - 9base - revived minimalist port of Plan 9 userland to Unix
 (HTM) git clone git://git.suckless.org/9base
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       rregexec.c (4729B)
       ---
            1 #include "lib9.h"
            2 #include "regexp9.h"
            3 #include "regcomp.h"
            4 
            5 /*
            6  *  return        0 if no match
            7  *                >0 if a match
            8  *                <0 if we ran out of _relist space
            9  */
           10 static int
           11 rregexec1(Reprog *progp,        /* program to run */
           12         Rune *bol,                /* string to run machine on */
           13         Resub *mp,                /* subexpression elements */
           14         int ms,                        /* number of elements at mp */
           15         Reljunk *j)
           16 {
           17         int flag=0;
           18         Reinst *inst;
           19         Relist *tlp;
           20         Rune *s;
           21         int i, checkstart;
           22         Rune r, *rp, *ep;
           23         Relist* tl;                /* This list, next list */
           24         Relist* nl;
           25         Relist* tle;                /* ends of this and next list */
           26         Relist* nle;
           27         int match;
           28         Rune *p;
           29 
           30         match = 0;
           31         checkstart = j->startchar;
           32         if(mp)
           33                 for(i=0; i<ms; i++) {
           34                         mp[i].s.rsp = 0;
           35                         mp[i].e.rep = 0;
           36                 }
           37         j->relist[0][0].inst = 0;
           38         j->relist[1][0].inst = 0;
           39 
           40         /* Execute machine once for each character, including terminal NUL */
           41         s = j->rstarts;
           42         do{
           43 
           44                 /* fast check for first char */
           45                 if(checkstart) {
           46                         switch(j->starttype) {
           47                         case RUNE:
           48                                 p = runestrchr(s, j->startchar);
           49                                 if(p == 0 || p == j->reol)
           50                                         return match;
           51                                 s = p;
           52                                 break;
           53                         case BOL:
           54                                 if(s == bol)
           55                                         break;
           56                                 p = runestrchr(s, '\n');
           57                                 if(p == 0 || s == j->reol)
           58                                         return match;
           59                                 s = p+1;
           60                                 break;
           61                         }
           62                 }
           63 
           64                 r = *s;
           65 
           66                 /* switch run lists */
           67                 tl = j->relist[flag];
           68                 tle = j->reliste[flag];
           69                 nl = j->relist[flag^=1];
           70                 nle = j->reliste[flag];
           71                 nl->inst = 0;
           72 
           73                 /* Add first instruction to current list */
           74                 _rrenewemptythread(tl, progp->startinst, ms, s);
           75 
           76                 /* Execute machine until current list is empty */
           77                 for(tlp=tl; tlp->inst; tlp++){
           78                         for(inst=tlp->inst; ; inst = inst->u2.next){
           79                                 switch(inst->type){
           80                                 case RUNE:        /* regular character */
           81                                         if(inst->u1.r == r)
           82                                                 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
           83                                                         return -1;
           84                                         break;
           85                                 case LBRA:
           86                                         tlp->se.m[inst->u1.subid].s.rsp = s;
           87                                         continue;
           88                                 case RBRA:
           89                                         tlp->se.m[inst->u1.subid].e.rep = s;
           90                                         continue;
           91                                 case ANY:
           92                                         if(r != '\n')
           93                                                 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
           94                                                         return -1;
           95                                         break;
           96                                 case ANYNL:
           97                                         if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
           98                                                         return -1;
           99                                         break;
          100                                 case BOL:
          101                                         if(s == bol || *(s-1) == '\n')
          102                                                 continue;
          103                                         break;
          104                                 case EOL:
          105                                         if(s == j->reol || r == 0 || r == '\n')
          106                                                 continue;
          107                                         break;
          108                                 case CCLASS:
          109                                         ep = inst->u1.cp->end;
          110                                         for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
          111                                                 if(r >= rp[0] && r <= rp[1]){
          112                                                         if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
          113                                                                 return -1;
          114                                                         break;
          115                                                 }
          116                                         break;
          117                                 case NCCLASS:
          118                                         ep = inst->u1.cp->end;
          119                                         for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
          120                                                 if(r >= rp[0] && r <= rp[1])
          121                                                         break;
          122                                         if(rp == ep)
          123                                                 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
          124                                                         return -1;
          125                                         break;
          126                                 case OR:
          127                                         /* evaluate right choice later */
          128                                         if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle)
          129                                                 return -1;
          130                                         /* efficiency: advance and re-evaluate */
          131                                         continue;
          132                                 case END:        /* Match! */
          133                                         match = 1;
          134                                         tlp->se.m[0].e.rep = s;
          135                                         if(mp != 0)
          136                                                 _renewmatch(mp, ms, &tlp->se);
          137                                         break;
          138                                 }
          139                                 break;
          140                         }
          141                 }
          142                 if(s == j->reol)
          143                         break;
          144                 checkstart = j->startchar && nl->inst==0;
          145                 s++;
          146         }while(r);
          147         return match;
          148 }
          149 
          150 static int
          151 rregexec2(Reprog *progp,        /* program to run */
          152         Rune *bol,        /* string to run machine on */
          153         Resub *mp,        /* subexpression elements */
          154         int ms,                /* number of elements at mp */
          155         Reljunk *j
          156 )
          157 {
          158         Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
          159 
          160         /* mark space */
          161         j->relist[0] = relist0;
          162         j->relist[1] = relist1;
          163         j->reliste[0] = relist0 + nelem(relist0) - 2;
          164         j->reliste[1] = relist1 + nelem(relist1) - 2;
          165 
          166         return rregexec1(progp, bol, mp, ms, j);
          167 }
          168 
          169 extern int
          170 rregexec(Reprog *progp,        /* program to run */
          171         Rune *bol,        /* string to run machine on */
          172         Resub *mp,        /* subexpression elements */
          173         int ms)                /* number of elements at mp */
          174 {
          175         Reljunk j;
          176         Relist relist0[LISTSIZE], relist1[LISTSIZE];
          177         int rv;
          178 
          179         /*
          180           *  use user-specified starting/ending location if specified
          181          */
          182         j.rstarts = bol;
          183         j.reol = 0;
          184         if(mp && ms>0){
          185                 if(mp->s.sp)
          186                         j.rstarts = mp->s.rsp;
          187                 if(mp->e.ep)
          188                         j.reol = mp->e.rep;
          189         }
          190         j.starttype = 0;
          191         j.startchar = 0;
          192         if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) {
          193                 j.starttype = RUNE;
          194                 j.startchar = progp->startinst->u1.r;
          195         }
          196         if(progp->startinst->type == BOL)
          197                 j.starttype = BOL;
          198 
          199         /* mark space */
          200         j.relist[0] = relist0;
          201         j.relist[1] = relist1;
          202         j.reliste[0] = relist0 + nelem(relist0) - 2;
          203         j.reliste[1] = relist1 + nelem(relist1) - 2;
          204 
          205         rv = rregexec1(progp, bol, mp, ms, &j);
          206         if(rv >= 0)
          207                 return rv;
          208         rv = rregexec2(progp, bol, mp, ms, &j);
          209         if(rv >= 0)
          210                 return rv;
          211         return -1;
          212 }