trset.c - neatvi - [fork] simple vi-type editor with UTF-8 support
 (HTM) git clone git://src.adamsgaard.dk/neatvi
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
       ---
       trset.c (3355B)
       ---
            1 #include <stdlib.h>
            2 #include <stdio.h>
            3 #include <string.h>
            4 #include "regex.h"
            5 #include "vi.h"
            6 
            7 /* regular expression set */
            8 struct rset {
            9         regex_t regex;                /* the combined regular expression */
           10         int n;                        /* number of regular expressions in this set */
           11         int *grp;                /* the group assigned to each subgroup */
           12         int *setgrpcnt;                /* number of groups in each regular expression */
           13         int grpcnt;                /* group count */
           14 };
           15 
           16 static int re_groupcount(char *s)
           17 {
           18         int n = 0;        /* number of groups */
           19         int brk = 0;        /* one if inside a bracket expression */
           20         int brk2 = 0;        /* nested bracket type: ':', '*', or '=' */
           21         while (*s) {
           22                 if (!brk) {
           23                         if (s[0] == '(')
           24                                 n++;
           25                         if (s[0] == '\\' && s[1]) {
           26                                 s++;
           27                         } else if (s[0] == '[' && s[1] && s[2]) {
           28                                 s += s[1] == '^' ? 2 : 1;
           29                                 brk = 1;
           30                         }
           31                 } else {
           32                         if (!brk2) {
           33                                 if (s[0] == ']')
           34                                         brk = 0;
           35                                 if (s[0] == '[' && (s[1] == ':' || s[1] == '*' || s[1] == '=')) {
           36                                         brk2 = s[1];
           37                                         s++;
           38                                 }
           39                         } else if (s[0] == brk2 && s[1] == ']') {
           40                                 brk2 = 0;
           41                                 s++;
           42                         }
           43                 }
           44                 s++;
           45         }
           46         return n;
           47 }
           48 
           49 struct rset *rset_make(int n, char **re, int flg)
           50 {
           51         struct rset *rs = malloc(sizeof(*rs));
           52         struct sbuf *sb = sbuf_make();
           53         int regex_flg = REG_EXTENDED | (flg & RE_ICASE ? REG_ICASE : 0);
           54         int i;
           55         memset(rs, 0, sizeof(*rs));
           56         rs->grp = malloc((n + 1) * sizeof(rs->grp[0]));
           57         rs->setgrpcnt = malloc((n + 1) * sizeof(rs->setgrpcnt[0]));
           58         rs->grpcnt = 2;
           59         rs->n = n;
           60         sbuf_chr(sb, '(');
           61         for (i = 0; i < n; i++) {
           62                 if (!re[i]) {
           63                         rs->grp[i] = -1;
           64                         rs->setgrpcnt[i] = 0;
           65                         continue;
           66                 }
           67                 if (sbuf_len(sb) > 1)
           68                         sbuf_chr(sb, '|');
           69                 sbuf_chr(sb, '(');
           70                 sbuf_str(sb, re[i]);
           71                 sbuf_chr(sb, ')');
           72                 rs->grp[i] = rs->grpcnt;
           73                 rs->setgrpcnt[i] = re_groupcount(re[i]);
           74                 rs->grpcnt += 1 + rs->setgrpcnt[i];
           75         }
           76         rs->grp[n] = rs->grpcnt;
           77         sbuf_chr(sb, ')');
           78         if (regcomp(&rs->regex, sbuf_buf(sb), regex_flg)) {
           79                 free(rs->grp);
           80                 free(rs->setgrpcnt);
           81                 free(rs);
           82                 sbuf_free(sb);
           83                 return NULL;
           84         }
           85         sbuf_free(sb);
           86         return rs;
           87 }
           88 
           89 /* return the index of the matching regular expression or -1 if none matches */
           90 int rset_find(struct rset *rs, char *s, int n, int *grps, int flg)
           91 {
           92         regmatch_t *subs;
           93         int found, i, set = -1;
           94         int regex_flg = 0;
           95         if (rs->grpcnt <= 2)
           96                 return -1;
           97         if (flg & RE_NOTBOL)
           98                 regex_flg |= REG_NOTBOL;
           99         if (flg & RE_NOTEOL)
          100                 regex_flg |= REG_NOTEOL;
          101         subs = malloc(rs->grpcnt * sizeof(subs[0]));
          102         found = !regexec(&rs->regex, s, rs->grpcnt, subs, regex_flg);
          103         for (i = 0; found && i < rs->n; i++)
          104                 if (rs->grp[i] >= 0 && subs[rs->grp[i]].rm_so >= 0)
          105                         set = i;
          106         if (found && set >= 0) {
          107                 for (i = 0; i < n; i++) {
          108                         int grp = rs->grp[set] + i;
          109                         if (i < rs->setgrpcnt[set] + 1) {
          110                                 grps[i * 2] = subs[grp].rm_so;
          111                                 grps[i * 2 + 1] = subs[grp].rm_eo;
          112                         } else {
          113                                 grps[i * 2 + 0] = -1;
          114                                 grps[i * 2 + 1] = -1;
          115                         }
          116                 }
          117         }
          118         free(subs);
          119         return set;
          120 }
          121 
          122 void rset_free(struct rset *rs)
          123 {
          124         regfree(&rs->regex);
          125         free(rs->setgrpcnt);
          126         free(rs->grp);
          127         free(rs);
          128 }
          129 
          130 /* read a regular expression enclosed in a delimiter */
          131 char *re_read(char **src)
          132 {
          133         struct sbuf *sbuf;
          134         char *s = *src;
          135         int delim = (unsigned char) *s++;
          136         if (!delim)
          137                 return NULL;
          138         sbuf = sbuf_make();
          139         while (*s && *s != delim) {
          140                 if (s[0] == '\\' && s[1])
          141                         if (*(++s) != delim)
          142                                 sbuf_chr(sbuf, '\\');
          143                 sbuf_chr(sbuf, (unsigned char) *s++);
          144         }
          145         *src = *s ? s + 1 : s;
          146         return sbuf_done(sbuf);
          147 }