trset: use rset struct instead of regex.h - neatvi - [fork] simple vi-type editor with UTF-8 support
 (HTM) git clone git://src.adamsgaard.dk/neatvi
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
       ---
 (DIR) commit 4b67186ed7bb5e6e255f78c351151bf72e51919d
 (DIR) parent 3efb0c5dc5d7be165d3a80ecdd144e895fed6aa6
 (HTM) Author: Ali Gholami Rudi <ali@rudi.ir>
       Date:   Thu, 14 May 2015 11:12:15 +0430
       
       rset: use rset struct instead of regex.h
       
       Diffstat:
         M dir.c                               |      25 +++++++++----------------
         M ex.c                                |      35 +++++++++++++++++--------------
         M rset.c                              |      12 +++++++++---
         M uc.c                                |      10 ++++++++++
         M vi.c                                |      26 +++++++++++++-------------
         M vi.h                                |       7 ++++++-
       
       6 files changed, 66 insertions(+), 49 deletions(-)
       ---
 (DIR) diff --git a/dir.c b/dir.c
       t@@ -1,7 +1,6 @@
        #include <stdio.h>
        #include <stdlib.h>
        #include <string.h>
       -#include <regex.h>
        #include "vi.h"
        
        #define CR2L                "ءآأؤإئابةتثجحخدذرزسشصضطظعغـفقكلمنهوىييپچژکگی‌‍؛،»«؟"
       t@@ -33,32 +32,26 @@ static struct rset *dir_rslr;        /* pattern of marks for left-to-right strings */
        static struct rset *dir_rsrl;        /* pattern of marks for right-to-left strings */
        static struct rset *dir_rsctx;        /* direction context patterns */
        
       -static int uc_off(char *s, int off)
       -{
       -        char *e = s + off;
       -        int i;
       -        for (i = 0; s < e && *s; i++)
       -                s = uc_next(s);
       -        return i;
       -}
       -
        static int dir_match(char **chrs, int beg, int end, int ctx, int *rec,
                        int *r_beg, int *r_end, int *c_beg, int *c_end, int *dir)
        {
                int subs[16 * 2];
                struct rset *rs = ctx < 0 ? dir_rsrl : dir_rslr;
                struct sbuf *str = sbuf_make();
       +        int flg = (beg ? RE_NOTBOL : 0) | (chrs[end][0] ? RE_NOTEOL : 0);
                int found;
                sbuf_mem(str, chrs[beg], chrs[end] - chrs[beg]);
       -        found = rset_find(rs, sbuf_buf(str), LEN(subs) / 2, subs, 0);
       +        found = rset_find(rs, sbuf_buf(str), LEN(subs) / 2, subs, flg);
                if (found >= 0 && r_beg && r_end && c_beg && c_end) {
                        struct dmark *dm = &dmarks[found];
                        char *s = sbuf_buf(str);
                        int grp = dm->grp;
                        *r_beg = beg + uc_off(s, subs[0]);
                        *r_end = beg + uc_off(s, subs[1]);
       -                *c_beg = subs[grp * 2 + 0] >= 0 ? beg + uc_off(s, subs[grp * 2 + 0]) : *r_beg;
       -                *c_end = subs[grp * 2 + 1] >= 0 ? beg + uc_off(s, subs[grp * 2 + 1]) : *r_end;
       +                *c_beg = subs[grp * 2 + 0] >= 0 ?
       +                        beg + uc_off(s, subs[grp * 2 + 0]) : *r_beg;
       +                *c_end = subs[grp * 2 + 1] >= 0 ?
       +                        beg + uc_off(s, subs[grp * 2 + 1]) : *r_end;
                        *dir = dm->dir;
                        *rec = grp > 0;
                }
       t@@ -134,11 +127,11 @@ void dir_init(void)
                        relr[i] = dmarks[i].ctx >= 0 ? dmarks[i].pat : NULL;
                        rerl[i] = dmarks[i].ctx <= 0 ? dmarks[i].pat : NULL;
                }
       -        dir_rslr = rset_make(LEN(dmarks), relr);
       -        dir_rsrl = rset_make(LEN(dmarks), rerl);
       +        dir_rslr = rset_make(LEN(dmarks), relr, 0);
       +        dir_rsrl = rset_make(LEN(dmarks), rerl, 0);
                for (i = 0; i < LEN(dcontexts); i++)
                        ctx[i] = dcontexts[i].pat;
       -        dir_rsctx = rset_make(LEN(dcontexts), ctx);
       +        dir_rsctx = rset_make(LEN(dcontexts), ctx, 0);
        }
        
        void dir_done(void)
 (DIR) diff --git a/ex.c b/ex.c
       t@@ -1,6 +1,5 @@
        #include <ctype.h>
        #include <fcntl.h>
       -#include <regex.h>
        #include <stdio.h>
        #include <stdlib.h>
        #include <string.h>
       t@@ -96,28 +95,32 @@ static char *ex_arg(char *s, char *arg)
        
        static int ex_search(char *pat)
        {
       -        struct sbuf *kwd;
       +        struct sbuf *kw;
                int dir = *pat == '/' ? 1 : -1;
                char *b = pat;
                char *e = b;
       +        char *re_kw[1];
                int i = xrow;
       -        regex_t re;
       -        kwd = sbuf_make();
       +        struct rset *re;
       +        kw = sbuf_make();
                while (*++e) {
                        if (*e == *pat)
                                break;
       -                sbuf_chr(kwd, (unsigned char) *e);
       +                sbuf_chr(kw, (unsigned char) *e);
                        if (*e == '\\' && e[1])
                                e++;
                }
       -        regcomp(&re, sbuf_buf(kwd), 0);
       +        re_kw[0] = sbuf_buf(kw);
       +        re = rset_make(1, re_kw, 0);
       +        sbuf_free(kw);
       +        if (!re)
       +                return i;
                while (i >= 0 && i < lbuf_len(xb)) {
       -                if (!regexec(&re, lbuf_get(xb, i), 0, NULL, 0))
       +                if (rset_find(re, lbuf_get(xb, i), 0, NULL, 0) >= 0)
                                break;
                        i += dir;
                }
       -        regfree(&re);
       -        sbuf_free(kwd);
       +        rset_free(re);
                return i;
        }
        
       t@@ -388,8 +391,8 @@ static char *readuntil(char **src, int delim)
        static void ec_substitute(char *ec)
        {
                char loc[EXLEN], arg[EXLEN];
       -        regmatch_t subs[16];
       -        regex_t re;
       +        struct rset *re;
       +        int offs[32];
                int beg, end;
                char *pat, *rep;
                char *s = arg;
       t@@ -402,20 +405,20 @@ static void ec_substitute(char *ec)
                delim = (unsigned char) *s++;
                pat = readuntil(&s, delim);
                rep = readuntil(&s, delim);
       -        regcomp(&re, pat, 0);
       +        re = rset_make(1, &pat, 0);
                for (i = beg; i < end; i++) {
                        char *ln = lbuf_get(xb, i);
       -                if (!regexec(&re, ln, LEN(subs), subs, 0)) {
       +                if (rset_find(re, ln, LEN(offs) / 2, offs, 0)) {
                                struct sbuf *r = sbuf_make();
       -                        sbuf_mem(r, ln, subs[0].rm_so);
       +                        sbuf_mem(r, ln, offs[0]);
                                sbuf_str(r, rep);
       -                        sbuf_str(r, ln + subs[0].rm_eo);
       +                        sbuf_str(r, ln + offs[1]);
                                lbuf_put(xb, i, sbuf_buf(r));
                                lbuf_rm(xb, i + 1, i + 2);
                                sbuf_free(r);
                        }
                }
       -        regfree(&re);
       +        rset_free(re);
                free(pat);
                free(rep);
        }
 (DIR) diff --git a/rset.c b/rset.c
       t@@ -26,10 +26,11 @@ static int re_groupcount(char *s)
                return n;
        }
        
       -struct rset *rset_make(int n, char **re)
       +struct rset *rset_make(int n, char **re, int flg)
        {
                struct rset *rs = malloc(sizeof(*rs));
                struct sbuf *sb = sbuf_make();
       +        int regex_flg = REG_EXTENDED | (flg & RE_ICASE ? REG_ICASE : 0);
                int i;
                memset(rs, 0, sizeof(*rs));
                rs->grp = malloc((n + 1) * sizeof(rs->grp[0]));
       t@@ -54,7 +55,7 @@ struct rset *rset_make(int n, char **re)
                }
                rs->grp[n] = rs->grpcnt;
                sbuf_chr(sb, ')');
       -        if (regcomp(&rs->regex, sbuf_buf(sb), REG_EXTENDED)) {
       +        if (regcomp(&rs->regex, sbuf_buf(sb), regex_flg)) {
                        free(rs->grp);
                        free(rs->setgrpcnt);
                        free(rs);
       t@@ -70,10 +71,15 @@ int rset_find(struct rset *rs, char *s, int n, int *grps, int flg)
        {
                regmatch_t *subs;
                int found, i, set = -1;
       +        int regex_flg = 0;
                if (rs->grpcnt <= 2)
                        return -1;
       +        if (flg & RE_NOTBOL)
       +                regex_flg |= REG_NOTBOL;
       +        if (flg & RE_NOTEOL)
       +                regex_flg |= REG_NOTEOL;
                subs = malloc(rs->grpcnt * sizeof(subs[0]));
       -        found = !regexec(&rs->regex, s, rs->grpcnt, subs, 0);
       +        found = !regexec(&rs->regex, s, rs->grpcnt, subs, regex_flg);
                for (i = 0; found && i < rs->n; i++)
                        if (rs->grp[i] >= 0 && subs[rs->grp[i]].rm_so >= 0)
                                set = i;
 (DIR) diff --git a/uc.c b/uc.c
       t@@ -105,6 +105,16 @@ char *uc_chr(char *s, int off)
                return s && (off < 0 || i == off) ? s : "";
        }
        
       +/* the number of characters between s and s + off*/
       +int uc_off(char *s, int off)
       +{
       +        char *e = s + off;
       +        int i;
       +        for (i = 0; s < e && *s; i++)
       +                s = uc_next(s);
       +        return i;
       +}
       +
        char *uc_sub(char *s, int beg, int end)
        {
                char *sbeg = uc_chr(s, beg);
 (DIR) diff --git a/vi.c b/vi.c
       t@@ -7,7 +7,6 @@
         */
        #include <ctype.h>
        #include <fcntl.h>
       -#include <regex.h>
        #include <stdio.h>
        #include <stdlib.h>
        #include <string.h>
       t@@ -146,29 +145,30 @@ static void lbuf_findchar(struct lbuf *lb, int *row, int *col, char *cs, int cmd
        
        static int lbuf_search(struct lbuf *lb, char *kw, int dir, int *r, int *c, int *len)
        {
       -        regmatch_t subs[1];
       -        regex_t re;
       +        int offs[2];
                int found = 0;
                int row = *r, col = *c;
                int i;
       -        if (regcomp(&re, kw, REG_EXTENDED))
       +        struct rset *re = rset_make(1, &kw, 0);
       +        if (!re)
                        return 1;
                for (i = row; !found && i >= 0 && i < lbuf_len(lb); i += dir) {
                        char *s = lbuf_get(lb, i);
       -                int off = dir > 0 && row == i ? col + 1 : 0;
       -                while (!regexec(&re, s + off, LEN(subs), subs, 0)) {
       -                        if (dir < 0 && row == i && off + subs[0].rm_so >= col)
       +                int off = dir > 0 && row == i ? uc_chr(s, col + 1) - s : 0;
       +                int flg = off ? RE_NOTBOL : 0;
       +                while (rset_find(re, s + off, 1, offs, flg) >= 0) {
       +                        if (dir < 0 && row == i && off + offs[0] >= col)
                                        break;
                                found = 1;
       -                        *c = off + subs[0].rm_so;
       +                        *c = uc_off(s, off + offs[0]);
                                *r = i;
       -                        *len = subs[0].rm_eo - subs[0].rm_so;
       -                        off += subs[0].rm_eo;
       +                        *len = offs[1] - offs[0];
       +                        off += offs[1];
                                if (dir > 0)
                                        break;
                        }
                }
       -        regfree(&re);
       +        rset_free(re);
                return !found;
        }
        
       t@@ -177,8 +177,8 @@ static int vi_search(int cmd, int cnt, int *row, int *col)
                int r = *row;
                int c = *col;
                int failed = 0;
       -        int i, len;
       -        int dir;
       +        int len = 0;
       +        int i, dir;
                if (cmd == '/' || cmd == '?') {
                        char sign[4] = {cmd};
                        char *kw;
 (DIR) diff --git a/vi.h b/vi.h
       t@@ -37,7 +37,11 @@ int sbuf_len(struct sbuf *sb);
        void sbuf_cut(struct sbuf *s, int len);
        
        /* regular expression sets */
       -struct rset *rset_make(int n, char **pat);
       +#define RE_ICASE                1
       +#define RE_NOTBOL                2
       +#define RE_NOTEOL                4
       +
       +struct rset *rset_make(int n, char **pat, int flg);
        int rset_find(struct rset *re, char *s, int n, int *grps, int flg);
        void rset_free(struct rset *re);
        
       t@@ -69,6 +73,7 @@ int uc_wid(char *s);
        int uc_slen(char *s);
        int uc_code(char *s);
        char *uc_chr(char *s, int off);
       +int uc_off(char *s, int off);
        char *uc_sub(char *s, int beg, int end);
        char *uc_dup(char *s);
        int uc_isspace(char *s);