tdir: use regular expressions for specifying text direction - neatvi - [fork] simple vi-type editor with UTF-8 support
(HTM) git clone git://src.adamsgaard.dk/neatvi
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
---
(DIR) commit 7feca5e3a5de42c3ee1d3a0a0b13dc27c93280ed
(DIR) parent dee1eb07aca89fd5aa037ff4736f74ba9f0e8c5c
(HTM) Author: Ali Gholami Rudi <ali@rudi.ir>
Date: Mon, 11 May 2015 16:41:17 +0430
dir: use regular expressions for specifying text direction
Diffstat:
M Makefile | 2 +-
A dir.c | 148 +++++++++++++++++++++++++++++++
M ren.c | 144 ++++++++++---------------------
A reset.c | 100 +++++++++++++++++++++++++++++++
M vi.c | 37 ++++++++++++++++++++++++++-----
M vi.h | 14 ++++++++++++--
6 files changed, 336 insertions(+), 109 deletions(-)
---
(DIR) diff --git a/Makefile b/Makefile
t@@ -2,7 +2,7 @@ CC = cc
CFLAGS = -Wall -O2
LDFLAGS =
-OBJS = vi.o ex.o lbuf.o sbuf.o ren.o reg.o led.o uc.o term.o
+OBJS = vi.o ex.o lbuf.o sbuf.o ren.o dir.o reg.o led.o uc.o term.o reset.o
all: vi
%.o: %.c
(DIR) diff --git a/dir.c b/dir.c
t@@ -0,0 +1,148 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <regex.h>
+#include "vi.h"
+
+#define CR2L "ءآأؤإئابةتثجحخدذرزسشصضطظعغـفقكلمنهوىييپچژکگی؛،»«؟"
+#define CNEUT "-!\"#$%&'()*+,./:;<=>?@^_`{|}~ "
+
+static struct dcontext {
+ int dir;
+ char *pat;
+} dcontexts[] = {
+ {-1, "^[" CR2L "]"},
+ {+1, "^[a-zA-Z_0-9]"},
+};
+
+/* direction marks */
+static struct dmark {
+ int ctx; /* the direction context for this mark; 0 means any */
+ int dir; /* the direction of matched text */
+ int grp; /* the nested subgroup; 0 means no groups */
+ char *pat; /* the pattern */
+} dmarks[] = {
+ {+0, +1, 0, "$([^$]+)\\$"},
+ {+0, +1, 1, "\\\\\\*\\[([^]]+)\\]"},
+ {+1, -1, 0, "[" CR2L "][" CNEUT CR2L "]*[" CR2L "]"},
+ {-1, +1, 0, "[a-zA-Z0-9_][^" CR2L "\\\\`$']*[a-zA-Z0-9_]"},
+};
+
+static struct reset *dir_rslr;
+static struct reset *dir_rsrl;
+static struct reset *dir_rsctx;
+
+static int uc_off(char *s, int off)
+{
+ char *e = s + off;
+ int i;
+ for (i = 0; s < e && *s; i++)
+ s = uc_next(s);
+ return i;
+}
+
+static int dir_match(char **chrs, int beg, int end, int ctx, int *rec,
+ int *r_beg, int *r_end, int *c_beg, int *c_end, int *dir)
+{
+ int subs[16 * 2];
+ struct reset *rs = ctx < 0 ? dir_rsrl : dir_rslr;
+ struct sbuf *str = sbuf_make();
+ int found;
+ sbuf_mem(str, chrs[beg], chrs[end] - chrs[beg]);
+ found = reset_find(rs, sbuf_buf(str), LEN(subs) / 2, subs, 0);
+ if (found >= 0 && r_beg && r_end && c_beg && c_end) {
+ struct dmark *dm = &dmarks[found];
+ char *s = sbuf_buf(str);
+ int grp = dm->grp;
+ *r_beg = beg + uc_off(s, subs[0]);
+ *r_end = beg + uc_off(s, subs[1]);
+ *c_beg = subs[grp * 2 + 0] >= 0 ? beg + uc_off(s, subs[grp * 2 + 0]) : *r_beg;
+ *c_end = subs[grp * 2 + 1] >= 0 ? beg + uc_off(s, subs[grp * 2 + 1]) : *r_end;
+ *dir = dm->dir;
+ *rec = grp > 0;
+ }
+ sbuf_free(str);
+ return found < 0;
+}
+
+static void dir_reverse(int *ord, int beg, int end)
+{
+ end--;
+ while (beg < end) {
+ int tmp = ord[beg];
+ ord[beg] = ord[end];
+ ord[end] = tmp;
+ beg++;
+ end--;
+ }
+}
+
+/* reorder the characters based on direction marks and characters */
+static void dir_fix(char **chrs, int *ord, int dir, int beg, int end)
+{
+ int r_beg, r_end, c_beg, c_end;
+ int c_dir, c_rec;
+ while (beg < end && !dir_match(chrs, beg, end, dir, &c_rec,
+ &r_beg, &r_end, &c_beg, &c_end, &c_dir)) {
+ if (dir < 0)
+ dir_reverse(ord, r_beg, r_end);
+ if (c_dir < 0)
+ dir_reverse(ord, c_beg, c_end);
+ if (c_beg == r_beg)
+ c_beg++;
+ if (c_rec)
+ dir_fix(chrs, ord, c_dir, c_beg, c_end);
+ beg = r_end;
+ }
+}
+
+int dir_context(char *s)
+{
+ int found;
+ if (xdir == 'L')
+ return +1;
+ if (xdir == 'R')
+ return -1;
+ found = reset_find(dir_rsctx, s ? s : "", 0, NULL, 0);
+ if (found >= 0)
+ return dcontexts[found].dir;
+ return xdir == 'r' ? +1 : -1;
+}
+
+/* reorder the characters in s */
+void dir_reorder(char *s, int *ord)
+{
+ int n;
+ char **chrs = uc_chop(s, &n);
+ int dir = dir_context(s);
+ if (n && chrs[n - 1][0] == '\n') {
+ ord[n - 1] = n - 1;
+ n--;
+ }
+ dir_fix(chrs, ord, dir, 0, n);
+ free(chrs);
+}
+
+void dir_init(void)
+{
+ char *relr[128];
+ char *rerl[128];
+ char *ctx[128];
+ int i;
+ for (i = 0; i < LEN(dmarks); i++) {
+ relr[i] = dmarks[i].ctx >= 0 ? dmarks[i].pat : NULL;
+ rerl[i] = dmarks[i].ctx <= 0 ? dmarks[i].pat : NULL;
+ }
+ dir_rslr = reset_make(LEN(dmarks), relr);
+ dir_rsrl = reset_make(LEN(dmarks), rerl);
+ for (i = 0; i < LEN(dcontexts); i++)
+ ctx[i] = dcontexts[i].pat;
+ dir_rsctx = reset_make(LEN(dcontexts), ctx);
+}
+
+void dir_done(void)
+{
+ reset_free(dir_rslr);
+ reset_free(dir_rsrl);
+ reset_free(dir_rsctx);
+}
(DIR) diff --git a/ren.c b/ren.c
t@@ -1,65 +1,10 @@
/* rendering strings */
-/*
- * Overview:
- * + ren_translate() replaces the characters if necessary.
- * + ren_position() specifies the position of characters on the screen.
- * + ren_reorder() is called by ren_position() and changes the order of characters.
- * + ren_highlight() performs syntax highlighting.
- */
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "vi.h"
-static int bidi_maximalregion(char *s, int n, int dir, char **chrs, int idx, int *beg, int *end)
-{
- while (idx < n && uc_dir(chrs[idx]) * dir >= 0)
- idx++;
- *beg = idx;
- *end = idx;
- while (idx < n && uc_dir(chrs[idx]) * dir <= 0) {
- if (uc_dir(chrs[idx]) * dir < 0)
- *end = idx + 1;
- idx++;
- }
- return *beg >= *end;
-}
-
-static void bidi_reverse(int *ord, int beg, int end)
-{
- end--;
- while (beg < end) {
- int tmp = ord[beg];
- ord[beg] = ord[end];
- ord[end] = tmp;
- beg++;
- end--;
- }
-}
-
-int ren_dir(char *s)
-{
- if (xdir == 'R')
- return -1;
- if (xdir == 'l')
- return *s && uc_dir(s) < 0 ? -1 : +1;
- if (xdir == 'r')
- return *s && uc_dir(s) > 0 ? +1 : -1;
- return +1;
-}
-
-/* reorder the characters in s */
-static void ren_reorder(char *s, int *ord)
-{
- int beg = 0, end = 0, n;
- char **chrs = uc_chop(s, &n);
- int dir = ren_dir(s);
- while (!bidi_maximalregion(s, n, dir, chrs, end, &beg, &end))
- bidi_reverse(ord, beg, end);
- free(chrs);
-}
-
/* specify the screen position of the characters in s */
static int *ren_position(char *s, int *beg, int *end)
{
t@@ -67,11 +12,11 @@ static int *ren_position(char *s, int *beg, int *end)
char **chrs = uc_chop(s, &n);
int *off, *pos;
int diff = 0;
- int dir = ren_dir(s);
+ int dir = dir_context(s);
pos = malloc(n * sizeof(pos[0]));
for (i = 0; i < n; i++)
pos[i] = i;
- ren_reorder(s, pos);
+ dir_reorder(s, pos);
off = malloc(n * sizeof(off[0]));
for (i = 0; i < n; i++)
off[pos[i]] = i;
t@@ -124,7 +69,7 @@ char *ren_all(char *s0, int wid)
int i;
s1 = ren_translate(s0 ? s0 : "");
chrs = uc_chop(s1, &n);
- pos = ren_position(s1, NULL, NULL);
+ pos = ren_position(s0, NULL, NULL);
for (i = 0; i < n; i++)
if (w <= pos[i])
w = pos[i] + 1;
t@@ -192,7 +137,7 @@ int ren_off(char *s, int p)
int n = uc_slen(s);
int *pos = ren_position(s, NULL, NULL);
int i;
- if (ren_dir(s) >= 0)
+ if (dir_context(s) >= 0)
p = pos_prev(pos, n, p, 1);
else
p = pos_next(pos, n, p, 1);
t@@ -206,7 +151,7 @@ int ren_off(char *s, int p)
/* adjust cursor position */
int ren_cursor(char *s, int p)
{
- int dir = ren_dir(s ? s : "");
+ int dir = dir_context(s ? s : "");
int n, next;
int beg, end;
int *pos;
t@@ -232,11 +177,11 @@ int ren_next(char *s, int p, int dir)
{
int n = uc_slen(s);
int *pos = ren_position(s, NULL, NULL);
- if (ren_dir(s ? s : "") >= 0)
+ if (dir_context(s ? s : "") >= 0)
p = pos_prev(pos, n, p, 1);
else
p = pos_next(pos, n, p, 1);
- if (dir * ren_dir(s ? s : "") >= 0)
+ if (dir * dir_context(s ? s : "") >= 0)
p = pos_next(pos, n, p, 0);
else
p = pos_prev(pos, n, p, 0);
t@@ -249,58 +194,57 @@ int ren_eol(char *s, int dir)
int beg, end;
int *pos = ren_position(s, &beg, &end);
free(pos);
- return dir * ren_dir(s) >= 0 ? end : beg;
+ return dir * dir_context(s) >= 0 ? end : beg;
}
/* compare two visual positions */
int ren_cmp(char *s, int pos1, int pos2)
{
- return ren_dir(s ? s : "") >= 0 ? pos1 - pos2 : pos2 - pos1;
+ return dir_context(s ? s : "") >= 0 ? pos1 - pos2 : pos2 - pos1;
}
-/*
- * insertion offset before or after the given visual position
- *
- * When pre is nonzero, the return value indicates an offset of s,
- * which, if a character is inserted at that position, it appears
- * just before the character at pos. If pre is zero, the inserted
- * character should appear just after the character at pos.
- */
-int ren_insertionoffset(char *s, int pos, int pre)
+static void swap(int *i1, int *i2)
+{
+ int t = *i1;
+ *i1 = *i2;
+ *i2 = t;
+}
+
+/* the region specified by two visual positions */
+int ren_region(char *s, int c1, int c2, int *l1, int *l2, int closed)
{
int *ord; /* ord[i]: the order of the i-th char on the screen */
- int *map; /* map[i]: the char appearing i-th on the screen */
+ int o1, o2;
+ int beg, end;
int n = uc_slen(s);
- int oprev, o, onext; /* the offset the of previous, current, and next positions */
- int cord; /* the order of the current position on the screen */
int i;
+ if (c1 == c2 && !closed) {
+ *l1 = ren_off(s, c1);
+ *l2 = ren_off(s, c2);
+ return 0;
+ }
ord = malloc(n * sizeof(ord[0]));
for (i = 0; i < n; i++)
ord[i] = i;
- ren_reorder(s, ord);
- map = malloc(n * sizeof(map[0]));
- for (i = 0; i < n; i++)
- map[ord[i]] = i;
- if (uc_chr(s, n - 1)[0] == '\n')
- n--;
- o = ren_off(s, pos);
- cord = ord[o];
- oprev = cord > 0 ? map[cord - 1] : -1;
- onext = cord < n - 1 ? map[cord + 1] : -1;
- free(map);
+ dir_reorder(s, ord);
+
+ if (ren_cmp(s, c1, c2) > 0)
+ swap(&c1, &c2);
+ if (!closed)
+ c2 = ren_next(s, c2, -1);
+ beg = ren_off(s, c1);
+ end = ren_off(s, c2);
+ if (end < beg)
+ swap(&beg, &end);
+ o1 = ord[beg];
+ o2 = ord[end];
+ if (o2 < o1)
+ swap(&o1, &o2);
+ for (i = beg; i <= end; i++)
+ if (ord[i] < o1 || ord[i] > o2)
+ break;
+ *l1 = beg;
+ *l2 = i;
free(ord);
- if (oprev < 0 && onext < 0)
- return pre ? o : o + 1;
- if (pre) {
- if (oprev >= 0)
- return oprev < o ? o : o + 1;
- else
- return onext > o ? o : o + 1;
- } else {
- if (onext >= 0)
- return onext > o ? o + 1 : o;
- else
- return oprev < o ? o + 1 : o;
- }
return 0;
}
(DIR) diff --git a/reset.c b/reset.c
t@@ -0,0 +1,100 @@
+#include <regex.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include "vi.h"
+
+struct reset {
+ regex_t regex; /* the combined regular expression */
+ int n; /* number of regular expressions in this set */
+ int *grp; /* the group assigned to each subgroup */
+ int *setgrpcnt; /* number of groups in each regular expression */
+ int grpcnt; /* group count */
+};
+
+static int re_groupcount(char *s)
+{
+ int n = 0;
+ while (*s) {
+ if (s[0] == '(')
+ n++;
+ if (s[0] == '\\' && s[1])
+ s++;
+ s++;
+ }
+ return n;
+}
+
+struct reset *reset_make(int n, char **re)
+{
+ struct reset *rs = malloc(sizeof(*rs));
+ struct sbuf *sb = sbuf_make();
+ int i;
+ memset(rs, 0, sizeof(*rs));
+ rs->grp = malloc((n + 1) * sizeof(rs->grp[0]));
+ rs->setgrpcnt = malloc((n + 1) * sizeof(rs->setgrpcnt[0]));
+ rs->grpcnt = 2;
+ rs->n = n;
+ sbuf_chr(sb, '(');
+ for (i = 0; i < n; i++) {
+ if (!re[i]) {
+ rs->grp[i] = -1;
+ rs->setgrpcnt[i] = 0;
+ continue;
+ }
+ if (sbuf_len(sb) > 1)
+ sbuf_chr(sb, '|');
+ sbuf_chr(sb, '(');
+ sbuf_str(sb, re[i]);
+ sbuf_chr(sb, ')');
+ rs->grp[i] = rs->grpcnt;
+ rs->setgrpcnt[i] = re_groupcount(re[i]);
+ rs->grpcnt += 1 + rs->setgrpcnt[i];
+ }
+ rs->grp[n] = rs->grpcnt;
+ sbuf_chr(sb, ')');
+ if (regcomp(&rs->regex, sbuf_buf(sb), REG_EXTENDED)) {
+ free(rs->grp);
+ free(rs->setgrpcnt);
+ free(rs);
+ sbuf_free(sb);
+ return NULL;
+ }
+ sbuf_free(sb);
+ return rs;
+}
+
+int reset_find(struct reset *rs, char *s, int n, int *grps, int flg)
+{
+ regmatch_t *subs;
+ int found, i, set = -1;
+ if (rs->grpcnt <= 2)
+ return -1;
+ subs = malloc(rs->grpcnt * sizeof(subs[0]));
+ found = !regexec(&rs->regex, s, rs->grpcnt, subs, 0);
+ for (i = 0; found && i < rs->n; i++)
+ if (rs->grp[i] >= 0 && subs[rs->grp[i]].rm_so >= 0)
+ set = i;
+ if (found && set >= 0) {
+ for (i = 0; i < n; i++) {
+ int grp = rs->grp[set] + i;
+ if (i < rs->setgrpcnt[set] + 1) {
+ grps[i * 2] = subs[grp].rm_so;
+ grps[i * 2 + 1] = subs[grp].rm_eo;
+ } else {
+ grps[i * 2 + 0] = -1;
+ grps[i * 2 + 1] = -1;
+ }
+ }
+ }
+ free(subs);
+ return set;
+}
+
+void reset_free(struct reset *rs)
+{
+ regfree(&rs->regex);
+ free(rs->setgrpcnt);
+ free(rs->grp);
+ free(rs);
+}
(DIR) diff --git a/vi.c b/vi.c
t@@ -259,7 +259,7 @@ static int vi_motion(int *row, int *col, int pre1, int pre2)
int c = vi_read();
int pre = (pre1 ? pre1 : 1) * (pre2 ? pre2 : 1);
char *ln = lbuf_get(xb, *row);
- int dir = ren_dir(ln ? ln : "");
+ int dir = dir_context(ln ? ln : "");
int i;
switch (c) {
case ' ':
t@@ -371,14 +371,37 @@ static char *lbuf_region(struct lbuf *lb, int r1, int l1, int r2, int l2)
return sbuf_done(sb);
}
+/* insertion offset before or after the given visual position */
+static int vi_insertionoffset(char *s, int c1, int before)
+{
+ int l1, l2, c2;
+ c2 = ren_next(s, c1, before ? -1 : +1);
+ l2 = c2 >= 0 ? ren_off(s, c2) : 0;
+ if (c1 == c2 || c2 < 0 || uc_chr(s, l2)[0] == '\n') {
+ c2 = ren_next(s, c1, before ? +1 : -1);
+ l1 = ren_off(s, c1);
+ l2 = c2 >= 0 ? ren_off(s, c2) : 0;
+ if (c1 == c2 || c2 < 0 || uc_chr(s, l2)[0] == '\n')
+ return before ? l1 : l1 + 1;
+ if (before)
+ return l1 < l2 ? l1 : l1 + 1;
+ else
+ return l2 < l1 ? l1 + 1 : l1;
+ }
+ ren_region(s, c1, c2, &l1, &l2, 0);
+ return l1 < l2 ? l2 : l1;
+}
+
static void vi_commandregion(int *r1, int *r2, int *c1, int *c2, int *l1, int *l2, int closed)
{
if (*r2 < *r1 || (*r2 == *r1 && ren_cmp(lbuf_get(xb, *r1), *c1, *c2) > 0)) {
swap(r1, r2);
swap(c1, c2);
}
- *l1 = lbuf_get(xb, *r1) ? ren_insertionoffset(lbuf_get(xb, *r1), *c1, 1) : 0;
- *l2 = lbuf_get(xb, *r2) ? ren_insertionoffset(lbuf_get(xb, *r2), *c2, !closed) : 0;
+ *l1 = lbuf_get(xb, *r1) ? vi_insertionoffset(lbuf_get(xb, *r1), *c1, 1) : 0;
+ *l2 = lbuf_get(xb, *r2) ? vi_insertionoffset(lbuf_get(xb, *r2), *c2, !closed) : 0;
+ if (*r1 == *r2 && lbuf_get(xb, *r1))
+ ren_region(lbuf_get(xb, *r1), *c1, *c2, l1, l2, closed);
if (*r1 == *r2 && *l2 < *l1)
swap(l1, l2);
}
t@@ -492,9 +515,9 @@ static void vc_insert(int cmd)
if (cmd == 'o' || cmd == 'O')
ln = NULL;
if (cmd == 'i' || cmd == 'I')
- off = ln ? ren_insertionoffset(ln, xcol, 1) : 0;
+ off = ln ? vi_insertionoffset(ln, xcol, 1) : 0;
if (cmd == 'a' || cmd == 'A')
- off = ln ? ren_insertionoffset(ln, xcol, 0) : 0;
+ off = ln ? vi_insertionoffset(ln, xcol, 0) : 0;
pref = ln ? uc_sub(ln, 0, off) : uc_dup("");
post = ln ? uc_sub(ln, off, -1) : uc_dup("\n");
rep = led_input(pref, post, &row, &col);
t@@ -523,7 +546,7 @@ static void vc_put(int cmd, int cnt)
if (!buf)
return;
ln = lnmode ? NULL : lbuf_get(xb, xrow);
- off = ln ? ren_insertionoffset(ln, xcol, cmd == 'P') : 0;
+ off = ln ? vi_insertionoffset(ln, xcol, cmd == 'P') : 0;
if (cmd == 'p' && !ln)
xrow++;
sb = sbuf_make();
t@@ -762,6 +785,7 @@ int main(int argc, char *argv[])
if (argv[i][1] == 'v')
visual = 1;
}
+ dir_init();
if (i < argc) {
snprintf(ecmd, PATHLEN, "e %s", argv[i]);
ex_command(ecmd);
t@@ -772,5 +796,6 @@ int main(int argc, char *argv[])
ex();
lbuf_free(xb);
reg_done();
+ dir_done();
return 0;
}
(DIR) diff --git a/vi.h b/vi.h
t@@ -36,17 +36,27 @@ void sbuf_printf(struct sbuf *sbuf, char *s, ...);
int sbuf_len(struct sbuf *sb);
void sbuf_cut(struct sbuf *s, int len);
+/* regular expression sets */
+struct reset *reset_make(int n, char **pat);
+int reset_find(struct reset *re, char *s, int n, int *grps, int flg);
+void reset_free(struct reset *re);
+
/* rendering lines */
char *ren_all(char *s, int wid);
int ren_cursor(char *s, int pos);
int ren_next(char *s, int p, int dir);
int ren_eol(char *s, int dir);
-int ren_dir(char *s);
int ren_pos(char *s, int off);
int ren_off(char *s, int pos);
int ren_last(char *s);
int ren_cmp(char *s, int pos1, int pos2);
-int ren_insertionoffset(char *s, int pos, int pre);
+int ren_region(char *s, int c1, int c2, int *l1, int *l2, int closed);
+
+/* text direction */
+int dir_context(char *s);
+void dir_reorder(char *s, int *ord);
+void dir_init(void);
+void dir_done(void);
/* string registers */
char *reg_get(int c, int *lnmode);