support utf8 input and output - sob - simple output bar
(HTM) git clone git://git.codemadness.org/sob
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 517a1cac03d09213419cabd41dc481c08ad16c9d
(DIR) parent d52ae758f565c6a59c5ddb96995b2e364c507c4f
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Fri, 3 Oct 2014 23:49:15 +0000
support utf8 input and output
Diffstat:
M sob.c | 374 +++++++++++++++++++++----------
1 file changed, 255 insertions(+), 119 deletions(-)
---
(DIR) diff --git a/sob.c b/sob.c
@@ -22,9 +22,11 @@ char *argv0;
#define MIN(A, B) ((A) < (B) ? (A) : (B))
struct line {
- char line[BUFSIZ];
- size_t len;
- size_t pos;
+ char line[BUFSIZ]; /* static line buffer */
+ size_t bytesiz; /* length in bytes */
+ size_t utflen; /* length in characters */
+ size_t bytepos; /* index position (in bytes) */
+ size_t utfpos; /* pos in characters */
};
static void line_clear(void);
@@ -36,14 +38,17 @@ static void line_cursor_next(void);
static void line_cursor_prev(void);
static void line_cursor_wordprev(void);
static void line_cursor_wordnext(void);
-static void line_delcharback(void);
+static void line_delcharprev(void);
static void line_delcharnext(void);
static void line_deltoend(void);
-static void line_delwordback(void);
+static void line_delwordprev(void);
static void line_delwordcursor(void);
static void line_draw(void);
static void line_exit(void);
-static void line_getwordpos(size_t *, size_t *);
+static void line_getwordpos(size_t, size_t, size_t *, size_t *, size_t *,
+ size_t *);
+static void line_getwordposprev(size_t, size_t, size_t *, size_t *);
+static void line_getwordposnext(size_t, size_t, size_t *, size_t *);
static void line_inserttext(const char *);
static void line_newline(void);
static void line_out(void);
@@ -51,7 +56,7 @@ static void line_prompt(void);
static int line_promptlen(void);
static int line_pipeto(char **);
static void line_set(const char *);
-static void line_wordpipeto(char **);
+static int line_wordpipeto(char **);
static int pipe_readline(int, int, char *, char *, size_t);
static int pipe_cmd(char *[], char *, char *, size_t);
@@ -64,6 +69,12 @@ static void setup(void);
static void sighandler(int);
static void usage(void);
+static int nonspace(int c);
+static size_t utf8len(const char *);
+static size_t utfprevn(const char *, size_t , size_t);
+static size_t utfnextn(const char *, size_t , size_t);
+static void utfuntilchar(size_t *, size_t *, int (*)(int), int);
+
static struct termios ttystate, ttysave;
static struct line line;
@@ -74,35 +85,123 @@ static FILE * lineoutfp = NULL;
#include "config.h"
+static int
+nonspace(int c)
+{
+ return !isspace(c);
+}
+
+static size_t
+utf8len(const char *s)
+{
+ size_t i;
+
+ for(i = 0; *s; s++) {
+ if((*s & 0xc0) != 0x80)
+ i++;
+ }
+ return i;
+}
+
+/* returns amount of bytes needed to go to previous utf char
+ * p is index in bytes. */
+static size_t
+utfprevn(const char *s, size_t p, size_t n)
+{
+ size_t i;
+
+ for(i = 0; p > 0; p--) {
+ i++;
+ if((s[p - 1] & 0xc0) != 0x80 && !--n)
+ return i;
+ }
+ return 0;
+}
+
+/* returns amount of bytes needed to go to next utf char
+ * p is index in bytes. */
+static size_t
+utfnextn(const char *s, size_t p, size_t n)
+{
+ size_t i;
+
+ for(i = 0; s[p]; p++) {
+ i++;
+ if((s[p + 1] & 0xc0) != 0x80 && !--n)
+ return i;
+ }
+ return 0;
+}
+
+/* b is byte start pos, u is utf pos, f is filter function,
+ * dir is -1 or +1 for prev or next */
+static void
+utfuntilchar(size_t *b, size_t *u, int (*f)(int), int dir)
+{
+ size_t n;
+
+ if(dir > 0) {
+ while(*u < line.utflen && *b < line.bytesiz) {
+ if(f(line.line[*b]))
+ break;
+ if((n = utfnextn(line.line, *b, 1)) == 0)
+ break;
+ *b += n;
+ (*u)++;
+ }
+
+ } else {
+ while(*u > 0) {
+ if(f(line.line[*b - 1]))
+ break;
+ if((n = utfprevn(line.line, *b, 1)) == 0)
+ break;
+ *b -= n;
+ (*u)--;
+ }
+ }
+}
+
static void
line_inserttext(const char *s)
{
- size_t len;
+ size_t siz, len;
- len = strlen(s);
- if(line.pos + len + 1 > sizeof(line.line))
+ siz = strlen(s);
+ if(line.bytepos + siz + 1 > sizeof(line.line))
return;
+ len = utf8len(s);
/* append */
- if(line.pos == line.len) {
- memmove(&line.line[line.pos], s, len);
+ if(line.bytepos == line.bytesiz) {
+ memmove(&line.line[line.bytepos], s, siz);
} else {
/* insert */
- memmove(&line.line[line.pos + len], &line.line[line.pos],
- line.len - line.pos);
- memcpy(&line.line[line.pos], s, len);
+ memmove(&line.line[line.bytepos + siz], &line.line[line.bytepos],
+ line.bytesiz - line.bytepos);
+ memcpy(&line.line[line.bytepos], s, siz);
}
- line.len += len;
- line.pos += len;
- line.line[line.len + 1] = '\0';
+ line.bytepos += siz;
+ line.bytesiz += siz;
+ line.line[line.bytesiz + 1] = '\0';
+ line.utflen = utf8len(line.line);
+ line.utfpos += len;
line_draw();
}
static void
line_set(const char *s)
{
+ char *p;
+
strlcpy(line.line, s, sizeof(line.line));
- line.len = strlen(line.line);
- line.pos = line.len;
+ /* remove linefeed and return from string */
+ if((p = strpbrk(line.line, "\r\n")))
+ *p = '\0';
+
+ line.bytesiz = strlen(line.line);
+ line.bytepos = line.bytesiz;
+ line.utflen = utf8len(line.line);
+ line.utfpos = line.utflen;
}
/* like mksh, toggle counting of escape codes in prompt with "\x01" */
@@ -115,7 +214,7 @@ line_promptlen(void)
for(i = 0; prompt[i]; i++) {
if(prompt[i] == 1)
t = !t;
- else if(!t)
+ else if(!t && (prompt[i] & 0xc0) != 0x80)
n++;
}
return n;
@@ -135,16 +234,10 @@ line_prompt(void)
static void
line_draw(void)
{
- size_t n;
-
- /* clear */
- fprintf(outfp, "\x1b[2J\x1b[H");
-
+ fprintf(outfp, "\x1b[2J\x1b[H"); /* clear */
line_prompt();
- for(n = 0; line.line[n] && n < line.len; n++)
- fputc(line.line[n], outfp);
-
- line_cursor_move(line.pos);
+ fwrite(line.line, 1, line.bytesiz, outfp);
+ line_cursor_move(line.utfpos);
}
static void
@@ -169,143 +262,158 @@ line_cursor_move(size_t newpos)
}
fprintf(outfp, "\x1b[%lu;%luH", y + 1, x + 1);
fflush(outfp);
- line.pos = newpos;
}
static void
line_cursor_wordprev(void)
{
- size_t s, e;
-
- line_getwordpos(&s, &e);
- if(s == line.pos) {
- while(s > 0 && isspace(line.line[s - 1]))
- s--;
- }
- line_cursor_move(s);
+ line_getwordposprev(line.bytepos, line.utfpos, &line.bytepos, &line.utfpos);
+ line_cursor_move(line.utfpos);
}
static void
line_cursor_wordnext(void)
{
- size_t s, e;
-
- line_getwordpos(&s, &e);
- if(e == line.pos) {
- while(e < line.len && line.line[e] && isspace(line.line[e]))
- e++;
- }
- line_cursor_move(e);
+ line_getwordposnext(line.bytepos, line.utfpos, &line.bytepos, &line.utfpos);
+ line_cursor_move(line.utfpos);
}
static void
line_cursor_begin(void)
{
- line_cursor_move(0);
+ line.utfpos = 0;
+ line.bytepos = 0;
+ line_cursor_move(line.utfpos);
}
static void
line_cursor_prev(void)
{
- if(line.pos > 0)
- line_cursor_move(line.pos - 1);
+ if(line.utfpos <= 0)
+ return;
+
+ line.bytepos -= utfprevn(line.line, line.bytepos, 1);
+ line.utfpos--;
+ line_cursor_move(line.utfpos);
}
static void
line_cursor_next(void)
{
- if(line.pos < line.len)
- line_cursor_move(line.pos + 1);
+ if(line.utfpos >= line.utflen)
+ return;
+
+ line.bytepos += utfnextn(line.line, line.bytepos, 1);
+ line.utfpos++;
+ line_cursor_move(line.utfpos);
}
static void
line_cursor_end(void)
{
- line_cursor_move(line.len);
+ line.bytepos = line.bytesiz;
+ line.utfpos = line.utflen;
+ line_cursor_move(line.utfpos);
}
static void
line_clear(void)
{
line_cursor_begin();
- line.line[0] = '\0';
- line.len = 0;
+ line_set("");
line_draw();
}
static void
line_delcharnext(void)
{
- if(line.pos == line.len || line.len <= 0)
+ size_t siz;
+
+ if(line.utfpos == line.utflen || line.utflen <= 0)
return;
- memmove(&line.line[line.pos], &line.line[line.pos + 1],
- line.len - line.pos - 1);
- line.len--;
- line.line[line.len] = '\0';
+ siz = utfnextn(line.line, line.bytepos, 1);
+ memmove(&line.line[line.bytepos], &line.line[line.bytepos + siz],
+ line.bytesiz - line.bytepos - siz);
+
+ line.bytesiz -= siz;
+ line.line[line.bytesiz] = '\0';
line_draw();
}
static void
-line_delcharback(void)
+line_delcharprev(void)
{
- if(line.pos <= 0 || line.len <= 0)
+ size_t siz;
+
+ if(line.utfpos <= 0 || line.utflen <= 0)
return;
- memmove(&line.line[line.pos - 1], &line.line[line.pos],
- line.len - line.pos);
- line.len--;
- line.line[line.len] = '\0';
- line_cursor_prev();
+ siz = utfprevn(line.line, line.bytepos, 1);
+
+ memmove(&line.line[line.bytepos - siz], &line.line[line.bytepos],
+ line.bytesiz - line.bytepos);
+ line.utflen--;
+ line.utfpos--;
+ line.bytepos -= siz;
+ line.bytesiz -= siz;
+ line.line[line.bytesiz] = '\0';
line_draw();
}
static void
line_deltoend(void)
{
- line.line[line.pos] = '\0';
- line.len = line.pos;
- line_cursor_end();
+ line.line[line.bytepos] = '\0';
+ line.bytesiz = line.bytepos;
+ line.utflen = utf8len(line.line);
+ line.utfpos = line.utflen;
line_draw();
}
static void
line_delwordcursor(void)
{
- size_t len, s, e;
+ size_t len, siz, bs, be, us, ue;
+
+ line_getwordpos(line.bytepos, line.utfpos, &bs, &be, &us, &ue);
+
+ siz = be - bs;
+ len = ue - us;
- line_getwordpos(&s, &e);
+ memmove(&line.line[bs], &line.line[be], line.bytesiz - be);
+
+ line.bytesiz -= siz;
+ line.bytepos -= siz;
+ line.utfpos -= len;
+ line.utflen -= len;
+ line.line[line.bytesiz] = '\0';
- memmove(&line.line[s], &line.line[e], line.len - e);
- len = e - s;
- line.len -= len;
- line.pos = s;
- line.line[line.len] = '\0';
line_draw();
}
static void
-line_delwordback(void)
+line_delwordprev(void)
{
- size_t i, len;
+ size_t bs, us, siz, len;
- if(line.pos <= 0 || line.len <= 0)
+ if(line.utfpos <= 0 || line.utflen <= 0)
return;
- i = line.pos;
- while(i > 0 && isspace(line.line[i - 1]))
- i--;
- while(i > 0 && !isspace(line.line[i - 1]))
- i--;
-
- len = line.len - line.pos;
- if(len > 0)
- memmove(&line.line[i], &line.line[line.pos],
- line.len - line.pos);
- len = line.pos - i;
- line.pos = i;
- line.len -= len;
- line.line[line.len] = '\0';
+ line_getwordposprev(line.bytepos, line.utfpos, &bs, &us);
+
+ siz = line.bytepos - bs;
+ memmove(&line.line[bs], &line.line[line.bytepos],
+ line.bytesiz - line.bytepos);
+
+ len = line.utfpos - us;
+
+ line.bytesiz -= siz;
+ line.bytepos -= siz;
+ line.utfpos -= len;
+ line.utflen -= len;
+ line.line[line.bytesiz] = '\0';
+
line_draw();
}
@@ -319,38 +427,65 @@ line_newline(void)
static void
line_exit(void)
{
- line_out();
+ fprintf(outfp, "\n");
+ fflush(outfp);
isrunning = 0;
}
static void
-line_getwordpos(size_t *start, size_t *end)
+line_getwordpos(size_t b, size_t u, size_t *bs, size_t *be,
+ size_t *us, size_t *ue)
{
- size_t i;
+ size_t tb = b, tu = u;
- i = line.pos;
- while(i > 0 && !isspace(line.line[i - 1]))
- i--;
- if(start)
- *start = i;
- i = line.pos;
- while(line.line[i] && i < line.len && !isspace(line.line[i]))
- i++;
- if(end)
- *end = i;
+ utfuntilchar(&b, &u, isspace, -1);
+ if(bs)
+ *bs = b;
+ if(us)
+ *us = u;
+
+ /* seek from original specified position */
+ utfuntilchar(&tb, &tu, isspace, +1);
+ if(be)
+ *be = tb;
+ if(ue)
+ *ue = tu;
+}
+
+static void
+line_getwordposprev(size_t sb, size_t su, size_t *b, size_t *u)
+{
+ utfuntilchar(&sb, &su, nonspace, -1);
+ utfuntilchar(&sb, &su, isspace, -1);
+ if(b)
+ *b = sb;
+ if(u)
+ *u = su;
+}
+
+static void
+line_getwordposnext(size_t sb, size_t su, size_t *b, size_t *u)
+{
+ utfuntilchar(&sb, &su, nonspace, +1);
+ utfuntilchar(&sb, &su, isspace, +1);
+ if(b)
+ *b = sb;
+ if(u)
+ *u = su;
}
static void
line_copywordcursor(char *buf, size_t bufsiz)
{
- size_t s, e, len;
+ size_t bs, be, len;
+
+ line_getwordpos(line.bytepos, line.utfpos, &bs, &be, NULL, NULL);
+ len = be - bs;
- line_getwordpos(&s, &e);
- len = e - s;
/* truncate */
if(len + 1 > bufsiz)
len = bufsiz - 1;
- memcpy(buf, &line.line[s], len);
+ memcpy(buf, &line.line[bs], len);
buf[len + 1] = '\0';
}
@@ -375,7 +510,7 @@ pipe_readline(int fd_in, int fd_out, char *writestr, char *outbuf,
}
memset(&tv, 0, sizeof(tv));
tv.tv_sec = 0;
- tv.tv_usec = 200;
+ tv.tv_usec = 50000; /* 50 ms */
if((r = select(maxfd + 1, haswritten ? &fdr : NULL,
haswritten ? NULL : &fdw, NULL, &tv)) == -1)
@@ -468,21 +603,19 @@ static int
line_pipeto(char **cmd)
{
char buf[BUFSIZ];
- size_t len;
if(pipe_cmd(cmd, line.line, buf, sizeof(buf)) == -1)
return -1;
if(buf[0] == '\0')
return -1;
- len = strlcpy(line.line, buf, sizeof(line.line));
- line.len = len;
+ line_set(buf);
line_cursor_end();
line_draw();
return 0;
}
/* pipe word under cursor and replace it */
-static void
+static int
line_wordpipeto(char **cmd)
{
char wordbuf[BUFSIZ], outbuf[BUFSIZ];
@@ -493,13 +626,15 @@ line_wordpipeto(char **cmd)
if(pipe_cmd((char**)cmd, wordbuf, outbuf,
sizeof(outbuf)) == -1)
- return;
+ return -1;
if(outbuf[0] == '\0')
- return;
+ return -1;
line_delwordcursor();
line_inserttext(outbuf);
line_draw();
+
+ return 0;
}
static void
@@ -619,7 +754,7 @@ run(void)
memset(&tv, 0, sizeof(tv));
tv.tv_sec = 0;
- tv.tv_usec = 50000; /* 50 ms */
+ tv.tv_usec = 32000; /* 32 ms */
errno = 0;
if((r = select(STDIN_FILENO + 1, &fdr, NULL, NULL, &tv)) == -1) {
@@ -667,6 +802,7 @@ main(int argc, char **argv)
lineoutfp = stdout;
outfp = stderr;
+
setlocale(LC_ALL, "");
setup();
run();