support utf8 input and output - sob - simple output bar
 (HTM) git clone git://git.codemadness.org/sob
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 517a1cac03d09213419cabd41dc481c08ad16c9d
 (DIR) parent d52ae758f565c6a59c5ddb96995b2e364c507c4f
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Fri,  3 Oct 2014 23:49:15 +0000
       
       support utf8 input and output
       
       Diffstat:
         M sob.c                               |     374 +++++++++++++++++++++----------
       
       1 file changed, 255 insertions(+), 119 deletions(-)
       ---
 (DIR) diff --git a/sob.c b/sob.c
       @@ -22,9 +22,11 @@ char *argv0;
        #define MIN(A, B) ((A) < (B) ? (A) : (B))
        
        struct line {
       -        char line[BUFSIZ];
       -        size_t len;
       -        size_t pos;
       +        char line[BUFSIZ]; /* static line buffer */
       +        size_t bytesiz;    /* length in bytes */
       +        size_t utflen;     /* length in characters */
       +        size_t bytepos;    /* index position (in bytes) */
       +        size_t utfpos;     /* pos in characters */
        };
        
        static void   line_clear(void);
       @@ -36,14 +38,17 @@ static void   line_cursor_next(void);
        static void   line_cursor_prev(void);
        static void   line_cursor_wordprev(void);
        static void   line_cursor_wordnext(void);
       -static void   line_delcharback(void);
       +static void   line_delcharprev(void);
        static void   line_delcharnext(void);
        static void   line_deltoend(void);
       -static void   line_delwordback(void);
       +static void   line_delwordprev(void);
        static void   line_delwordcursor(void);
        static void   line_draw(void);
        static void   line_exit(void);
       -static void   line_getwordpos(size_t *, size_t *);
       +static void   line_getwordpos(size_t, size_t, size_t *, size_t *, size_t *,
       +                              size_t *);
       +static void   line_getwordposprev(size_t, size_t, size_t *, size_t *);
       +static void   line_getwordposnext(size_t, size_t, size_t *, size_t *);
        static void   line_inserttext(const char *);
        static void   line_newline(void);
        static void   line_out(void);
       @@ -51,7 +56,7 @@ static void   line_prompt(void);
        static int    line_promptlen(void);
        static int    line_pipeto(char **);
        static void   line_set(const char *);
       -static void   line_wordpipeto(char **);
       +static int    line_wordpipeto(char **);
        static int    pipe_readline(int, int, char *, char *, size_t);
        static int    pipe_cmd(char *[], char *, char *, size_t);
        
       @@ -64,6 +69,12 @@ static void   setup(void);
        static void   sighandler(int);
        static void   usage(void);
        
       +static int    nonspace(int c);
       +static size_t utf8len(const char *);
       +static size_t utfprevn(const char *, size_t , size_t);
       +static size_t utfnextn(const char *, size_t , size_t);
       +static void   utfuntilchar(size_t *, size_t *, int (*)(int), int);
       +
        static struct termios ttystate, ttysave;
        
        static struct line line;
       @@ -74,35 +85,123 @@ static FILE * lineoutfp = NULL;
        
        #include "config.h"
        
       +static int
       +nonspace(int c)
       +{
       +        return !isspace(c);
       +}
       +
       +static size_t
       +utf8len(const char *s)
       +{
       +        size_t i;
       +
       +        for(i = 0; *s; s++) {
       +                if((*s & 0xc0) != 0x80)
       +                        i++;
       +        }
       +        return i;
       +}
       +
       +/* returns amount of bytes needed to go to previous utf char
       + * p is index in bytes. */
       +static size_t
       +utfprevn(const char *s, size_t p, size_t n)
       +{
       +        size_t i;
       +
       +        for(i = 0; p > 0; p--) {
       +                i++;
       +                if((s[p - 1] & 0xc0) != 0x80 && !--n)
       +                        return i;
       +        }
       +        return 0;
       +}
       +
       +/* returns amount of bytes needed to go to next utf char
       + * p is index in bytes. */
       +static size_t
       +utfnextn(const char *s, size_t p, size_t n)
       +{
       +        size_t i;
       +
       +        for(i = 0; s[p]; p++) {
       +                i++;
       +                if((s[p + 1] & 0xc0) != 0x80 && !--n)
       +                        return i;
       +        }
       +        return 0;
       +}
       +
       +/* b is byte start pos, u is utf pos, f is filter function,
       + * dir is -1 or +1 for prev or next */
       +static void
       +utfuntilchar(size_t *b, size_t *u, int (*f)(int), int dir)
       +{
       +        size_t n;
       +
       +        if(dir > 0) {
       +                while(*u < line.utflen && *b < line.bytesiz) {
       +                        if(f(line.line[*b]))
       +                                break;
       +                        if((n = utfnextn(line.line, *b, 1)) == 0)
       +                                break;
       +                        *b += n;
       +                        (*u)++;
       +                }
       +
       +        } else {
       +                while(*u > 0) {
       +                        if(f(line.line[*b - 1]))
       +                                break;
       +                        if((n = utfprevn(line.line, *b, 1)) == 0)
       +                                break;
       +                        *b -= n;
       +                        (*u)--;
       +                }
       +        }
       +}
       +
        static void
        line_inserttext(const char *s)
        {
       -        size_t len;
       +        size_t siz, len;
        
       -        len = strlen(s);
       -        if(line.pos + len + 1 > sizeof(line.line))
       +        siz = strlen(s);
       +        if(line.bytepos + siz + 1 > sizeof(line.line))
                        return;
       +        len = utf8len(s);
                /* append */
       -        if(line.pos == line.len) {
       -                memmove(&line.line[line.pos], s, len);
       +        if(line.bytepos == line.bytesiz) {
       +                memmove(&line.line[line.bytepos], s, siz);
                } else {
                        /* insert */
       -                memmove(&line.line[line.pos + len], &line.line[line.pos],
       -                        line.len - line.pos);
       -                memcpy(&line.line[line.pos], s, len);
       +                memmove(&line.line[line.bytepos + siz], &line.line[line.bytepos],
       +                        line.bytesiz - line.bytepos);
       +                memcpy(&line.line[line.bytepos], s, siz);
                }
       -        line.len += len;
       -        line.pos += len;
       -        line.line[line.len + 1] = '\0';
       +        line.bytepos += siz;
       +        line.bytesiz += siz;
       +        line.line[line.bytesiz + 1] = '\0';
       +        line.utflen = utf8len(line.line);
       +        line.utfpos += len;
                line_draw();
        }
        
        static void
        line_set(const char *s)
        {
       +        char *p;
       +
                strlcpy(line.line, s, sizeof(line.line));
       -        line.len = strlen(line.line);
       -        line.pos = line.len;
       +        /* remove linefeed and return from string */
       +        if((p = strpbrk(line.line, "\r\n")))
       +                *p = '\0';
       +
       +        line.bytesiz = strlen(line.line);
       +        line.bytepos = line.bytesiz;
       +        line.utflen = utf8len(line.line);
       +        line.utfpos = line.utflen;
        }
        
        /* like mksh, toggle counting of escape codes in prompt with "\x01" */
       @@ -115,7 +214,7 @@ line_promptlen(void)
                for(i = 0; prompt[i]; i++) {
                        if(prompt[i] == 1)
                                t = !t;
       -                else if(!t)
       +                else if(!t && (prompt[i] & 0xc0) != 0x80)
                                n++;
                }
                return n;
       @@ -135,16 +234,10 @@ line_prompt(void)
        static void
        line_draw(void)
        {
       -        size_t n;
       -
       -        /* clear */
       -        fprintf(outfp, "\x1b[2J\x1b[H");
       -
       +        fprintf(outfp, "\x1b[2J\x1b[H"); /* clear */
                line_prompt();
       -        for(n = 0; line.line[n] && n < line.len; n++)
       -                fputc(line.line[n], outfp);
       -
       -        line_cursor_move(line.pos);
       +        fwrite(line.line, 1, line.bytesiz, outfp);
       +        line_cursor_move(line.utfpos);
        }
        
        static void
       @@ -169,143 +262,158 @@ line_cursor_move(size_t newpos)
                }
                fprintf(outfp, "\x1b[%lu;%luH", y + 1, x + 1);
                fflush(outfp);
       -        line.pos = newpos;
        }
        
        static void
        line_cursor_wordprev(void)
        {
       -        size_t s, e;
       -
       -        line_getwordpos(&s, &e);
       -        if(s == line.pos) {
       -                while(s > 0 && isspace(line.line[s - 1]))
       -                        s--;
       -        }
       -        line_cursor_move(s);
       +        line_getwordposprev(line.bytepos, line.utfpos, &line.bytepos, &line.utfpos);
       +        line_cursor_move(line.utfpos);
        }
        
        static void
        line_cursor_wordnext(void)
        {
       -        size_t s, e;
       -
       -        line_getwordpos(&s, &e);
       -        if(e == line.pos) {
       -                while(e < line.len && line.line[e] && isspace(line.line[e]))
       -                        e++;
       -        }
       -        line_cursor_move(e);
       +        line_getwordposnext(line.bytepos, line.utfpos, &line.bytepos, &line.utfpos);
       +        line_cursor_move(line.utfpos);
        }
        
        static void
        line_cursor_begin(void)
        {
       -        line_cursor_move(0);
       +        line.utfpos = 0;
       +        line.bytepos = 0;
       +        line_cursor_move(line.utfpos);
        }
        
        static void
        line_cursor_prev(void)
        {
       -        if(line.pos > 0)
       -                line_cursor_move(line.pos - 1);
       +        if(line.utfpos <= 0)
       +                return;
       +
       +        line.bytepos -= utfprevn(line.line, line.bytepos, 1);
       +        line.utfpos--;
       +        line_cursor_move(line.utfpos);
        }
        
        static void
        line_cursor_next(void)
        {
       -        if(line.pos < line.len)
       -                line_cursor_move(line.pos + 1);
       +        if(line.utfpos >= line.utflen)
       +                return;
       +
       +        line.bytepos += utfnextn(line.line, line.bytepos, 1);
       +        line.utfpos++;
       +        line_cursor_move(line.utfpos);
        }
        
        static void
        line_cursor_end(void)
        {
       -        line_cursor_move(line.len);
       +        line.bytepos = line.bytesiz;
       +        line.utfpos = line.utflen;
       +        line_cursor_move(line.utfpos);
        }
        
        static void
        line_clear(void)
        {
                line_cursor_begin();
       -        line.line[0] = '\0';
       -        line.len = 0;
       +        line_set("");
                line_draw();
        }
        
        static void
        line_delcharnext(void)
        {
       -        if(line.pos == line.len || line.len <= 0)
       +        size_t siz;
       +
       +        if(line.utfpos == line.utflen || line.utflen <= 0)
                        return;
        
       -        memmove(&line.line[line.pos], &line.line[line.pos + 1],
       -                line.len - line.pos - 1);
       -        line.len--;
       -        line.line[line.len] = '\0';
       +        siz = utfnextn(line.line, line.bytepos, 1);
       +        memmove(&line.line[line.bytepos], &line.line[line.bytepos + siz],
       +                line.bytesiz - line.bytepos - siz);
       +
       +        line.bytesiz -= siz;
       +        line.line[line.bytesiz] = '\0';
                line_draw();
        }
        
        static void
       -line_delcharback(void)
       +line_delcharprev(void)
        {
       -        if(line.pos <= 0 || line.len <= 0)
       +        size_t siz;
       +
       +        if(line.utfpos <= 0 || line.utflen <= 0)
                        return;
        
       -        memmove(&line.line[line.pos - 1], &line.line[line.pos],
       -                line.len - line.pos);
       -        line.len--;
       -        line.line[line.len] = '\0';
       -        line_cursor_prev();
       +        siz = utfprevn(line.line, line.bytepos, 1);
       +
       +        memmove(&line.line[line.bytepos - siz], &line.line[line.bytepos],
       +                line.bytesiz - line.bytepos);
       +        line.utflen--;
       +        line.utfpos--;
       +        line.bytepos -= siz;
       +        line.bytesiz -= siz;
       +        line.line[line.bytesiz] = '\0';
                line_draw();
        }
        
        static void
        line_deltoend(void)
        {
       -        line.line[line.pos] = '\0';
       -        line.len = line.pos;
       -        line_cursor_end();
       +        line.line[line.bytepos] = '\0';
       +        line.bytesiz = line.bytepos;
       +        line.utflen = utf8len(line.line);
       +        line.utfpos = line.utflen;
                line_draw();
        }
        
        static void
        line_delwordcursor(void)
        {
       -        size_t len, s, e;
       +        size_t len, siz, bs, be, us, ue;
       +
       +        line_getwordpos(line.bytepos, line.utfpos, &bs, &be, &us, &ue);
       +
       +        siz = be - bs;
       +        len = ue - us;
        
       -        line_getwordpos(&s, &e);
       +        memmove(&line.line[bs], &line.line[be], line.bytesiz - be);
       +
       +        line.bytesiz -= siz;
       +        line.bytepos -= siz;
       +        line.utfpos -= len;
       +        line.utflen -= len;
       +        line.line[line.bytesiz] = '\0';
        
       -        memmove(&line.line[s], &line.line[e], line.len - e);
       -        len = e - s;
       -        line.len -= len;
       -        line.pos = s;
       -        line.line[line.len] = '\0';
                line_draw();
        }
        
        static void
       -line_delwordback(void)
       +line_delwordprev(void)
        {
       -        size_t i, len;
       +        size_t bs, us, siz, len;
        
       -        if(line.pos <= 0 || line.len <= 0)
       +        if(line.utfpos <= 0 || line.utflen <= 0)
                        return;
        
       -        i = line.pos;
       -        while(i > 0 && isspace(line.line[i - 1]))
       -                i--;
       -        while(i > 0 && !isspace(line.line[i - 1]))
       -                i--;
       -
       -        len = line.len - line.pos;
       -        if(len > 0)
       -                memmove(&line.line[i], &line.line[line.pos],
       -                        line.len - line.pos);
       -        len = line.pos - i;
       -        line.pos = i;
       -        line.len -= len;
       -        line.line[line.len] = '\0';
       +        line_getwordposprev(line.bytepos, line.utfpos, &bs, &us);
       +
       +        siz = line.bytepos - bs;
       +        memmove(&line.line[bs], &line.line[line.bytepos],
       +                line.bytesiz - line.bytepos);
       +
       +        len = line.utfpos - us;
       +
       +        line.bytesiz -= siz;
       +        line.bytepos -= siz;
       +        line.utfpos -= len;
       +        line.utflen -= len;
       +        line.line[line.bytesiz] = '\0';
       +
                line_draw();
        }
        
       @@ -319,38 +427,65 @@ line_newline(void)
        static void
        line_exit(void)
        {
       -        line_out();
       +        fprintf(outfp, "\n");
       +        fflush(outfp);
                isrunning = 0;
        }
        
        static void
       -line_getwordpos(size_t *start, size_t *end)
       +line_getwordpos(size_t b, size_t u, size_t *bs, size_t *be,
       +        size_t *us, size_t *ue)
        {
       -        size_t i;
       +        size_t tb = b, tu = u;
        
       -        i = line.pos;
       -        while(i > 0 && !isspace(line.line[i - 1]))
       -                i--;
       -        if(start)
       -                *start = i;
       -        i = line.pos;
       -        while(line.line[i] && i < line.len && !isspace(line.line[i]))
       -                i++;
       -        if(end)
       -                *end = i;
       +        utfuntilchar(&b, &u, isspace, -1);
       +        if(bs)
       +                *bs = b;
       +        if(us)
       +                *us = u;
       +
       +        /* seek from original specified position */
       +        utfuntilchar(&tb, &tu, isspace, +1);
       +        if(be)
       +                *be = tb;
       +        if(ue)
       +                *ue = tu;
       +}
       +
       +static void
       +line_getwordposprev(size_t sb, size_t su, size_t *b, size_t *u)
       +{
       +        utfuntilchar(&sb, &su, nonspace, -1);
       +        utfuntilchar(&sb, &su, isspace, -1);
       +        if(b)
       +                *b = sb;
       +        if(u)
       +                *u = su;
       +}
       +
       +static void
       +line_getwordposnext(size_t sb, size_t su, size_t *b, size_t *u)
       +{
       +        utfuntilchar(&sb, &su, nonspace, +1);
       +        utfuntilchar(&sb, &su, isspace, +1);
       +        if(b)
       +                *b = sb;
       +        if(u)
       +                *u = su;
        }
        
        static void
        line_copywordcursor(char *buf, size_t bufsiz)
        {
       -        size_t s, e, len;
       +        size_t bs, be, len;
       +
       +        line_getwordpos(line.bytepos, line.utfpos, &bs, &be, NULL, NULL);
       +        len = be - bs;
        
       -        line_getwordpos(&s, &e);
       -        len = e - s;
                /* truncate */
                if(len + 1 > bufsiz)
                        len = bufsiz - 1;
       -        memcpy(buf, &line.line[s], len);
       +        memcpy(buf, &line.line[bs], len);
                buf[len + 1] = '\0';
        }
        
       @@ -375,7 +510,7 @@ pipe_readline(int fd_in, int fd_out, char *writestr, char *outbuf,
                        }
                        memset(&tv, 0, sizeof(tv));
                        tv.tv_sec = 0;
       -                tv.tv_usec = 200;
       +                tv.tv_usec = 50000; /* 50 ms */
        
                        if((r = select(maxfd + 1, haswritten ? &fdr : NULL,
                                           haswritten ? NULL : &fdw, NULL, &tv)) == -1)
       @@ -468,21 +603,19 @@ static int
        line_pipeto(char **cmd)
        {
                char buf[BUFSIZ];
       -        size_t len;
        
                if(pipe_cmd(cmd, line.line, buf, sizeof(buf)) == -1)
                        return -1;
                if(buf[0] == '\0')
                        return -1;
       -        len = strlcpy(line.line, buf, sizeof(line.line));
       -        line.len = len;
       +        line_set(buf);
                line_cursor_end();
                line_draw();
                return 0;
        }
        
        /* pipe word under cursor and replace it */
       -static void
       +static int
        line_wordpipeto(char **cmd)
        {
                char wordbuf[BUFSIZ], outbuf[BUFSIZ];
       @@ -493,13 +626,15 @@ line_wordpipeto(char **cmd)
        
                if(pipe_cmd((char**)cmd, wordbuf, outbuf,
                           sizeof(outbuf)) == -1)
       -                return;
       +                return -1;
                if(outbuf[0] == '\0')
       -                return;
       +                return -1;
        
                line_delwordcursor();
                line_inserttext(outbuf);
                line_draw();
       +
       +        return 0;
        }
        
        static void
       @@ -619,7 +754,7 @@ run(void)
        
                        memset(&tv, 0, sizeof(tv));
                        tv.tv_sec = 0;
       -                tv.tv_usec = 50000; /* 50 ms */
       +                tv.tv_usec = 32000; /* 32 ms */
        
                        errno = 0;
                        if((r = select(STDIN_FILENO + 1, &fdr, NULL, NULL, &tv)) == -1) {
       @@ -667,6 +802,7 @@ main(int argc, char **argv)
        
                lineoutfp = stdout;
                outfp = stderr;
       +
                setlocale(LC_ALL, "");
                setup();
                run();