Switched to using wide characters. - sam - An updated version of the sam text editor.
 (HTM) git clone git://vernunftzentrum.de/sam.git
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) LICENSE
       ---
 (DIR) commit dc40bc617f9c43c86abd68e2ee7b50b0425c1433
 (DIR) parent dde296185e1a9e5da394deae1e61aa883c1b5d84
 (HTM) Author: Rob King <jking@deadpixi.com>
       Date:   Thu, 29 Sep 2016 23:58:04 -0500
       
       Switched to using wide characters.
       
       Diffstat:
         include/libc.h                      |      41 ++++++++-----------------------
         include/u.h                         |       3 +++
         libXg/font.c                        |       4 ++--
         libXg/rune.c                        |     209 ++-----------------------------
         libframe/frbox.c                    |       7 +------
         libframe/frinsert.c                 |       2 +-
         libframe/frptofchar.c               |      10 ++--------
         libframe/frutil.c                   |       5 +----
         libframe/misc.c                     |       7 -------
         sam/address.c                       |       1 -
         sam/cmd.c                           |      11 ++++++-----
         sam/error.c                         |       3 ++-
         sam/io.c                            |      19 ++++++-------------
         sam/mesg.c                          |       2 +-
         sam/mesg.h                          |       6 +++---
         sam/moveto.c                        |      21 ++-------------------
         sam/parse.h                         |       4 +++-
         sam/rasp.c                          |       8 ++++----
         sam/sam.c                           |      10 +++++++++-
         sam/sam.h                           |       1 -
         sam/string.c                        |      59 +++++++++++--------------------
         sam/sys.c                           |      20 +++++++++++++-------
         sam/unix.c                          |       6 +++---
         sam/xec.c                           |      15 +--------------
         samterm/io.c                        |       2 --
         samterm/main.c                      |      22 +++-------------------
         samterm/menu.c                      |       2 +-
         samterm/mesg.c                      |       2 +-
       
       28 files changed, 105 insertions(+), 397 deletions(-)
       ---
 (DIR) diff --git a/include/libc.h b/include/libc.h
       @@ -3,51 +3,30 @@
            /* Plan 9 C library interface */
        
        
       +#include <u.h>
       +
       +#define utflen(s)        (mbstowcs(NULL, (s), 0))
       +#define fullrune(s, n)   (mbtowc(NULL, (s), (n))) /* >0 */
       +#define runetochar(s, r) (wctomb((s), (r)))
       +#define runelen(r)       (wctomb(NULL, (r)))
       +
        #define sprint              sprintf
        #define dup(a,b)            dup2(a,b)
        #define seek(a,b,c)         lseek(a,b,c)
        #define create(name, mode, perm)    creat(name, perm)
        #define exec(a,b)           execv(a,b)
       -#define USED(a)
       -#define SET(a)
       -
       -#define _exits(v)           if (v!=0) _exit(1); else _exit(0)
       -
       -enum
       -{
       -    OREAD   =   0,      /* open for read */
       -    OWRITE  =   1,      /* open for write */
       -    ORDWR   =   2,      /* open for read/write */
       -    ERRLEN  =   64      /* length of error message */
       -};
       -
       -enum
       -{
       -    UTFmax      = 3,        /* maximum bytes per rune */
       -    Runesync    = 0x80,     /* cannot represent part of a utf sequence (<) */
       -    Runeself    = 0x80,     /* rune and utf sequences are the same (<) */
       -    Runeerror   = 0x80      /* decoding error in utf */
       -};
       +
       +#define _exits(v)           _exit((v) ? 1 : 0)
        
        /*
         * new rune routines
         */
       -extern  int runetochar(char*, wchar_t*);
        extern  int chartorune(wchar_t*, char*);
       -extern  int runelen(int64_t);
       -extern  int fullrune(char*, int);
        
        /*
       - * rune routines from converted str routines
       - */
       -extern  int utflen(char*);      /* was countrune */
       -extern  char*   utfrune(char*, int64_t);
       -extern  char*   utfrrune(char*, int64_t);
       -extern  char*   utfutf(char*, char*);
       -/*
         *  Miscellaneous functions
         */
        extern  int notify (void(*)(void *, char *));
       -extern  int errstr(char *);
        extern  char*   getuser(void);
        extern  void    exits(char*);
       +
 (DIR) diff --git a/include/u.h b/include/u.h
       @@ -1,4 +1,6 @@
        #include <fcntl.h>
       +#include <limits.h>
       +#include <locale.h>
        #include <stdbool.h>
        #include <setjmp.h>
        #include <stdint.h>
       @@ -8,3 +10,4 @@
        #include <sys/types.h>
        #include <unistd.h>
        #include <wchar.h>
       +#include <wctype.h>
 (DIR) diff --git a/libXg/font.c b/libXg/font.c
       @@ -10,9 +10,9 @@ long
        charwidth(XftFont *f, wchar_t r)
        {
            
       -    char chars[UTFmax + 1] = {0};
       +    char chars[MB_LEN_MAX + 1] = {0};
        
       -    runetochar(chars, &r);
       +    runetochar(chars, r);
            return strwidth(f, chars);
        }
        
 (DIR) diff --git a/libXg/rune.c b/libXg/rune.c
       @@ -1,213 +1,18 @@
        /* Copyright (c) 1998 Lucent Technologies - All rights reserved. */
       -#include    <u.h>
       -#include    <libc.h>
       +#include <u.h>
       +#include <libc.h>
        #include <string.h>
        
       -enum
       -{
       -    Bit1    = 7,
       -    Bitx    = 6,
       -    Bit2    = 5,
       -    Bit3    = 4,
       -    Bit4    = 3,
       -
       -    T1  = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
       -    Tx  = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
       -    T2  = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
       -    T3  = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
       -    T4  = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
       -
       -    Rune1   = (1<<(Bit1+0*Bitx))-1,     /* 0000 0000 0111 1111 */
       -    Rune2   = (1<<(Bit2+1*Bitx))-1,     /* 0000 0111 1111 1111 */
       -    Rune3   = (1<<(Bit3+2*Bitx))-1,     /* 1111 1111 1111 1111 */
       -
       -    Maskx   = (1<<Bitx)-1,          /* 0011 1111 */
       -    Testx   = Maskx ^ 0xFF,         /* 1100 0000 */
       -
       -    Bad = Runeerror
       -};
       +#define UNICODE_REPLACEMENT_CHAR 0xfffd
        
        int
        chartorune(wchar_t *rune, char *str)
        {
       -    int c, c1, c2;
       -    int64_t l;
       -
       -    /*
       -     * one character sequence
       -     *  00000-0007F => T1
       -     */
       -    c = *(uint8_t*)str;
       -    if(c < Tx) {
       -        *rune = c;
       +    int r = mbtowc(rune, str, strlen(str));
       +    if (r < 0){
       +        *rune = UNICODE_REPLACEMENT_CHAR;
                return 1;
            }
       -
       -    /*
       -     * two character sequence
       -     *  0080-07FF => T2 Tx
       -     */
       -    c1 = *(uint8_t*)(str+1) ^ Tx;
       -    if(c1 & Testx)
       -        goto bad;
       -    if(c < T3) {
       -        if(c < T2)
       -            goto bad;
       -        l = ((c << Bitx) | c1) & Rune2;
       -        if(l <= Rune1)
       -            goto bad;
       -        *rune = l;
       -        return 2;
       -    }
       -
       -    /*
       -     * three character sequence
       -     *  0800-FFFF => T3 Tx Tx
       -     */
       -    c2 = *(uint8_t*)(str+2) ^ Tx;
       -    if(c2 & Testx)
       -        goto bad;
       -    if(c < T4) {
       -        l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
       -        if(l <= Rune2)
       -            goto bad;
       -        *rune = l;
       -        return 3;
       -    }
       -
       -    /*
       -     * bad decoding
       -     */
       -bad:
       -    *rune = Bad;
       -    return 1;
       +    return r;
        }
        
       -int
       -runetochar(char *str, wchar_t *rune)
       -{
       -    int64_t c;
       -
       -    /*
       -     * one character sequence
       -     *  00000-0007F => 00-7F
       -     */
       -    c = *rune;
       -    if(c <= Rune1) {
       -        str[0] = c;
       -        return 1;
       -    }
       -
       -    /*
       -     * two character sequence
       -     *  0080-07FF => T2 Tx
       -     */
       -    if(c <= Rune2) {
       -        str[0] = T2 | (c >> 1*Bitx);
       -        str[1] = Tx | (c & Maskx);
       -        return 2;
       -    }
       -
       -    /*
       -     * three character sequence
       -     *  0800-FFFF => T3 Tx Tx
       -     */
       -    str[0] = T3 |  (c >> 2*Bitx);
       -    str[1] = Tx | ((c >> 1*Bitx) & Maskx);
       -    str[2] = Tx |  (c & Maskx);
       -    return 3;
       -}
       -
       -int
       -runelen(int64_t c)
       -{
       -    wchar_t rune;
       -    char str[10];
       -
       -    rune = c;
       -    return runetochar(str, &rune);
       -}
       -
       -int
       -runenlen(wchar_t *r, int nrune)
       -{
       -    int nb, c;
       -
       -    nb = 0;
       -    while(nrune--) {
       -        c = *r++;
       -        if(c <= Rune1)
       -            nb++;
       -        else
       -        if(c <= Rune2)
       -            nb += 2;
       -        else
       -            nb += 3;
       -    }
       -    return nb;
       -}
       -
       -int
       -fullrune(char *str, int n)
       -{
       -    int c;
       -
       -    if(n > 0) {
       -        c = *(uint8_t*)str;
       -        if(c < Tx)
       -            return 1;
       -        if(n > 1)
       -            if(c < T3 || n > 2)
       -                return 1;
       -    }
       -    return 0;
       -}
       -
       -char*
       -utfrune(char *s, int64_t c)
       -{
       -    int64_t c1;
       -    wchar_t r;
       -    int n;
       -
       -    if(c < Runesync)        /* not part of utf sequence */
       -        return strchr(s, c);
       -
       -    for(;;) {
       -        c1 = *(uint8_t*)s;
       -        if(c1 < Runeself) { /* one byte rune */
       -            if(c1 == 0)
       -                return 0;
       -            if(c1 == c)
       -                return s;
       -            s++;
       -            continue;
       -        }
       -        n = chartorune(&r, s);
       -        if(r == c)
       -            return s;
       -        s += n;
       -    }
       -    return 0;
       -}
       -
       -int
       -utflen(char *s)
       -{
       -    int c;
       -    int64_t n;
       -    wchar_t rune;
       -
       -    n = 0;
       -    for(;;) {
       -        c = *(uint8_t*)s;
       -        if(c < Runeself) {
       -            if(c == 0)
       -                return n;
       -            s++;
       -        } else
       -            s += chartorune(&rune, s);
       -        n++;
       -    }
       -    return 0;
       -}
 (DIR) diff --git a/libframe/frbox.c b/libframe/frbox.c
       @@ -91,12 +91,7 @@ runeindex(uint8_t *p, int n)
            wchar_t rune;
        
            for(i=0; i<n; i++,p+=w)
       -        if(*p < Runeself)
       -            w = 1;
       -        else{
       -            w = chartorune(&rune, (char*)p);
       -            USED(rune);
       -        }
       +        w = chartorune(&rune, (char*)p);
            return p;
        }
        
 (DIR) diff --git a/libframe/frinsert.c b/libframe/frinsert.c
       @@ -52,7 +52,7 @@ bxscan(Frame *f, wchar_t *sp, wchar_t *ep, Point *ppt)
                        c = *sp;
                        if(c=='\t' || c=='\n')
                            break;
       -                rw = runetochar(s, sp);
       +                rw = runetochar(s, *sp);
                        if(s+rw >= tmp+TMPSIZE)
                            break;
                        w += charwidth(frame.font, c);
 (DIR) diff --git a/libframe/frptofchar.c b/libframe/frptofchar.c
       @@ -17,10 +17,7 @@ _frptofcharptb(Frame *f, uint64_t p, Point pt, int bn)
                if(p < (l=NRUNE(b))){
                    if(b->nrune > 0)
                        for(s=b->a.ptr; p>0; s+=w, p--){
       -                    if((r = *s) < Runeself)
       -                        w = 1;
       -                    else
       -                        w = chartorune(&r, (char*)s);
       +                    w = chartorune(&r, (char*)s);
                            pt.x += charwidth(f->font, r);
                            if(r==0 || pt.x>f->r.max.x)
                                berror("frptofchar");
       @@ -94,10 +91,7 @@ frcharofpt(Frame *f, Point pt)
                    else{
                        s = b->a.ptr;
                        for(;;){
       -                    if((r = *s) < Runeself)
       -                        w = 1;
       -                    else
       -                        w = chartorune(&r, (char*)s);
       +                    w = chartorune(&r, (char*)s);
                            if(r == 0)
                                berror("end of string in frcharofpt");
                            s += w;
 (DIR) diff --git a/libframe/frutil.c b/libframe/frutil.c
       @@ -18,10 +18,7 @@ _frcanfit(Frame *f, Point pt, Frbox *b)
                return b->nrune;
            for(nr=0,p=b->a.ptr; *p; p+=w,nr++){
                r = *p;
       -        if(r < Runeself)
       -            w = 1;
       -        else
       -            w = chartorune(&r, (char*)p);
       +        w = chartorune(&r, (char*)p);
                left -= charwidth(f->font, r);
                if(left < 0)
                    return nr;
 (DIR) diff --git a/libframe/misc.c b/libframe/misc.c
       @@ -9,13 +9,6 @@
        #endif
        #include <errno.h>
        
       -int errstr(char *buf)
       -{
       -
       -    strncpy(buf, strerror(errno), ERRLEN);
       -    return 1;
       -}
       -
        char*
        getuser(void)
        {
 (DIR) diff --git a/sam/address.c b/sam/address.c
       @@ -185,7 +185,6 @@ lineaddr(Posn l, Address addr, int sign)
            File *f = addr.f;
            Address a;
        
       -    SET(c);
            a.f = f;
            if(sign >= 0){
                if(l == 0){
 (DIR) diff --git a/sam/cmd.c b/sam/cmd.c
       @@ -2,8 +2,9 @@
        #include "sam.h"
        #include "parse.h"
        
       -static char linex[]="\n";
       -static char wordx[]=" \t\n";
       +static wchar_t linex[] = L"\n";
       +static wchar_t wordx[] = L" \t\n";
       +
        struct cmdtab cmdtab[]={
        /*  cmdc    text    regexp  addr    defcmd  defaddr count   token    fn */
            {'\n',   0,  0,  0,  0,  aDot,   0,  0,  nl_cmd},
       @@ -339,14 +340,14 @@ getrhs(String *s, int delim, int cmd)
        }
        
        String *
       -collecttoken(char *end)
       +collecttoken(wchar_t *end)
        {
            String *s = newstring();
            int c;
        
       -    while((c=nextc())==' ' || c=='\t')
       +    while ((c = nextc()) == ' ' || c == '\t')
                Straddc(s, getch()); /* blanks significant for getname() */
       -    while((c=getch())>0 && utfrune(end, c)==0)
       +    while ((c =getch()) > 0 && wcschr(end, (wchar_t)c)==0)
                Straddc(s, c);
            Straddc(s, 0);
            if(c != '\n')
 (DIR) diff --git a/sam/error.c b/sam/error.c
       @@ -128,5 +128,6 @@ termwrite(char *s)
                    Strinsert(&cmdstr, p, cmdstr.n);
                cmdptadv += p->n;
            }else
       -        Write(2, s, strlen(s));
       +        Write(STDERR_FILENO, s, strlen(s));
        }
       +
 (DIR) diff --git a/sam/io.c b/sam/io.c
       @@ -89,24 +89,17 @@ readio(File *f, int *nulls, int setdate)
                r = genbuf;
                s = buf;
                while(n > 0){
       -            if((*r = *(uint8_t*)s) < Runeself){
       -                if(*r)
       -                    r++;
       -                else
       -                    *nulls = TRUE;
       -                --n;
       -                s++;
       -                continue;
       -            }
                    if(fullrune(s, n)){
                        w = chartorune(r, s);
       -                if(*r)
       -                    r++;
       -                else
       -                    *nulls = TRUE;
                        n -= w;
                        s += w;
                        continue;
       +            } else{
       +                if (*r)
       +                    *r++ = *s++;
       +                else
       +                    *nulls = true;
       +                --n;
                    }
                    b = n;
                    memmove(buf, s, b);
 (DIR) diff --git a/sam/mesg.c b/sam/mesg.c
       @@ -396,7 +396,7 @@ inmesg(Tmesg type)
                setgenstr(f, p0, p1);
                for(l = 0; l<genstr.n; l++){
                    i = genstr.s[l];
       -            if(utfrune(".*+?(|)\\[]^$", i))
       +            if (wcschr(L".*+?(|)\\[]^$", (wchar_t)i))
                        Strinsert(&genstr, tmpcstr("\\"), l++);
                }
                Straddc(&genstr, '\0');
 (DIR) diff --git a/sam/mesg.h b/sam/mesg.h
       @@ -1,9 +1,9 @@
        /* Copyright (c) 1998 Lucent Technologies - All rights reserved. */
        #define VERSION 16092
        
       -#define TBLOCKSIZE 512        /* largest piece of text sent to terminal */
       -#define DATASIZE  (UTFmax*TBLOCKSIZE+30) /* ... including protocol header stuff */
       -#define SNARFSIZE 16384     /* maximum length of exchanged snarf buffer */
       +#define TBLOCKSIZE 512                           /* largest piece of text sent to terminal */
       +#define DATASIZE  (MB_LEN_MAX * TBLOCKSIZE + 30) /* ... including protocol header stuff */
       +#define SNARFSIZE 16384                          /* maximum length of exchanged snarf buffer */
        /*
         * Messages originating at the terminal
         */
 (DIR) diff --git a/sam/moveto.c b/sam/moveto.c
       @@ -63,23 +63,6 @@ lookorigin(File *f, Posn p0, Posn ls, int64_t rl)
        }
        
        int
       -alnum(int c)
       -{
       -    /*
       -     * Hard to get absolutely right.  Use what we know about ASCII
       -     * and assume anything above the Latin control characters is
       -     * potentially an alphanumeric.
       -     */
       -    if(c<=' ')
       -        return 0;
       -    if(0x7F<=c && c<=0xA0)
       -        return 0;
       -    if(utfrune("!\"#$%&'()*+,-./:;<=>?@[\\]^`{|}~", c))
       -        return 0;
       -    return 1;
       -}
       -
       -int
        clickmatch(File *f, int cl, int cr, int dir)
        {
            int c;
       @@ -159,11 +142,11 @@ doubleclick(File *f, Posn p1)
            }
            /* try filling out word to right */
            Fgetcset(f, p1);
       -    while((c=Fgetc(f))!=-1 && alnum(c))
       +    while((c=Fgetc(f))!=-1 && iswalnum(c))
                f->dot.r.p2++;
            /* try filling out word to left */
            Fbgetcset(f, p1);
       -    while((c=Fbgetc(f))!=-1 && alnum(c))
       +    while((c=Fbgetc(f))!=-1 && iswalnum(c))
                f->dot.r.p1--;
        }
        
 (DIR) diff --git a/sam/parse.h b/sam/parse.h
       @@ -1,3 +1,5 @@
       +#include <u.h>
       +
        /* Copyright (c) 1998 Lucent Technologies - All rights reserved. */
        typedef struct Addr Addr;
        typedef struct Cmd Cmd;
       @@ -42,7 +44,7 @@ extern struct cmdtab{
            uint8_t   defcmd;     /* default command; 0==>none */
            uint8_t   defaddr;    /* default address */
            uint8_t   count;      /* takes a count e.g. s2/// */
       -    char    *token;     /* takes text terminated by one of these */
       +    wchar_t *token;     /* takes text terminated by one of these */
            int (*fn)(File*, Cmd*); /* function to call with parse tree */
        }cmdtab[];
        
 (DIR) diff --git a/sam/rasp.c b/sam/rasp.c
       @@ -19,7 +19,7 @@ toterminal(File *f, int toterm)
            union{
                union   Hdr g;
                wchar_t    buf[8+GROWDATASIZE];
       -    }hdr;
       +    }hdr = {0};
            Posn growpos, grown;
        
            growpos = 0;
       @@ -31,12 +31,12 @@ toterminal(File *f, int toterm)
                p0 = 0;
            grown = 0;
            noflush = 1;
       -    SET(growpos);
            while(Bread(t, (wchar_t*)&hdr, sizeof(hdr)/RUNESIZE, p0) > 0){
                switch(hdr.g.cs.c){
                default:
                    fprintf(stderr, "char %c %.2x\n", hdr.g.cs.c, hdr.g.cs.c);
                    panic("unknown in toterminal");
       +            break;
        
                case 'd':
                    if(grown){
       @@ -94,12 +94,12 @@ toterminal(File *f, int toterm)
                                grown = n;
                            }
                        }else{
       -                    wchar_t *rp;
       +                    wchar_t *rp = hdr.buf + sizeof(hdr.g.csl) / RUNESIZE;
                            if(grown){
                                outTsll(Hgrow, f->tag, growpos, grown);
                                grown = 0;
                            }
       -                    rp = hdr.buf+sizeof(hdr.g.csl)/RUNESIZE;
       +
                            rgrow(f->rasp, p1, n);
                            r = rdata(f->rasp, p1, n);
                            if(r.p1!=p1 || r.p2!=p1+n)
 (DIR) diff --git a/sam/sam.c b/sam/sam.c
       @@ -49,6 +49,7 @@ main(int argc, char *argv[])
        
            ap = &arg[argc];
            arg[0] = "samterm";
       +    setlocale(LC_ALL, "");
        
            while ((o = getopt(argc, argv, "efdRr:t:s:")) != -1){
                switch (o){
       @@ -198,22 +199,29 @@ hiccough(char *s)
        {
            if(rescuing)
                exits("rescue");
       +
            if(s)
                dprint("%s\n", s);
       +
            resetcmd();
            resetxec();
            resetsys();
       +
            if(io > 0)
                close(io);
       +
            if(undobuf->nrunes)
                Bdelete(undobuf, (Posn)0, undobuf->nrunes);
       +
            update();
       +
            if (curfile) {
                if (curfile->state==Unread)
                    curfile->state = Clean;
                else if (downloaded)
                    outTs(Hcurrent, curfile->tag);
            }
       +
            longjmp(mainloop, 1);
        }
        
       @@ -345,7 +353,7 @@ edit(File *f, int cmd)
                addr.r.p2 = f->nrunes;
            }else if(f->nrunes!=0 || (f->name.s[0] && Strcmp(&genstr, &f->name)!=0))
                empty = FALSE;
       -    if((io = open(genc, OREAD))<0) {
       +    if((io = open(genc, O_RDONLY))<0) {
                if (curfile && curfile->state == Unread)
                    curfile->state = Clean;
                error_s(Eopen, genc);
 (DIR) diff --git a/sam/sam.h b/sam/sam.h
       @@ -212,7 +212,6 @@ union Hdr
        #define Fgetc(f)  ((--(f)->ngetc<0)? Fgetcload(f, (f)->getcp) : (f)->getcbuf[(f)->getcp++, (f)->getci++])
        #define Fbgetc(f) (((f)->getci<=0)? Fbgetcload(f, (f)->getcp) : (f)->getcbuf[--(f)->getcp, --(f)->getci])
        
       -int alnum(int);
        void    Bclean(Buffer*);
        void    Bterm(Buffer*);
        void    Bdelete(Buffer*, Posn, Posn);
 (DIR) diff --git a/sam/string.c b/sam/string.c
       @@ -102,38 +102,24 @@ Strdelete(String *p, Posn p1, Posn p2)
        int
        Strcmp(String *a, String *b)
        {
       -    int i, c;
       -
       -    for(i=0; i<a->n && i<b->n; i++)
       -        if((c = (a->s[i] - b->s[i]))) /* assign = */
       -            return c;
       -    /* damn NULs confuse everything */
       -    i = a->n - b->n;
       -    if(i == 1){
       -        if(a->s[a->n-1] == 0)
       -            return 0;
       -    }else if(i == -1){
       -        if(b->s[b->n-1] == 0)
       -            return 0;
       -    }
       -    return i;
       +    return wcscmp(a->s, b->s);
        }
        
        char*
        Strtoc(String *s)
        {
       -    int i;
       -    char *c, *d;
       -    wchar_t *r;
       -    c = emalloc(s->n*UTFmax + 1);  /* worst case UTFmax bytes per rune, plus NUL */
       -    d = c;
       -    r = s->s;
       -    for(i=0; i<s->n; i++)
       -        d += runetochar(d, r++);
       -    if(d==c || d[-1]!=0)
       -        *d = 0;
       -    return c;
       +    size_t l = s->n * MB_LEN_MAX;
       +    char *c = emalloc(l + 1);
       +    wchar_t ws[s->n + 1];
       +
       +    memset(c, 0, l + 1);
       +    ws[s->n] = 0;
        
       +    swprintf(ws, s->n, L"%ls", s->s);
       +    if (wcstombs(c, ws, l) == (size_t)-1)
       +        panic("encoding 1");
       +
       +    return c;
        }
        
        /*
       @@ -142,7 +128,7 @@ Strtoc(String *s)
        String*
        tmprstr(wchar_t *r, int n)
        {
       -    static String p;
       +    static String p = {0};
        
            p.s = r;
            p.n = n;
       @@ -156,18 +142,13 @@ tmprstr(wchar_t *r, int n)
        String*
        tmpcstr(char *s)
        {
       -    String *p;
       -    wchar_t *r;
       -    int i, n;
       -
       -    n = utflen(s);  /* don't include NUL */
       -    p = emalloc(sizeof(String));
       -    r = emalloc(n*RUNESIZE);
       -    p->s = r;
       -    for(i=0; i<n; i++,r++)
       -        s += chartorune(r, s);
       -    p->n = n;
       -    p->size = n;
       +    String *p = emalloc(sizeof(String));
       +    p->n = utflen(s);
       +    p->size = p->n + 1;
       +    p->s = calloc(p->size, sizeof(wchar_t));
       +    if (mbstowcs(p->s, s, p->n) == (size_t)-1)
       +        panic("encoding 2");
       +
            return p;
        }
        
 (DIR) diff --git a/sam/sys.c b/sam/sys.c
       @@ -1,7 +1,13 @@
        /* Copyright (c) 1998 Lucent Technologies - All rights reserved. */
       +
       +#include <errno.h>
       +#include <stdbool.h>
       +
        #include "sam.h"
        
       -static int inerror=FALSE;
       +static bool inerror = false;
       +
       +#define ERRLEN 63
        
        /*
         * A reasonable interface to the system calls
       @@ -10,17 +16,17 @@ static int inerror=FALSE;
        void
        resetsys(void)
        {
       -    inerror = FALSE;
       +    inerror = false;
        }
        
        void
        syserror(char *a)
        {
       -    char buf[ERRLEN];
       +    char buf[ERRLEN + 1] = {0};
        
            if(!inerror){
       -        inerror=TRUE;
       -        errstr(buf);
       +        inerror = true;
       +        strncpy(buf, strerror(errno), ERRLEN);
                dprint("%s: ", a);
                error_s(Eio, buf);
            }
       @@ -29,12 +35,12 @@ syserror(char *a)
        int
        Read(int f, void *a, int n)
        {
       -    char buf[ERRLEN];
       +    char buf[ERRLEN + 1] = {0};
        
            if(read(f, (char *)a, n)!=n) {
                if (lastfile)
                    lastfile->state = Readerr;
       -        errstr(buf);
       +        strncpy(buf, strerror(errno), ERRLEN);
                if (downloaded)
                    fprintf(stderr, "read error: %s\n", buf);
                rescue();
 (DIR) diff --git a/sam/unix.c b/sam/unix.c
       @@ -31,11 +31,11 @@ print_ss(char *s, String *a, String *b)
        
            ap = emalloc(a->n+1);
            for (cp = ap, rp = a->s; *rp; rp++)
       -        cp += runetochar(cp, rp);
       +        cp += runetochar(cp, *rp);
            *cp = 0;
            bp = emalloc(b->n+1);
            for (cp = bp, rp = b->s; *rp; rp++)
       -        cp += runetochar(cp, rp);
       +        cp += runetochar(cp, *rp);
            *cp = 0;
            dprint("?warning: %s `%.*s' and `%.*s'\n", s, a->n, ap, b->n, bp);
            free(ap);
       @@ -50,7 +50,7 @@ print_s(char *s, String *a)
        
            ap = emalloc(a->n+1);
            for (cp = ap, rp = a->s; *rp; rp++)
       -        cp += runetochar(cp, rp);
       +        cp += runetochar(cp, *rp);
            *cp = 0;
            dprint("?warning: %s `%.*s'\n", s, a->n, ap);
            free(ap);
 (DIR) diff --git a/sam/xec.c b/sam/xec.c
       @@ -27,7 +27,7 @@ cmdexec(File *f, Cmd *cp)
            if(f && f->state==Unread)
                load(f);
            if(f==0 && (cp->addr==0 || cp->addr->type!='"') &&
       -        !utfrune("bBnqUXY!", cp->cmdc) &&
       +        !wcschr(L"bBnqUXY!", (wchar_t)cp->cmdc) &&
                cp->cmdc!=('c'|0x100) && !(cp->cmdc=='D' && cp->ctext))
                error(Enofile);
            i = lookup(cp->cmdc);
       @@ -78,7 +78,6 @@ a_cmd(File *f, Cmd *cp)
        int
        b_cmd(File *f, Cmd *cp)
        {
       -    USED(f);
            f = cp->cmdc=='b'? tofile(cp->ctext) : getfile(cp->ctext);
            if(f->state == Unread)
                load(f);
       @@ -98,7 +97,6 @@ c_cmd(File *f, Cmd *cp)
        int
        d_cmd(File *f, Cmd *cp)
        {
       -    USED(cp);
            Fdelete(f, addr.r.p1, addr.r.p2);
            f->ndot.r.p1 = f->ndot.r.p2 = addr.r.p1;
            return TRUE;
       @@ -149,7 +147,6 @@ i_cmd(File *f, Cmd *cp)
        int
        k_cmd(File *f, Cmd *cp)
        {
       -    USED(cp);
            f->mark = addr.r;
            return TRUE;
        }
       @@ -171,8 +168,6 @@ int
        n_cmd(File *f, Cmd *cp)
        {
            int i;
       -    USED(f);
       -    USED(cp);
            for(i = 0; i<file.nused; i++){
                if(file.filepptr[i] == cmd)
                    continue;
       @@ -186,15 +181,12 @@ n_cmd(File *f, Cmd *cp)
        int
        p_cmd(File *f, Cmd *cp)
        {
       -    USED(cp);
            return display(f);
        }
        
        int
        q_cmd(File *f, Cmd *cp)
        {
       -    USED(cp);
       -    USED(f);
            trytoquit();
            if(downloaded){
                outT0(Hexit);
       @@ -268,8 +260,6 @@ int
        u_cmd(File *f, Cmd *cp)
        {
            int n;
       -    USED(f);
       -    USED(cp);
            n = cp->num;
            while(n-- && undo())
                ;
       @@ -298,7 +288,6 @@ x_cmd(File *f, Cmd *cp)
        int
        X_cmd(File *f, Cmd *cp)
        {
       -    USED(f);
            filelooper(cp, cp->cmdc=='X');
            return TRUE;
        }
       @@ -325,7 +314,6 @@ eq_cmd(File *f, Cmd *cp)
                    break;
                }
            default:
       -        SET(charsonly);
                error(Enewline);
            }
            printposn(f, charsonly);
       @@ -352,7 +340,6 @@ nl_cmd(File *f, Cmd *cp)
        int
        cd_cmd(File *f, Cmd *cp)
        {
       -    USED(f);
            cd(cp->ctext);
            return TRUE;
        }
 (DIR) diff --git a/samterm/io.c b/samterm/io.c
       @@ -183,8 +183,6 @@ kbdchar(void)
        void
        ereshaped(Rectangle r)
        {
       -    USED(r);
       -
            reshaped = 1;
        }
        
 (DIR) diff --git a/samterm/main.c b/samterm/main.c
       @@ -42,6 +42,7 @@ main(int argc, char *argv[])
            char rcpath[PATH_MAX + 1] = {0};
            FILE *rc = NULL;
        
       +    setlocale(LC_ALL, "");
            installdefaultbindings();
            installdefaultchords();
        
       @@ -348,23 +349,6 @@ scrorigin(Flayer *l, int but, int64_t p0)
        }
        
        int
       -alnum(int c)
       -{
       -    /*
       -     * Hard to get absolutely right.  Use what we know about ASCII
       -     * and assume anything above the Latin control characters is
       -     * potentially an alphanumeric.
       -     */
       -    if(c<=' ')
       -        return 0;
       -    if(0x7F<=c && c<=0xA0)
       -        return 0;
       -    if(utfrune("!\"#$%&'()*+,-./:;<=>?@[\\]^`{|}~", c))
       -        return 0;
       -    return 1;
       -}
       -
       -int
        raspc(Rasp *r, int64_t p)
        {
            uint64_t n;
       @@ -383,10 +367,10 @@ ctlw(Rasp *r, int64_t o, int64_t p)
                return o;
            if(raspc(r, p)=='\n')
                return p;
       -    for(; p>=o && !alnum(c=raspc(r, p)); --p)
       +    for(; p>=o && !iswalnum(c=raspc(r, p)); --p)
                if(c=='\n')
                    return p+1;
       -    for(; p>o && alnum(raspc(r, p-1)); --p)
       +    for(; p>o && iswalnum(raspc(r, p-1)); --p)
                ;
            return p>=o? p : o;
        }
 (DIR) diff --git a/samterm/menu.c b/samterm/menu.c
       @@ -272,7 +272,7 @@ haspat(void)
        }
        
        #define NBUF    64
       -static uint8_t buf[NBUF*UTFmax]={' ', ' ', ' ', ' '};
       +static uint8_t buf[NBUF * MB_LEN_MAX] = {' ', ' ', ' ', ' '};
        
        char *
        paren(char *s)
 (DIR) diff --git a/samterm/mesg.c b/samterm/mesg.c
       @@ -436,7 +436,7 @@ outTslS(Tmesg type, int s1, int64_t l1, wchar_t *s)
            outlong(l1);
            c = buf;
            while(*s)
       -        c += runetochar(c, s++);
       +        c += runetochar(c, *s++);
            *c++ = 0;
            outcopy(c-buf, (uint8_t *)buf);
            outsend();