libc/wchar: Remove state from mbtowc() - scc - simple c99 compiler
 (HTM) git clone git://git.simple-cc.org/scc
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Submodules
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 29f51dd9ea6407ff5d6386d7dc82cd499185533e
 (DIR) parent 115f7694ce1502f57bb72b9e34d8816c5224e843
 (HTM) Author: Roberto E. Vargas Caballero <k0ga@shike2.net>
       Date:   Thu, 27 Mar 2025 22:11:35 +0100
       
       libc/wchar: Remove state from mbtowc()
       
       As we only support utf8 it does not make sense to keep state in mbtowc()
       because as we cannot differentiate between an incorrect sequence and
       a non complete sequence then there is not actual use case for that
       state. We still keep the static variable because we don't want to use
       the internal state from mbrtowc(), and as this function is not safe
       thread anyway it is not a problem to have that variable, whose value
       will be always correct, except when mbrtowc() returns -2 and in that
       case we have to clear it.
       
       The C99 specification does not specify any errno value when mbtowc()
       fails, but it allows changes to errno even when it is not directly
       specified in the standard, so it is not a problem to implement
       mbtowc() using mbrtowc() which sets errno when it cannot translate
       a full multibyte sequence.
       
       Diffstat:
         M src/libc/stdlib/mblen.c             |       9 +--------
         M src/libc/stdlib/mbtowc.c            |       6 ++++--
         M tests/libc/execute/0039-mbrtowc.c   |      18 ++++++------------
         M tests/libc/execute/0041-mbrlen.c    |      10 +++-------
         M tests/libc/execute/mbtest.h         |      63 ++++++++++++++++---------------
       
       5 files changed, 46 insertions(+), 60 deletions(-)
       ---
 (DIR) diff --git a/src/libc/stdlib/mblen.c b/src/libc/stdlib/mblen.c
       @@ -1,16 +1,9 @@
        #include <stdlib.h>
       -#include <wchar.h>
        
        #undef mblen
        
        int
        mblen(const char *s, size_t n)
        {
       -        int ret;
       -        static mbstate_t st;
       -
       -        ret = mbrtowc(NULL, s, n, &st);
       -        if (ret < 0)
       -                ret = -1;
       -        return ret;
       +        return mbtowc(NULL, s, n);
        }
 (DIR) diff --git a/src/libc/stdlib/mbtowc.c b/src/libc/stdlib/mbtowc.c
       @@ -7,12 +7,14 @@
        int
        mbtowc(wchar_t *restrict pwc, const char *restrict s, size_t n)
        {
       -        static mbstate_t st;
                int ret;
       +        static mbstate_t st;
        
                ret = mbrtowc(pwc, s, n, &st);
       -        if (ret < 0)
       +        if (ret == -2) {
       +                memset(&st, 0, sizeof(st));
                        ret = -1;
       +        }
        
                return ret;
        }
 (DIR) diff --git a/tests/libc/execute/0039-mbrtowc.c b/tests/libc/execute/0039-mbrtowc.c
       @@ -31,9 +31,9 @@ tests_mbrtowc(void)
                        wc = -1;
                        errno = 0;
                        r = mbrtowc(tp->pwc, tp->s, tp->l, NULL);
       -                assert(tp->r == r);
       +                assert(tp->rmbr == r);
                        assert(tp->syserr == errno);
       -                if (tp->r != -1)
       +                if (tp->rmbr != -1)
                                assert(tp->wc == wc);
                }
        
       @@ -43,9 +43,9 @@ tests_mbrtowc(void)
                        wc = -1;
                        errno = 0;
                        r = mbrtowc(tp->pwc, tp->s, tp->l, &s);
       -                assert(tp->r == r);
       +                assert(tp->rmbr == r);
                        assert(tp->syserr == errno);
       -                if (tp->r != -1)
       +                if (tp->rmbr != -1)
                                assert(tp->wc == wc);
                        assert(mbsinit(&s) != 0 == tp->mbstate);
                }
       @@ -60,16 +60,10 @@ tests_mbtowc(void)
                puts("testing mbtowc");
                for (tp = tests; tp < &tests[NELEM(tests)]; ++tp) {
                        wc = -1;
       -                errno = 0;
                        r = mbtowc(tp->pwc, tp->s, tp->l);
       -                assert(tp->syserr == errno);
       -                if (tp->r >= 0) {
       -                        rt = tp->r;
       +                assert(tp->rmb == r);
       +                if (tp->rmb >= 0)
                                assert(tp->wc == wc);
       -                } else {
       -                        rt = -1;
       -                }
       -                assert(rt == r);
                }
        }
        
 (DIR) diff --git a/tests/libc/execute/0041-mbrlen.c b/tests/libc/execute/0041-mbrlen.c
       @@ -31,7 +31,7 @@ tests_mbrlen(void)
                        wc = -1;
                        errno = 0;
                        r = mbrlen(tp->s, tp->l, NULL);
       -                assert(tp->r == r);
       +                assert(tp->rmbr == r);
                        assert(tp->syserr == errno);
                }
        
       @@ -41,7 +41,7 @@ tests_mbrlen(void)
                        wc = -1;
                        errno = 0;
                        r = mbrlen(tp->s, tp->l, &s);
       -                assert(tp->r == r);
       +                assert(tp->rmbr == r);
                        assert(tp->syserr == errno);
                        assert(mbsinit(&s) != 0 == tp->mbstate);
                }
       @@ -55,12 +55,8 @@ tests_mblen(void)
        
                puts("testing mblen");
                for (tp = tests; tp < &tests[NELEM(tests)]; ++tp) {
       -                wc = -1;
       -                errno = 0;
                        r = mblen(tp->s, tp->l);
       -                assert(tp->syserr == errno);
       -                rt = (tp->r >= 0) ? tp->r : -1;
       -                assert(rt == r);
       +                assert(tp->rmb == r);
                }
        }
        
 (DIR) diff --git a/tests/libc/execute/mbtest.h b/tests/libc/execute/mbtest.h
       @@ -2,43 +2,44 @@ static wchar_t wc;
        static struct mbtest {
                char *s;
                int l;
       -        int r;
       +        int rmbr;
       +        int rmb;
                int mbstate;
                int syserr;
                wchar_t *pwc;
                wchar_t wc;
        } tests[] = {
       -        {"\0",                             2,  0, 1, 0,      &wc,  0},
       -        {"\x21",                           2,  1, 1, 0,      &wc,  0x21},
       -        {"\xc2\xa1",                       3,  2, 1, 0,      &wc,  0x00A1},
       -        {"\xc2\xa1",                       2,  2, 1, 0,      &wc,  0x00A1},
       -        {"\xe2\x80\x94",                   4,  3, 1, 0,      &wc,  0x2014},
       -        {"\xf0\x9f\x92\xa9",               5,  4, 1, 0,      &wc,  0x01F4A9},
       -        {"\xf0\x9f\x92\xa9",               5,  4, 1, 0,      NULL, -1},
       -        {"\xf0\x9f\x92\xa9",              -1,  4, 1, 0,      &wc,  0x01F4A9},
       +        {"\0",                             2,  0,  0, 1, 0,      &wc,  0},
       +        {"\x21",                           2,  1,  1, 1, 0,      &wc,  0x21},
       +        {"\xc2\xa1",                       3,  2,  2, 1, 0,      &wc,  0x00A1},
       +        {"\xc2\xa1",                       2,  2,  2, 1, 0,      &wc,  0x00A1},
       +        {"\xe2\x80\x94",                   4,  3,  3, 1, 0,      &wc,  0x2014},
       +        {"\xf0\x9f\x92\xa9",               5,  4,  4, 1, 0,      &wc,  0x01F4A9},
       +        {"\xf0\x9f\x92\xa9",               5,  4,  4, 1, 0,      NULL, -1},
       +        {"\xf0\x9f\x92\xa9",              -1,  4,  4, 1, 0,      &wc,  0x01F4A9},
        
       -        {NULL,                             4,  0, 1, 0,      NULL, -1},
       -        {"\xed\x9f\xbf",                   4,  3, 1, 0,      &wc,  0xd7ff},
       -        {"\xed\xa0\x80",                   4, -1, 1, EILSEQ, &wc,  -1},
       -        {"\xed\xb3\xbf",                   4, -1, 1, EILSEQ, &wc,  -1},
       -        {"\xed\xb4\x80",                   4,  3, 1, 0,      &wc,  0xdd00},
       +        {NULL,                             4,  0,  0, 1, 0,      NULL, -1},
       +        {"\xed\x9f\xbf",                   4,  3,  3, 1, 0,      &wc,  0xd7ff},
       +        {"\xed\xa0\x80",                   4, -1, -1, 1, EILSEQ, &wc,  -1},
       +        {"\xed\xb3\xbf",                   4, -1, -1, 1, EILSEQ, &wc,  -1},
       +        {"\xed\xb4\x80",                   4,  3,  3, 1, 0,      &wc,  0xdd00},
        
       -        {"\xf0\x9f\x92\xa9",               3, -2, 0, 0,      &wc, -1},
       -        {"\xa9",                           2,  1, 1, 0,      &wc, 0x01F4A9},
       -        {"\xf0\x9f\x92\xa9",               3, -2, 0, 0,      &wc, -1},
       -        {NULL,                             4, -1, 1, EILSEQ, &wc, -1},
       -        {"\xa9",                           2, -1, 1, EILSEQ, &wc, -1},
       -        {"\xf0\x9f\x92\xa9",               3, -2, 0, 0,      &wc, -1},
       -        {NULL,                             4, -1, 1, EILSEQ, &wc, -1},
       -        {"\x21",                           2,  1, 1, 0,      &wc,  0x21},
       -        {"\xf0\x9f\x92\xa9",               2, -2, 0, 0,      &wc, -1},
       -        {"\xf0\x9f\x92\xa9",               0, -2, 0, 0,      &wc, -1},
       -        {"\x92\xa9",                       2,  2, 1, 0,      &wc, 0x01F4A9},
       +        {"\xf0\x9f\x92\xa9",               3, -2, -1, 0, 0,      &wc, -1},
       +        {"\xa9",                           2,  1, -1, 1, 0,      &wc, 0x01F4A9},
       +        {"\xf0\x9f\x92\xa9",               3, -2, -1, 0, 0,      &wc, -1},
       +        {NULL,                             4, -1,  0, 1, EILSEQ, &wc, -1},
       +        {"\xa9",                           2, -1, -1, 1, EILSEQ, &wc, -1},
       +        {"\xf0\x9f\x92\xa9",               3, -2, -1, 0, 0,      &wc, -1},
       +        {NULL,                             4, -1,  0, 1, EILSEQ, &wc, -1},
       +        {"\x21",                           2,  1,  1, 1, 0,      &wc,  0x21},
       +        {"\xf0\x9f\x92\xa9",               2, -2, -1, 0, 0,      &wc, -1},
       +        {"\xf0\x9f\x92\xa9",               0, -2, -1, 0, 0,      &wc, -1},
       +        {"\x92\xa9",                       2,  2, -1, 1, 0,      &wc, 0x01F4A9},
        
       -        {"\x80",                           2, -1, 1, EILSEQ, &wc, -1},
       -        {"\xc0\x80",                       2, -1, 1, EILSEQ, &wc, -1},
       -        {"\xc0\x00",                       2, -1, 1, EILSEQ, &wc, -1},
       -        {"\xc1\x81",                       2, -1, 1, EILSEQ, &wc, -1},
       -        {"\xf8\x81\x82\x83\x84\x85",      -1, -1, 1, EILSEQ, &wc, -1},
       -        {"\xfe\x81\x82\x83\x84\x85\x86",   8, -1, 1, EILSEQ, &wc, -1},
       +        {"\x80",                           2, -1, -1, 1, EILSEQ, &wc, -1},
       +        {"\xc0\x80",                       2, -1, -1, 1, EILSEQ, &wc, -1},
       +        {"\xc0\x00",                       2, -1, -1, 1, EILSEQ, &wc, -1},
       +        {"\xc1\x81",                       2, -1, -1, 1, EILSEQ, &wc, -1},
       +        {"\xf8\x81\x82\x83\x84\x85",      -1, -1, -1, 1, EILSEQ, &wc, -1},
       +        {"\xfe\x81\x82\x83\x84\x85\x86",   8, -1, -1, 1, EILSEQ, &wc, -1},
        };