libc/wchar: Simplify mbsrtowc() and mbstowc() - scc - simple c99 compiler
 (HTM) git clone git://git.simple-cc.org/scc
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Submodules
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 68ad37c9bee5f12c0804f165a5176e4bbf4b4898
 (DIR) parent 29f51dd9ea6407ff5d6386d7dc82cd499185533e
 (HTM) Author: Roberto E. Vargas Caballero <k0ga@shike2.net>
       Date:   Fri, 28 Mar 2025 09:42:13 +0100
       
       libc/wchar: Simplify mbsrtowc() and mbstowc()
       
       We only support UTF8, that is stateless, and as mbsrtowc() cannot return
       -2 then we do know that the internal state of mbsrtowc() will be always
       set to the initial state, so we can reuse it from mbstowcs().
       
       Also, mbsrtowcs() had a bug and it was incrementing the number of wchar
       stored in the output buffer when mbrtowc() returned -2. Not a big issue
       because we try to read MB_LEN_MAX, and longer sequences will eventually
       return -1 and the value of n will not be used.
       
       Diffstat:
         M src/libc/stdlib/mbstowcs.c          |      10 +++++++---
         M src/libc/wchar/mbsrtowcs.c          |      18 ++++++++++--------
       
       2 files changed, 17 insertions(+), 11 deletions(-)
       ---
 (DIR) diff --git a/src/libc/stdlib/mbstowcs.c b/src/libc/stdlib/mbstowcs.c
       @@ -4,10 +4,14 @@
        
        #undef mbstowcs
        
       +/*
       + * we can reuse the internal state of mbstwocws() because
       + * we loop in case of having a -2 from mbrtowc(), that
       + * makes impossible to leave the function in a non initial
       + * state (or error that cleans the state).
       + */
        size_t
        mbstowcs(wchar_t *restrict dest, const char *restrict src, size_t n)
        {
       -        static mbstate_t st;
       -
       -        return mbsrtowcs(dest, (void *) &src, n, &st);
       +        return mbsrtowcs(dest, (void *) &src, n, NULL);
        }
 (DIR) diff --git a/src/libc/wchar/mbsrtowcs.c b/src/libc/wchar/mbsrtowcs.c
       @@ -16,7 +16,8 @@ mbsrtowcs(wchar_t *restrict dest, const char **restrict src, size_t len,
                if (!ps)
                        ps = &st;
        
       -        for (n = 0; ; n++) {
       +        n = 0;
       +        for (;;) {
                        cnt = mbrtowc(&wc, s, MB_LEN_MAX, ps);
                        if (cnt == (size_t) -2) {
                                s += MB_LEN_MAX;
       @@ -28,19 +29,20 @@ mbsrtowcs(wchar_t *restrict dest, const char **restrict src, size_t len,
                        if (dest) {
                                if (n == len) {
                                        *src = s;
       -                                return n;
       +                                break;
                                }
                                *dest++ = wc;
                        }
       -                s += cnt;
        
       -                if (wc == 0)
       +                if (wc == 0) {
       +                        if (dest)
       +                                *src = NULL;
                                break;
       -        }
       +                }
       +
       +                s += cnt;
       +                n++;
        
       -        if (dest) {
       -                memset(ps, 0, sizeof(mbstate_t));
       -                *src = NULL;
                }
        
                return n;