Simplify multibyte decoding. - sam - An updated version of the sam text editor.
(HTM) git clone git://vernunftzentrum.de/sam.git
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) LICENSE
---
(DIR) commit c07917509ac5d439b802f913dd407b8c5f08fe44
(DIR) parent 865ad2eb92b1e50b67648a99d7800a3d28ba8efc
(HTM) Author: Rob King <jking@deadpixi.com>
Date: Wed, 8 Feb 2017 10:40:07 -0600
Simplify multibyte decoding.
Diffstat:
sam/io.c | 100 +++++++++----------------------
1 file changed, 27 insertions(+), 73 deletions(-)
---
(DIR) diff --git a/sam/io.c b/sam/io.c
@@ -78,97 +78,51 @@ writef(File *f)
}
}
-static wchar_t
-finishpartialchar(File *f, const char *s, size_t n, size_t *p)
-{
- size_t lp = *p;
- wchar_t w = 0;
-
- while (!w && f->mblen && lp < n && f->mblen < BLOCKSIZE){
- mbstate_t ts = f->ps;
- size_t rc = 0;
- wchar_t c = 0;
-
- switch (rc = mbrtowc(&c, f->mbbuf, f->mblen, &ts)){
- case (size_t)-1:
- memset(&f->ps, 0, sizeof(f->ps));
- w = UNICODE_REPLACEMENT_CHAR;
- lp++;
- break;
-
- case (size_t)-2:
- f->mbbuf[f->mblen++] = s[lp++];
- break;
-
- default:
- f->ps = ts;
- w = c;
- break;
- }
- }
- *p = lp;
+static inline void
+writembchar(File *f, wchar_t *c)
+{
+ mbrtowc(c, f->mbbuf, f->mblen, &f->ps);
f->mblen = 0;
- memset(f->mbbuf, 0, sizeof(f->mbbuf));
+}
- return w? w : UNICODE_REPLACEMENT_CHAR;
+static inline size_t
+testmbchar(File *f)
+{
+ mbstate_t ts = f->ps;
+ return mbrtowc(NULL, f->mbbuf, f->mblen, &ts);
}
static size_t
insertbuf(File *f, const char *s, size_t n, bool *nulls)
{
- wchar_t wbuf[BLOCKSIZE + 1] = {0};
- size_t nw = 0;
- size_t nt = 0;
- size_t p = 0;
+ size_t nw = 0, p = 0, nb = 0, nt = 0;
+ wchar_t buf[BLOCKSIZE + 1] = {0};
Posn pos = addr.r.p2;
+ n = n? n : strlen(s);
- if (f->mblen)
- wbuf[nw++] = finishpartialchar(f, s, n, &p);
-
- while (p < n){
- mbstate_t ts = f->ps;
- wchar_t c = 0;
- size_t rc = mbrtowc(&c, s + p, n - p, &ts);
- switch (rc){
- case (size_t)0:
- if (p < n){
- memset(&f->ps, 0, sizeof(f->ps));
- wbuf[nw++] = UNICODE_REPLACEMENT_CHAR;
- *nulls = true;
- p++;
- }
- break;
-
- case (size_t)-1:
- memset(&f->ps, 0, sizeof(f->ps));
- wbuf[nw++] = UNICODE_REPLACEMENT_CHAR;
- p++;
- *nulls = true;
- break;
-
- case (size_t)-2:
- Finsert(f, tmprstr(wbuf, nw), pos);
- memcpy(f->mbbuf, s + p, MIN(n - p, BLOCKSIZE));
- f->mblen = MIN(n - p, BLOCKSIZE);
- return nt + nw;
-
- default:
- f->ps = ts;
- p += rc;
- wbuf[nw++] = c;
- break;
+ while (p < n && f->mblen < BLOCKSIZE){
+ switch (testmbchar(f)){
+ case (size_t)-1: buf[nw++] = UNICODE_REPLACEMENT_CHAR; f->mblen = 0; *nulls = true; break;
+ case (size_t)-2: f->mbbuf[f->mblen++] = s[p++]; break;
+ default: writembchar(f, buf + nw++); break;
}
if (nw >= BLOCKSIZE){
- Finsert(f, tmprstr(wbuf, nw), pos);
- memset(wbuf, 0, sizeof(wbuf));
+ Finsert(f, tmprstr(buf, nw), pos);
nt += nw;
nw = 0;
}
}
+ Finsert(f, tmprstr(buf, nw), pos);
+
+ nb = testmbchar(f); /* we might've finished a char on the last byte */
+ if (nb && nb != (size_t)-1 && nb != (size_t)-2){
+ writembchar(f, buf);
+ Finsert(f, tmprstr(buf, 1), pos);
+ nw++;
+ }
- Finsert(f, tmprstr(wbuf, nw), pos);
return nt + nw;
}