sync printutf8pad from sfeed - tscrape - twitter scraper
(HTM) git clone git://git.codemadness.org/tscrape
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit 573905aec2e99fbe31a1cabe5864853ef9015a41
(DIR) parent 426522824e719e081c9c5e47ba8771779b0fdc85
(HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
Date: Fri, 20 Mar 2020 12:00:16 +0100
sync printutf8pad from sfeed
changes:
- util: printutf8pad: proper counting of multiwidth characters
for example the string "\xef\xbc\xb5".
- optimization
Diffstat:
M util.c | 30 ++++++++++++++++++------------
1 file changed, 18 insertions(+), 12 deletions(-)
---
(DIR) diff --git a/util.c b/util.c
@@ -72,32 +72,38 @@ xmlencode(const char *s, FILE *fp)
}
}
-/* print `len' columns of characters. If string is shorter pad the rest
- * with characters `pad`. */
+/* print `len' columns of characters. If string is shorter pad the rest with
+ * characters `pad`. */
void
printutf8pad(FILE *fp, const char *s, size_t len, int pad)
{
- wchar_t w;
+ wchar_t wc;
size_t col = 0, i, slen;
- int rl, wc;
+ int rl, w;
if (!len)
return;
slen = strlen(s);
- for (i = 0; i < slen && col < len + 1; i += rl) {
- if ((rl = mbtowc(&w, &s[i], slen - i < 4 ? slen - i : 4)) <= 0)
- break;
- if ((wc = wcwidth(w)) == -1)
- wc = 1;
- col += (size_t)wc;
- if (col >= len && s[i + rl]) {
+ for (i = 0; i < slen; i += rl) {
+ rl = w = 1;
+ if ((unsigned char)s[i] < 32)
+ continue;
+ if ((unsigned char)s[i] >= 127) {
+ if ((rl = mbtowc(&wc, s + i, slen - i < 4 ? slen - i : 4)) <= 0)
+ break;
+ if ((w = wcwidth(wc)) == -1)
+ continue;
+ }
+ if (col + w > len || (col + w == len && s[i + rl])) {
fputs("\xe2\x80\xa6", fp);
+ col++;
break;
}
fwrite(&s[i], 1, rl, fp);
+ col += w;
}
- for (; col < len; col++)
+ for (; col < len; ++col)
putc(pad, fp);
}