sync printutf8pad from sfeed - tscrape - twitter scraper
 (HTM) git clone git://git.codemadness.org/tscrape
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 573905aec2e99fbe31a1cabe5864853ef9015a41
 (DIR) parent 426522824e719e081c9c5e47ba8771779b0fdc85
 (HTM) Author: Hiltjo Posthuma <hiltjo@codemadness.org>
       Date:   Fri, 20 Mar 2020 12:00:16 +0100
       
       sync printutf8pad from sfeed
       
       changes:
       - util: printutf8pad: proper counting of multiwidth characters
         for example the string "\xef\xbc\xb5".
       - optimization
       
       Diffstat:
         M util.c                              |      30 ++++++++++++++++++------------
       
       1 file changed, 18 insertions(+), 12 deletions(-)
       ---
 (DIR) diff --git a/util.c b/util.c
       @@ -72,32 +72,38 @@ xmlencode(const char *s, FILE *fp)
                }
        }
        
       -/* print `len' columns of characters. If string is shorter pad the rest
       - * with characters `pad`. */
       +/* print `len' columns of characters. If string is shorter pad the rest with
       + * characters `pad`. */
        void
        printutf8pad(FILE *fp, const char *s, size_t len, int pad)
        {
       -        wchar_t w;
       +        wchar_t wc;
                size_t col = 0, i, slen;
       -        int rl, wc;
       +        int rl, w;
        
                if (!len)
                        return;
        
                slen = strlen(s);
       -        for (i = 0; i < slen && col < len + 1; i += rl) {
       -                if ((rl = mbtowc(&w, &s[i], slen - i < 4 ? slen - i : 4)) <= 0)
       -                        break;
       -                if ((wc = wcwidth(w)) == -1)
       -                        wc = 1;
       -                col += (size_t)wc;
       -                if (col >= len && s[i + rl]) {
       +        for (i = 0; i < slen; i += rl) {
       +                rl = w = 1;
       +                if ((unsigned char)s[i] < 32)
       +                        continue;
       +                if ((unsigned char)s[i] >= 127) {
       +                        if ((rl = mbtowc(&wc, s + i, slen - i < 4 ? slen - i : 4)) <= 0)
       +                                break;
       +                        if ((w = wcwidth(wc)) == -1)
       +                                continue;
       +                }
       +                if (col + w > len || (col + w == len && s[i + rl])) {
                                fputs("\xe2\x80\xa6", fp);
       +                        col++;
                                break;
                        }
                        fwrite(&s[i], 1, rl, fp);
       +                col += w;
                }
       -        for (; col < len; col++)
       +        for (; col < len; ++col)
                        putc(pad, fp);
        }