tuc.c - neatvi - [fork] simple vi-type editor with UTF-8 support
 (HTM) git clone git://src.adamsgaard.dk/neatvi
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
       ---
       tuc.c (21648B)
       ---
            1 #include <ctype.h>
            2 #include <stdio.h>
            3 #include <stdlib.h>
            4 #include <string.h>
            5 #include "vi.h"
            6 
            7 #define LEN(a)                (sizeof(a) / sizeof((a)[0]))
            8 
            9 /* return the length of a utf-8 character */
           10 int uc_len(char *s)
           11 {
           12         int c = (unsigned char) s[0];
           13         if (~c & 0xc0)                /* ASCII or invalid */
           14                 return c > 0;
           15         if (~c & 0x20)
           16                 return 2;
           17         if (~c & 0x10)
           18                 return 3;
           19         if (~c & 0x08)
           20                 return 4;
           21         return 1;
           22 }
           23 
           24 /* the number of utf-8 characters in s */
           25 int uc_slen(char *s)
           26 {
           27         int n;
           28         for (n = 0; *s; n++)
           29                 s = uc_end(s) + 1;
           30         return n;
           31 }
           32 
           33 /* the unicode codepoint of the given utf-8 character */
           34 int uc_code(char *s)
           35 {
           36         int c = (unsigned char) s[0];
           37         if (~c & 0xc0)                /* ASCII or invalid */
           38                 return c;
           39         if (~c & 0x20)
           40                 return ((c & 0x1f) << 6) | (s[1] & 0x3f);
           41         if (~c & 0x10)
           42                 return ((c & 0x0f) << 12) | ((s[1] & 0x3f) << 6) | (s[2] & 0x3f);
           43         if (~c & 0x08)
           44                 return ((c & 0x07) << 18) | ((s[1] & 0x3f) << 12) | ((s[2] & 0x3f) << 6) | (s[3] & 0x3f);
           45         return c;
           46 }
           47 
           48 /* find the beginning of the character at s[i] */
           49 char *uc_beg(char *beg, char *s)
           50 {
           51         while (s > beg && (((unsigned char) *s) & 0xc0) == 0x80)
           52                 s--;
           53         return s;
           54 }
           55 
           56 /* find the end of the character at s[i] */
           57 char *uc_end(char *s)
           58 {
           59         if (!*s || !((unsigned char) *s & 0x80))
           60                 return s;
           61         if (((unsigned char) *s & 0xc0) == 0xc0)
           62                 s++;
           63         while (((unsigned char) *s & 0xc0) == 0x80)
           64                 s++;
           65         return s - 1;
           66 }
           67 
           68 /* return a pointer to the character following s */
           69 char *uc_next(char *s)
           70 {
           71         s = uc_end(s);
           72         return *s ? s + 1 : s;
           73 }
           74 
           75 /* return a pointer to the character preceding s */
           76 char *uc_prev(char *beg, char *s)
           77 {
           78         return s == beg ? beg : uc_beg(beg, s - 1);
           79 }
           80 
           81 char *uc_lastline(char *s)
           82 {
           83         char *r = strrchr(s, '\n');
           84         return r ? r + 1 : s;
           85 }
           86 
           87 /* allocate and return an array for the characters in s */
           88 char **uc_chop(char *s, int *n)
           89 {
           90         char **chrs;
           91         int i;
           92         *n = uc_slen(s);
           93         chrs = malloc((*n + 1) * sizeof(chrs[0]));
           94         for (i = 0; i < *n + 1; i++) {
           95                 chrs[i] = s;
           96                 s = uc_next(s);
           97         }
           98         return chrs;
           99 }
          100 
          101 char *uc_chr(char *s, int off)
          102 {
          103         int i = 0;
          104         while (s && *s) {
          105                 if (i++ == off)
          106                         return s;
          107                 s = uc_next(s);
          108         }
          109         return s && (off < 0 || i == off) ? s : "";
          110 }
          111 
          112 /* the number of characters between s and s + off */
          113 int uc_off(char *s, int off)
          114 {
          115         char *e = s + off;
          116         int i;
          117         for (i = 0; s < e && *s; i++)
          118                 s = uc_next(s);
          119         return i;
          120 }
          121 
          122 char *uc_sub(char *s, int beg, int end)
          123 {
          124         char *sbeg = uc_chr(s, beg);
          125         char *send = uc_chr(s, end);
          126         int len = sbeg && send && sbeg <= send ? send - sbeg : 0;
          127         char *r = malloc(len + 1);
          128         memcpy(r, sbeg, len);
          129         r[len] = '\0';
          130         return r;
          131 }
          132 
          133 char *uc_dup(char *s)
          134 {
          135         char *r = malloc(strlen(s) + 1);
          136         return r ? strcpy(r, s) : NULL;
          137 }
          138 
          139 int uc_isspace(char *s)
          140 {
          141         int c = s ? (unsigned char) *s : 0;
          142         return c <= 0x7f && isspace(c);
          143 }
          144 
          145 int uc_isprint(char *s)
          146 {
          147         int c = s ? (unsigned char) *s : 0;
          148         return c > 0x7f || isprint(c);
          149 }
          150 
          151 int uc_isalpha(char *s)
          152 {
          153         int c = s ? (unsigned char) *s : 0;
          154         return c > 0x7f || isalpha(c);
          155 }
          156 
          157 int uc_isdigit(char *s)
          158 {
          159         int c = s ? (unsigned char) *s : 0;
          160         return c <= 0x7f && isdigit(c);
          161 }
          162 
          163 int uc_kind(char *c)
          164 {
          165         if (uc_isspace(c))
          166                 return 0;
          167         if (uc_isalpha(c) || uc_isdigit(c) || c[0] == '_')
          168                 return 1;
          169         return 2;
          170 }
          171 
          172 #define UC_R2L(ch)        (((ch) & 0xff00) == 0x0600 || \
          173                         ((ch) & 0xfffc) == 0x200c || \
          174                         ((ch) & 0xff00) == 0xfb00 || \
          175                         ((ch) & 0xff00) == 0xfc00 || \
          176                         ((ch) & 0xff00) == 0xfe00)
          177 
          178 /* sorted list of characters that can be shaped */
          179 static struct achar {
          180         unsigned c;                /* utf-8 code */
          181         unsigned s;                /* single form */
          182         unsigned i;                /* initial form */
          183         unsigned m;                /* medial form */
          184         unsigned f;                /* final form */
          185 } achars[] = {
          186         {0x0621, 0xfe80},                                /* hamza */
          187         {0x0622, 0xfe81, 0, 0, 0xfe82},                        /* alef madda */
          188         {0x0623, 0xfe83, 0, 0, 0xfe84},                        /* alef hamza above */
          189         {0x0624, 0xfe85, 0, 0, 0xfe86},                        /* waw hamza */
          190         {0x0625, 0xfe87, 0, 0, 0xfe88},                        /* alef hamza below */
          191         {0x0626, 0xfe89, 0xfe8b, 0xfe8c, 0xfe8a},        /* yeh hamza */
          192         {0x0627, 0xfe8d, 0, 0, 0xfe8e},                        /* alef */
          193         {0x0628, 0xfe8f, 0xfe91, 0xfe92, 0xfe90},        /* beh */
          194         {0x0629, 0xfe93, 0, 0, 0xfe94},                        /* teh marbuta */
          195         {0x062a, 0xfe95, 0xfe97, 0xfe98, 0xfe96},        /* teh */
          196         {0x062b, 0xfe99, 0xfe9b, 0xfe9c, 0xfe9a},        /* theh */
          197         {0x062c, 0xfe9d, 0xfe9f, 0xfea0, 0xfe9e},        /* jeem */
          198         {0x062d, 0xfea1, 0xfea3, 0xfea4, 0xfea2},        /* hah */
          199         {0x062e, 0xfea5, 0xfea7, 0xfea8, 0xfea6},        /* khah */
          200         {0x062f, 0xfea9, 0, 0, 0xfeaa},                        /* dal */
          201         {0x0630, 0xfeab, 0, 0, 0xfeac},                        /* thal */
          202         {0x0631, 0xfead, 0, 0, 0xfeae},                        /* reh */
          203         {0x0632, 0xfeaf, 0, 0, 0xfeb0},                        /* zain */
          204         {0x0633, 0xfeb1, 0xfeb3, 0xfeb4, 0xfeb2},        /* seen */
          205         {0x0634, 0xfeb5, 0xfeb7, 0xfeb8, 0xfeb6},        /* sheen */
          206         {0x0635, 0xfeb9, 0xfebb, 0xfebc, 0xfeba},        /* sad */
          207         {0x0636, 0xfebd, 0xfebf, 0xfec0, 0xfebe},        /* dad */
          208         {0x0637, 0xfec1, 0xfec3, 0xfec4, 0xfec2},        /* tah */
          209         {0x0638, 0xfec5, 0xfec7, 0xfec8, 0xfec6},        /* zah */
          210         {0x0639, 0xfec9, 0xfecb, 0xfecc, 0xfeca},        /* ain */
          211         {0x063a, 0xfecd, 0xfecf, 0xfed0, 0xfece},        /* ghain */
          212         {0x0640, 0x640, 0x640, 0x640},                        /* tatweel */
          213         {0x0641, 0xfed1, 0xfed3, 0xfed4, 0xfed2},        /* feh */
          214         {0x0642, 0xfed5, 0xfed7, 0xfed8, 0xfed6},        /* qaf */
          215         {0x0643, 0xfed9, 0xfedb, 0xfedc, 0xfeda},        /* kaf */
          216         {0x0644, 0xfedd, 0xfedf, 0xfee0, 0xfede},        /* lam */
          217         {0x0645, 0xfee1, 0xfee3, 0xfee4, 0xfee2},        /* meem */
          218         {0x0646, 0xfee5, 0xfee7, 0xfee8, 0xfee6},        /* noon */
          219         {0x0647, 0xfee9, 0xfeeb, 0xfeec, 0xfeea},        /* heh */
          220         {0x0648, 0xfeed, 0, 0, 0xfeee},                        /* waw */
          221         {0x0649, 0xfeef, 0, 0, 0xfef0},                        /* alef maksura */
          222         {0x064a, 0xfef1, 0xfef3, 0xfef4, 0xfef2},        /* yeh */
          223         {0x067e, 0xfb56, 0xfb58, 0xfb59, 0xfb57},        /* peh */
          224         {0x0686, 0xfb7a, 0xfb7c, 0xfb7d, 0xfb7b},        /* tcheh */
          225         {0x0698, 0xfb8a, 0, 0, 0xfb8b},                        /* jeh */
          226         {0x06a9, 0xfb8e, 0xfb90, 0xfb91, 0xfb8f},        /* fkaf */
          227         {0x06af, 0xfb92, 0xfb94, 0xfb95, 0xfb93},        /* gaf */
          228         {0x06cc, 0xfbfc, 0xfbfe, 0xfbff, 0xfbfd},        /* fyeh */
          229         {0x200c},                                        /* ZWNJ */
          230         {0x200d, 0, 0x200d, 0x200d},                        /* ZWJ */
          231 };
          232 
          233 static struct achar *find_achar(int c)
          234 {
          235         int h, m, l;
          236         h = LEN(achars);
          237         l = 0;
          238         /* using binary search to find c */
          239         while (l < h) {
          240                 m = (h + l) >> 1;
          241                 if (achars[m].c == c)
          242                         return &achars[m];
          243                 if (c < achars[m].c)
          244                         h = m;
          245                 else
          246                         l = m + 1;
          247         }
          248         return NULL;
          249 }
          250 
          251 static int can_join(int c1, int c2)
          252 {
          253         struct achar *a1 = find_achar(c1);
          254         struct achar *a2 = find_achar(c2);
          255         return a1 && a2 && (a1->i || a1->m) && (a2->f || a2->m);
          256 }
          257 
          258 static int uc_cshape(int cur, int prev, int next)
          259 {
          260         int c = cur;
          261         int join_prev, join_next;
          262         struct achar *ac = find_achar(c);
          263         if (!ac)                /* ignore non-Arabic characters */
          264                 return c;
          265         join_prev = can_join(prev, c);
          266         join_next = can_join(c, next);
          267         if (join_prev && join_next)
          268                 c = ac->m;
          269         if (join_prev && !join_next)
          270                 c = ac->f;
          271         if (!join_prev && join_next)
          272                 c = ac->i;
          273         if (!join_prev && !join_next)
          274                 c = ac->c;        /* some fonts do not have a glyph for ac->s */
          275         return c ? c : cur;
          276 }
          277 
          278 /*
          279  * return nonzero for Arabic combining characters
          280  *
          281  * The standard Arabic diacritics:
          282  * + 0x064b: fathatan
          283  * + 0x064c: dammatan
          284  * + 0x064d: kasratan
          285  * + 0x064e: fatha
          286  * + 0x064f: damma
          287  * + 0x0650: kasra
          288  * + 0x0651: shadda
          289  * + 0x0652: sukun
          290  * + 0x0653: madda above
          291  * + 0x0654: hamza above
          292  * + 0x0655: hamza below
          293  * + 0x0670: superscript alef
          294  */
          295 static int uc_acomb(int c)
          296 {
          297         return (c >= 0x064b && c <= 0x0655) ||                /* the standard diacritics */
          298                 (c >= 0xfc5e && c <= 0xfc63) ||                /* shadda ligatures */
          299                 c == 0x0670;                                /* superscript alef */
          300 }
          301 
          302 static void uc_cput(char *d, int c)
          303 {
          304         int l = 0;
          305         if (c > 0xffff) {
          306                 *d++ = 0xf0 | (c >> 18);
          307                 l = 3;
          308         } else if (c > 0x7ff) {
          309                 *d++ = 0xe0 | (c >> 12);
          310                 l = 2;
          311         } else if (c > 0x7f) {
          312                 *d++ = 0xc0 | (c >> 6);
          313                 l = 1;
          314         } else {
          315                 *d++ = c;
          316         }
          317         while (l--)
          318                 *d++ = 0x80 | ((c >> (l * 6)) & 0x3f);
          319         *d = '\0';
          320 }
          321 
          322 /* shape the given arabic character; returns a static buffer */
          323 char *uc_shape(char *beg, char *s)
          324 {
          325         static char out[16];
          326         char *r;
          327         int prev = 0;
          328         int next = 0;
          329         int curr = uc_code(s);
          330         if (!curr || !UC_R2L(curr))
          331                 return NULL;
          332         r = s;
          333         while (r > beg) {
          334                 r = uc_beg(beg, r - 1);
          335                 if (!uc_acomb(uc_code(r))) {
          336                         prev = uc_code(r);
          337                         break;
          338                 }
          339         }
          340         r = s;
          341         while (*r) {
          342                 r = uc_next(r);
          343                 if (!uc_acomb(uc_code(r))) {
          344                         next = uc_code(r);
          345                         break;
          346                 }
          347         }
          348         uc_cput(out, uc_cshape(curr, prev, next));
          349         return out;
          350 }
          351 
          352 static int dwchars[][2] = {
          353         {0x1100, 0x115f}, {0x11a3, 0x11a7}, {0x11fa, 0x11ff}, {0x2329, 0x232a},
          354         {0x2e80, 0x2e99}, {0x2e9b, 0x2ef3}, {0x2f00, 0x2fd5}, {0x2ff0, 0x2ffb},
          355         {0x3000, 0x3029}, {0x3030, 0x303e}, {0x3041, 0x3096}, {0x309b, 0x30ff},
          356         {0x3105, 0x312d}, {0x3131, 0x318e}, {0x3190, 0x31b7}, {0x31c0, 0x31e3},
          357         {0x31f0, 0x321e}, {0x3220, 0x3247}, {0x3250, 0x32fe}, {0x3300, 0x4dbf},
          358         {0x4e00, 0xa48c}, {0xa490, 0xa4c6}, {0xa960, 0xa97c}, {0xac00, 0xd7a3},
          359         {0xd7b0, 0xd7c6}, {0xd7cb, 0xd7fb}, {0xf900, 0xfaff}, {0xfe10, 0xfe19},
          360         {0xfe30, 0xfe52}, {0xfe54, 0xfe66}, {0xfe68, 0xfe6b}, {0xff01, 0xff60},
          361         {0xffe0, 0xffe6}, {0x1f200, 0x1f200}, {0x1f210, 0x1f231}, {0x1f240, 0x1f248},
          362         {0x20000,0x2ffff},
          363 };
          364 
          365 static int zwchars[][2] = {
          366         {0x0300, 0x036f}, {0x0483, 0x0489}, {0x0591, 0x05bd}, {0x05bf, 0x05bf},
          367         {0x05c1, 0x05c2}, {0x05c4, 0x05c5}, {0x05c7, 0x05c7}, {0x0610, 0x061a},
          368         {0x064b, 0x065e}, {0x0670, 0x0670}, {0x06d6, 0x06dc}, {0x06de, 0x06e4},
          369         {0x06e7, 0x06e8}, {0x06ea, 0x06ed}, {0x0711, 0x0711}, {0x0730, 0x074a},
          370         {0x07a6, 0x07b0}, {0x07eb, 0x07f3}, {0x0816, 0x0819}, {0x081b, 0x0823},
          371         {0x0825, 0x0827}, {0x0829, 0x082d}, {0x0900, 0x0903}, {0x093c, 0x093c},
          372         {0x093e, 0x094e}, {0x0951, 0x0955}, {0x0962, 0x0963}, {0x0981, 0x0983},
          373         {0x09bc, 0x09bc}, {0x09be, 0x09c4}, {0x09c7, 0x09c8}, {0x09cb, 0x09cd},
          374         {0x09d7, 0x09d7}, {0x09e2, 0x09e3}, {0x0a01, 0x0a03}, {0x0a3c, 0x0a3c},
          375         {0x0a3e, 0x0a42}, {0x0a47, 0x0a48}, {0x0a4b, 0x0a4d}, {0x0a51, 0x0a51},
          376         {0x0a70, 0x0a71}, {0x0a75, 0x0a75}, {0x0a81, 0x0a83}, {0x0abc, 0x0abc},
          377         {0x0abe, 0x0ac5}, {0x0ac7, 0x0ac9}, {0x0acb, 0x0acd}, {0x0ae2, 0x0ae3},
          378         {0x0b01, 0x0b03}, {0x0b3c, 0x0b3c}, {0x0b3e, 0x0b44}, {0x0b47, 0x0b48},
          379         {0x0b4b, 0x0b4d}, {0x0b56, 0x0b57}, {0x0b62, 0x0b63}, {0x0b82, 0x0b82},
          380         {0x0bbe, 0x0bc2}, {0x0bc6, 0x0bc8}, {0x0bca, 0x0bcd}, {0x0bd7, 0x0bd7},
          381         {0x0c01, 0x0c03}, {0x0c3e, 0x0c44}, {0x0c46, 0x0c48}, {0x0c4a, 0x0c4d},
          382         {0x0c55, 0x0c56}, {0x0c62, 0x0c63}, {0x0c82, 0x0c83}, {0x0cbc, 0x0cbc},
          383         {0x0cbe, 0x0cc4}, {0x0cc6, 0x0cc8}, {0x0cca, 0x0ccd}, {0x0cd5, 0x0cd6},
          384         {0x0ce2, 0x0ce3}, {0x0d02, 0x0d03}, {0x0d3e, 0x0d44}, {0x0d46, 0x0d48},
          385         {0x0d4a, 0x0d4d}, {0x0d57, 0x0d57}, {0x0d62, 0x0d63}, {0x0d82, 0x0d83},
          386         {0x0dca, 0x0dca}, {0x0dcf, 0x0dd4}, {0x0dd6, 0x0dd6}, {0x0dd8, 0x0ddf},
          387         {0x0df2, 0x0df3}, {0x0e31, 0x0e31}, {0x0e34, 0x0e3a}, {0x0e47, 0x0e4e},
          388         {0x0eb1, 0x0eb1}, {0x0eb4, 0x0eb9}, {0x0ebb, 0x0ebc}, {0x0ec8, 0x0ecd},
          389         {0x0f18, 0x0f19}, {0x0f35, 0x0f35}, {0x0f37, 0x0f37}, {0x0f39, 0x0f39},
          390         {0x0f3e, 0x0f3f}, {0x0f71, 0x0f84}, {0x0f86, 0x0f87}, {0x0f90, 0x0f97},
          391         {0x0f99, 0x0fbc}, {0x0fc6, 0x0fc6}, {0x102b, 0x103e}, {0x1056, 0x1059},
          392         {0x105e, 0x1060}, {0x1062, 0x1064}, {0x1067, 0x106d}, {0x1071, 0x1074},
          393         {0x1082, 0x108d}, {0x108f, 0x108f}, {0x109a, 0x109d}, {0x135f, 0x135f},
          394         {0x1712, 0x1714}, {0x1732, 0x1734}, {0x1752, 0x1753}, {0x1772, 0x1773},
          395         {0x17b6, 0x17d3}, {0x17dd, 0x17dd}, {0x180b, 0x180d}, {0x18a9, 0x18a9},
          396         {0x1920, 0x192b}, {0x1930, 0x193b}, {0x19b0, 0x19c0}, {0x19c8, 0x19c9},
          397         {0x1a17, 0x1a1b}, {0x1a55, 0x1a5e}, {0x1a60, 0x1a7c}, {0x1a7f, 0x1a7f},
          398         {0x1b00, 0x1b04}, {0x1b34, 0x1b44}, {0x1b6b, 0x1b73}, {0x1b80, 0x1b82},
          399         {0x1ba1, 0x1baa}, {0x1c24, 0x1c37}, {0x1cd0, 0x1cd2}, {0x1cd4, 0x1ce8},
          400         {0x1ced, 0x1ced}, {0x1cf2, 0x1cf2}, {0x1dc0, 0x1de6}, {0x1dfd, 0x1dff},
          401         {0x200b, 0x200f},
          402         {0x20d0, 0x20f0}, {0x2cef, 0x2cf1}, {0x2de0, 0x2dff}, {0x302a, 0x302f},
          403         {0x3099, 0x309a}, {0xa66f, 0xa672}, {0xa67c, 0xa67d}, {0xa6f0, 0xa6f1},
          404         {0xa802, 0xa802}, {0xa806, 0xa806}, {0xa80b, 0xa80b}, {0xa823, 0xa827},
          405         {0xa880, 0xa881}, {0xa8b4, 0xa8c4}, {0xa8e0, 0xa8f1}, {0xa926, 0xa92d},
          406         {0xa947, 0xa953}, {0xa980, 0xa983}, {0xa9b3, 0xa9c0}, {0xaa29, 0xaa36},
          407         {0xaa43, 0xaa43}, {0xaa4c, 0xaa4d}, {0xaa7b, 0xaa7b}, {0xaab0, 0xaab0},
          408         {0xaab2, 0xaab4}, {0xaab7, 0xaab8}, {0xaabe, 0xaabf}, {0xaac1, 0xaac1},
          409         {0xabe3, 0xabea}, {0xabec, 0xabed}, {0xfb1e, 0xfb1e}, {0xfe00, 0xfe0f},
          410         {0xfe20, 0xfe26}, {0x101fd, 0x101fd}, {0x10a01, 0x10a03}, {0x10a05, 0x10a06},
          411         {0x10a0c, 0x10a0f}, {0x10a38, 0x10a3a}, {0x10a3f, 0x10a3f}, {0x11080, 0x11082},
          412         {0x110b0, 0x110ba}, {0x1d165, 0x1d169}, {0x1d16d, 0x1d172}, {0x1d17b, 0x1d182},
          413         {0x1d185, 0x1d18b}, {0x1d1aa, 0x1d1ad}, {0x1d242, 0x1d244}, {0xe0100, 0xe01ef}
          414 };
          415 
          416 static int bchars[][2] = {
          417         {0x00000, 0x0001f}, {0x00080, 0x0009f}, {0x00300, 0x0036f},
          418         {0x00379, 0x00379}, {0x00380, 0x00383}, {0x0038d, 0x0038d},
          419         {0x00483, 0x00489}, {0x00527, 0x00530}, {0x00558, 0x00558},
          420         {0x00588, 0x00588}, {0x0058c, 0x005bd}, {0x005c1, 0x005c2},
          421         {0x005c5, 0x005c5}, {0x005c8, 0x005cf}, {0x005ec, 0x005ef},
          422         {0x005f6, 0x00605}, {0x00611, 0x0061a}, {0x0061d, 0x0061d},
          423         {0x0064b, 0x0065f}, {0x006d6, 0x006e4}, {0x006e8, 0x006e8},
          424         {0x006eb, 0x006ed}, {0x0070f, 0x0070f}, {0x00730, 0x0074c},
          425         {0x007a7, 0x007b0}, {0x007b3, 0x007bf}, {0x007ec, 0x007f3},
          426         {0x007fc, 0x007ff}, {0x00817, 0x00819}, {0x0081c, 0x00823},
          427         {0x00826, 0x00827}, {0x0082a, 0x0082f}, {0x00840, 0x00903},
          428         {0x0093b, 0x0093c}, {0x0093f, 0x0094f}, {0x00952, 0x00957},
          429         {0x00963, 0x00963}, {0x00974, 0x00978}, {0x00981, 0x00984},
          430         {0x0098e, 0x0098e}, {0x00992, 0x00992}, {0x009b1, 0x009b1},
          431         {0x009b4, 0x009b5}, {0x009bb, 0x009bc}, {0x009bf, 0x009cd},
          432         {0x009d0, 0x009db}, {0x009e2, 0x009e5}, {0x009fd, 0x00a04},
          433         {0x00a0c, 0x00a0e}, {0x00a12, 0x00a12}, {0x00a31, 0x00a31},
          434         {0x00a37, 0x00a37}, {0x00a3b, 0x00a58}, {0x00a5f, 0x00a65},
          435         {0x00a71, 0x00a71}, {0x00a76, 0x00a84}, {0x00a92, 0x00a92},
          436         {0x00ab1, 0x00ab1}, {0x00aba, 0x00abc}, {0x00abf, 0x00acf},
          437         {0x00ad2, 0x00adf}, {0x00ae3, 0x00ae5}, {0x00af2, 0x00b04},
          438         {0x00b0e, 0x00b0e}, {0x00b12, 0x00b12}, {0x00b31, 0x00b31},
          439         {0x00b3a, 0x00b3c}, {0x00b3f, 0x00b5b}, {0x00b62, 0x00b65},
          440         {0x00b73, 0x00b82}, {0x00b8b, 0x00b8d}, {0x00b96, 0x00b98},
          441         {0x00b9d, 0x00b9d}, {0x00ba1, 0x00ba2}, {0x00ba6, 0x00ba7},
          442         {0x00bac, 0x00bad}, {0x00bbb, 0x00bcf}, {0x00bd2, 0x00be5},
          443         {0x00bfc, 0x00c04}, {0x00c11, 0x00c11}, {0x00c34, 0x00c34},
          444         {0x00c3b, 0x00c3c}, {0x00c3f, 0x00c57}, {0x00c5b, 0x00c5f},
          445         {0x00c63, 0x00c65}, {0x00c71, 0x00c77}, {0x00c81, 0x00c84},
          446         {0x00c91, 0x00c91}, {0x00cb4, 0x00cb4}, {0x00cbb, 0x00cbc},
          447         {0x00cbf, 0x00cdd}, {0x00ce2, 0x00ce5}, {0x00cf3, 0x00d04},
          448         {0x00d11, 0x00d11}, {0x00d3a, 0x00d3c}, {0x00d3f, 0x00d5f},
          449         {0x00d63, 0x00d65}, {0x00d77, 0x00d78}, {0x00d81, 0x00d84},
          450         {0x00d98, 0x00d99}, {0x00dbc, 0x00dbc}, {0x00dbf, 0x00dbf},
          451         {0x00dc8, 0x00df3}, {0x00df6, 0x00e00}, {0x00e34, 0x00e3e},
          452         {0x00e48, 0x00e4e}, {0x00e5d, 0x00e80}, {0x00e85, 0x00e86},
          453         {0x00e8b, 0x00e8c}, {0x00e8f, 0x00e93}, {0x00ea0, 0x00ea0},
          454         {0x00ea6, 0x00ea6}, {0x00ea9, 0x00ea9}, {0x00eb1, 0x00eb1},
          455         {0x00eb5, 0x00ebc}, {0x00ebf, 0x00ebf}, {0x00ec7, 0x00ecf},
          456         {0x00edb, 0x00edb}, {0x00edf, 0x00eff}, {0x00f19, 0x00f19},
          457         {0x00f37, 0x00f37}, {0x00f3e, 0x00f3f}, {0x00f6d, 0x00f84},
          458         {0x00f87, 0x00f87}, {0x00f8d, 0x00fbd}, {0x00fcd, 0x00fcd},
          459         {0x00fda, 0x00fff}, {0x0102c, 0x0103e}, {0x01057, 0x01059},
          460         {0x0105f, 0x01060}, {0x01063, 0x01064}, {0x01068, 0x0106d},
          461         {0x01072, 0x01074}, {0x01083, 0x0108d}, {0x0109a, 0x0109d},
          462         {0x010c7, 0x010cf}, {0x010fe, 0x010ff}, {0x0124e, 0x0124f},
          463         {0x01259, 0x01259}, {0x0125f, 0x0125f}, {0x0128e, 0x0128f},
          464         {0x012b6, 0x012b7}, {0x012c1, 0x012c1}, {0x012c7, 0x012c7},
          465         {0x01311, 0x01311}, {0x01317, 0x01317}, {0x0135c, 0x0135f},
          466         {0x0137e, 0x0137f}, {0x0139b, 0x0139f}, {0x013f6, 0x013ff},
          467         {0x0169e, 0x0169f}, {0x016f2, 0x016ff}, {0x01712, 0x0171f},
          468         {0x01733, 0x01734}, {0x01738, 0x0173f}, {0x01753, 0x0175f},
          469         {0x01771, 0x0177f}, {0x017b5, 0x017d3}, {0x017de, 0x017df},
          470         {0x017eb, 0x017ef}, {0x017fb, 0x017ff}, {0x0180c, 0x0180d},
          471         {0x0181a, 0x0181f}, {0x01879, 0x0187f}, {0x018ab, 0x018af},
          472         {0x018f7, 0x018ff}, {0x0191e, 0x0193f}, {0x01942, 0x01943},
          473         {0x0196f, 0x0196f}, {0x01976, 0x0197f}, {0x019ad, 0x019c0},
          474         {0x019c9, 0x019cf}, {0x019dc, 0x019dd}, {0x01a18, 0x01a1d},
          475         {0x01a56, 0x01a7f}, {0x01a8b, 0x01a8f}, {0x01a9b, 0x01a9f},
          476         {0x01aaf, 0x01b04}, {0x01b35, 0x01b44}, {0x01b4d, 0x01b4f},
          477         {0x01b6c, 0x01b73}, {0x01b7e, 0x01b82}, {0x01ba2, 0x01bad},
          478         {0x01bbb, 0x01bff}, {0x01c25, 0x01c3a}, {0x01c4b, 0x01c4c},
          479         {0x01c81, 0x01cd2}, {0x01cd5, 0x01ce8}, {0x01cf2, 0x01cff},
          480         {0x01dc1, 0x01dff}, {0x01f17, 0x01f17}, {0x01f1f, 0x01f1f},
          481         {0x01f47, 0x01f47}, {0x01f4f, 0x01f4f}, {0x01f5a, 0x01f5a},
          482         {0x01f5e, 0x01f5e}, {0x01f7f, 0x01f7f}, {0x01fc5, 0x01fc5},
          483         {0x01fd5, 0x01fd5}, {0x01ff0, 0x01ff1}, {0x01fff, 0x01fff},
          484         {0x0200c, 0x0200f}, {0x02029, 0x0202e}, {0x02061, 0x0206f},
          485         {0x02073, 0x02073}, {0x02095, 0x0209f}, {0x020ba, 0x020ff},
          486         {0x0218b, 0x0218f}, {0x023ea, 0x023ff}, {0x02428, 0x0243f},
          487         {0x0244c, 0x0245f}, {0x026e2, 0x026e2}, {0x026e5, 0x026e7},
          488         {0x02705, 0x02705}, {0x0270b, 0x0270b}, {0x0274c, 0x0274c},
          489         {0x02753, 0x02755}, {0x02760, 0x02760}, {0x02796, 0x02797},
          490         {0x027bf, 0x027bf}, {0x027cd, 0x027cf}, {0x02b4e, 0x02b4f},
          491         {0x02b5b, 0x02bff}, {0x02c5f, 0x02c5f}, {0x02cf0, 0x02cf8},
          492         {0x02d27, 0x02d2f}, {0x02d67, 0x02d6e}, {0x02d71, 0x02d7f},
          493         {0x02d98, 0x02d9f}, {0x02daf, 0x02daf}, {0x02dbf, 0x02dbf},
          494         {0x02dcf, 0x02dcf}, {0x02ddf, 0x02dff}, {0x02e33, 0x02e7f},
          495         {0x02ef4, 0x02eff}, {0x02fd7, 0x02fef}, {0x02ffd, 0x02fff},
          496         {0x0302b, 0x0302f}, {0x03097, 0x0309a}, {0x03101, 0x03104},
          497         {0x0312f, 0x03130}, {0x031b8, 0x031bf}, {0x031e5, 0x031ef},
          498         {0x032ff, 0x032ff}, {0x04db7, 0x04dbf}, {0x09fcd, 0x09fff},
          499         {0x0a48e, 0x0a48f}, {0x0a4c8, 0x0a4cf}, {0x0a62d, 0x0a63f},
          500         {0x0a661, 0x0a661}, {0x0a670, 0x0a672}, {0x0a675, 0x0a67d},
          501         {0x0a699, 0x0a69f}, {0x0a6f1, 0x0a6f1}, {0x0a6f9, 0x0a6ff},
          502         {0x0a78e, 0x0a7fa}, {0x0a806, 0x0a806}, {0x0a823, 0x0a827},
          503         {0x0a82d, 0x0a82f}, {0x0a83b, 0x0a83f}, {0x0a879, 0x0a881},
          504         {0x0a8b5, 0x0a8cd}, {0x0a8db, 0x0a8f1}, {0x0a8fd, 0x0a8ff},
          505         {0x0a927, 0x0a92d}, {0x0a948, 0x0a95e}, {0x0a97e, 0x0a983},
          506         {0x0a9b4, 0x0a9c0}, {0x0a9da, 0x0a9dd}, {0x0a9e1, 0x0a9ff},
          507         {0x0aa2a, 0x0aa3f}, {0x0aa4c, 0x0aa4f}, {0x0aa5b, 0x0aa5b},
          508         {0x0aa7c, 0x0aa7f}, {0x0aab2, 0x0aab4}, {0x0aab8, 0x0aab8},
          509         {0x0aabf, 0x0aabf}, {0x0aac3, 0x0aada}, {0x0aae1, 0x0abbf},
          510         {0x0abe4, 0x0abea}, {0x0abed, 0x0abef}, {0x0abfb, 0x0abff},
          511         {0x0d7a5, 0x0d7af}, {0x0d7c8, 0x0d7ca}, {0x0d7fd, 0x0f8ff},
          512         {0x0fa2f, 0x0fa2f}, {0x0fa6f, 0x0fa6f}, {0x0fadb, 0x0faff},
          513         {0x0fb08, 0x0fb12}, {0x0fb19, 0x0fb1c}, {0x0fb37, 0x0fb37},
          514         {0x0fb3f, 0x0fb3f}, {0x0fb45, 0x0fb45}, {0x0fbb3, 0x0fbd2},
          515         {0x0fd41, 0x0fd4f}, {0x0fd91, 0x0fd91}, {0x0fdc9, 0x0fdef},
          516         {0x0fdff, 0x0fe0f}, {0x0fe1b, 0x0fe2f}, {0x0fe67, 0x0fe67},
          517         {0x0fe6d, 0x0fe6f}, {0x0fefd, 0x0ff00}, {0x0ffc0, 0x0ffc1},
          518         {0x0ffc9, 0x0ffc9}, {0x0ffd1, 0x0ffd1}, {0x0ffd9, 0x0ffd9},
          519         {0x0ffde, 0x0ffdf}, {0x0ffef, 0x0fffb}, {0x0ffff, 0x0ffff},
          520         {0x10027, 0x10027}, {0x1003e, 0x1003e}, {0x1004f, 0x1004f},
          521         {0x1005f, 0x1007f}, {0x100fc, 0x100ff}, {0x10104, 0x10106},
          522         {0x10135, 0x10136}, {0x1018c, 0x1018f}, {0x1019d, 0x101cf},
          523         {0x101fe, 0x1027f}, {0x1029e, 0x1029f}, {0x102d2, 0x102ff},
          524         {0x10324, 0x1032f}, {0x1034c, 0x1037f}, {0x103c4, 0x103c7},
          525         {0x103d7, 0x103ff}, {0x1049f, 0x1049f}, {0x104ab, 0x107ff},
          526         {0x10807, 0x10807}, {0x10836, 0x10836}, {0x1083a, 0x1083b},
          527         {0x1083e, 0x1083e}, {0x10860, 0x108ff}, {0x1091d, 0x1091e},
          528         {0x1093b, 0x1093e}, {0x10941, 0x109ff}, {0x10a02, 0x10a0f},
          529         {0x10a18, 0x10a18}, {0x10a35, 0x10a3f}, {0x10a49, 0x10a4f},
          530         {0x10a5a, 0x10a5f}, {0x10a81, 0x10aff}, {0x10b37, 0x10b38},
          531         {0x10b57, 0x10b57}, {0x10b74, 0x10b77}, {0x10b81, 0x10bff},
          532         {0x10c4a, 0x10e5f}, {0x10e80, 0x11082}, {0x110b1, 0x110ba},
          533         {0x110c2, 0x11fff}, {0x12370, 0x123ff}, {0x12464, 0x1246f},
          534         {0x12475, 0x12fff}, {0x13430, 0x1cfff}, {0x1d0f7, 0x1d0ff},
          535         {0x1d128, 0x1d128}, {0x1d166, 0x1d169}, {0x1d16e, 0x1d182},
          536         {0x1d186, 0x1d18b}, {0x1d1ab, 0x1d1ad}, {0x1d1df, 0x1d1ff},
          537         {0x1d243, 0x1d244}, {0x1d247, 0x1d2ff}, {0x1d358, 0x1d35f},
          538         {0x1d373, 0x1d3ff}, {0x1d49d, 0x1d49d}, {0x1d4a1, 0x1d4a1},
          539         {0x1d4a4, 0x1d4a4}, {0x1d4a8, 0x1d4a8}, {0x1d4ba, 0x1d4ba},
          540         {0x1d4c4, 0x1d4c4}, {0x1d50b, 0x1d50c}, {0x1d51d, 0x1d51d},
          541         {0x1d53f, 0x1d53f}, {0x1d547, 0x1d549}, {0x1d6a6, 0x1d6a7},
          542         {0x1d7cd, 0x1d7cd}, {0x1d801, 0x1efff}, {0x1f02d, 0x1f02f},
          543         {0x1f095, 0x1f0ff}, {0x1f10c, 0x1f10f}, {0x1f130, 0x1f130},
          544         {0x1f133, 0x1f13c}, {0x1f140, 0x1f141}, {0x1f144, 0x1f145},
          545         {0x1f148, 0x1f149}, {0x1f150, 0x1f156}, {0x1f159, 0x1f15e},
          546         {0x1f161, 0x1f178}, {0x1f17d, 0x1f17e}, {0x1f181, 0x1f189},
          547         {0x1f18f, 0x1f18f}, {0x1f192, 0x1f1ff}, {0x1f202, 0x1f20f},
          548         {0x1f233, 0x1f23f}, {0x1f24a, 0x1ffff}, {0x2a6d8, 0x2a6ff},
          549         {0x2b736, 0x2f7ff}, {0x2fa1f, 0x10ffff},
          550 };
          551 
          552 static int find(int c, int tab[][2], int n)
          553 {
          554         int l = 0;
          555         int h = n - 1;
          556         int m;
          557         if (c < tab[0][0])
          558                 return 0;
          559         while (l <= h) {
          560                 m = (h + l) / 2;
          561                 if (c >= tab[m][0] && c <= tab[m][1])
          562                         return 1;
          563                 if (c < tab[m][0])
          564                         h = m - 1;
          565                 else
          566                         l = m + 1;
          567         }
          568         return 0;
          569 }
          570 
          571 /* double-width characters */
          572 static int uc_isdw(int c)
          573 {
          574         return c >= 0x1100 && find(c, dwchars, LEN(dwchars));
          575 }
          576 
          577 /* zero-width and combining characters */
          578 static int uc_iszw(int c)
          579 {
          580         return c >= 0x0300 && find(c, zwchars, LEN(zwchars));
          581 }
          582 
          583 int uc_wid(char *s)
          584 {
          585         int c = uc_code(s);
          586         if (uc_iszw(c))
          587                 return 0;
          588         return uc_isdw(c) ? 2 : 1;
          589 }
          590 
          591 /* nonprintable characters */
          592 int uc_isbell(char *s)
          593 {
          594         int c = (unsigned char) *s;
          595         if (c == ' ' || c == '\t' || c == '\n' || (c <= 0x7f && isprint(c)))
          596                 return 0;
          597         c = uc_code(s);
          598         return uc_iszw(c) || find(c, bchars, LEN(bchars));
          599 }
          600 
          601 /* combining characters */
          602 int uc_iscomb(char *s)
          603 {
          604         int c = (unsigned char) *s;
          605         if (c == ' ' || c == '\t' || c == '\n' || (c <= 0x7f && isprint(c)))
          606                 return 0;
          607         return uc_acomb(uc_code(s));
          608 }