util.c - libgrapheme - unicode string library
 (HTM) git clone git://git.suckless.org/libgrapheme
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       util.c (21467B)
       ---
            1 /* See LICENSE file for copyright and license details. */
            2 #include <ctype.h>
            3 #include <errno.h>
            4 #include <inttypes.h>
            5 #include <stdbool.h>
            6 #include <stddef.h>
            7 #include <stdint.h>
            8 #include <stdio.h>
            9 #include <stdlib.h>
           10 #include <string.h>
           11 
           12 #include "util.h"
           13 
           14 struct range {
           15         uint_least32_t lower;
           16         uint_least32_t upper;
           17 };
           18 
           19 struct properties_payload {
           20         struct properties *prop;
           21         const struct property_spec *spec;
           22         uint_least8_t speclen;
           23         int (*set_value)(struct properties_payload *, uint_least32_t,
           24                          int_least64_t);
           25         uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t,
           26                                          uint_least8_t);
           27 };
           28 
           29 struct break_test_payload {
           30         struct break_test **test;
           31         size_t *testlen;
           32 };
           33 
           34 static void *
           35 reallocate_array(void *p, size_t len, size_t size)
           36 {
           37         if (len > 0 && size > SIZE_MAX / len) {
           38                 errno = ENOMEM;
           39                 return NULL;
           40         }
           41 
           42         return realloc(p, len * size);
           43 }
           44 
           45 int
           46 hextocp(const char *str, size_t len, uint_least32_t *cp)
           47 {
           48         size_t i;
           49         int off;
           50         char relative;
           51 
           52         /* the maximum valid codepoint is 0x10FFFF */
           53         if (len > 6) {
           54                 fprintf(stderr, "hextocp: '%.*s' is too long.\n", (int)len,
           55                         str);
           56                 return 1;
           57         }
           58 
           59         for (i = 0, *cp = 0; i < len; i++) {
           60                 if (str[i] >= '0' && str[i] <= '9') {
           61                         relative = '0';
           62                         off = 0;
           63                 } else if (str[i] >= 'a' && str[i] <= 'f') {
           64                         relative = 'a';
           65                         off = 10;
           66                 } else if (str[i] >= 'A' && str[i] <= 'F') {
           67                         relative = 'A';
           68                         off = 10;
           69                 } else {
           70                         fprintf(stderr, "hextocp: '%.*s' is not hexadecimal.\n",
           71                                 (int)len, str);
           72                         return 1;
           73                 }
           74 
           75                 *cp += ((uint_least32_t)1 << (4 * (len - i - 1))) *
           76                        (uint_least32_t)(str[i] - relative + off);
           77         }
           78 
           79         if (*cp > UINT32_C(0x10FFFF)) {
           80                 fprintf(stderr, "hextocp: '%.*s' is too large.\n", (int)len,
           81                         str);
           82                 return 1;
           83         }
           84 
           85         return 0;
           86 }
           87 
           88 int
           89 parse_cp_list(const char *str, uint_least32_t **cp, size_t *cplen)
           90 {
           91         size_t count, i;
           92         const char *tmp1 = NULL, *tmp2 = NULL;
           93 
           94         if (strlen(str) == 0) {
           95                 *cp = NULL;
           96                 *cplen = 0;
           97                 return 0;
           98         }
           99 
          100         /* count the number of spaces in the string and infer list length */
          101         for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL;
          102              count++, tmp1 = tmp2 + 1) {
          103                 ;
          104         }
          105 
          106         /* allocate resources */
          107         if (!(*cp = calloc((*cplen = count), sizeof(**cp)))) {
          108                 fprintf(stderr, "calloc: %s\n", strerror(errno));
          109                 exit(1);
          110         }
          111 
          112         /* go through the string again, parsing the numbers */
          113         for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) {
          114                 tmp2 = strchr(tmp1, ' ');
          115                 if (hextocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1),
          116                             &((*cp)[i]))) {
          117                         return 1;
          118                 }
          119                 if (tmp2 != NULL) {
          120                         tmp1 = tmp2 + 1;
          121                 }
          122         }
          123 
          124         return 0;
          125 }
          126 
          127 static int
          128 range_parse(const char *str, struct range *range)
          129 {
          130         char *p;
          131 
          132         if ((p = strstr(str, "..")) == NULL) {
          133                 /* input has the form "XXXXXX" */
          134                 if (hextocp(str, strlen(str), &range->lower)) {
          135                         return 1;
          136                 }
          137                 range->upper = range->lower;
          138         } else {
          139                 /* input has the form "XXXXXX..XXXXXX" */
          140                 if (hextocp(str, (size_t)(p - str), &range->lower) ||
          141                     hextocp(p + 2, strlen(p + 2), &range->upper)) {
          142                         return 1;
          143                 }
          144         }
          145 
          146         return 0;
          147 }
          148 
          149 static bool
          150 get_line(char **buf, size_t *bufsize, FILE *fp, size_t *len)
          151 {
          152         int ret = EOF;
          153 
          154         for (*len = 0;; (*len)++) {
          155                 if (*len > 0 && *buf != NULL && (*buf)[*len - 1] == '\n') {
          156                         /*
          157                          * if the previously read character was a newline,
          158                          * we fake an end-of-file so we NUL-terminate and
          159                          * are done.
          160                          */
          161                         ret = EOF;
          162                 } else {
          163                         ret = fgetc(fp);
          164                 }
          165 
          166                 if (*len >= *bufsize) {
          167                         /* the buffer needs to be expanded */
          168                         *bufsize += 512;
          169                         if ((*buf = realloc(*buf, *bufsize)) == NULL) {
          170                                 fprintf(stderr, "get_line: Out of memory.\n");
          171                                 exit(1);
          172                         }
          173                 }
          174 
          175                 if (ret != EOF) {
          176                         (*buf)[*len] = (char)ret;
          177                 } else {
          178                         (*buf)[*len] = '\0';
          179                         break;
          180                 }
          181         }
          182 
          183         return *len == 0 && (feof(fp) || ferror(fp));
          184 }
          185 
          186 void
          187 parse_file_with_callback(const char *fname,
          188                          int (*callback)(const char *, char **, size_t, char *,
          189                                          void *),
          190                          void *payload)
          191 {
          192         FILE *fp;
          193         char *line = NULL, **field = NULL, *comment;
          194         size_t linebufsize = 0, i, fieldbufsize = 0, j, nfields, len;
          195 
          196         /* open file */
          197         if (!(fp = fopen(fname, "r"))) {
          198                 fprintf(stderr, "parse_file_with_callback: fopen '%s': %s.\n",
          199                         fname, strerror(errno));
          200                 exit(1);
          201         }
          202 
          203         while (!get_line(&line, &linebufsize, fp, &len)) {
          204                 /* remove trailing newline */
          205                 if (len > 0 && line[len - 1] == '\n') {
          206                         line[len - 1] = '\0';
          207                         len--;
          208                 }
          209 
          210                 /* skip empty lines and comment lines */
          211                 if (len == 0 || line[0] == '#') {
          212                         continue;
          213                 }
          214 
          215                 /* tokenize line into fields */
          216                 for (i = 0, nfields = 0, comment = NULL; i < (size_t)len; i++) {
          217                         /* skip leading whitespace */
          218                         while (line[i] == ' ') {
          219                                 i++;
          220                         }
          221 
          222                         /* check if we crashed into the comment */
          223                         if (line[i] != '#') {
          224                                 /* extend field buffer, if necessary */
          225                                 if (++nfields > fieldbufsize) {
          226                                         if ((field = realloc(
          227                                                      field,
          228                                                      nfields *
          229                                                              sizeof(*field))) ==
          230                                             NULL) {
          231                                                 fprintf(stderr,
          232                                                         "parse_file_with_"
          233                                                         "callback: realloc: "
          234                                                         "%s.\n",
          235                                                         strerror(errno));
          236                                                 exit(1);
          237                                         }
          238                                         fieldbufsize = nfields;
          239                                 }
          240 
          241                                 /* set current position as field start */
          242                                 field[nfields - 1] = &line[i];
          243 
          244                                 /* continue until we reach ';' or '#' or end */
          245                                 while (line[i] != ';' && line[i] != '#' &&
          246                                        line[i] != '\0') {
          247                                         i++;
          248                                 }
          249                         }
          250 
          251                         if (line[i] == '#') {
          252                                 /* set comment-variable for later */
          253                                 comment = &line[i + 1];
          254                         }
          255 
          256                         /* go back whitespace and terminate field there */
          257                         if (i > 0) {
          258                                 for (j = i - 1; line[j] == ' '; j--) {
          259                                         ;
          260                                 }
          261                                 line[j + 1] = '\0';
          262                         } else {
          263                                 line[i] = '\0';
          264                         }
          265 
          266                         /* if comment is set, we are done */
          267                         if (comment != NULL) {
          268                                 break;
          269                         }
          270                 }
          271 
          272                 /* skip leading whitespace in comment */
          273                 while (comment != NULL && comment[0] == ' ') {
          274                         comment++;
          275                 }
          276 
          277                 /* call callback function */
          278                 if (callback(fname, field, nfields, comment, payload)) {
          279                         fprintf(stderr, "parse_file_with_callback: "
          280                                         "Malformed input.\n");
          281                         exit(1);
          282                 }
          283         }
          284 
          285         /* close file */
          286         if (fclose(fp)) {
          287                 fprintf(stderr, "parse_file_with_callback: fclose '%s': %s.\n",
          288                         fname, strerror(errno));
          289                 exit(1);
          290         }
          291 
          292         /* cleanup */
          293         free(line);
          294         free(field);
          295 }
          296 
          297 static int
          298 properties_callback(const char *file, char **field, size_t nfields,
          299                     char *comment, void *payload)
          300 {
          301         /* prop always has the length 0x110000 */
          302         struct properties_payload *p = (struct properties_payload *)payload;
          303         struct range r;
          304         uint_least8_t i;
          305         uint_least32_t cp;
          306 
          307         (void)comment;
          308 
          309         if (nfields < 2) {
          310                 return 1;
          311         }
          312 
          313         for (i = 0; i < p->speclen; i++) {
          314                 /* identify fitting file and identifier */
          315                 if (p->spec[i].file && !strcmp(p->spec[i].file, file) &&
          316                     (!strcmp(p->spec[i].ucdname, field[1]) ||
          317                      (comment != NULL &&
          318                       !strncmp(p->spec[i].ucdname, comment,
          319                                strlen(p->spec[i].ucdname)) &&
          320                       comment[strlen(p->spec[i].ucdname)] == ' ')) &&
          321                     (p->spec[i].ucdsubname == NULL ||
          322                      (nfields >= 3 &&
          323                       !strcmp(p->spec[i].ucdsubname, field[2])))) {
          324                         /* parse range in first field */
          325                         if (range_parse(field[0], &r)) {
          326                                 return 1;
          327                         }
          328 
          329                         /* apply to all codepoints in the range */
          330                         for (cp = r.lower; cp <= r.upper; cp++) {
          331                                 if (p->set_value(payload, cp, i)) {
          332                                         exit(1);
          333                                 }
          334                         }
          335                         break;
          336                 }
          337         }
          338 
          339         return 0;
          340 }
          341 
          342 void
          343 properties_compress(const struct properties *prop,
          344                     struct properties_compressed *comp)
          345 {
          346         uint_least32_t cp, i;
          347 
          348         /* initialization */
          349         if (!(comp->offset = malloc((size_t)UINT32_C(0x110000) *
          350                                     sizeof(*(comp->offset))))) {
          351                 fprintf(stderr, "malloc: %s\n", strerror(errno));
          352                 exit(1);
          353         }
          354         comp->data = NULL;
          355         comp->datalen = 0;
          356 
          357         for (cp = 0; cp < UINT32_C(0x110000); cp++) {
          358                 for (i = 0; i < comp->datalen; i++) {
          359                         if (!memcmp(&(prop[cp]), &(comp->data[i]),
          360                                     sizeof(*prop))) {
          361                                 /* found a match! */
          362                                 comp->offset[cp] = i;
          363                                 break;
          364                         }
          365                 }
          366                 if (i == comp->datalen) {
          367                         /*
          368                          * found no matching properties-struct, so
          369                          * add current properties to data and add the
          370                          * offset in the offset-table
          371                          */
          372                         if (!(comp->data = reallocate_array(
          373                                       comp->data, ++(comp->datalen),
          374                                       sizeof(*(comp->data))))) {
          375                                 fprintf(stderr, "reallocate_array: %s\n",
          376                                         strerror(errno));
          377                                 exit(1);
          378                         }
          379                         memcpy(&(comp->data[comp->datalen - 1]), &(prop[cp]),
          380                                sizeof(*prop));
          381                         comp->offset[cp] = comp->datalen - 1;
          382                 }
          383         }
          384 }
          385 
          386 double
          387 properties_get_major_minor(const struct properties_compressed *comp,
          388                            struct properties_major_minor *mm)
          389 {
          390         size_t i, j, compression_count = 0;
          391 
          392         /*
          393          * we currently have an array comp->offset which maps the
          394          * codepoints 0..0x110000 to offsets into comp->data.
          395          * To improve cache-locality instead and allow a bit of
          396          * compressing, instead of directly mapping a codepoint
          397          * 0xAAAABB with comp->offset, we generate two arrays major
          398          * and minor such that
          399          *    comp->offset(0xAAAABB) == minor[major[0xAAAA] + 0xBB]
          400          * This yields a major-array of length 2^16 and a minor array
          401          * of variable length depending on how many common subsequences
          402          * can be filtered out.
          403          */
          404 
          405         /* initialize */
          406         if (!(mm->major = malloc((size_t)0x1100 * sizeof(*(mm->major))))) {
          407                 fprintf(stderr, "malloc: %s\n", strerror(errno));
          408                 exit(1);
          409         }
          410         mm->minor = NULL;
          411         mm->minorlen = 0;
          412 
          413         for (i = 0; i < (size_t)0x1100; i++) {
          414                 /*
          415                  * we now look at the cp-range (i << 8)..(i << 8 + 0xFF)
          416                  * and check if its corresponding offset-data already
          417                  * exists in minor (because then we just point there
          418                  * and need less storage)
          419                  */
          420                 for (j = 0; j + 0xFF < mm->minorlen; j++) {
          421                         if (!memcmp(&(comp->offset[i << 8]), &(mm->minor[j]),
          422                                     sizeof(*(comp->offset)) * 0x100)) {
          423                                 break;
          424                         }
          425                 }
          426                 if (j + 0xFF < mm->minorlen) {
          427                         /* found an index */
          428                         compression_count++;
          429                         mm->major[i] = j;
          430                 } else {
          431                         /*
          432                          * add "new" sequence to minor and point to it
          433                          * in major
          434                          */
          435                         mm->minorlen += 0x100;
          436                         if (!(mm->minor =
          437                                       reallocate_array(mm->minor, mm->minorlen,
          438                                                        sizeof(*(mm->minor))))) {
          439                                 fprintf(stderr, "reallocate_array: %s\n",
          440                                         strerror(errno));
          441                                 exit(1);
          442                         }
          443                         memcpy(&(mm->minor[mm->minorlen - 0x100]),
          444                                &(comp->offset[i << 8]),
          445                                sizeof(*(mm->minor)) * 0x100);
          446                         mm->major[i] = mm->minorlen - 0x100;
          447                 }
          448         }
          449 
          450         /* return compression ratio */
          451         return (double)compression_count / 0x1100 * 100;
          452 }
          453 
          454 void
          455 properties_print_lookup_table(const char *name, const size_t *data,
          456                               size_t datalen)
          457 {
          458         const char *type;
          459         size_t i, maxval;
          460 
          461         for (i = 0, maxval = 0; i < datalen; i++) {
          462                 if (data[i] > maxval) {
          463                         maxval = data[i];
          464                 }
          465         }
          466 
          467         type = (maxval <= UINT_LEAST8_MAX)  ? "uint_least8_t" :
          468                (maxval <= UINT_LEAST16_MAX) ? "uint_least16_t" :
          469                (maxval <= UINT_LEAST32_MAX) ? "uint_least32_t" :
          470                                               "uint_least64_t";
          471 
          472         printf("static const %s %s[] = {\n\t", type, name);
          473         for (i = 0; i < datalen; i++) {
          474                 printf("%zu", data[i]);
          475                 if (i + 1 == datalen) {
          476                         printf("\n");
          477                 } else if ((i + 1) % 8 != 0) {
          478                         printf(", ");
          479                 } else {
          480                         printf(",\n\t");
          481                 }
          482         }
          483         printf("};\n");
          484 }
          485 
          486 void
          487 properties_print_derived_lookup_table(
          488         char *name, size_t *offset, size_t offsetlen,
          489         int_least64_t (*get_value)(const struct properties *, size_t),
          490         const void *payload)
          491 {
          492         const char *type;
          493         size_t i;
          494         int_least64_t minval, maxval;
          495 
          496         for (i = 0, minval = INT_LEAST64_MAX, maxval = INT_LEAST64_MIN;
          497              i < offsetlen; i++) {
          498                 if (get_value(payload, offset[i]) > maxval) {
          499                         maxval = get_value(payload, offset[i]);
          500                 } else if (get_value(payload, offset[i]) < minval) {
          501                         minval = get_value(payload, offset[i]);
          502                 }
          503         }
          504 
          505         if (minval < 0) {
          506                 /* we need a signed type */
          507                 type = (minval >= INT_LEAST8_MIN && maxval <= INT_LEAST8_MAX) ?
          508                                "int_least8_t" :
          509                        (minval >= INT_LEAST16_MIN &&
          510                         maxval <= INT_LEAST16_MAX) ?
          511                                "int_least16_t" :
          512                        (minval >= INT_LEAST32_MIN &&
          513                         maxval <= INT_LEAST32_MAX) ?
          514                                "int_least32_t" :
          515                                "int_least64_t";
          516         } else {
          517                 /* we are fine with an unsigned type */
          518                 type = (maxval <= UINT_LEAST8_MAX)  ? "uint_least8_t" :
          519                        (maxval <= UINT_LEAST16_MAX) ? "uint_least16_t" :
          520                        (maxval <= UINT_LEAST32_MAX) ? "uint_least32_t" :
          521                                                       "uint_least64_t";
          522         }
          523 
          524         printf("static const %s %s[] = {\n\t", type, name);
          525         for (i = 0; i < offsetlen; i++) {
          526                 printf("%" PRIiLEAST64, get_value(payload, offset[i]));
          527                 if (i + 1 == offsetlen) {
          528                         printf("\n");
          529                 } else if ((i + 1) % 8 != 0) {
          530                         printf(", ");
          531                 } else {
          532                         printf(",\n\t");
          533                 }
          534         }
          535         printf("};\n");
          536 }
          537 
          538 static void
          539 properties_print_enum(const struct property_spec *spec, size_t speclen,
          540                       const char *enumname, const char *enumprefix)
          541 {
          542         size_t i;
          543 
          544         printf("enum %s {\n", enumname);
          545         for (i = 0; i < speclen; i++) {
          546                 printf("\t%s_%s,\n", enumprefix, spec[i].enumname);
          547         }
          548         printf("\tNUM_%sS,\n};\n\n", enumprefix);
          549 }
          550 
          551 static int
          552 set_value_bp(struct properties_payload *payload, uint_least32_t cp,
          553              int_least64_t value)
          554 {
          555         if (payload->prop[cp].property != payload->speclen) {
          556                 if (payload->handle_conflict == NULL) {
          557                         fprintf(stderr,
          558                                 "set_value_bp: "
          559                                 "Unhandled character break property "
          560                                 "overwrite for 0x%06X (%s <- %s).\n",
          561                                 cp,
          562                                 payload->spec[payload->prop[cp].property]
          563                                         .enumname,
          564                                 payload->spec[value].enumname);
          565                         return 1;
          566                 } else {
          567                         value = payload->handle_conflict(
          568                                 cp, (uint_least8_t)payload->prop[cp].property,
          569                                 (uint_least8_t)value);
          570                 }
          571         }
          572         payload->prop[cp].property = value;
          573 
          574         return 0;
          575 }
          576 
          577 static int_least64_t
          578 get_value_bp(const struct properties *prop, size_t offset)
          579 {
          580         return prop[offset].property;
          581 }
          582 
          583 void
          584 properties_generate_break_property(
          585         const struct property_spec *spec, uint_least8_t speclen,
          586         uint_least8_t (*fill_missing)(uint_least32_t),
          587         uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t,
          588                                          uint_least8_t),
          589         void (*post_process)(struct properties *), const char *prefix,
          590         const char *argv0)
          591 {
          592         struct properties_compressed comp;
          593         struct properties_major_minor mm;
          594         struct properties_payload payload;
          595         struct properties *prop;
          596         size_t i, j, prefixlen = strlen(prefix);
          597         char buf1[64], prefix_uc[64], buf2[64], buf3[64], buf4[64];
          598 
          599         /*
          600          * allocate property buffer for all 0x110000 codepoints and
          601          * initialize its entries to the known invalid value "speclen"
          602          */
          603         if (!(prop = calloc(UINT32_C(0x110000), sizeof(*prop)))) {
          604                 fprintf(stderr, "calloc: %s\n", strerror(errno));
          605                 exit(1);
          606         }
          607         for (i = 0; i < UINT32_C(0x110000); i++) {
          608                 prop[i].property = speclen;
          609         }
          610 
          611         /* generate data */
          612         payload.prop = prop;
          613         payload.spec = spec;
          614         payload.speclen = speclen;
          615         payload.set_value = set_value_bp;
          616         payload.handle_conflict = handle_conflict;
          617 
          618         /* parse each file exactly once and ignore NULL-fields */
          619         for (i = 0; i < speclen; i++) {
          620                 for (j = 0; j < i; j++) {
          621                         if (spec[i].file && spec[j].file &&
          622                             !strcmp(spec[i].file, spec[j].file)) {
          623                                 /* file has already been parsed */
          624                                 break;
          625                         }
          626                 }
          627                 if (i == j && spec[i].file) {
          628                         /* file has not been processed yet */
          629                         parse_file_with_callback(spec[i].file,
          630                                                  properties_callback, &payload);
          631                 }
          632         }
          633 
          634         /* fill in the missing properties that weren't explicitly given */
          635         for (i = 0; i < UINT32_C(0x110000); i++) {
          636                 if (payload.prop[i].property == speclen) {
          637                         if (fill_missing != NULL) {
          638                                 payload.prop[i].property =
          639                                         fill_missing((uint_least32_t)i);
          640                         } else {
          641                                 payload.prop[i].property = 0;
          642                         }
          643                 }
          644         }
          645 
          646         /* post-processing */
          647         if (post_process != NULL) {
          648                 post_process(payload.prop);
          649         }
          650 
          651         /* compress data */
          652         printf("/* Automatically generated by %s */\n#include <stdint.h>\n\n",
          653                argv0);
          654         properties_compress(prop, &comp);
          655 
          656         fprintf(stderr, "%s: %s-LUT compression-ratio: %.2f%%\n", argv0, prefix,
          657                 properties_get_major_minor(&comp, &mm));
          658 
          659         /* prepare names */
          660         if ((size_t)snprintf(buf1, LEN(buf1), "%s_property", prefix) >=
          661             LEN(buf1)) {
          662                 fprintf(stderr, "snprintf: String truncated.\n");
          663                 exit(1);
          664         }
          665         if (LEN(prefix_uc) + 1 < prefixlen) {
          666                 fprintf(stderr, "snprintf: Buffer too small.\n");
          667                 exit(1);
          668         }
          669         for (i = 0; i < prefixlen; i++) {
          670                 prefix_uc[i] = (char)toupper(prefix[i]);
          671         }
          672         prefix_uc[prefixlen] = '\0';
          673         if ((size_t)snprintf(buf2, LEN(buf2), "%s_PROP", prefix_uc) >=
          674                     LEN(buf2) ||
          675             (size_t)snprintf(buf3, LEN(buf3), "%s_major", prefix) >=
          676                     LEN(buf3) ||
          677             (size_t)snprintf(buf4, LEN(buf4), "%s_minor", prefix) >=
          678                     LEN(buf4)) {
          679                 fprintf(stderr, "snprintf: String truncated.\n");
          680                 exit(1);
          681         }
          682 
          683         /* print data */
          684         properties_print_enum(spec, speclen, buf1, buf2);
          685         properties_print_lookup_table(buf3, mm.major, 0x1100);
          686         printf("\n");
          687         properties_print_derived_lookup_table(buf4, mm.minor, mm.minorlen,
          688                                               get_value_bp, comp.data);
          689 
          690         /* free data */
          691         free(prop);
          692         free(comp.data);
          693         free(comp.offset);
          694         free(mm.major);
          695         free(mm.minor);
          696 }
          697 
          698 static int
          699 break_test_callback(const char *fname, char **field, size_t nfields,
          700                     char *comment, void *payload)
          701 {
          702         struct break_test *t,
          703                 **test = ((struct break_test_payload *)payload)->test;
          704         size_t i, *testlen = ((struct break_test_payload *)payload)->testlen,
          705                   commentlen;
          706         char *token;
          707 
          708         (void)fname;
          709 
          710         if (nfields < 1) {
          711                 return 1;
          712         }
          713 
          714         /* append new testcase and initialize with zeroes */
          715         if ((*test = realloc(*test, ++(*testlen) * sizeof(**test))) == NULL) {
          716                 fprintf(stderr, "break_test_callback: realloc: %s.\n",
          717                         strerror(errno));
          718                 return 1;
          719         }
          720         t = &(*test)[*testlen - 1];
          721         memset(t, 0, sizeof(*t));
          722 
          723         /* parse testcase "<÷|×> <cp> <÷|×> ... <cp> <÷|×>" */
          724         for (token = strtok(field[0], " "), i = 0; token != NULL;
          725              i++, token = strtok(NULL, " ")) {
          726                 if (i % 2 == 0) {
          727                         /* delimiter or start of sequence */
          728                         if (i == 0 ||
          729                             !strncmp(token, "\xC3\xB7", 2)) { /* UTF-8 */
          730                                 /*
          731                                  * '÷' indicates a breakpoint,
          732                                  * the current length is done; allocate
          733                                  * a new length field and set it to 0
          734                                  */
          735                                 if ((t->len = realloc(
          736                                              t->len,
          737                                              ++t->lenlen * sizeof(*t->len))) ==
          738                                     NULL) {
          739                                         fprintf(stderr,
          740                                                 "break_test_"
          741                                                 "callback: realloc: %s.\n",
          742                                                 strerror(errno));
          743                                         return 1;
          744                                 }
          745                                 t->len[t->lenlen - 1] = 0;
          746                         } else if (!strncmp(token, "\xC3\x97", 2)) { /* UTF-8 */
          747                                 /* '×' indicates a non-breakpoint, do nothing */
          748                         } else {
          749                                 fprintf(stderr,
          750                                         "break_test_callback: "
          751                                         "Malformed delimiter '%s'.\n",
          752                                         token);
          753                                 return 1;
          754                         }
          755                 } else {
          756                         /* add codepoint to cp-array */
          757                         if ((t->cp = realloc(t->cp,
          758                                              ++t->cplen * sizeof(*t->cp))) ==
          759                             NULL) {
          760                                 fprintf(stderr,
          761                                         "break_test_callback: "
          762                                         "realloc: %s.\n",
          763                                         strerror(errno));
          764                                 return 1;
          765                         }
          766                         if (hextocp(token, strlen(token),
          767                                     &t->cp[t->cplen - 1])) {
          768                                 return 1;
          769                         }
          770                         if (t->lenlen > 0) {
          771                                 t->len[t->lenlen - 1]++;
          772                         }
          773                 }
          774         }
          775         if (t->lenlen > 0 && t->len[t->lenlen - 1] == 0) {
          776                 /*
          777                  * we allocated one more length than we needed because
          778                  * the breakpoint was at the end
          779                  */
          780                 t->lenlen--;
          781         }
          782 
          783         /* store comment */
          784         if (comment != NULL) {
          785                 commentlen = strlen(comment) + 1;
          786                 if (((*test)[*testlen - 1].descr = malloc(commentlen)) ==
          787                     NULL) {
          788                         fprintf(stderr, "break_test_callback: malloc: %s.\n",
          789                                 strerror(errno));
          790                         return 1;
          791                 }
          792                 memcpy((*test)[*testlen - 1].descr, comment, commentlen);
          793         }
          794 
          795         return 0;
          796 }
          797 
          798 void
          799 break_test_list_parse(char *fname, struct break_test **test, size_t *testlen)
          800 {
          801         struct break_test_payload pl = {
          802                 .test = test,
          803                 .testlen = testlen,
          804         };
          805         *test = NULL;
          806         *testlen = 0;
          807 
          808         parse_file_with_callback(fname, break_test_callback, &pl);
          809 }
          810 
          811 void
          812 break_test_list_print(const struct break_test *test, size_t testlen,
          813                       const char *identifier, const char *progname)
          814 {
          815         size_t i, j;
          816 
          817         printf("/* Automatically generated by %s */\n"
          818                "#include <stdint.h>\n#include <stddef.h>\n\n"
          819                "#include \"../gen/types.h\"\n\n",
          820                progname);
          821 
          822         printf("static const struct break_test %s[] = {\n", identifier);
          823         for (i = 0; i < testlen; i++) {
          824                 printf("\t{\n");
          825 
          826                 printf("\t\t.cp     = (uint_least32_t[]){");
          827                 for (j = 0; j < test[i].cplen; j++) {
          828                         printf(" UINT32_C(0x%06X)", test[i].cp[j]);
          829                         if (j + 1 < test[i].cplen) {
          830                                 putchar(',');
          831                         }
          832                 }
          833                 printf(" },\n");
          834                 printf("\t\t.cplen  = %zu,\n", test[i].cplen);
          835 
          836                 printf("\t\t.len    = (size_t[]){");
          837                 for (j = 0; j < test[i].lenlen; j++) {
          838                         printf(" %zu", test[i].len[j]);
          839                         if (j + 1 < test[i].lenlen) {
          840                                 putchar(',');
          841                         }
          842                 }
          843                 printf(" },\n");
          844                 printf("\t\t.lenlen = %zu,\n", test[i].lenlen);
          845 
          846                 printf("\t\t.descr  = \"%s\",\n", test[i].descr);
          847 
          848                 printf("\t},\n");
          849         }
          850         printf("};\n");
          851 }
          852 
          853 void
          854 break_test_list_free(struct break_test *test, size_t testlen)
          855 {
          856         size_t i;
          857 
          858         for (i = 0; i < testlen; i++) {
          859                 free(test[i].cp);
          860                 free(test[i].len);
          861                 free(test[i].descr);
          862         }
          863 
          864         free(test);
          865 }