util.c - libgrapheme - unicode string library
 (HTM) git clone git://git.suckless.org/libgrapheme
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       util.c (19744B)
       ---
            1 /* See LICENSE file for copyright and license details. */
            2 #include <ctype.h>
            3 #include <errno.h>
            4 #include <inttypes.h>
            5 #include <stdbool.h>
            6 #include <stddef.h>
            7 #include <stdint.h>
            8 #include <stdio.h>
            9 #include <stdlib.h>
           10 #include <string.h>
           11 
           12 #include "util.h"
           13 
           14 struct range {
           15         uint_least32_t lower;
           16         uint_least32_t upper;
           17 };
           18 
           19 static int
           20 hextocp(const char *str, size_t len, uint_least32_t *cp)
           21 {
           22         size_t i;
           23         int off;
           24         char relative;
           25 
           26         /* the maximum valid codepoint is 0x10FFFF */
           27         if (len > 6) {
           28                 fprintf(stderr, "hextocp: '%.*s' is too long.\n", (int)len,
           29                         str);
           30                 return 1;
           31         }
           32 
           33         for (i = 0, *cp = 0; i < len; i++) {
           34                 if (str[i] >= '0' && str[i] <= '9') {
           35                         relative = '0';
           36                         off = 0;
           37                 } else if (str[i] >= 'a' && str[i] <= 'f') {
           38                         relative = 'a';
           39                         off = 10;
           40                 } else if (str[i] >= 'A' && str[i] <= 'F') {
           41                         relative = 'A';
           42                         off = 10;
           43                 } else {
           44                         fprintf(stderr, "hextocp: '%.*s' is not hexadecimal.\n",
           45                                 (int)len, str);
           46                         return 1;
           47                 }
           48 
           49                 *cp += ((uint_least32_t)1 << (4 * (len - i - 1))) *
           50                        (uint_least32_t)(str[i] - relative + off);
           51         }
           52 
           53         if (*cp > UINT32_C(0x10FFFF)) {
           54                 fprintf(stderr, "hextocp: '%.*s' is too large.\n", (int)len,
           55                         str);
           56                 return 1;
           57         }
           58 
           59         return 0;
           60 }
           61 
           62 int
           63 parse_range(const char *str, struct range *range)
           64 {
           65         char *p;
           66 
           67         if ((p = strstr(str, "..")) == NULL) {
           68                 /* input has the form "XXXXXX" */
           69                 if (hextocp(str, strlen(str), &range->lower)) {
           70                         return 1;
           71                 }
           72                 range->upper = range->lower;
           73         } else {
           74                 /* input has the form "XXXXXX..XXXXXX" */
           75                 if (hextocp(str, (size_t)(p - str), &range->lower) ||
           76                     hextocp(p + 2, strlen(p + 2), &range->upper)) {
           77                         return 1;
           78                 }
           79         }
           80 
           81         return 0;
           82 }
           83 
           84 static bool
           85 get_line(char **buf, size_t *bufsize, FILE *fp, size_t *len)
           86 {
           87         int ret = EOF;
           88 
           89         for (*len = 0;; (*len)++) {
           90                 if (*len > 0 && *buf != NULL && (*buf)[*len - 1] == '\n') {
           91                         /*
           92                          * if the previously read character was a newline,
           93                          * we fake an end-of-file so we NUL-terminate and
           94                          * are done.
           95                          */
           96                         ret = EOF;
           97                 } else {
           98                         ret = fgetc(fp);
           99                 }
          100 
          101                 if (*len >= *bufsize) {
          102                         /* the buffer needs to be expanded */
          103                         *bufsize += 512;
          104                         if ((*buf = realloc(*buf, *bufsize)) == NULL) {
          105                                 fprintf(stderr, "get_line: Out of memory.\n");
          106                                 exit(1);
          107                         }
          108                 }
          109 
          110                 if (ret != EOF) {
          111                         (*buf)[*len] = (char)ret;
          112                 } else {
          113                         (*buf)[*len] = '\0';
          114                         break;
          115                 }
          116         }
          117 
          118         return *len == 0 && (feof(fp) || ferror(fp));
          119 }
          120 
          121 static char *
          122 duplicate_string(const char *src)
          123 {
          124         size_t len = strlen(src);
          125         char *dest;
          126 
          127         if (!(dest = malloc(len + 1))) {
          128                 fprintf(stderr, "malloc: %s\n", strerror(errno));
          129                 exit(1);
          130         }
          131 
          132         memcpy(dest, src, len);
          133         dest[len] = '\0';
          134 
          135         return dest;
          136 }
          137 
          138 void
          139 duplicate_codepoint_property(const struct codepoint_property *src,
          140         struct codepoint_property *dest)
          141 {
          142         size_t i;
          143 
          144         /* copy the field count */
          145         dest->field_count = src->field_count;
          146 
          147         /* allocate the field array */
          148         if (!(dest->fields = calloc(dest->field_count, sizeof(*(dest->fields))))) {
          149                 fprintf(stderr, "malloc: %s\n", strerror(errno));
          150                 exit(1);
          151         }
          152 
          153         for (i = 0; i < dest->field_count; i++) {
          154                 dest->fields[i] = duplicate_string(src->fields[i]);
          155         }
          156 }
          157 
          158 static void
          159 free_codepoint_property(struct codepoint_property *p)
          160 {
          161         size_t i;
          162 
          163         for (i = 0; i < p->field_count; i++) {
          164                 free(p->fields[i]);
          165         }
          166         free(p->fields);
          167 }
          168 
          169 static void
          170 free_codepoint_property_set(struct codepoint_property_set *p)
          171 {
          172         size_t i;
          173 
          174         for (i = 0; i < p->property_count; i++) {
          175                 free_codepoint_property(&(p->properties[i]));
          176         }
          177         free(p->properties);
          178 }
          179 
          180 void
          181 free_codepoint_property_set_array(struct codepoint_property_set *p)
          182 {
          183         uint_least32_t cp;
          184 
          185         for (cp = 0; cp < UINT32_C(0x110000); cp++) {
          186                 free_codepoint_property_set(&(p[cp]));
          187         }
          188         free(p);
          189 }
          190 
          191 const struct codepoint_property *
          192 match_in_codepoint_property_set(const struct codepoint_property_set *p,
          193         const char *str, size_t field_offset)
          194 {
          195         size_t i;
          196 
          197         for (i = 0; i < p->property_count; i++) {
          198                 /* there must be enough fields to reach the offset */
          199                 if (field_offset >= p->properties[i].field_count) {
          200                         continue;
          201                 }
          202 
          203                 /* check for a match */
          204                 if (strcmp(p->properties[i].fields[field_offset], str) == 0) {
          205                         return &(p->properties[i]);
          206                 }
          207         }
          208         
          209         return NULL;
          210 }
          211 
          212 struct codepoint_property_set *
          213 parse_property_file(const char *fname)
          214 {
          215         FILE *fp;
          216         struct codepoint_property p;
          217         struct codepoint_property_set *cpp, *missing;
          218         struct range range;
          219         char *line = NULL, **field = NULL, *comment;
          220         uint_least32_t cp;
          221         size_t linebufsize = 0, i, fieldbufsize = 0, j, nfields, len;
          222         bool is_missing;
          223 
          224         /*
          225          * allocate cpp buffer of length 0x11000, initialised to zeros
          226          * (NULL 'properties' pointer, zero property_count)
          227          */
          228         if (!(cpp = calloc((size_t)UINT32_C(0x110000), sizeof(*cpp)))) {
          229                 fprintf(stderr, "calloc: %s\n", strerror(errno));
          230                 exit(1);
          231         }
          232 
          233         /*
          234          * allocate same-sized array for 'missing' fields. The data files
          235          * have this strange notion of defining some properties in bloody
          236          * comments, and in a way that says 'yeah, if we did not define
          237          * something for some, then replace it with this value'. However,
          238          * it complicates things, as multiple, overlapping @missing
          239          * can be defined in a single file. One can deduce that subsequent
          240          * @missing just overwrite each other, but there's no way to
          241          * track which properties have not been set without running
          242          * through the file multiple times, which we want to avoid, so
          243          * we accumulate the (self-overwriting) @missing set separately
          244          * and fill it in at the end.
          245          */
          246         if (!(missing = calloc((size_t)UINT32_C(0x110000), sizeof(*missing)))) {
          247                 fprintf(stderr, "calloc: %s\n", strerror(errno));
          248                 exit(1);
          249         }
          250 
          251         /* open the property file */
          252         if (!(fp = fopen(fname, "r"))) {
          253                 fprintf(stderr, "parse_property_file: fopen '%s': %s.\n",
          254                         fname, strerror(errno));
          255                 exit(1);
          256         }
          257 
          258         while (!get_line(&line, &linebufsize, fp, &len)) {
          259                 /* remove trailing newline */
          260                 if (len > 0 && line[len - 1] == '\n') {
          261                         line[len - 1] = '\0';
          262                         len--;
          263                 }
          264 
          265                 /* skip empty lines */
          266                 if (len == 0) {
          267                         continue;
          268                 }
          269 
          270                 /* comment, check if it's a @missing */
          271                 is_missing = false;
          272                 if (line[0] == '#') {
          273                         if (strncmp(line + 1, " @missing: ", LEN(" @missing: ") - 1) == 0) {
          274                                 /*
          275                                  * this comment specifies a 'missing' line.
          276                                  * We shift it to the left and treat it
          277                                  * like any other line, differentiating it
          278                                  * with the 'is_missing' flag.
          279                                  */
          280                                 size_t offset = 1 + (LEN(" @missing: ") - 1);
          281 
          282                                 memmove(line, line + offset, len - offset + 1);
          283                                 len -= offset;
          284                                 is_missing = true;
          285                         } else {
          286                                 /* skip unrelated comment line */
          287                                 continue;
          288                         }
          289                 }
          290 
          291                 /* tokenize line into fields */
          292                 for (i = 0, nfields = 0, comment = NULL; i < (size_t)len; i++) {
          293                         /* skip leading whitespace */
          294                         while (line[i] == ' ') {
          295                                 i++;
          296                         }
          297 
          298                         /* check if we crashed into the comment */
          299                         if (line[i] != '#') {
          300                                 /* extend field buffer, if necessary */
          301                                 if (++nfields > fieldbufsize) {
          302                                         if ((field = realloc(
          303                                                      field,
          304                                                      nfields *
          305                                                              sizeof(*field))) ==
          306                                             NULL) {
          307                                                 fprintf(stderr,
          308                                                         "parse_property_"
          309                                                         "file: realloc: "
          310                                                         "%s.\n",
          311                                                         strerror(errno));
          312                                                 exit(1);
          313                                         }
          314                                         fieldbufsize = nfields;
          315                                 }
          316 
          317                                 /* set current position as field start */
          318                                 field[nfields - 1] = &line[i];
          319 
          320                                 /* continue until we reach ';' or '#' or end */
          321                                 while (line[i] != ';' && line[i] != '#' &&
          322                                        line[i] != '\0') {
          323                                         i++;
          324                                 }
          325                         }
          326 
          327                         if (line[i] == '#') {
          328                                 /* set comment-variable for later */
          329                                 comment = &line[i + 1];
          330                         }
          331 
          332                         /* go back whitespace and terminate field there */
          333                         if (i > 0) {
          334                                 for (j = i - 1; line[j] == ' '; j--) {
          335                                         ;
          336                                 }
          337                                 line[j + 1] = '\0';
          338                         } else {
          339                                 line[i] = '\0';
          340                         }
          341 
          342                         /* if comment is set, we are done */
          343                         if (comment != NULL) {
          344                                 break;
          345                         }
          346                 }
          347 
          348                 /* skip leading whitespace in comment */
          349                 while (comment != NULL && comment[0] == ' ') {
          350                         comment++;
          351                 }
          352 
          353                 /*
          354                  * We now have an array 'field' with 'nfields'. We can
          355                  * follow from the file format that, if nfields > 0,
          356                  * field[0] specifies a codepoint or range of
          357                  * codepoints, which we parse as the first step.
          358                  */
          359                 if (nfields < 2) {
          360                         /*
          361                          * either there is only a range or no range at
          362                          * all. Report this
          363                          */
          364                         fprintf(stderr, "parse_property_file: malformed "
          365                                 "line with either no range or no entry\n");
          366                         exit(1);
          367                 }
          368 
          369                 if (parse_range(field[0], &range)) {
          370                         fprintf(stderr, "parse_property_file: parse_range of "
          371                                 "'%s' failed.\n", field[0]);
          372                         exit(1);
          373                 }
          374 
          375                 /*
          376                  * fill a codepoint_property from the remaining
          377                  * fields (no allocations on a stack-allocated struct
          378                  */
          379                 p.fields = field + 1;
          380                 p.field_count = nfields - 1;
          381 
          382                 /*
          383                  * add a duplicate of the filled codepoint_property
          384                  * to all codepoints in the range (i.e. the cpp or
          385                  * missing array, depending on is_missing)
          386                  */
          387                 for (cp = range.lower; cp <= range.upper; cp++) {
          388                         if (is_missing) {
          389                                 /*
          390                                  * we overwrite the set at the codepoint,
          391                                  * as the @missing properties overwrite
          392                                  * each other (bad design)
          393                                  */
          394                                 if (missing[cp].property_count == 0) {
          395                                         /*
          396                                          * set is still empty, add space
          397                                          * for one pointer to a
          398                                          * codepoint_property
          399                                          */
          400                                         if (!(missing[cp].properties =
          401                                               malloc(sizeof(*(missing[cp].properties))))) {
          402                                                 fprintf(stderr, "malloc: %s\n",
          403                                                         strerror(errno));
          404                                                 exit(1);
          405                                         }
          406                                         missing[cp].property_count = 1;
          407                                 } else if (missing[cp].property_count == 1) {
          408                                         /* free the old property */
          409                                         free_codepoint_property(
          410                                                 &(missing[cp].properties[0]));
          411                                 } else {
          412                                         /* this shouldn't happen */
          413                                         fprintf(stderr, "parse_property_file: "
          414                                                 "malformed missing set\n");
          415                                         exit(1);
          416                                 }
          417 
          418                                 /* copy in the new one */
          419                                 duplicate_codepoint_property(
          420                                         &p, &(missing[cp].properties[0]));
          421                         } else {
          422                                 /* expand the set by one element */
          423                                 if (!(cpp[cp].properties = realloc(
          424                                         cpp[cp].properties,
          425                                         sizeof(*(cpp[cp].properties)) *
          426                                         (++cpp[cp].property_count)))) {
          427                                         fprintf(stderr, "malloc: %s\n",
          428                                                 strerror(errno));
          429                                         exit(1);
          430                                 }
          431 
          432                                 duplicate_codepoint_property(&p,
          433                                         &(cpp[cp].properties[cpp[cp].property_count - 1]));
          434                         }
          435                 }
          436         }
          437 
          438         /*
          439          * now we add the missing elements. We purposefully do not
          440          * follow the interpretation in DerivedCoreProperties.txt for
          441          * the @missing 'InCB; None'. Missing, for us, means that
          442          * _no_ properties have been extracted and thus property_count
          443          * is zero, not that some first field is not set. Absolute
          444          * madness to even publish data like this...
          445          */
          446         for (cp = 0; cp < UINT32_C(0x110000); cp++) {
          447                 if (cpp[cp].property_count == 0) {
          448                         /* swap the whole lot */
          449                         cpp[cp].properties = missing[cp].properties;
          450                         cpp[cp].property_count = missing[cp].property_count;
          451                         missing[cp].properties = NULL;
          452                         missing[cp].property_count = 0;
          453                 }
          454         }
          455 
          456         /* close file */
          457         if (fclose(fp)) {
          458                 fprintf(stderr, "parse_property_file: fclose '%s': %s.\n",
          459                         fname, strerror(errno));
          460                 exit(1);
          461         }
          462 
          463         /* cleanup */
          464         free_codepoint_property_set_array(missing);
          465         free(line);
          466         free(field);
          467 
          468         /* return codepoint properties array */
          469         return cpp;
          470 }
          471 
          472 struct compression_output {
          473         uint_least64_t *major;
          474         uint_least64_t *minor;
          475         size_t block_size_shift;
          476         size_t block_size;
          477         size_t major_size;
          478         size_t minor_size;
          479         size_t major_entry_size;
          480         size_t minor_entry_size;
          481         size_t total_size;
          482 };
          483 
          484 static void
          485 compress_array(const uint_least64_t *array, size_t block_size_shift,
          486                struct compression_output *co)
          487 {
          488         size_t i, j, major_maximum, minor_maximum;
          489 
          490         /* set some preliminary data in the output struct */
          491         co->block_size_shift = block_size_shift;
          492         co->block_size = ((size_t)1) << block_size_shift;
          493         co->major_size = UINT32_C(0x110000) / co->block_size;
          494 
          495         /* allocate/initialise */
          496         if (!(co->major = malloc(co->major_size * sizeof(*(co->major))))) {
          497                 fprintf(stderr, "malloc: %s\n", strerror(errno));
          498                 exit(1);
          499         }
          500         co->minor = NULL;
          501         co->minor_size = 0;
          502 
          503         /* iterate over all blocks in the array */
          504         for (i = 0; i < co->major_size; i++) {
          505                 size_t block_offset = i * co->block_size;
          506 
          507                 /*
          508                  * iterate over all possible matching block starting
          509                  * positions in the minor array
          510                  */
          511                 for (j = 0; j + co->block_size < co->minor_size; j++) {
          512                         /*
          513                          * check if our current block matches the
          514                          * candidate block in the minor array
          515                          */
          516                         if (memcmp(array + block_offset,
          517                                    co->minor + j,
          518                                    co->block_size * sizeof(*array)) == 0) {
          519                                 /*
          520                                  * we have a match, so we don't have to
          521                                  * store this block explicitly and just
          522                                  * point to the offset in minor
          523                                  */
          524                                 co->major[i] = j;
          525                                 break;
          526                         }
          527                 }
          528                 if (j + co->block_size >= co->minor_size) {
          529                         /*
          530                          * we found no matching subblock in minor. Add it
          531                          * to the minor array. The index to the first
          532                          * element we add now is exactly the size
          533                          * of the minor array.
          534                          */
          535                         co->major[i] = co->minor_size;
          536                         co->minor_size += co->block_size;
          537                         if (!(co->minor = realloc(co->minor, co->minor_size *
          538                                                   sizeof(*(co->minor))))) {
          539                                 fprintf(stderr, "malloc: %s\n",
          540                                         strerror(errno));
          541                                 exit(1);
          542                         }
          543                         memcpy(co->minor + co->minor_size - co->block_size,
          544                                array + block_offset,
          545                                co->block_size * sizeof(*array));
          546                 }
          547         }
          548 
          549         /* the compression is done. Now we derive some metadata */
          550 
          551         /* determine maxima */
          552         for (i = 0, major_maximum = 0; i < co->major_size; i++) {
          553                 if (co->major[i] > major_maximum) {
          554                         major_maximum = co->major[i];
          555                 }
          556         }
          557         for (i = 0, minor_maximum = 0; i < co->minor_size; i++) {
          558                 if (co->minor[i] > minor_maximum) {
          559                         minor_maximum = co->minor[i];
          560                 }
          561         }
          562 
          563         /* determine entry sizes */        
          564         if (major_maximum < UINT64_C(1) << 8) {
          565                 co->major_entry_size = 8;
          566         } else if (major_maximum < UINT64_C(1) << 16) {
          567                 co->major_entry_size = 16;
          568         } else if (major_maximum < UINT64_C(1) << 32) {
          569                 co->major_entry_size = 32;
          570         } else {
          571                 co->major_entry_size = 64;
          572         }
          573 
          574         if (minor_maximum < UINT64_C(1) << 4) {
          575                 /* using 4 bit packed nibbles */
          576                 co->minor_entry_size = 4;
          577         } else if (minor_maximum < UINT64_C(1) << 8) {
          578                 co->minor_entry_size = 8;
          579         } else if (minor_maximum < UINT64_C(1) << 16) {
          580                 co->minor_entry_size = 16;
          581         } else if (minor_maximum < UINT64_C(1) << 32) {
          582                 co->minor_entry_size = 32;
          583         } else {
          584                 co->minor_entry_size = 64;
          585         }
          586 
          587         /* total data size in bytes */
          588         co->total_size = ((co->major_size * co->major_entry_size) +
          589                           (co->minor_size * co->minor_entry_size)) / 8;
          590 }
          591 
          592 void
          593 free_compressed_data(struct compression_output *co)
          594 {
          595         free(co->major);
          596         free(co->minor);
          597         memset(co, 0, sizeof(*co));
          598 }
          599 
          600 void
          601 print_array(const uint_least64_t *array, size_t array_size,
          602             size_t array_entry_size, const char *prefix,
          603             const char *name)
          604 {
          605         size_t i;
          606 
          607         if (array_entry_size == 4) {
          608                 /* we store two 4-bit nibbles in one byte */
          609                 if (array_size % 2 != 0) {
          610                         /* ensure array lenght is even */
          611                         fprintf(stderr, "print_array: 4-bit array "
          612                                 "is not implemented for odd length.");
          613                         exit(1);
          614                 }
          615 
          616                 printf("static const uint_least8_t %s_%s[] = {",
          617                        prefix, name);
          618                 for (i = 0; i < array_size; i += 2) {
          619                         if ((i / 2) % 8 == 0) {
          620                                 printf("\n\t");
          621                         } else {
          622                                 printf(" ");
          623                         }
          624 
          625                         /* write high and low nibbles */
          626                         printf("%zu", (array[i] << 4) | array[i + 1]);
          627 
          628                         if (i + 2 < array_size) {
          629                                 printf(",");
          630                         }
          631                 }
          632                 printf("\n};\n");
          633         } else {
          634                 printf("static const uint_least%zu_t %s_%s[] = {",
          635                        array_entry_size, prefix, name);
          636                 for (i = 0; i < array_size; i++) {
          637                         if (i % 8 == 0) {
          638                                 printf("\n\t");
          639                         } else {
          640                                 printf(" ");
          641                         }
          642                         printf("%zu", array[i]);
          643                         if (i + 1 < array_size) {
          644                                 printf(",");
          645                         }
          646                 }
          647                 printf("\n};\n");
          648         }
          649 }
          650 
          651 void
          652 compress_and_output(uint_least64_t *array, const char *prefix)
          653 {
          654         struct compression_output co, co_best;
          655         size_t i, j, dictionary_size, dictionary_entry_size;
          656         uint_least64_t maximum = 0, *dictionary, *keys;
          657 
          658         /* initialise dictionary */
          659         dictionary = NULL;
          660         dictionary_size = 0;
          661 
          662         /* initialise keys */
          663         if (!(keys = calloc(UINT32_C(0x110000), sizeof(*keys)))) {
          664                 fprintf(stderr, "calloc: %s\n", strerror(errno));
          665                 exit(1);        
          666         }
          667 
          668         for (i = 0; i < UINT32_C(0x110000); i++) {
          669                 /* maximum determination */
          670                 if (array[i] > maximum) {
          671                         maximum = array[i];
          672                 }
          673 
          674                 /* check if the array value is already in the dictionary */
          675                 for (j = 0; j < dictionary_size; j++) {
          676                         if (array[i] == dictionary[j]) {
          677                                 break;
          678                         }
          679                 }
          680                 if (j == dictionary_size) {
          681                         /* not in the dictionary, insert the array value */
          682                         if (!(dictionary = realloc(dictionary, (++dictionary_size) *
          683                                 sizeof(*dictionary)))) {
          684                                 fprintf(stderr, "realloc: %s\n", strerror(errno));
          685                                 exit(1);        
          686                         }
          687                         dictionary[dictionary_size - 1] = array[i];
          688                 }
          689 
          690                 /* set the index (key) of the matched dictionary entry */
          691                 keys[i] = j;
          692         }
          693 
          694         /* check if dictionary size is below a reasonable threshold */
          695         if (dictionary_size > 256) {
          696                 fprintf(stderr, "compress_and_output: dictionary too large\n");
          697                 exit(1);        
          698         }
          699 
          700         /*
          701          * compress keys array with different block size shifts
          702          * (block sizes from 16=1<<4 to 32768=1<<15)
          703          */
          704         memset(&co_best, 0, sizeof(co_best));
          705         co_best.total_size = SIZE_MAX;
          706         for (i = 15; i >= 4; i--) {
          707                 compress_array(keys, i, &co);
          708 
          709                 fprintf(stderr, "compress_and_output: "
          710                         "block size %zu -> data size %.1f kB (%zu,%zu)\n",
          711                         ((size_t)1) << i, (double)co.total_size / 1000, co.major_entry_size, co.minor_entry_size);
          712 
          713                 if (co.total_size < co_best.total_size) {
          714                         /* we have a new best compression */
          715                         free_compressed_data(&co_best);
          716                         co_best = co;
          717                 } else {
          718                         /* not optimal, discard it */
          719                         free_compressed_data(&co);
          720                 }
          721         }
          722         fprintf(stderr, "compress_and_output: choosing optimal block size %zu (%zu,%zu)\n",
          723                 co_best.block_size, co_best.major_entry_size, co_best.minor_entry_size);
          724 
          725         /* output header */
          726         printf("/* Automatically generated by gen2/%s */\n"
          727                "#include <stdint.h>\n\n"
          728                "#include \"character.h\"\n\n", prefix);
          729 
          730         /* output dictionary */
          731         if (maximum < UINT64_C(1) << 8) {
          732                 dictionary_entry_size = 8;
          733         } else if (maximum < UINT64_C(1) << 16) {
          734                 dictionary_entry_size = 16;
          735         } else if (maximum < UINT64_C(1) << 32) {
          736                 dictionary_entry_size = 32;
          737         } else {
          738                 dictionary_entry_size = 64;
          739         }
          740 
          741         print_array(dictionary, dictionary_size, dictionary_entry_size,
          742                     prefix, "dictionary");
          743         printf("\n");
          744 
          745         /* output major array */
          746         print_array(co_best.major, co_best.major_size,
          747                     co_best.major_entry_size, prefix, "major");
          748         printf("\n");
          749 
          750         /* output minor array */
          751         print_array(co_best.minor, co_best.minor_size,
          752                     co_best.minor_entry_size, prefix, "minor");
          753         printf("\n");
          754 
          755         /* output accessor function (major is never 4-bits per entry) */
          756         printf("static inline uint_least%zu_t\n"
          757                "get_%s_property(uint_least32_t cp)\n"
          758                "{\n"
          759                "\tuint_least%zu_t minor_offset = %s_major[cp >> %zu]\n"
          760                "\t\t+ (cp & ((UINT32_C(1) << %zu) - 1));\n",
          761                dictionary_entry_size, prefix, co_best.major_entry_size,
          762                prefix, co_best.block_size_shift, co_best.block_size_shift);
          763         if (co_best.minor_entry_size == 4) {
          764                 printf("\tuint_least8_t packed_value = %s_minor[minor_offset / 2];\n\n"
          765                        "\tif (minor_offset & UINT8_C(1) == 0) {\n"
          766                        "\t\t/* high nibble */\n"
          767                        "\t\treturn %s_dictionary[packed_value >> 4];\n"
          768                        "\t} else {\n"
          769                        "\t\t/* low nibble */\n"
          770                        "\t\treturn %s_dictionary[packed_value & UINT8_C(0x0f)];\n"
          771                        "\t}\n",
          772                        prefix, prefix, prefix);
          773         } else {
          774                 printf("\n\treturn %s_dictionary[%s_minor[minor_offset]];\n",
          775                        prefix, prefix);
          776         }
          777         printf("}\n");
          778 
          779         /* cleanup */
          780         free(dictionary);
          781         free(keys);
          782 }