bidirectional-test.c - libgrapheme - unicode string library
 (HTM) git clone git://git.suckless.org/libgrapheme
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       bidirectional-test.c (15968B)
       ---
            1 /* See LICENSE file for copyright and license details. */
            2 #include <errno.h>
            3 #include <inttypes.h>
            4 #include <stddef.h>
            5 #include <stdio.h>
            6 #include <stdlib.h>
            7 #include <string.h>
            8 
            9 #include "../grapheme.h"
           10 #include "util.h"
           11 
           12 struct bidirectional_test {
           13         uint_least32_t *cp;
           14         size_t cplen;
           15         enum grapheme_bidirectional_direction mode[3];
           16         size_t modelen;
           17         enum grapheme_bidirectional_direction resolved;
           18         int_least8_t *level;
           19         int_least16_t *reorder;
           20         size_t reorderlen;
           21 };
           22 
           23 static const struct {
           24         const char *class;
           25         const uint_least32_t cp;
           26 } classcpmap[] = {
           27         { .class = "L", .cp = UINT32_C(0x0041) },
           28         { .class = "AL", .cp = UINT32_C(0x0608) },
           29         { .class = "AN", .cp = UINT32_C(0x0600) },
           30         { .class = "B", .cp = UINT32_C(0x000A) },
           31         { .class = "BN", .cp = UINT32_C(0x0000) },
           32         { .class = "CS", .cp = UINT32_C(0x002C) },
           33         { .class = "EN", .cp = UINT32_C(0x0030) },
           34         { .class = "ES", .cp = UINT32_C(0x002B) },
           35         { .class = "ET", .cp = UINT32_C(0x0023) },
           36         { .class = "FSI", .cp = UINT32_C(0x2068) },
           37         { .class = "LRE", .cp = UINT32_C(0x202A) },
           38         { .class = "LRI", .cp = UINT32_C(0x2066) },
           39         { .class = "LRO", .cp = UINT32_C(0x202D) },
           40         { .class = "NSM", .cp = UINT32_C(0x0300) },
           41         { .class = "ON", .cp = UINT32_C(0x0021) },
           42         { .class = "PDF", .cp = UINT32_C(0x202C) },
           43         { .class = "PDI", .cp = UINT32_C(0x2069) },
           44         { .class = "R", .cp = UINT32_C(0x05BE) },
           45         { .class = "RLE", .cp = UINT32_C(0x202B) },
           46         { .class = "RLI", .cp = UINT32_C(0x2067) },
           47         { .class = "RLO", .cp = UINT32_C(0x202E) },
           48         { .class = "S", .cp = UINT32_C(0x0009) },
           49         { .class = "WS", .cp = UINT32_C(0x000C) },
           50 };
           51 
           52 static int
           53 classtocp(const char *str, size_t len, uint_least32_t *cp)
           54 {
           55         size_t i;
           56 
           57         for (i = 0; i < LEN(classcpmap); i++) {
           58                 if (!strncmp(str, classcpmap[i].class, len)) {
           59                         *cp = classcpmap[i].cp;
           60                         return 0;
           61                 }
           62         }
           63         fprintf(stderr, "classtocp: unknown class string '%.*s'.\n", (int)len,
           64                 str);
           65 
           66         return 1;
           67 }
           68 
           69 static int
           70 parse_class_list(const char *str, uint_least32_t **cp, size_t *cplen)
           71 {
           72         size_t count, i;
           73         const char *tmp1 = NULL, *tmp2 = NULL;
           74 
           75         if (strlen(str) == 0) {
           76                 *cp = NULL;
           77                 *cplen = 0;
           78                 return 0;
           79         }
           80 
           81         /* count the number of spaces in the string and infer list length */
           82         for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL;
           83              count++, tmp1 = tmp2 + 1) {
           84                 ;
           85         }
           86 
           87         /* allocate resources */
           88         if (!(*cp = calloc((*cplen = count), sizeof(**cp)))) {
           89                 fprintf(stderr, "calloc: %s\n", strerror(errno));
           90                 exit(1);
           91         }
           92 
           93         /* go through the string again, parsing the classes */
           94         for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) {
           95                 tmp2 = strchr(tmp1, ' ');
           96                 if (classtocp(tmp1, tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1),
           97                               &((*cp)[i]))) {
           98                         return 1;
           99                 }
          100                 if (tmp2 != NULL) {
          101                         tmp1 = tmp2 + 1;
          102                 }
          103         }
          104 
          105         return 0;
          106 }
          107 
          108 static int
          109 strtolevel(const char *str, size_t len, int_least8_t *level)
          110 {
          111         size_t i;
          112 
          113         if (len == 1 && str[0] == 'x') {
          114                 /*
          115                  * 'x' indicates those characters that are ignored.
          116                  * We indicate this with a level of -1
          117                  */
          118                 *level = -1;
          119                 return 0;
          120         }
          121 
          122         if (len > 3) {
          123                 /*
          124                  * given we can only express (positive) numbers from
          125                  * 0..127, more than 3 digits means an excess
          126                  */
          127                 goto toolarge;
          128         }
          129 
          130         /* check if the string is completely numerical */
          131         for (i = 0; i < len; i++) {
          132                 if (str[i] < '0' && str[i] > '9') {
          133                         fprintf(stderr,
          134                                 "strtolevel: '%.*s' is not an integer.\n",
          135                                 (int)len, str);
          136                         return 1;
          137                 }
          138         }
          139 
          140         if (len == 3) {
          141                 if (str[0] != '1' || str[1] > '2' ||
          142                     (str[1] == '2' && str[2] > '7')) {
          143                         goto toolarge;
          144                 }
          145                 *level = (str[0] - '0') * 100 + (str[1] - '0') * 10 +
          146                          (str[2] - '0');
          147         } else if (len == 2) {
          148                 *level = (str[0] - '0') * 10 + (str[1] - '0');
          149         } else if (len == 1) {
          150                 *level = (str[0] - '0');
          151         } else { /* len == 0 */
          152                 *level = 0;
          153         }
          154 
          155         return 0;
          156 toolarge:
          157         fprintf(stderr, "strtolevel: '%.*s' is too large.\n", (int)len, str);
          158         return 1;
          159 }
          160 
          161 static int
          162 strtoreorder(const char *str, size_t len, int_least16_t *reorder)
          163 {
          164         size_t i;
          165 
          166         if (len == 1 && str[0] == 'x') {
          167                 /*
          168                  * 'x' indicates those characters that are ignored.
          169                  * We indicate this with a reorder of -1
          170                  */
          171                 *reorder = -1;
          172                 return 0;
          173         }
          174 
          175         if (len > 3) {
          176                 /*
          177                  * given we want to only express (positive) numbers from
          178                  * 0..999 (at most!), more than 3 digits means an excess
          179                  */
          180                 goto toolarge;
          181         }
          182 
          183         /* check if the string is completely numerical */
          184         for (i = 0; i < len; i++) {
          185                 if (str[i] < '0' && str[i] > '9') {
          186                         fprintf(stderr,
          187                                 "strtoreorder: '%.*s' is not an integer.\n",
          188                                 (int)len, str);
          189                         return 1;
          190                 }
          191         }
          192 
          193         if (len == 3) {
          194                 *reorder = (str[0] - '0') * 100 + (str[1] - '0') * 10 +
          195                            (str[2] - '0');
          196         } else if (len == 2) {
          197                 *reorder = (str[0] - '0') * 10 + (str[1] - '0');
          198         } else if (len == 1) {
          199                 *reorder = (str[0] - '0');
          200         } else { /* len == 0 */
          201                 *reorder = 0;
          202         }
          203 
          204         return 0;
          205 toolarge:
          206         fprintf(stderr, "strtoreorder: '%.*s' is too large.\n", (int)len, str);
          207         return 1;
          208 }
          209 
          210 static int
          211 parse_level_list(const char *str, int_least8_t **level, size_t *levellen)
          212 {
          213         size_t count, i;
          214         const char *tmp1 = NULL, *tmp2 = NULL;
          215 
          216         if (strlen(str) == 0) {
          217                 *level = NULL;
          218                 *levellen = 0;
          219                 return 0;
          220         }
          221 
          222         /* count the number of spaces in the string and infer list length */
          223         for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL;
          224              count++, tmp1 = tmp2 + 1) {
          225                 ;
          226         }
          227 
          228         /* allocate resources */
          229         if (!(*level = calloc((*levellen = count), sizeof(**level)))) {
          230                 fprintf(stderr, "calloc: %s\n", strerror(errno));
          231                 exit(1);
          232         }
          233 
          234         /* go through the string again, parsing the levels */
          235         for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) {
          236                 tmp2 = strchr(tmp1, ' ');
          237                 if (strtolevel(tmp1,
          238                                tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1),
          239                                &((*level)[i]))) {
          240                         return 1;
          241                 }
          242                 if (tmp2 != NULL) {
          243                         tmp1 = tmp2 + 1;
          244                 }
          245         }
          246 
          247         return 0;
          248 }
          249 
          250 static int
          251 parse_reorder_list(const char *str, int_least16_t **reorder, size_t *reorderlen)
          252 {
          253         size_t count, i;
          254         const char *tmp1 = NULL, *tmp2 = NULL;
          255 
          256         if (strlen(str) == 0) {
          257                 *reorder = NULL;
          258                 *reorderlen = 0;
          259                 return 0;
          260         }
          261 
          262         /* count the number of spaces in the string and infer list length */
          263         for (count = 1, tmp1 = str; (tmp2 = strchr(tmp1, ' ')) != NULL;
          264              count++, tmp1 = tmp2 + 1) {
          265                 ;
          266         }
          267 
          268         /* allocate resources */
          269         if (!(*reorder = calloc((*reorderlen = count), sizeof(**reorder)))) {
          270                 fprintf(stderr, "calloc: %s\n", strerror(errno));
          271                 exit(1);
          272         }
          273 
          274         /* go through the string again, parsing the reorders */
          275         for (i = 0, tmp1 = tmp2 = str; tmp2 != NULL; i++) {
          276                 tmp2 = strchr(tmp1, ' ');
          277                 if (strtoreorder(tmp1,
          278                                  tmp2 ? (size_t)(tmp2 - tmp1) : strlen(tmp1),
          279                                  &((*reorder)[i]))) {
          280                         return 1;
          281                 }
          282                 if (tmp2 != NULL) {
          283                         tmp1 = tmp2 + 1;
          284                 }
          285         }
          286 
          287         return 0;
          288 }
          289 
          290 static void
          291 bidirectional_test_list_print(const struct bidirectional_test *test,
          292                               size_t testlen, const char *identifier,
          293                               const char *progname)
          294 {
          295         size_t i, j;
          296 
          297         printf("/* Automatically generated by %s */\n"
          298                "#include <stdint.h>\n#include <stddef.h>\n\n"
          299                "#include \"../grapheme.h\"\n\n",
          300                progname);
          301 
          302         printf("static const struct {\n"
          303                "\tuint_least32_t *cp;\n"
          304                "\tsize_t cplen;\n"
          305                "\tenum grapheme_bidirectional_direction *mode;\n"
          306                "\tsize_t modelen;\n"
          307                "\tenum grapheme_bidirectional_direction resolved;\n"
          308                "\tint_least8_t *level;\n"
          309                "\tint_least16_t *reorder;\n"
          310                "\tsize_t reorderlen;\n} %s[] = {\n",
          311                identifier);
          312         for (i = 0; i < testlen; i++) {
          313                 printf("\t{\n");
          314 
          315                 printf("\t\t.cp         = (uint_least32_t[]){");
          316                 for (j = 0; j < test[i].cplen; j++) {
          317                         printf(" UINT32_C(0x%06X)", test[i].cp[j]);
          318                         if (j + 1 < test[i].cplen) {
          319                                 putchar(',');
          320                         }
          321                 }
          322                 printf(" },\n");
          323                 printf("\t\t.cplen      = %zu,\n", test[i].cplen);
          324 
          325                 printf("\t\t.mode       = (enum "
          326                        "grapheme_bidirectional_direction[]){");
          327                 for (j = 0; j < test[i].modelen; j++) {
          328                         if (test[i].mode[j] ==
          329                             GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL) {
          330                                 printf(" GRAPHEME_BIDIRECTIONAL_DIRECTION_"
          331                                        "NEUTRAL");
          332                         } else if (test[i].mode[j] ==
          333                                    GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR) {
          334                                 printf(" GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR");
          335                         } else if (test[i].mode[j] ==
          336                                    GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL) {
          337                                 printf(" GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL");
          338                         }
          339                         if (j + 1 < test[i].modelen) {
          340                                 putchar(',');
          341                         }
          342                 }
          343                 printf(" },\n");
          344                 printf("\t\t.modelen    = %zu,\n", test[i].modelen);
          345 
          346                 printf("\t\t.resolved   = ");
          347                 if (test[i].resolved ==
          348                     GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL) {
          349                         printf("GRAPHEME_BIDIRECTIONAL_DIRECTION_"
          350                                "NEUTRAL");
          351                 } else if (test[i].resolved ==
          352                            GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR) {
          353                         printf("GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR");
          354                 } else if (test[i].resolved ==
          355                            GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL) {
          356                         printf("GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL");
          357                 }
          358                 printf(",\n");
          359 
          360                 printf("\t\t.level      = (int_least8_t[]){");
          361                 for (j = 0; j < test[i].cplen; j++) {
          362                         printf(" %" PRIdLEAST8, test[i].level[j]);
          363                         if (j + 1 < test[i].cplen) {
          364                                 putchar(',');
          365                         }
          366                 }
          367                 printf(" },\n");
          368 
          369                 printf("\t\t.reorder    = ");
          370                 if (test[i].reorderlen > 0) {
          371                         printf("(int_least16_t[]){");
          372                         for (j = 0; j < test[i].reorderlen; j++) {
          373                                 printf(" %" PRIdLEAST16, test[i].reorder[j]);
          374                                 if (j + 1 < test[i].reorderlen) {
          375                                         putchar(',');
          376                                 }
          377                         }
          378                         printf(" },\n");
          379                 } else {
          380                         printf("NULL,\n");
          381                 }
          382                 printf("\t\t.reorderlen = %zu,\n", test[i].reorderlen);
          383 
          384                 printf("\t},\n");
          385         }
          386         printf("};\n");
          387 }
          388 
          389 static struct bidirectional_test *test;
          390 static size_t testlen;
          391 
          392 static int_least8_t *current_level;
          393 static size_t current_level_len;
          394 static int_least16_t *current_reorder;
          395 static size_t current_reorder_len;
          396 
          397 static int
          398 test_callback(const char *file, char **field, size_t nfields, char *comment,
          399               void *payload)
          400 {
          401         char *tmp;
          402 
          403         (void)file;
          404         (void)comment;
          405         (void)payload;
          406 
          407         /* we either get a line beginning with an '@', or an input line */
          408         if (nfields > 0 && field[0][0] == '@') {
          409                 if (!strncmp(field[0], "@Levels:", sizeof("@Levels:") - 1)) {
          410                         tmp = field[0] + sizeof("@Levels:") - 1;
          411                         for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t');
          412                              tmp++) {
          413                                 ;
          414                         }
          415                         free(current_level);
          416                         parse_level_list(tmp, &current_level,
          417                                          &current_level_len);
          418                 } else if (!strncmp(field[0],
          419                                     "@Reorder:", sizeof("@Reorder:") - 1)) {
          420                         tmp = field[0] + sizeof("@Reorder:") - 1;
          421                         for (; *tmp != '\0' && (*tmp == ' ' || *tmp == '\t');
          422                              tmp++) {
          423                                 ;
          424                         }
          425                         free(current_reorder);
          426                         parse_reorder_list(tmp, &current_reorder,
          427                                            &current_reorder_len);
          428                 } else {
          429                         fprintf(stderr, "Unknown @-input-line.\n");
          430                         exit(1);
          431                 }
          432         } else {
          433                 if (nfields < 2) {
          434                         /* discard any line that does not have at least 2 fields
          435                          */
          436                         return 0;
          437                 }
          438 
          439                 /* extend test array */
          440                 if (!(test = realloc(test, (++testlen) * sizeof(*test)))) {
          441                         fprintf(stderr, "realloc: %s\n", strerror(errno));
          442                         exit(1);
          443                 }
          444 
          445                 /* parse field data */
          446                 parse_class_list(field[0], &(test[testlen - 1].cp),
          447                                  &(test[testlen - 1].cplen));
          448 
          449                 /* copy current level- and reorder-arrays */
          450                 if (!(test[testlen - 1].level =
          451                               calloc(current_level_len,
          452                                      sizeof(*(test[testlen - 1].level))))) {
          453                         fprintf(stderr, "calloc: %s\n", strerror(errno));
          454                         exit(1);
          455                 }
          456                 memcpy(test[testlen - 1].level, current_level,
          457                        current_level_len * sizeof(*(test[testlen - 1].level)));
          458 
          459                 if (!(test[testlen - 1].reorder =
          460                               calloc(current_reorder_len,
          461                                      sizeof(*(test[testlen - 1].reorder))))) {
          462                         fprintf(stderr, "calloc: %s\n", strerror(errno));
          463                         exit(1);
          464                 }
          465                 if (current_reorder != NULL) {
          466                         memcpy(test[testlen - 1].reorder, current_reorder,
          467                                current_reorder_len *
          468                                        sizeof(*(test[testlen - 1].reorder)));
          469                 }
          470                 test[testlen - 1].reorderlen = current_reorder_len;
          471 
          472                 if (current_level_len != test[testlen - 1].cplen) {
          473                         fprintf(stderr,
          474                                 "mismatch between string and level lengths.\n");
          475                         exit(1);
          476                 }
          477 
          478                 /* parse paragraph-level-bitset */
          479                 if (strlen(field[1]) != 1) {
          480                         fprintf(stderr, "malformed paragraph-level-bitset.\n");
          481                         exit(1);
          482                 } else if (field[1][0] == '2') {
          483                         test[testlen - 1].mode[0] =
          484                                 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR;
          485                         test[testlen - 1].modelen = 1;
          486                 } else if (field[1][0] == '3') {
          487                         /* auto=0 and LTR=1 */
          488                         test[testlen - 1].mode[0] =
          489                                 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL;
          490                         test[testlen - 1].mode[1] =
          491                                 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR;
          492                         test[testlen - 1].modelen = 2;
          493                 } else if (field[1][0] == '4') {
          494                         test[testlen - 1].mode[0] =
          495                                 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL;
          496                         test[testlen - 1].modelen = 1;
          497                 } else if (field[1][0] == '5') {
          498                         test[testlen - 1].mode[0] =
          499                                 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL;
          500                         test[testlen - 1].mode[1] =
          501                                 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL;
          502                         test[testlen - 1].modelen = 2;
          503                 } else if (field[1][0] == '7') {
          504                         test[testlen - 1].mode[0] =
          505                                 GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL;
          506                         test[testlen - 1].mode[1] =
          507                                 GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR;
          508                         test[testlen - 1].mode[2] =
          509                                 GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL;
          510                         test[testlen - 1].modelen = 3;
          511                 } else {
          512                         fprintf(stderr,
          513                                 "unhandled paragraph-level-bitset %s.\n",
          514                                 field[1]);
          515                         exit(1);
          516                 }
          517 
          518                 /* the resolved paragraph level is always neutral as the test
          519                  * file does not specify it */
          520                 test[testlen - 1].resolved =
          521                         GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL;
          522         }
          523 
          524         return 0;
          525 }
          526 
          527 static int
          528 character_test_callback(const char *file, char **field, size_t nfields,
          529                         char *comment, void *payload)
          530 {
          531         size_t tmp;
          532 
          533         (void)file;
          534         (void)comment;
          535         (void)payload;
          536 
          537         if (nfields < 5) {
          538                 /* discard any line that does not have at least 5 fields */
          539                 return 0;
          540         }
          541 
          542         /* extend test array */
          543         if (!(test = realloc(test, (++testlen) * sizeof(*test)))) {
          544                 fprintf(stderr, "realloc: %s\n", strerror(errno));
          545                 exit(1);
          546         }
          547 
          548         /* parse field data */
          549         parse_cp_list(field[0], &(test[testlen - 1].cp),
          550                       &(test[testlen - 1].cplen));
          551         parse_level_list(field[3], &(test[testlen - 1].level), &tmp);
          552         parse_reorder_list(field[4], &(test[testlen - 1].reorder),
          553                            &(test[testlen - 1].reorderlen));
          554 
          555         /* parse paragraph-level-mode */
          556         if (strlen(field[1]) != 1) {
          557                 fprintf(stderr, "malformed paragraph-level-setting.\n");
          558                 exit(1);
          559         } else if (field[1][0] == '0') {
          560                 test[testlen - 1].mode[0] =
          561                         GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR;
          562         } else if (field[1][0] == '1') {
          563                 test[testlen - 1].mode[0] =
          564                         GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL;
          565         } else if (field[1][0] == '2') {
          566                 test[testlen - 1].mode[0] =
          567                         GRAPHEME_BIDIRECTIONAL_DIRECTION_NEUTRAL;
          568         } else {
          569                 fprintf(stderr, "unhandled paragraph-level-setting.\n");
          570                 exit(1);
          571         }
          572         test[testlen - 1].modelen = 1;
          573 
          574         /* parse resolved paragraph level */
          575         if (strlen(field[2]) != 1) {
          576                 fprintf(stderr, "malformed resolved paragraph level.\n");
          577                 exit(1);
          578         } else if (field[2][0] == '0') {
          579                 test[testlen - 1].resolved =
          580                         GRAPHEME_BIDIRECTIONAL_DIRECTION_LTR;
          581         } else if (field[2][0] == '1') {
          582                 test[testlen - 1].resolved =
          583                         GRAPHEME_BIDIRECTIONAL_DIRECTION_RTL;
          584         } else {
          585                 fprintf(stderr, "unhandled resolved paragraph level.\n");
          586                 exit(1);
          587         }
          588 
          589         if (tmp != test[testlen - 1].cplen) {
          590                 fprintf(stderr, "mismatch between string and level lengths.\n");
          591                 exit(1);
          592         }
          593 
          594         return 0;
          595 }
          596 
          597 int
          598 main(int argc, char *argv[])
          599 {
          600         (void)argc;
          601 
          602         parse_file_with_callback("data/BidiTest.txt", test_callback, NULL);
          603         parse_file_with_callback("data/BidiCharacterTest.txt",
          604                                  character_test_callback, NULL);
          605         bidirectional_test_list_print(test, testlen, "bidirectional_test",
          606                                       argv[0]);
          607 
          608         return 0;
          609 }