case.c - libgrapheme - unicode string library
 (HTM) git clone git://git.suckless.org/libgrapheme
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       case.c (16477B)
       ---
            1 /* See LICENSE file for copyright and license details. */
            2 #include <stdbool.h>
            3 #include <stdint.h>
            4 #include <stdio.h>
            5 #include <string.h>
            6 
            7 #include "../grapheme.h"
            8 #include "util.h"
            9 
           10 struct unit_test_is_case_utf8 {
           11         const char *description;
           12 
           13         struct {
           14                 const char *src;
           15                 size_t srclen;
           16         } input;
           17 
           18         struct {
           19                 bool ret;
           20                 size_t caselen;
           21         } output;
           22 };
           23 
           24 struct unit_test_to_case_utf8 {
           25         const char *description;
           26 
           27         struct {
           28                 const char *src;
           29                 size_t srclen;
           30                 size_t destlen;
           31         } input;
           32 
           33         struct {
           34                 const char *dest;
           35                 size_t ret;
           36         } output;
           37 };
           38 
           39 static const struct unit_test_is_case_utf8 is_lowercase_utf8[] = {
           40         {
           41                 .description = "empty input",
           42                 .input = { "", 0 },
           43                 .output = { true, 0 },
           44         },
           45         {
           46                 .description = "one character, violation",
           47                 .input = { "A", 1 },
           48                 .output = { false, 0 },
           49         },
           50         {
           51                 .description = "one character, confirmation",
           52                 .input = { "\xC3\x9F", 2 },
           53                 .output = { true, 2 },
           54         },
           55         {
           56                 .description = "one character, violation, NUL-terminated",
           57                 .input = { "A", SIZE_MAX },
           58                 .output = { false, 0 },
           59         },
           60         {
           61                 .description = "one character, confirmation, NUL-terminated",
           62                 .input = { "\xC3\x9F", SIZE_MAX },
           63                 .output = { true, 2 },
           64         },
           65         {
           66                 .description = "one word, violation",
           67                 .input = { "Hello", 5 },
           68                 .output = { false, 0 },
           69         },
           70         {
           71                 .description = "one word, partial confirmation",
           72                 .input = { "gru"
           73                            "\xC3\x9F"
           74                            "fOrmel",
           75                            11 },
           76                 .output = { false, 6 },
           77         },
           78         {
           79                 .description = "one word, full confirmation",
           80                 .input = { "gru"
           81                            "\xC3\x9F"
           82                            "formel",
           83                            11 },
           84                 .output = { true, 11 },
           85         },
           86         {
           87                 .description = "one word, violation, NUL-terminated",
           88                 .input = { "Hello", SIZE_MAX },
           89                 .output = { false, 0 },
           90         },
           91         {
           92                 .description = "one word, partial confirmation, NUL-terminated",
           93                 .input = { "gru"
           94                            "\xC3\x9F"
           95                            "fOrmel",
           96                            SIZE_MAX },
           97                 .output = { false, 6 },
           98         },
           99         {
          100                 .description = "one word, full confirmation, NUL-terminated",
          101                 .input = { "gru"
          102                            "\xC3\x9F"
          103                            "formel",
          104                            SIZE_MAX },
          105                 .output = { true, 11 },
          106         },
          107 };
          108 
          109 static const struct unit_test_is_case_utf8 is_uppercase_utf8[] = {
          110         {
          111                 .description = "empty input",
          112                 .input = { "", 0 },
          113                 .output = { true, 0 },
          114         },
          115         {
          116                 .description = "one character, violation",
          117                 .input = { "\xC3\x9F", 2 },
          118                 .output = { false, 0 },
          119         },
          120         {
          121                 .description = "one character, confirmation",
          122                 .input = { "A", 1 },
          123                 .output = { true, 1 },
          124         },
          125         {
          126                 .description = "one character, violation, NUL-terminated",
          127                 .input = { "\xC3\x9F", SIZE_MAX },
          128                 .output = { false, 0 },
          129         },
          130         {
          131                 .description = "one character, confirmation, NUL-terminated",
          132                 .input = { "A", SIZE_MAX },
          133                 .output = { true, 1 },
          134         },
          135         {
          136                 .description = "one word, violation",
          137                 .input = { "hello", 5 },
          138                 .output = { false, 0 },
          139         },
          140         {
          141                 .description = "one word, partial confirmation",
          142                 .input = { "GRU"
          143                            "\xC3\x9F"
          144                            "formel",
          145                            11 },
          146                 .output = { false, 3 },
          147         },
          148         {
          149                 .description = "one word, full confirmation",
          150                 .input = { "HELLO", 5 },
          151                 .output = { true, 5 },
          152         },
          153         {
          154                 .description = "one word, violation, NUL-terminated",
          155                 .input = { "hello", SIZE_MAX },
          156                 .output = { false, 0 },
          157         },
          158         {
          159                 .description = "one word, partial confirmation, NUL-terminated",
          160                 .input = { "GRU"
          161                            "\xC3\x9F"
          162                            "formel",
          163                            SIZE_MAX },
          164                 .output = { false, 3 },
          165         },
          166         {
          167                 .description = "one word, full confirmation, NUL-terminated",
          168                 .input = { "HELLO", SIZE_MAX },
          169                 .output = { true, 5 },
          170         },
          171 };
          172 
          173 static const struct unit_test_is_case_utf8 is_titlecase_utf8[] = {
          174         {
          175                 .description = "empty input",
          176                 .input = { "", 0 },
          177                 .output = { true, 0 },
          178         },
          179         {
          180                 .description = "one character, violation",
          181                 .input = { "\xC3\x9F", 2 },
          182                 .output = { false, 0 },
          183         },
          184         {
          185                 .description = "one character, confirmation",
          186                 .input = { "A", 1 },
          187                 .output = { true, 1 },
          188         },
          189         {
          190                 .description = "one character, violation, NUL-terminated",
          191                 .input = { "\xC3\x9F", SIZE_MAX },
          192                 .output = { false, 0 },
          193         },
          194         {
          195                 .description = "one character, confirmation, NUL-terminated",
          196                 .input = { "A", SIZE_MAX },
          197                 .output = { true, 1 },
          198         },
          199         {
          200                 .description = "one word, violation",
          201                 .input = { "hello", 5 },
          202                 .output = { false, 0 },
          203         },
          204         {
          205                 .description = "one word, partial confirmation",
          206                 .input = { "Gru"
          207                            "\xC3\x9F"
          208                            "fOrmel",
          209                            11 },
          210                 .output = { false, 6 },
          211         },
          212         {
          213                 .description = "one word, full confirmation",
          214                 .input = { "Gru"
          215                            "\xC3\x9F"
          216                            "formel",
          217                            11 },
          218                 .output = { true, 11 },
          219         },
          220         {
          221                 .description = "one word, violation, NUL-terminated",
          222                 .input = { "hello", SIZE_MAX },
          223                 .output = { false, 0 },
          224         },
          225         {
          226                 .description = "one word, partial confirmation, NUL-terminated",
          227                 .input = { "Gru"
          228                            "\xC3\x9F"
          229                            "fOrmel",
          230                            SIZE_MAX },
          231                 .output = { false, 6 },
          232         },
          233         {
          234                 .description = "one word, full confirmation, NUL-terminated",
          235                 .input = { "Gru"
          236                            "\xC3\x9F"
          237                            "formel",
          238                            SIZE_MAX },
          239                 .output = { true, 11 },
          240         },
          241         {
          242                 .description = "multiple words, partial confirmation",
          243                 .input = { "Hello Gru"
          244                            "\xC3\x9F"
          245                            "fOrmel!",
          246                            18 },
          247                 .output = { false, 12 },
          248         },
          249         {
          250                 .description = "multiple words, full confirmation",
          251                 .input = { "Hello Gru"
          252                            "\xC3\x9F"
          253                            "formel!",
          254                            18 },
          255                 .output = { true, 18 },
          256         },
          257         {
          258                 .description =
          259                         "multiple words, partial confirmation, NUL-terminated",
          260                 .input = { "Hello Gru"
          261                            "\xC3\x9F"
          262                            "fOrmel!",
          263                            SIZE_MAX },
          264                 .output = { false, 12 },
          265         },
          266         {
          267                 .description =
          268                         "multiple words, full confirmation, NUL-terminated",
          269                 .input = { "Hello Gru"
          270                            "\xC3\x9F"
          271                            "formel!",
          272                            SIZE_MAX },
          273                 .output = { true, 18 },
          274         },
          275 };
          276 
          277 static const struct unit_test_to_case_utf8 to_lowercase_utf8[] = {
          278         {
          279                 .description = "empty input",
          280                 .input = { "", 0, 10 },
          281                 .output = { "", 0 },
          282         },
          283         {
          284                 .description = "empty output",
          285                 .input = { "hello", 5, 0 },
          286                 .output = { "", 5 },
          287         },
          288         {
          289                 .description = "one character, conversion",
          290                 .input = { "A", 1, 10 },
          291                 .output = { "a", 1 },
          292         },
          293         {
          294                 .description = "one character, no conversion",
          295                 .input = { "\xC3\x9F", 2, 10 },
          296                 .output = { "\xC3\x9F", 2 },
          297         },
          298         {
          299                 .description = "one character, conversion, truncation",
          300                 .input = { "A", 1, 0 },
          301                 .output = { "", 1 },
          302         },
          303         {
          304                 .description = "one character, conversion, NUL-terminated",
          305                 .input = { "A", SIZE_MAX, 10 },
          306                 .output = { "a", 1 },
          307         },
          308         {
          309                 .description = "one character, no conversion, NUL-terminated",
          310                 .input = { "\xC3\x9F", SIZE_MAX, 10 },
          311                 .output = { "\xC3\x9F", 2 },
          312         },
          313         {
          314                 .description =
          315                         "one character, conversion, NUL-terminated, truncation",
          316                 .input = { "A", SIZE_MAX, 0 },
          317                 .output = { "", 1 },
          318         },
          319         {
          320                 .description = "one word, conversion",
          321                 .input = { "wOrD", 4, 10 },
          322                 .output = { "word", 4 },
          323         },
          324         {
          325                 .description = "one word, no conversion",
          326                 .input = { "word", 4, 10 },
          327                 .output = { "word", 4 },
          328         },
          329         {
          330                 .description = "one word, conversion, truncation",
          331                 .input = { "wOrD", 4, 3 },
          332                 .output = { "wo", 4 },
          333         },
          334         {
          335                 .description = "one word, conversion, NUL-terminated",
          336                 .input = { "wOrD", SIZE_MAX, 10 },
          337                 .output = { "word", 4 },
          338         },
          339         {
          340                 .description = "one word, no conversion, NUL-terminated",
          341                 .input = { "word", SIZE_MAX, 10 },
          342                 .output = { "word", 4 },
          343         },
          344         {
          345                 .description =
          346                         "one word, conversion, NUL-terminated, truncation",
          347                 .input = { "wOrD", SIZE_MAX, 3 },
          348                 .output = { "wo", 4 },
          349         },
          350 };
          351 
          352 static const struct unit_test_to_case_utf8 to_uppercase_utf8[] = {
          353         {
          354                 .description = "empty input",
          355                 .input = { "", 0, 10 },
          356                 .output = { "", 0 },
          357         },
          358         {
          359                 .description = "empty output",
          360                 .input = { "hello", 5, 0 },
          361                 .output = { "", 5 },
          362         },
          363         {
          364                 .description = "one character, conversion",
          365                 .input = { "\xC3\x9F", 2, 10 },
          366                 .output = { "SS", 2 },
          367         },
          368         {
          369                 .description = "one character, no conversion",
          370                 .input = { "A", 1, 10 },
          371                 .output = { "A", 1 },
          372         },
          373         {
          374                 .description = "one character, conversion, truncation",
          375                 .input = { "\xC3\x9F", 2, 0 },
          376                 .output = { "", 2 },
          377         },
          378         {
          379                 .description = "one character, conversion, NUL-terminated",
          380                 .input = { "\xC3\x9F", SIZE_MAX, 10 },
          381                 .output = { "SS", 2 },
          382         },
          383         {
          384                 .description = "one character, no conversion, NUL-terminated",
          385                 .input = { "A", SIZE_MAX, 10 },
          386                 .output = { "A", 1 },
          387         },
          388         {
          389                 .description =
          390                         "one character, conversion, NUL-terminated, truncation",
          391                 .input = { "\xC3\x9F", SIZE_MAX, 0 },
          392                 .output = { "", 2 },
          393         },
          394         {
          395                 .description = "one word, conversion",
          396                 .input = { "gRu"
          397                            "\xC3\x9F"
          398                            "fOrMel",
          399                            11, 15 },
          400                 .output = { "GRUSSFORMEL", 11 },
          401         },
          402         {
          403                 .description = "one word, no conversion",
          404                 .input = { "WORD", 4, 10 },
          405                 .output = { "WORD", 4 },
          406         },
          407         {
          408                 .description = "one word, conversion, truncation",
          409                 .input = { "gRu"
          410                            "\xC3\x9F"
          411                            "formel",
          412                            11, 5 },
          413                 .output = { "GRUS", 11 },
          414         },
          415         {
          416                 .description = "one word, conversion, NUL-terminated",
          417                 .input = { "gRu"
          418                            "\xC3\x9F"
          419                            "formel",
          420                            SIZE_MAX, 15 },
          421                 .output = { "GRUSSFORMEL", 11 },
          422         },
          423         {
          424                 .description = "one word, no conversion, NUL-terminated",
          425                 .input = { "WORD", SIZE_MAX, 10 },
          426                 .output = { "WORD", 4 },
          427         },
          428         {
          429                 .description =
          430                         "one word, conversion, NUL-terminated, truncation",
          431                 .input = { "gRu"
          432                            "\xC3\x9F"
          433                            "formel",
          434                            SIZE_MAX, 5 },
          435                 .output = { "GRUS", 11 },
          436         },
          437 };
          438 
          439 static const struct unit_test_to_case_utf8 to_titlecase_utf8[] = {
          440         {
          441                 .description = "empty input",
          442                 .input = { "", 0, 10 },
          443                 .output = { "", 0 },
          444         },
          445         {
          446                 .description = "empty output",
          447                 .input = { "hello", 5, 0 },
          448                 .output = { "", 5 },
          449         },
          450         {
          451                 .description = "one character, conversion",
          452                 .input = { "a", 1, 10 },
          453                 .output = { "A", 1 },
          454         },
          455         {
          456                 .description = "one character, no conversion",
          457                 .input = { "A", 1, 10 },
          458                 .output = { "A", 1 },
          459         },
          460         {
          461                 .description = "one character, conversion, truncation",
          462                 .input = { "a", 1, 0 },
          463                 .output = { "", 1 },
          464         },
          465         {
          466                 .description = "one character, conversion, NUL-terminated",
          467                 .input = { "a", SIZE_MAX, 10 },
          468                 .output = { "A", 1 },
          469         },
          470         {
          471                 .description = "one character, no conversion, NUL-terminated",
          472                 .input = { "A", SIZE_MAX, 10 },
          473                 .output = { "A", 1 },
          474         },
          475         {
          476                 .description =
          477                         "one character, conversion, NUL-terminated, truncation",
          478                 .input = { "a", SIZE_MAX, 0 },
          479                 .output = { "", 1 },
          480         },
          481         {
          482                 .description = "one word, conversion",
          483                 .input = { "heLlo", 5, 10 },
          484                 .output = { "Hello", 5 },
          485         },
          486         {
          487                 .description = "one word, no conversion",
          488                 .input = { "Hello", 5, 10 },
          489                 .output = { "Hello", 5 },
          490         },
          491         {
          492                 .description = "one word, conversion, truncation",
          493                 .input = { "heLlo", 5, 2 },
          494                 .output = { "H", 5 },
          495         },
          496         {
          497                 .description = "one word, conversion, NUL-terminated",
          498                 .input = { "heLlo", SIZE_MAX, 10 },
          499                 .output = { "Hello", 5 },
          500         },
          501         {
          502                 .description = "one word, no conversion, NUL-terminated",
          503                 .input = { "Hello", SIZE_MAX, 10 },
          504                 .output = { "Hello", 5 },
          505         },
          506         {
          507                 .description =
          508                         "one word, conversion, NUL-terminated, truncation",
          509                 .input = { "heLlo", SIZE_MAX, 3 },
          510                 .output = { "He", 5 },
          511         },
          512         {
          513                 .description = "two words, conversion",
          514                 .input = { "heLlo wORLd!", 12, 20 },
          515                 .output = { "Hello World!", 12 },
          516         },
          517         {
          518                 .description = "two words, no conversion",
          519                 .input = { "Hello World!", 12, 20 },
          520                 .output = { "Hello World!", 12 },
          521         },
          522         {
          523                 .description = "two words, conversion, truncation",
          524                 .input = { "heLlo wORLd!", 12, 8 },
          525                 .output = { "Hello W", 12 },
          526         },
          527         {
          528                 .description = "two words, conversion, NUL-terminated",
          529                 .input = { "heLlo wORLd!", SIZE_MAX, 20 },
          530                 .output = { "Hello World!", 12 },
          531         },
          532         {
          533                 .description = "two words, no conversion, NUL-terminated",
          534                 .input = { "Hello World!", SIZE_MAX, 20 },
          535                 .output = { "Hello World!", 12 },
          536         },
          537         {
          538                 .description =
          539                         "two words, conversion, NUL-terminated, truncation",
          540                 .input = { "heLlo wORLd!", SIZE_MAX, 4 },
          541                 .output = { "Hel", 12 },
          542         },
          543 };
          544 
          545 static int
          546 unit_test_callback_is_case_utf8(const void *t, size_t off, const char *name,
          547                                 const char *argv0)
          548 {
          549         const struct unit_test_is_case_utf8 *test =
          550                 (const struct unit_test_is_case_utf8 *)t + off;
          551         bool ret = false;
          552         size_t caselen = 0x7f;
          553 
          554         if (t == is_lowercase_utf8) {
          555                 ret = grapheme_is_lowercase_utf8(test->input.src,
          556                                                  test->input.srclen, &caselen);
          557         } else if (t == is_uppercase_utf8) {
          558                 ret = grapheme_is_uppercase_utf8(test->input.src,
          559                                                  test->input.srclen, &caselen);
          560         } else if (t == is_titlecase_utf8) {
          561                 ret = grapheme_is_titlecase_utf8(test->input.src,
          562                                                  test->input.srclen, &caselen);
          563 
          564         } else {
          565                 goto err;
          566         }
          567 
          568         /* check results */
          569         if (ret != test->output.ret || caselen != test->output.caselen) {
          570                 goto err;
          571         }
          572 
          573         return 0;
          574 err:
          575         fprintf(stderr,
          576                 "%s: %s: Failed unit test %zu \"%s\" "
          577                 "(returned (%s, %zu) instead of (%s, %zu)).\n",
          578                 argv0, name, off, test->description, ret ? "true" : "false",
          579                 caselen, test->output.ret ? "true" : "false",
          580                 test->output.caselen);
          581         return 1;
          582 }
          583 
          584 static int
          585 unit_test_callback_to_case_utf8(const void *t, size_t off, const char *name,
          586                                 const char *argv0)
          587 {
          588         const struct unit_test_to_case_utf8 *test =
          589                 (const struct unit_test_to_case_utf8 *)t + off;
          590         size_t ret = 0, i;
          591         char buf[512];
          592 
          593         /* fill the array with canary values */
          594         memset(buf, 0x7f, LEN(buf));
          595 
          596         if (t == to_lowercase_utf8) {
          597                 ret = grapheme_to_lowercase_utf8(test->input.src,
          598                                                  test->input.srclen, buf,
          599                                                  test->input.destlen);
          600         } else if (t == to_uppercase_utf8) {
          601                 ret = grapheme_to_uppercase_utf8(test->input.src,
          602                                                  test->input.srclen, buf,
          603                                                  test->input.destlen);
          604         } else if (t == to_titlecase_utf8) {
          605                 ret = grapheme_to_titlecase_utf8(test->input.src,
          606                                                  test->input.srclen, buf,
          607                                                  test->input.destlen);
          608         } else {
          609                 goto err;
          610         }
          611 
          612         /* check results */
          613         if (ret != test->output.ret ||
          614             memcmp(buf, test->output.dest,
          615                    MIN(test->input.destlen, test->output.ret))) {
          616                 goto err;
          617         }
          618 
          619         /* check that none of the canary values have been overwritten */
          620         for (i = test->input.destlen; i < LEN(buf); i++) {
          621                 if (buf[i] != 0x7f) {
          622                         goto err;
          623                 }
          624         }
          625 
          626         return 0;
          627 err:
          628         fprintf(stderr,
          629                 "%s: %s: Failed unit test %zu \"%s\" "
          630                 "(returned (\"%.*s\", %zu) instead of (\"%.*s\", %zu)).\n",
          631                 argv0, name, off, test->description, (int)ret, buf, ret,
          632                 (int)test->output.ret, test->output.dest, test->output.ret);
          633         return 1;
          634 }
          635 
          636 int
          637 main(int argc, char *argv[])
          638 {
          639         (void)argc;
          640 
          641         return run_unit_tests(unit_test_callback_is_case_utf8,
          642                               is_lowercase_utf8, LEN(is_lowercase_utf8),
          643                               "grapheme_is_lowercase_utf8", argv[0]) +
          644                run_unit_tests(unit_test_callback_is_case_utf8,
          645                               is_uppercase_utf8, LEN(is_uppercase_utf8),
          646                               "grapheme_is_uppercase_utf8", argv[0]) +
          647                run_unit_tests(unit_test_callback_is_case_utf8,
          648                               is_titlecase_utf8, LEN(is_titlecase_utf8),
          649                               "grapheme_is_titlecase_utf8", argv[0]) +
          650                run_unit_tests(unit_test_callback_to_case_utf8,
          651                               to_lowercase_utf8, LEN(to_lowercase_utf8),
          652                               "grapheme_to_lowercase_utf8", argv[0]) +
          653                run_unit_tests(unit_test_callback_to_case_utf8,
          654                               to_uppercase_utf8, LEN(to_uppercase_utf8),
          655                               "grapheme_to_uppercase_utf8", argv[0]) +
          656                run_unit_tests(unit_test_callback_to_case_utf8,
          657                               to_titlecase_utf8, LEN(to_titlecase_utf8),
          658                               "grapheme_to_titlecase_utf8", argv[0]);
          659 }