Add case-conversion-unit-tests - libgrapheme - unicode string library
 (HTM) git clone git://git.suckless.org/libgrapheme
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit e63bcc42010176b300feea6a7412f814a6cc4191
 (DIR) parent 5332f7ee034081618617c2b0785733ccc9ec8753
 (HTM) Author: Laslo Hunhold <dev@frign.de>
       Date:   Wed, 21 Sep 2022 20:18:12 +0200
       
       Add case-conversion-unit-tests
       
       To give even more assurance and catch any possible future regressions,
       exhaustive unit tests are added for the case-conversion functions.
       
       Signed-off-by: Laslo Hunhold <dev@frign.de>
       
       Diffstat:
         M Makefile                            |       3 +++
         A test/case.c                         |     329 +++++++++++++++++++++++++++++++
         M test/util.c                         |      21 +++++++++++++++++++--
         M test/util.h                         |       5 +++++
       
       4 files changed, 356 insertions(+), 2 deletions(-)
       ---
 (DIR) diff --git a/Makefile b/Makefile
       @@ -53,6 +53,7 @@ SRC =\
                src/word\
        
        TEST =\
       +        test/case\
                test/character\
                test/line\
                test/sentence\
       @@ -160,6 +161,7 @@ src/sentence.o: src/sentence.c config.mk gen/sentence.h grapheme.h src/util.h
        src/utf8.o: src/utf8.c config.mk grapheme.h
        src/util.o: src/util.c config.mk gen/types.h grapheme.h src/util.h
        src/word.o: src/word.c config.mk gen/word.h grapheme.h src/util.h
       +test/case.o: test/case.c config.mk grapheme.h test/util.h
        test/character.o: test/character.c config.mk gen/character-test.h grapheme.h test/util.h
        test/line.o: test/line.c config.mk gen/line-test.h grapheme.h test/util.h
        test/sentence.o: test/sentence.c config.mk gen/sentence-test.h grapheme.h test/util.h
       @@ -183,6 +185,7 @@ gen/sentence: gen/sentence.o gen/util.o
        gen/sentence-test: gen/sentence-test.o gen/util.o
        gen/word: gen/word.o gen/util.o
        gen/word-test: gen/word-test.o gen/util.o
       +test/case: test/case.o test/util.o libgrapheme.a
        test/character: test/character.o test/util.o libgrapheme.a
        test/line: test/line.o test/util.o libgrapheme.a
        test/sentence: test/sentence.o test/util.o libgrapheme.a
 (DIR) diff --git a/test/case.c b/test/case.c
       @@ -0,0 +1,329 @@
       +/* See LICENSE file for copyright and license details. */
       +#include <stdbool.h>
       +#include <stdint.h>
       +#include <stdio.h>
       +#include <string.h>
       +
       +#include "../grapheme.h"
       +#include "util.h"
       +
       +struct unit_test_to_case_utf8 {
       +        const char *description;
       +        struct {
       +                const char *src;
       +                size_t srclen;
       +                size_t destlen;
       +        } input;
       +        struct {
       +                const char *dest;
       +                size_t ret;
       +        } output;
       +};
       +
       +struct unit_test_to_case_utf8 lowercase_utf8[] = {
       +        {
       +                .description = "empty input",
       +                .input =  { "", 0, 10 },
       +                .output = { "", 0 },
       +        },
       +        {
       +                .description = "empty output",
       +                .input =  { "hello", 5, 0 },
       +                .output = { "", 5 },
       +        },
       +        {
       +                .description = "one character, conversion",
       +                .input =  { "A", 1, 10 },
       +                .output = { "a", 1 },
       +        },
       +        {
       +                .description = "one character, no conversion",
       +                .input =  { "a", 1, 10 },
       +                .output = { "a", 1 },
       +        },
       +        {
       +                .description = "one character, conversion, truncation",
       +                .input =  { "A", 1, 0 },
       +                .output = { "", 1 },
       +        },
       +        {
       +                .description = "one character, conversion, NUL-terminated",
       +                .input =  { "A", SIZE_MAX, 10 },
       +                .output = { "a", 1 },
       +        },
       +        {
       +                .description = "one character, no conversion, NUL-terminated",
       +                .input =  { "a", SIZE_MAX, 10 },
       +                .output = { "a", 1 },
       +        },
       +        {
       +                .description = "one character, conversion, NUL-terminated, truncation",
       +                .input =  { "A", SIZE_MAX, 0 },
       +                .output = { "", 1 },
       +        },
       +        {
       +                .description = "one word, conversion",
       +                .input =  { "wOrD", 4, 10 },
       +                .output = { "word", 4 },
       +        },
       +        {
       +                .description = "one word, no conversion",
       +                .input =  { "word", 4, 10 },
       +                .output = { "word", 4 },
       +        },
       +        {
       +                .description = "one word, conversion, truncation",
       +                .input =  { "wOrD", 4, 3 },
       +                .output = { "wo", 4 },
       +        },
       +        {
       +                .description = "one word, conversion, NUL-terminated",
       +                .input =  { "wOrD", SIZE_MAX, 10 },
       +                .output = { "word", 4 },
       +        },
       +        {
       +                .description = "one word, no conversion, NUL-terminated",
       +                .input =  { "word", SIZE_MAX, 10 },
       +                .output = { "word", 4 },
       +        },
       +        {
       +                .description = "one word, conversion, NUL-terminated, truncation",
       +                .input =  { "wOrD", SIZE_MAX, 3 },
       +                .output = { "wo", 4 },
       +        },
       +};
       +
       +struct unit_test_to_case_utf8 uppercase_utf8[] = {
       +        {
       +                .description = "empty input",
       +                .input =  { "", 0, 10 },
       +                .output = { "", 0 },
       +        },
       +        {
       +                .description = "empty output",
       +                .input =  { "hello", 5, 0 },
       +                .output = { "", 5 },
       +        },
       +        {
       +                .description = "one character, conversion",
       +                .input =  { "a", 1, 10 },
       +                .output = { "A", 1 },
       +        },
       +        {
       +                .description = "one character, no conversion",
       +                .input =  { "A", 1, 10 },
       +                .output = { "A", 1 },
       +        },
       +        {
       +                .description = "one character, conversion, truncation",
       +                .input =  { "a", 1, 0 },
       +                .output = { "", 1 },
       +        },
       +        {
       +                .description = "one character, conversion, NUL-terminated",
       +                .input =  { "a", SIZE_MAX, 10 },
       +                .output = { "A", 1 },
       +        },
       +        {
       +                .description = "one character, no conversion, NUL-terminated",
       +                .input =  { "A", SIZE_MAX, 10 },
       +                .output = { "A", 1 },
       +        },
       +        {
       +                .description = "one character, conversion, NUL-terminated, truncation",
       +                .input =  { "a", SIZE_MAX, 0 },
       +                .output = { "", 1 },
       +        },
       +        {
       +                .description = "one word, conversion",
       +                .input =  { "wOrD", 4, 10 },
       +                .output = { "WORD", 4 },
       +        },
       +        {
       +                .description = "one word, no conversion",
       +                .input =  { "WORD", 4, 10 },
       +                .output = { "WORD", 4 },
       +        },
       +        {
       +                .description = "one word, conversion, truncation",
       +                .input =  { "wOrD", 4, 3 },
       +                .output = { "WO", 4 },
       +        },
       +        {
       +                .description = "one word, conversion, NUL-terminated",
       +                .input =  { "wOrD", SIZE_MAX, 10 },
       +                .output = { "WORD", 4 },
       +        },
       +        {
       +                .description = "one word, no conversion, NUL-terminated",
       +                .input =  { "WORD", SIZE_MAX, 10 },
       +                .output = { "WORD", 4 },
       +        },
       +        {
       +                .description = "one word, conversion, NUL-terminated, truncation",
       +                .input =  { "wOrD", SIZE_MAX, 3 },
       +                .output = { "WO", 4 },
       +        },
       +};
       +
       +struct unit_test_to_case_utf8 titlecase_utf8[] = {
       +        {
       +                .description = "empty input",
       +                .input =  { "", 0, 10 },
       +                .output = { "", 0 },
       +        },
       +        {
       +                .description = "empty output",
       +                .input =  { "hello", 5, 0 },
       +                .output = { "", 5 },
       +        },
       +        {
       +                .description = "one character, conversion",
       +                .input =  { "a", 1, 10 },
       +                .output = { "A", 1 },
       +        },
       +        {
       +                .description = "one character, no conversion",
       +                .input =  { "A", 1, 10 },
       +                .output = { "A", 1 },
       +        },
       +        {
       +                .description = "one character, conversion, truncation",
       +                .input =  { "a", 1, 0 },
       +                .output = { "", 1 },
       +        },
       +        {
       +                .description = "one character, conversion, NUL-terminated",
       +                .input =  { "a", SIZE_MAX, 10 },
       +                .output = { "A", 1 },
       +        },
       +        {
       +                .description = "one character, no conversion, NUL-terminated",
       +                .input =  { "A", SIZE_MAX, 10 },
       +                .output = { "A", 1 },
       +        },
       +        {
       +                .description = "one character, conversion, NUL-terminated, truncation",
       +                .input =  { "a", SIZE_MAX, 0 },
       +                .output = { "", 1 },
       +        },
       +        {
       +                .description = "one word, conversion",
       +                .input =  { "heLlo", 5, 10 },
       +                .output = { "Hello", 5 },
       +        },
       +        {
       +                .description = "one word, no conversion",
       +                .input =  { "Hello", 5, 10 },
       +                .output = { "Hello", 5 },
       +        },
       +        {
       +                .description = "one word, conversion, truncation",
       +                .input =  { "heLlo", 5, 2 },
       +                .output = { "H", 5 },
       +        },
       +        {
       +                .description = "one word, conversion, NUL-terminated",
       +                .input =  { "heLlo", SIZE_MAX, 10 },
       +                .output = { "Hello", 5 },
       +        },
       +        {
       +                .description = "one word, no conversion, NUL-terminated",
       +                .input =  { "Hello", SIZE_MAX, 10 },
       +                .output = { "Hello", 5 },
       +        },
       +        {
       +                .description = "one word, conversion, NUL-terminated, truncation",
       +                .input =  { "heLlo", SIZE_MAX, 3 },
       +                .output = { "He", 5 },
       +        },
       +        {
       +                .description = "two words, conversion",
       +                .input =  { "heLlo wORLd!", 12, 20 },
       +                .output = { "Hello World!", 12 },
       +        },
       +        {
       +                .description = "two words, no conversion",
       +                .input =  { "Hello World!", 12, 20 },
       +                .output = { "Hello World!", 12 },
       +        },
       +        {
       +                .description = "two words, conversion, truncation",
       +                .input =  { "heLlo wORLd!", 12, 8 },
       +                .output = { "Hello W", 12 },
       +        },
       +        {
       +                .description = "two words, conversion, NUL-terminated",
       +                .input =  { "heLlo wORLd!", SIZE_MAX, 20 },
       +                .output = { "Hello World!", 12 },
       +        },
       +        {
       +                .description = "two words, no conversion, NUL-terminated",
       +                .input =  { "Hello World!", SIZE_MAX, 20 },
       +                .output = { "Hello World!", 12 },
       +        },
       +        {
       +                .description = "two words, conversion, NUL-terminated, truncation",
       +                .input =  { "heLlo wORLd!", SIZE_MAX, 4 },
       +                .output = { "Hel", 12 },
       +        },
       +};
       +
       +static int
       +unit_test_callback_to_case_utf8(void *t, size_t off, const char *name, const char *argv0)
       +{
       +        struct unit_test_to_case_utf8 *test = (struct unit_test_to_case_utf8 *)t + off;
       +        size_t ret = 0, i;
       +        char buf[512];
       +
       +        /* fill the array with canary values */
       +        memset(buf, 0x7f, LEN(buf));
       +
       +        if (t == lowercase_utf8) {
       +                ret = grapheme_to_lowercase_utf8(test->input.src, test->input.srclen,
       +                                                 buf, test->input.destlen);
       +        } else if (t == uppercase_utf8) {
       +                ret = grapheme_to_uppercase_utf8(test->input.src, test->input.srclen,
       +                                                 buf, test->input.destlen);
       +        } else if (t == titlecase_utf8) {
       +                ret = grapheme_to_titlecase_utf8(test->input.src, test->input.srclen,
       +                                                 buf, test->input.destlen);
       +        } else {
       +                goto err;
       +        }
       +
       +        /* check results */
       +        if (ret != test->output.ret ||
       +            memcmp(buf, test->output.dest, MIN(test->input.destlen, test->output.ret))) {
       +                goto err;
       +        }
       +
       +        /* check that none of the canary values have been overwritten */
       +        for (i = test->input.destlen; i < LEN(buf); i++) {
       +                if (buf[i] != 0x7f) {
       +fprintf(stderr, "REEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE\n");
       +                        goto err;
       +                }
       +        }
       +
       +        return 0;
       +err:
       +        fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
       +                "(returned (\"%.*s\", %zu) instead of (\"%.*s\", %zu)).\n", argv0,
       +                name, off, test->description, (int)ret, buf, ret,
       +                (int)test->output.ret, test->output.dest, test->output.ret);
       +        return 1;
       +}
       +
       +int
       +main(int argc, char *argv[])
       +{
       +        (void)argc;
       +
       +        return run_unit_tests(unit_test_callback_to_case_utf8, lowercase_utf8,
       +                              LEN(lowercase_utf8), "grapheme_to_lowercase_utf8", argv[0]) +
       +               run_unit_tests(unit_test_callback_to_case_utf8, uppercase_utf8,
       +                              LEN(uppercase_utf8), "grapheme_to_uppercase_utf8", argv[0]) +
       +               run_unit_tests(unit_test_callback_to_case_utf8, titlecase_utf8,
       +                              LEN(titlecase_utf8), "grapheme_to_titlecase_utf8", argv[0]);
       +}
 (DIR) diff --git a/test/util.c b/test/util.c
       @@ -23,7 +23,7 @@ run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t),
                                /* check if our resulting offset matches */
                                if (j == test[i].lenlen ||
                                    res != test[i].len[j++]) {
       -                                fprintf(stderr, "%s: Failed test %zu \"%s\".\n",
       +                                fprintf(stderr, "%s: Failed conformance test %zu \"%s\".\n",
                                                argv0, i, test[i].descr);
                                        fprintf(stderr, "J=%zu: EXPECTED len %zu, got %zu\n", j-1, test[i].len[j-1], res);
                                        failed++;
       @@ -31,7 +31,24 @@ run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t),
                                }
                        }
                }
       -        printf("%s: %zu/%zu tests passed.\n", argv0,
       +        printf("%s: %zu/%zu conformance tests passed.\n", argv0,
       +               testlen - failed, testlen);
       +
       +        return (failed > 0) ? 1 : 0;
       +}
       +
       +int
       +run_unit_tests(int (*unit_test_callback)(void *, size_t, const char *,
       +               const char *), void *test, size_t testlen, const char *name,
       +               const char *argv0)
       +{
       +        size_t i, failed;
       +
       +        for (i = 0, failed = 0; i < testlen; i++) {
       +                failed += (unit_test_callback(test, i, name, argv0) == 0) ? 0 : 1;
       +        }
       +
       +        printf("%s: %s: %zu/%zu unit tests passed.\n", argv0, name,
                       testlen - failed, testlen);
        
                return (failed > 0) ? 1 : 0;
 (DIR) diff --git a/test/util.h b/test/util.h
       @@ -5,10 +5,15 @@
        #include "../gen/types.h"
        #include "../grapheme.h"
        
       +#undef MIN
       +#define MIN(x,y)  ((x) < (y) ? (x) : (y))
       +#undef LEN
        #define LEN(x) (sizeof(x) / sizeof(*(x)))
        
        int run_break_tests(size_t (*next_break)(const uint_least32_t *, size_t),
                            const struct break_test *test, size_t testlen,
                            const char *);
       +int run_unit_tests(int (*unit_test_callback)(void *, size_t, const char *,
       +                   const char *), void *, size_t, const char *, const char *);
        
        #endif /* UTIL_H */