Refactor case-checking-functions with Herodotus and add unit tests - libgrapheme - unicode string library
 (HTM) git clone git://git.suckless.org/libgrapheme
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 5dec22a7143e1105f25c7a7626fa166d882367d0
 (DIR) parent 8a7e2ee85f0a2824e48e85e57534c5b18113cf07
 (HTM) Author: Laslo Hunhold <dev@frign.de>
       Date:   Sat, 24 Sep 2022 10:36:15 +0200
       
       Refactor case-checking-functions with Herodotus and add unit tests
       
       Additionally, expand the unit tests with special-casing-cases.
       
       Signed-off-by: Laslo Hunhold <dev@frign.de>
       
       Diffstat:
         M src/case.c                          |     213 +++++++++++++++----------------
         M src/util.h                          |       3 ++-
         M test/case.c                         |     312 +++++++++++++++++++++++++++----
       
       3 files changed, 382 insertions(+), 146 deletions(-)
       ---
 (DIR) diff --git a/src/case.c b/src/case.c
       @@ -1,4 +1,5 @@
        /* See LICENSE file for copyright and license details. */
       +#include <stddef.h>
        #include <stdint.h>
        
        #include "../grapheme.h"
       @@ -208,6 +209,7 @@ to_titlecase(HERODOTUS_READER *r, HERODOTUS_WRITER *w)
                        /* cast the rest of the codepoints in the word to lowercase */
                        to_case(r, w, 1, lower_major, lower_minor, lower_special);
        
       +                /* remove the limit on the word before the next iteration */
                        herodotus_reader_pop_limit(r);
                }
        
       @@ -289,20 +291,16 @@ grapheme_to_titlecase_utf8(const char *src, size_t srclen, char *dest, size_t de
        }
        
        static inline bool
       -is_case(const void *src, size_t srclen,
       -        size_t srcnumprocess,
       -        size_t (*get_codepoint)(const void *, size_t, size_t, uint_least32_t *),
       -        const uint_least16_t *major, const int_least32_t *minor,
       -        const struct special_case *sc, size_t *output)
       +is_case(HERODOTUS_READER *r, const uint_least16_t *major,
       +        const int_least32_t *minor, const struct special_case *sc,
       +        size_t *output)
        {
       -        size_t srcoff, new_srcoff, tmp, res, off, i;
       -        uint_least32_t cp, tmp_cp;
       +        size_t off, i;
       +        bool ret = true;
       +        uint_least32_t cp;
                int_least32_t map;
        
       -        for (srcoff = 0; srcoff < srcnumprocess; srcoff = new_srcoff) {
       -                /* read in next source codepoint */
       -                new_srcoff = srcoff + get_codepoint(src, srclen, srcoff, &cp);
       -
       +        for (; herodotus_read_codepoint(r, false, &cp) == HERODOTUS_STATUS_SUCCESS;) {
                        /* get and handle case mapping */
                        if (unlikely((map = get_case_offset(cp, major, minor)) >=
                                     INT32_C(0x110000))) {
       @@ -310,173 +308,162 @@ is_case(const void *src, size_t srclen,
                                 * is the difference to 0x110000*/
                                off = (uint_least32_t)map - UINT32_C(0x110000);
        
       -                        for (i = 0, tmp = srcoff; i < sc[off].cplen; i++, tmp += res) {
       -                                res = get_codepoint(src, srclen, srcoff, &tmp_cp);
       -                                if (tmp_cp != sc[off].cp[i]) {
       -                                        /* we have a difference */
       -                                        if (output) {
       -                                                *output = tmp;
       +                        for (i = 0; i < sc[off].cplen; i++) {
       +                                if (herodotus_read_codepoint(r, false, &cp) ==
       +                                    HERODOTUS_STATUS_SUCCESS) {
       +                                        if (cp != sc[off].cp[i]) {
       +                                                ret = false;
       +                                                goto done;
       +                                        } else {
       +                                                /* move forward */
       +                                                herodotus_read_codepoint(r, true, &cp);
                                                }
       -                                        return false;
       +                                } else {
       +                                        /*
       +                                         * input ended and we didn't see
       +                                         * any difference so far, so this
       +                                         * string is in fact okay
       +                                         */
       +                                        ret = true;
       +                                        goto done;
                                        }
                                }
       -                        new_srcoff = tmp;
                        } else {
                                /* we have a simple mapping */
                                if (cp != (uint_least32_t)((int_least32_t)cp + map)) {
                                        /* we have a difference */
       -                                if (output) {
       -                                        *output = srcoff;
       -                                }
       -                                return false;
       +                                ret = false;
       +                                goto done;
       +                        } else {
       +                                /* move forward */
       +                                herodotus_read_codepoint(r, true, &cp);
                                }
                        }
                }
       -
       +done:
                if (output) {
       -                *output = srcoff;
       +                *output = herodotus_reader_number_read(r);
                }
       -        return true;
       +        return ret;
        }
        
        static inline bool
       -is_titlecase(const void *src, size_t srclen,
       -             size_t (*get_codepoint)(const void *, size_t, size_t, uint_least32_t *),
       -             size_t *output)
       +is_titlecase(HERODOTUS_READER *r, size_t *output)
        {
                enum case_property prop;
       -        size_t next_wb, srcoff, res, tmp_output;
       +        enum herodotus_status s;
       +        bool ret = true;
                uint_least32_t cp;
        
       -        for (srcoff = 0; ; ) {
       -                if (get_codepoint == get_codepoint_utf8) {
       -                        if ((next_wb = grapheme_next_word_break_utf8((const char *)src + srcoff,
       -                                                                     srclen - srcoff)) == 0) {
       -                                /* we consumed all of the string */
       -                                break;
       -                        }
       -                } else {
       -                        if ((next_wb = grapheme_next_word_break((const uint_least32_t *)src + srcoff,
       -                                                                srclen - srcoff)) == 0) {
       -                                /* we consumed all of the string */
       -                                break;
       -                        }
       -                }
       -
       -                for (; next_wb > 0 && srcoff < srclen; next_wb -= res, srcoff += res) {
       +        for (;;) {
       +                herodotus_reader_push_advance_limit(r, herodotus_next_word_break(r));
       +                for (; (s = herodotus_read_codepoint(r, false, &cp)) == HERODOTUS_STATUS_SUCCESS;) {
                                /* check if we have a cased character */
       -                        res = get_codepoint(src, srclen, srcoff, &cp);
                                prop = get_case_property(cp);
                                if (prop == CASE_PROP_CASED ||
                                    prop == CASE_PROP_BOTH_CASED_CASE_IGNORABLE) {
                                        break;
       -                        }
       -                }
       -
       -                if (next_wb > 0) {
       -                        /* get character length */
       -                        res = get_codepoint(src, srclen, srcoff, &cp);
       -
       -                        /* we have a cased character at srcoff, check if it's titlecase */
       -                        if (get_codepoint == get_codepoint_utf8) {
       -                                if (!is_case((const char *)src + srcoff,
       -                                              srclen - srcoff, res,
       -                                              get_codepoint_utf8, title_major,
       -                                              title_minor, title_special, &tmp_output)) {
       -                                        if (output) {
       -                                                *output = srcoff + tmp_output;
       -                                        }
       -                                        return false;
       -                                }
                                } else {
       -                                if (!is_case((const uint_least32_t *)src + srcoff,
       -                                              srclen - srcoff, res,
       -                                              get_codepoint, title_major,
       -                                              title_minor, title_special, &tmp_output)) {
       -                                        if (output) {
       -                                                *output = srcoff + tmp_output;
       -                                        }
       -                                        return false;
       -                                }
       +                                /* increment reader */
       +                                herodotus_read_codepoint(r, true, &cp);
                                }
       +                }
        
       +                if (s == HERODOTUS_STATUS_END_OF_BUFFER) {
       +                        /* we are done */
       +                        break;
       +                } else if (s == HERODOTUS_STATUS_SOFT_LIMIT_REACHED) {
       +                        /*
       +                         * we did not encounter any cased character
       +                         * up to the word break
       +                         */
       +                        continue;
       +                } else {
                                /*
       -                         * we consumed a character (make sure to never
       -                         * underflow next_wb; this should not happen,
       -                         * but it's better to be sure)
       +                         * we encountered a cased character before the word
       +                         * break, check if it's titlecase
                                 */
       -                        srcoff += res;
       -                        next_wb -= (res <= next_wb) ? res : next_wb;
       +                        herodotus_reader_push_advance_limit(r,
       +                                herodotus_reader_next_codepoint_break(r));
       +                        if (!is_case(r, title_major, title_minor, title_special, NULL)) {
       +                                ret = false;
       +                                goto done;
       +                        }
       +                        herodotus_reader_pop_limit(r);
                        }
        
                        /* check if the rest of the codepoints in the word are lowercase */
       -                if (get_codepoint == get_codepoint_utf8) {
       -                        if (!is_case((const char *)src + srcoff,
       -                                      srclen - srcoff, next_wb,
       -                                      get_codepoint_utf8, lower_major,
       -                                      lower_minor, lower_special, &tmp_output)) {
       -                                if (output) {
       -                                        *output = srcoff + tmp_output;
       -                                }
       -                                return false;
       -                        }
       -                } else {
       -                        if (!is_case((const uint_least32_t *)src + srcoff,
       -                                      srclen - srcoff, next_wb,
       -                                      get_codepoint, lower_major,
       -                                      lower_minor, lower_special, &tmp_output)) {
       -                                if (output) {
       -                                        *output = srcoff + tmp_output;
       -                                }
       -                                return false;
       -                        }
       +                if (!is_case(r, lower_major, lower_minor, lower_special, NULL)) {
       +                        ret = false;
       +                        goto done;
                        }
       -                srcoff += next_wb;
       -        }
        
       +                /* remove the limit on the word before the next iteration */
       +                herodotus_reader_pop_limit(r);
       +        }
       +done:
                if (output) {
       -                *output = srcoff;
       +                *output = herodotus_reader_number_read(r);
                }
       -        return true;
       +        return ret;
        }
        
        bool
        grapheme_is_uppercase(const uint_least32_t *src, size_t srclen, size_t *caselen)
        {
       -        return is_case(src, srclen, srclen, get_codepoint,
       -                       upper_major, upper_minor, upper_special, caselen);
       +        HERODOTUS_READER r;
       +
       +        herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, src, srclen);
       +
       +        return is_case(&r, upper_major, upper_minor, upper_special, caselen);
        }
        
        bool
        grapheme_is_lowercase(const uint_least32_t *src, size_t srclen, size_t *caselen)
        {
       -        return is_case(src, srclen, srclen, get_codepoint,
       -                       lower_major, lower_minor, lower_special, caselen);
       +        HERODOTUS_READER r;
       +
       +        herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, src, srclen);
       +
       +        return is_case(&r, lower_major, lower_minor, lower_special, caselen);
        }
        
        bool
        grapheme_is_titlecase(const uint_least32_t *src, size_t srclen, size_t *caselen)
        {
       -        return is_titlecase(src, srclen, get_codepoint, caselen);
       +        HERODOTUS_READER r;
       +
       +        herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, src, srclen);
       +
       +        return is_titlecase(&r, caselen);
        }
        
        bool
        grapheme_is_uppercase_utf8(const char *src, size_t srclen, size_t *caselen)
        {
       -        return is_case(src, srclen, srclen, get_codepoint_utf8,
       -                       upper_major, upper_minor, upper_special, caselen);
       +        HERODOTUS_READER r;
       +
       +        herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, src, srclen);
       +
       +        return is_case(&r, upper_major, upper_minor, upper_special, caselen);
        }
        
        bool
        grapheme_is_lowercase_utf8(const char *src, size_t srclen, size_t *caselen)
        {
       -        return is_case(src, srclen, srclen, get_codepoint_utf8,
       -                       lower_major, lower_minor, lower_special, caselen);
       +        HERODOTUS_READER r;
       +
       +        herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, src, srclen);
        
       +        return is_case(&r, lower_major, lower_minor, lower_special, caselen);
        }
        
        bool
        grapheme_is_titlecase_utf8(const char *src, size_t srclen, size_t *caselen)
        {
       -        return is_titlecase(src, srclen, get_codepoint_utf8, caselen);
       +        HERODOTUS_READER r;
       +
       +        herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, src, srclen);
       +
       +        return is_titlecase(&r, caselen);
        }
 (DIR) diff --git a/src/util.h b/src/util.h
       @@ -79,6 +79,7 @@ void herodotus_reader_init(HERODOTUS_READER *, enum herodotus_type,
        void herodotus_reader_copy(const HERODOTUS_READER *, HERODOTUS_READER *);
        void herodotus_reader_push_advance_limit(HERODOTUS_READER *, size_t);
        void herodotus_reader_pop_limit(HERODOTUS_READER *);
       +size_t herodotus_reader_number_read(const HERODOTUS_READER *);
        size_t herodotus_reader_next_word_break(const HERODOTUS_READER *);
        size_t herodotus_reader_next_codepoint_break(const HERODOTUS_READER *);
        enum herodotus_status herodotus_read_codepoint(HERODOTUS_READER *, bool, uint_least32_t *);
       @@ -86,7 +87,7 @@ enum herodotus_status herodotus_read_codepoint(HERODOTUS_READER *, bool, uint_le
        void herodotus_writer_init(HERODOTUS_WRITER *, enum herodotus_type, void *,
                                   size_t);
        void herodotus_writer_nul_terminate(HERODOTUS_WRITER *);
       -size_t herodotus_writer_number_written(HERODOTUS_WRITER *);
       +size_t herodotus_writer_number_written(const HERODOTUS_WRITER *);
        void herodotus_write_codepoint(HERODOTUS_WRITER *, uint_least32_t);
        
        size_t get_codepoint(const void *, size_t, size_t, uint_least32_t *);
 (DIR) diff --git a/test/case.c b/test/case.c
       @@ -7,6 +7,18 @@
        #include "../grapheme.h"
        #include "util.h"
        
       +struct unit_test_is_case_utf8 {
       +        const char *description;
       +        struct {
       +                const char *src;
       +                size_t srclen;
       +        } input;
       +        struct {
       +                bool ret;
       +                size_t caselen;
       +        } output;
       +};
       +
        struct unit_test_to_case_utf8 {
                const char *description;
                struct {
       @@ -20,7 +32,201 @@ struct unit_test_to_case_utf8 {
                } output;
        };
        
       -static struct unit_test_to_case_utf8 lowercase_utf8[] = {
       +static struct unit_test_is_case_utf8 is_lowercase_utf8[] = {
       +        {
       +                .description = "empty input",
       +                .input =  { "", 0 },
       +                .output = { true, 0 },
       +        },
       +        {
       +                .description = "one character, violation",
       +                .input =  { "A", 1 },
       +                .output = { false, 0 },
       +        },
       +        {
       +                .description = "one character, confirmation",
       +                .input =  { "\xc3\x9f", 2 },
       +                .output = { true, 2 },
       +        },
       +        {
       +                .description = "one character, violation, NUL-terminated",
       +                .input =  { "A", SIZE_MAX },
       +                .output = { false, 0 },
       +        },
       +        {
       +                .description = "one character, confirmation, NUL-terminated",
       +                .input =  { "\xc3\x9f", SIZE_MAX },
       +                .output = { true, 2 },
       +        },
       +        {
       +                .description = "one word, violation",
       +                .input =  { "Hello", 5 },
       +                .output = { false, 0 },
       +        },
       +        {
       +                .description = "one word, partial confirmation",
       +                .input =  { "gru" "\xc3\x9f" "fOrmel", 11 },
       +                .output = { false, 6 },
       +        },
       +        {
       +                .description = "one word, full confirmation",
       +                .input =  { "gru" "\xc3\x9f" "formel", 11 },
       +                .output = { true, 11 },
       +        },
       +        {
       +                .description = "one word, violation, NUL-terminated",
       +                .input =  { "Hello", SIZE_MAX },
       +                .output = { false, 0 },
       +        },
       +        {
       +                .description = "one word, partial confirmation, NUL-terminated",
       +                .input =  { "gru" "\xc3\x9f" "fOrmel", SIZE_MAX },
       +                .output = { false, 6 },
       +        },
       +        {
       +                .description = "one word, full confirmation, NUL-terminated",
       +                .input =  { "gru" "\xc3\x9f" "formel", SIZE_MAX },
       +                .output = { true, 11 },
       +        },
       +};
       +
       +static struct unit_test_is_case_utf8 is_uppercase_utf8[] = {
       +        {
       +                .description = "empty input",
       +                .input =  { "", 0 },
       +                .output = { true, 0 },
       +        },
       +        {
       +                .description = "one character, violation",
       +                .input =  { "\xc3\x9f", 2 },
       +                .output = { false, 0 },
       +        },
       +        {
       +                .description = "one character, confirmation",
       +                .input =  { "A", 1 },
       +                .output = { true, 1 },
       +        },
       +        {
       +                .description = "one character, violation, NUL-terminated",
       +                .input =  { "\xc3\x9f", SIZE_MAX },
       +                .output = { false, 0 },
       +        },
       +        {
       +                .description = "one character, confirmation, NUL-terminated",
       +                .input =  { "A", SIZE_MAX },
       +                .output = { true, 1 },
       +        },
       +        {
       +                .description = "one word, violation",
       +                .input =  { "hello", 5 },
       +                .output = { false, 0 },
       +        },
       +        {
       +                .description = "one word, partial confirmation",
       +                .input =  { "GRU" "\xc3\x9f" "formel", 11 },
       +                .output = { false, 3 },
       +        },
       +        {
       +                .description = "one word, full confirmation",
       +                .input =  { "HELLO", 5 },
       +                .output = { true, 5 },
       +        },
       +        {
       +                .description = "one word, violation, NUL-terminated",
       +                .input =  { "hello", SIZE_MAX },
       +                .output = { false, 0 },
       +        },
       +        {
       +                .description = "one word, partial confirmation, NUL-terminated",
       +                .input =  { "GRU" "\xc3\x9f" "formel", SIZE_MAX },
       +                .output = { false, 3 },
       +        },
       +        {
       +                .description = "one word, full confirmation, NUL-terminated",
       +                .input =  { "HELLO", SIZE_MAX },
       +                .output = { true, 5 },
       +        },
       +};
       +
       +static struct unit_test_is_case_utf8 is_titlecase_utf8[] = {
       +        {
       +                .description = "empty input",
       +                .input =  { "", 0 },
       +                .output = { true, 0 },
       +        },
       +        {
       +                .description = "one character, violation",
       +                .input =  { "\xc3\x9f", 2 },
       +                .output = { false, 0 },
       +        },
       +        {
       +                .description = "one character, confirmation",
       +                .input =  { "A", 1 },
       +                .output = { true, 1 },
       +        },
       +        {
       +                .description = "one character, violation, NUL-terminated",
       +                .input =  { "\xc3\x9f", SIZE_MAX },
       +                .output = { false, 0 },
       +        },
       +        {
       +                .description = "one character, confirmation, NUL-terminated",
       +                .input =  { "A", SIZE_MAX },
       +                .output = { true, 1 },
       +        },
       +        {
       +                .description = "one word, violation",
       +                .input =  { "hello", 5 },
       +                .output = { false, 0 },
       +        },
       +        {
       +                .description = "one word, partial confirmation",
       +                .input =  { "Gru" "\xc3\x9f" "fOrmel", 11 },
       +                .output = { false, 6 },
       +        },
       +        {
       +                .description = "one word, full confirmation",
       +                .input =  { "Gru" "\xc3\x9f" "formel", 11 },
       +                .output = { true, 11 },
       +        },
       +        {
       +                .description = "one word, violation, NUL-terminated",
       +                .input =  { "hello", SIZE_MAX },
       +                .output = { false, 0 },
       +        },
       +        {
       +                .description = "one word, partial confirmation, NUL-terminated",
       +                .input =  { "Gru" "\xc3\x9f" "fOrmel", SIZE_MAX },
       +                .output = { false, 6 },
       +        },
       +        {
       +                .description = "one word, full confirmation, NUL-terminated",
       +                .input =  { "Gru" "\xc3\x9f" "formel", SIZE_MAX },
       +                .output = { true, 11 },
       +        },
       +        {
       +                .description = "multiple words, partial confirmation",
       +                .input =  { "Hello Gru" "\xc3\x9f" "fOrmel!", 18 },
       +                .output = { false, 12 },
       +        },
       +        {
       +                .description = "multiple words, full confirmation",
       +                .input =  { "Hello Gru" "\xc3\x9f" "formel!", 18 },
       +                .output = { true, 18 },
       +        },
       +        {
       +                .description = "multiple words, partial confirmation, NUL-terminated",
       +                .input =  { "Hello Gru" "\xc3\x9f" "fOrmel!", SIZE_MAX },
       +                .output = { false, 12 },
       +        },
       +        {
       +                .description = "multiple words, full confirmation, NUL-terminated",
       +                .input =  { "Hello Gru" "\xc3\x9f" "formel!", SIZE_MAX },
       +                .output = { true, 18 },
       +        },
       +};
       +
       +static struct unit_test_to_case_utf8 to_lowercase_utf8[] = {
                {
                        .description = "empty input",
                        .input =  { "", 0, 10 },
       @@ -38,8 +244,8 @@ static struct unit_test_to_case_utf8 lowercase_utf8[] = {
                },
                {
                        .description = "one character, no conversion",
       -                .input =  { "a", 1, 10 },
       -                .output = { "a", 1 },
       +                .input =  { "\xc3\x9f", 2, 10 },
       +                .output = { "\xc3\x9f", 2 },
                },
                {
                        .description = "one character, conversion, truncation",
       @@ -53,8 +259,8 @@ static struct unit_test_to_case_utf8 lowercase_utf8[] = {
                },
                {
                        .description = "one character, no conversion, NUL-terminated",
       -                .input =  { "a", SIZE_MAX, 10 },
       -                .output = { "a", 1 },
       +                .input =  { "\xc3\x9f", SIZE_MAX, 10 },
       +                .output = { "\xc3\x9f", 2 },
                },
                {
                        .description = "one character, conversion, NUL-terminated, truncation",
       @@ -93,7 +299,7 @@ static struct unit_test_to_case_utf8 lowercase_utf8[] = {
                },
        };
        
       -static struct unit_test_to_case_utf8 uppercase_utf8[] = {
       +static struct unit_test_to_case_utf8 to_uppercase_utf8[] = {
                {
                        .description = "empty input",
                        .input =  { "", 0, 10 },
       @@ -106,8 +312,8 @@ static struct unit_test_to_case_utf8 uppercase_utf8[] = {
                },
                {
                        .description = "one character, conversion",
       -                .input =  { "a", 1, 10 },
       -                .output = { "A", 1 },
       +                .input =  { "\xc3\x9f", 2, 10 },
       +                .output = { "SS", 2 },
                },
                {
                        .description = "one character, no conversion",
       @@ -116,13 +322,13 @@ static struct unit_test_to_case_utf8 uppercase_utf8[] = {
                },
                {
                        .description = "one character, conversion, truncation",
       -                .input =  { "a", 1, 0 },
       -                .output = { "", 1 },
       +                .input =  { "\xc3\x9f", 2, 0 },
       +                .output = { "", 2 },
                },
                {
                        .description = "one character, conversion, NUL-terminated",
       -                .input =  { "a", SIZE_MAX, 10 },
       -                .output = { "A", 1 },
       +                .input =  { "\xc3\x9f", SIZE_MAX, 10 },
       +                .output = { "SS", 2 },
                },
                {
                        .description = "one character, no conversion, NUL-terminated",
       @@ -131,13 +337,13 @@ static struct unit_test_to_case_utf8 uppercase_utf8[] = {
                },
                {
                        .description = "one character, conversion, NUL-terminated, truncation",
       -                .input =  { "a", SIZE_MAX, 0 },
       -                .output = { "", 1 },
       +                .input =  { "\xc3\x9f", SIZE_MAX, 0 },
       +                .output = { "", 2 },
                },
                {
                        .description = "one word, conversion",
       -                .input =  { "wOrD", 4, 10 },
       -                .output = { "WORD", 4 },
       +                .input =  { "gRu" "\xc3\x9f" "fOrMel", 11, 15 },
       +                .output = { "GRUSSFORMEL", 11 },
                },
                {
                        .description = "one word, no conversion",
       @@ -146,13 +352,13 @@ static struct unit_test_to_case_utf8 uppercase_utf8[] = {
                },
                {
                        .description = "one word, conversion, truncation",
       -                .input =  { "wOrD", 4, 3 },
       -                .output = { "WO", 4 },
       +                .input =  { "gRu" "\xc3\x9f" "formel", 11, 5 },
       +                .output = { "GRUS", 11 },
                },
                {
                        .description = "one word, conversion, NUL-terminated",
       -                .input =  { "wOrD", SIZE_MAX, 10 },
       -                .output = { "WORD", 4 },
       +                .input =  { "gRu" "\xc3\x9f" "formel", SIZE_MAX, 15 },
       +                .output = { "GRUSSFORMEL", 11 },
                },
                {
                        .description = "one word, no conversion, NUL-terminated",
       @@ -161,12 +367,12 @@ static struct unit_test_to_case_utf8 uppercase_utf8[] = {
                },
                {
                        .description = "one word, conversion, NUL-terminated, truncation",
       -                .input =  { "wOrD", SIZE_MAX, 3 },
       -                .output = { "WO", 4 },
       +                .input =  { "gRu" "\xc3\x9f" "formel", SIZE_MAX, 5 },
       +                .output = { "GRUS", 11 },
                },
        };
        
       -static struct unit_test_to_case_utf8 titlecase_utf8[] = {
       +static struct unit_test_to_case_utf8 to_titlecase_utf8[] = {
                {
                        .description = "empty input",
                        .input =  { "", 0, 10 },
       @@ -270,6 +476,42 @@ static struct unit_test_to_case_utf8 titlecase_utf8[] = {
        };
        
        static int
       +unit_test_callback_is_case_utf8(void *t, size_t off, const char *name, const char *argv0)
       +{
       +        struct unit_test_is_case_utf8 *test = (struct unit_test_is_case_utf8 *)t + off;
       +        bool ret = false;
       +        size_t caselen = 0x7f;
       +
       +        if (t == is_lowercase_utf8) {
       +                ret = grapheme_is_lowercase_utf8(test->input.src, test->input.srclen,
       +                                                 &caselen);
       +        } else if (t == is_uppercase_utf8) {
       +                ret = grapheme_is_uppercase_utf8(test->input.src, test->input.srclen,
       +                                                 &caselen);
       +        } else if (t == is_titlecase_utf8) {
       +                ret = grapheme_is_titlecase_utf8(test->input.src, test->input.srclen,
       +                                                 &caselen);
       +
       +        } else {
       +                goto err;
       +        }
       +
       +        /* check results */
       +        if (ret != test->output.ret || caselen != test->output.caselen) {
       +                goto err;
       +        }
       +
       +        return 0;
       +err:
       +        fprintf(stderr, "%s: %s: Failed unit test %zu \"%s\" "
       +                "(returned (%s, %zu) instead of (%s, %zu)).\n", argv0,
       +                name, off, test->description, ret ? "true" : "false",
       +                caselen, test->output.ret ? "true" : "false",
       +                test->output.caselen);
       +        return 1;
       +}
       +
       +static int
        unit_test_callback_to_case_utf8(void *t, size_t off, const char *name, const char *argv0)
        {
                struct unit_test_to_case_utf8 *test = (struct unit_test_to_case_utf8 *)t + off;
       @@ -279,13 +521,13 @@ unit_test_callback_to_case_utf8(void *t, size_t off, const char *name, const cha
                /* fill the array with canary values */
                memset(buf, 0x7f, LEN(buf));
        
       -        if (t == lowercase_utf8) {
       +        if (t == to_lowercase_utf8) {
                        ret = grapheme_to_lowercase_utf8(test->input.src, test->input.srclen,
                                                         buf, test->input.destlen);
       -        } else if (t == uppercase_utf8) {
       +        } else if (t == to_uppercase_utf8) {
                        ret = grapheme_to_uppercase_utf8(test->input.src, test->input.srclen,
                                                         buf, test->input.destlen);
       -        } else if (t == titlecase_utf8) {
       +        } else if (t == to_titlecase_utf8) {
                        ret = grapheme_to_titlecase_utf8(test->input.src, test->input.srclen,
                                                         buf, test->input.destlen);
                } else {
       @@ -319,10 +561,16 @@ main(int argc, char *argv[])
        {
                (void)argc;
        
       -        return run_unit_tests(unit_test_callback_to_case_utf8, lowercase_utf8,
       -                              LEN(lowercase_utf8), "grapheme_to_lowercase_utf8", argv[0]) +
       -               run_unit_tests(unit_test_callback_to_case_utf8, uppercase_utf8,
       -                              LEN(uppercase_utf8), "grapheme_to_uppercase_utf8", argv[0]) +
       -               run_unit_tests(unit_test_callback_to_case_utf8, titlecase_utf8,
       -                              LEN(titlecase_utf8), "grapheme_to_titlecase_utf8", argv[0]);
       +        return run_unit_tests(unit_test_callback_is_case_utf8, is_lowercase_utf8,
       +                              LEN(is_lowercase_utf8), "grapheme_is_lowercase_utf8", argv[0]) +
       +               run_unit_tests(unit_test_callback_is_case_utf8, is_uppercase_utf8,
       +                              LEN(is_uppercase_utf8), "grapheme_is_uppercase_utf8", argv[0]) +
       +               run_unit_tests(unit_test_callback_is_case_utf8, is_titlecase_utf8,
       +                              LEN(is_titlecase_utf8), "grapheme_is_titlecase_utf8", argv[0]) +
       +               run_unit_tests(unit_test_callback_to_case_utf8, to_lowercase_utf8,
       +                              LEN(to_lowercase_utf8), "grapheme_to_lowercase_utf8", argv[0]) +
       +               run_unit_tests(unit_test_callback_to_case_utf8, to_uppercase_utf8,
       +                              LEN(to_uppercase_utf8), "grapheme_to_uppercase_utf8", argv[0]) +
       +               run_unit_tests(unit_test_callback_to_case_utf8, to_titlecase_utf8,
       +                              LEN(to_titlecase_utf8), "grapheme_to_titlecase_utf8", argv[0]);
        }