Move get_codepoint_*()-util-functions to src/util.c - libgrapheme - unicode string library
 (HTM) git clone git://git.suckless.org/libgrapheme
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit aa5dda2687c4907d6a47e57b1d7973b8f9d158ae
 (DIR) parent 25d89e6e460e68329e7a3f388fe3e150a8f5474a
 (HTM) Author: Laslo Hunhold <dev@frign.de>
       Date:   Tue, 16 Aug 2022 16:25:31 +0200
       
       Move get_codepoint_*()-util-functions to src/util.c
       
       Signed-off-by: Laslo Hunhold <dev@frign.de>
       
       Diffstat:
         M src/case.c                          |      62 -------------------------------
         M src/line.c                          |      28 +++-------------------------
         M src/sentence.c                      |      30 ++++--------------------------
         M src/util.c                          |      62 +++++++++++++++++++++++++++++++
         M src/util.h                          |       6 ++++++
         M src/word.c                          |      24 ------------------------
       
       6 files changed, 75 insertions(+), 137 deletions(-)
       ---
 (DIR) diff --git a/src/case.c b/src/case.c
       @@ -33,68 +33,6 @@ get_case_offset(uint_least32_t cp, const uint_least16_t *major,
        }
        
        static inline size_t
       -get_codepoint(const void *str, size_t len, size_t offset, uint_least32_t *cp)
       -{
       -        if (offset < len) {
       -                *cp = ((const uint_least32_t *)str)[offset];
       -                return 1;
       -        } else {
       -                *cp = GRAPHEME_INVALID_CODEPOINT;
       -                return 0;
       -        }
       -}
       -
       -static inline size_t
       -get_codepoint_utf8(const void *str, size_t len, size_t offset, uint_least32_t *cp)
       -{
       -        size_t ret;
       -
       -        if (offset < len) {
       -                ret = grapheme_decode_utf8((const char *)str + offset,
       -                                           len - offset, cp);
       -
       -                if (unlikely(len == SIZE_MAX && cp == 0)) {
       -                        return 0;
       -                } else {
       -                        return ret;
       -                }
       -        } else {
       -                *cp = GRAPHEME_INVALID_CODEPOINT;
       -                return 0;
       -        }
       -}
       -
       -static inline size_t
       -set_codepoint(uint_least32_t cp, void *str, size_t len, size_t offset)
       -{
       -        if (str == NULL || len == 0) {
       -                return 1;
       -        }
       -
       -        if (offset < len) {
       -                ((uint_least32_t *)str)[offset] = cp;
       -                return 1;
       -        } else {
       -                return 0;
       -        }
       -}
       -
       -static inline size_t
       -set_codepoint_utf8(uint_least32_t cp, void *str, size_t len, size_t offset)
       -{
       -        if (str == NULL || len == 0) {
       -                return grapheme_encode_utf8(cp, NULL, 0);
       -        }
       -
       -        if (offset < len) {
       -                return grapheme_encode_utf8(cp, (char *)str + offset,
       -                                            len - offset);
       -        } else {
       -                return grapheme_encode_utf8(cp, NULL, 0);
       -        }
       -}
       -
       -static inline size_t
        to_case(const void *src, size_t srclen, void *dest, size_t destlen,
                size_t srcnumprocess, uint_least8_t final_sigma_level,
                size_t (*get_codepoint)(const void *, size_t, size_t, uint_least32_t *),
 (DIR) diff --git a/src/line.c b/src/line.c
       @@ -19,30 +19,6 @@ get_break_prop(uint_least32_t cp)
                }
        }
        
       -static inline size_t
       -get_codepoint(const void *str, size_t len, size_t offset, uint_least32_t *cp)
       -{
       -        if (offset < len) {
       -                *cp = ((const uint_least32_t *)str)[offset];
       -                return 1;
       -        } else {
       -                *cp = GRAPHEME_INVALID_CODEPOINT;
       -                return 0;
       -        }
       -}
       -
       -static inline size_t
       -get_codepoint_utf8(const void *str, size_t len, size_t offset, uint_least32_t *cp)
       -{
       -        if (offset < len) {
       -                return grapheme_decode_utf8((const char *)str + offset,
       -                                            len - offset, cp);
       -        } else {
       -                *cp = GRAPHEME_INVALID_CODEPOINT;
       -                return 0;
       -        }
       -}
       -
        static size_t
        next_line_break(const void *str, size_t len, size_t (*get_codepoint)
                        (const void *, size_t, size_t, uint_least32_t *))
       @@ -152,7 +128,9 @@ next_line_break(const void *str, size_t len, size_t (*get_codepoint)
                                 *     and one (CL | CP) to the left of the middle
                                 *     spot
                                 */
       -                        if (lb25_level == 0 && cp0_prop == LINE_BREAK_PROP_NU) {
       +                        if ((lb25_level == 0 ||
       +                             lb25_level == 1) &&
       +                            cp0_prop == LINE_BREAK_PROP_NU) {
                                        /* sequence has begun */
                                        lb25_level = 1;
                                } else if ((lb25_level == 1 || lb25_level == 2) &&
 (DIR) diff --git a/src/sentence.c b/src/sentence.c
       @@ -20,30 +20,6 @@ get_break_prop(uint_least32_t cp)
                }
        }
        
       -static inline size_t
       -get_codepoint(const void *str, size_t len, size_t offset, uint_least32_t *cp)
       -{
       -        if (offset < len) {
       -                *cp = ((const uint_least32_t *)str)[offset];
       -                return 1;
       -        } else {
       -                *cp = GRAPHEME_INVALID_CODEPOINT;
       -                return 0;
       -        }
       -}
       -
       -static inline size_t
       -get_codepoint_utf8(const void *str, size_t len, size_t offset, uint_least32_t *cp)
       -{
       -        if (offset < len) {
       -                return grapheme_decode_utf8((const char *)str + offset,
       -                                            len - offset, cp);
       -        } else {
       -                *cp = GRAPHEME_INVALID_CODEPOINT;
       -                return 0;
       -        }
       -}
       -
        static size_t
        next_sentence_break(const void *str, size_t len, size_t (*get_codepoint)
                            (const void *, size_t, size_t, uint_least32_t *))
       @@ -142,7 +118,8 @@ next_sentence_break(const void *str, size_t len, size_t (*get_codepoint)
                                 *     left of the middle spot.
                                 *
                                 */
       -                        if (aterm_close_sp_level == 0 &&
       +                        if ((aterm_close_sp_level == 0 ||
       +                             aterm_close_sp_level == 1) &&
                                    skip.b == SENTENCE_BREAK_PROP_ATERM) {
                                            /* sequence has begun */
                                        aterm_close_sp_level = 1;
       @@ -162,7 +139,8 @@ next_sentence_break(const void *str, size_t len, size_t (*get_codepoint)
                                        aterm_close_sp_level = 0;
                                }
        
       -                        if (saterm_close_sp_parasep_level == 0 &&
       +                        if ((saterm_close_sp_parasep_level == 0 ||
       +                             saterm_close_sp_parasep_level == 1) &&
                                    (skip.b == SENTENCE_BREAK_PROP_STERM ||
                                     skip.b == SENTENCE_BREAK_PROP_ATERM)) {
                                            /* sequence has begun */
 (DIR) diff --git a/src/util.c b/src/util.c
       @@ -6,3 +6,65 @@
        #include "../gen/types.h"
        #include "../grapheme.h"
        #include "util.h"
       +
       +inline size_t
       +get_codepoint(const void *str, size_t len, size_t offset, uint_least32_t *cp)
       +{
       +        if (offset < len) {
       +                *cp = ((const uint_least32_t *)str)[offset];
       +                return 1;
       +        } else {
       +                *cp = GRAPHEME_INVALID_CODEPOINT;
       +                return 0;
       +        }
       +}
       +
       +inline size_t
       +get_codepoint_utf8(const void *str, size_t len, size_t offset, uint_least32_t *cp)
       +{
       +        size_t ret;
       +
       +        if (offset < len) {
       +                ret = grapheme_decode_utf8((const char *)str + offset,
       +                                           len - offset, cp);
       +
       +                if (unlikely(len == SIZE_MAX && cp == 0)) {
       +                        return 0;
       +                } else {
       +                        return ret;
       +                }
       +        } else {
       +                *cp = GRAPHEME_INVALID_CODEPOINT;
       +                return 0;
       +        }
       +}
       +
       +inline size_t
       +set_codepoint(uint_least32_t cp, void *str, size_t len, size_t offset)
       +{
       +        if (str == NULL || len == 0) {
       +                return 1;
       +        }
       +
       +        if (offset < len) {
       +                ((uint_least32_t *)str)[offset] = cp;
       +                return 1;
       +        } else {
       +                return 0;
       +        }
       +}
       +
       +inline size_t
       +set_codepoint_utf8(uint_least32_t cp, void *str, size_t len, size_t offset)
       +{
       +        if (str == NULL || len == 0) {
       +                return grapheme_encode_utf8(cp, NULL, 0);
       +        }
       +
       +        if (offset < len) {
       +                return grapheme_encode_utf8(cp, (char *)str + offset,
       +                                            len - offset);
       +        } else {
       +                return grapheme_encode_utf8(cp, NULL, 0);
       +        }
       +}
 (DIR) diff --git a/src/util.h b/src/util.h
       @@ -25,4 +25,10 @@
                #define unlikely(expr) (expr)
        #endif
        
       +size_t get_codepoint(const void *, size_t, size_t, uint_least32_t *);
       +size_t get_codepoint_utf8(const void *, size_t, size_t, uint_least32_t *);
       +
       +size_t set_codepoint(uint_least32_t, void *, size_t, size_t);
       +size_t set_codepoint_utf8(uint_least32_t, void *, size_t, size_t);
       +
        #endif /* UTIL_H */
 (DIR) diff --git a/src/word.c b/src/word.c
       @@ -19,30 +19,6 @@ get_break_prop(uint_least32_t cp)
                }
        }
        
       -static inline size_t
       -get_codepoint(const void *str, size_t len, size_t offset, uint_least32_t *cp)
       -{
       -        if (offset < len) {
       -                *cp = ((const uint_least32_t *)str)[offset];
       -                return 1;
       -        } else {
       -                *cp = GRAPHEME_INVALID_CODEPOINT;
       -                return 0;
       -        }
       -}
       -
       -static inline size_t
       -get_codepoint_utf8(const void *str, size_t len, size_t offset, uint_least32_t *cp)
       -{
       -        if (offset < len) {
       -                return grapheme_decode_utf8((const char *)str + offset,
       -                                            len - offset, cp);
       -        } else {
       -                *cp = GRAPHEME_INVALID_CODEPOINT;
       -                return 0;
       -        }
       -}
       -
        static size_t
        next_word_break(const void *str, size_t len, size_t (*get_codepoint)
                        (const void *, size_t, size_t, uint_least32_t *))