Compile the library in freestanding mode - libgrapheme - unicode string library
 (HTM) git clone git://git.suckless.org/libgrapheme
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
 (DIR) commit 8a7e2ee85f0a2824e48e85e57534c5b18113cf07
 (DIR) parent 9f15d7eb0c9cf216f069d6972c58520013b80acb
 (HTM) Author: Laslo Hunhold <dev@frign.de>
       Date:   Sat, 24 Sep 2022 01:54:52 +0200
       
       Compile the library in freestanding mode
       
       Looking closely, we never explicitly depend on the standard library
       within the actual library code. This can be explicitly expressed by
       setting -ffreestanding during object-compilation and -nostdlib during
       linking. The result is a clean library with zero libc-symbols, allowing
       it to be used even without an operating system (kernel code, ELF,
       etc.), by making use of the freestanding implementation form defined
       in the standard[0].
       
       To be freestanding, the code may only include <float.h>, <iso646.h>,
       <limits.h>, <stdalign.h>, <stdarg.h>, <stdbool.h>, <stddef.h>,
       <stdint.h> and <stdnoreturn.h>. We satisfy this condition implictly,
       but there are some erroneous supplementary includes that are removed
       in this commit. Additionally, the strict compiler-implementation simply
       adds the U-prefix to the argument of UINT16_C (et. al.), which is why
       calls to it have to be changed to really include only constants.
       
       [0]:https://www.iso-9899.info/n1570.html#4.p6
       
       Signed-off-by: Laslo Hunhold <dev@frign.de>
       
       Diffstat:
         M config.mk                           |       4 ++--
         M src/character.c                     |     139 +++++++++++++++----------------
         M src/line.c                          |       2 --
         M src/sentence.c                      |       2 --
         M src/utf8.c                          |       3 ++-
         M src/util.c                          |      11 +++++++++--
         M src/word.c                          |       2 --
       
       7 files changed, 82 insertions(+), 81 deletions(-)
       ---
 (DIR) diff --git a/config.mk b/config.mk
       @@ -15,8 +15,8 @@ BUILD_CPPFLAGS = $(CPPFLAGS)
        BUILD_CFLAGS   = $(CFLAGS)
        BUILD_LDFLAGS  = $(LDFLAGS)
        
       -SHFLAGS  = -fPIC
       -SOFLAGS  = -shared -Wl,--soname=libgrapheme.so
       +SHFLAGS  = -fPIC -ffreestanding
       +SOFLAGS  = -shared -nostdlib -Wl,--soname=libgrapheme.so
        
        # tools
        CC       = cc
 (DIR) diff --git a/src/character.c b/src/character.c
       @@ -1,8 +1,7 @@
        /* See LICENSE file for copyright and license details. */
       +#include <limits.h>
        #include <stdbool.h>
        #include <stddef.h>
       -#include <stdlib.h>
       -#include <string.h>
        
        #include "../gen/character.h"
        #include "../grapheme.h"
       @@ -10,96 +9,96 @@
        
        static const uint_least16_t dont_break[NUM_CHAR_BREAK_PROPS] = {
                [CHAR_BREAK_PROP_OTHER] =
       -                UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
       +                UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
                [CHAR_BREAK_PROP_CR] =
       -                UINT16_C(1 << CHAR_BREAK_PROP_LF),            /* GB3  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_LF,            /* GB3  */
                [CHAR_BREAK_PROP_EXTEND] =
       -                UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
       +                UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
                [CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC] =
       -                UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
       +                UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
                [CHAR_BREAK_PROP_HANGUL_L] =
       -                UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_L)     | /* GB6  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_V)     | /* GB6  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_LV)    | /* GB6  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_LVT)   | /* GB6  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
       +                UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_L     | /* GB6  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V     | /* GB6  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LV    | /* GB6  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_LVT   | /* GB6  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
                [CHAR_BREAK_PROP_HANGUL_V] =
       -                UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_V)     | /* GB7  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_T)     | /* GB7  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
       +                UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V     | /* GB7  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T     | /* GB7  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
                [CHAR_BREAK_PROP_HANGUL_T] =
       -                UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_T)     | /* GB8  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
       +                UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T     | /* GB8  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
                [CHAR_BREAK_PROP_HANGUL_LV] =
       -                UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_V)     | /* GB7  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_T)     | /* GB7  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
       +                UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_V     | /* GB7  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T     | /* GB7  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
                [CHAR_BREAK_PROP_HANGUL_LVT] =
       -                UINT16_C(1 << CHAR_BREAK_PROP_HANGUL_T)     | /* GB8  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
       +                UINT16_C(1) << CHAR_BREAK_PROP_HANGUL_T     | /* GB8  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
                [CHAR_BREAK_PROP_PREPEND] =
       -                UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK)  | /* GB9a */
       +                UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK  | /* GB9a */
                        (UINT16_C(0xFFFF) &
       -                 ~(UINT16_C(1 << CHAR_BREAK_PROP_CR)      |
       -                   UINT16_C(1 << CHAR_BREAK_PROP_LF)      |
       -                   UINT16_C(1 << CHAR_BREAK_PROP_CONTROL)
       +                 ~(UINT16_C(1) << CHAR_BREAK_PROP_CR      |
       +                   UINT16_C(1) << CHAR_BREAK_PROP_LF      |
       +                   UINT16_C(1) << CHAR_BREAK_PROP_CONTROL
                          )
                        ),                                           /* GB9b */
                [CHAR_BREAK_PROP_REGIONAL_INDICATOR] =
       -                UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
       +                UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
                [CHAR_BREAK_PROP_SPACINGMARK] =
       -                UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
       +                UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
                [CHAR_BREAK_PROP_ZWJ] =
       -                UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)       | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)          | /* GB9  */
       -                UINT16_C(1 << CHAR_BREAK_PROP_SPACINGMARK),   /* GB9a */
       +                UINT16_C(1) << CHAR_BREAK_PROP_EXTEND       | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_ZWJ          | /* GB9  */
       +                UINT16_C(1) << CHAR_BREAK_PROP_SPACINGMARK,   /* GB9a */
        };
        static const uint_least16_t flag_update_gb11[2 * NUM_CHAR_BREAK_PROPS] = {
                [CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC] =
       -                UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)                   |
       -                UINT16_C(1 << CHAR_BREAK_PROP_EXTEND),
       +                UINT16_C(1) << CHAR_BREAK_PROP_ZWJ                   |
       +                UINT16_C(1) << CHAR_BREAK_PROP_EXTEND,
                [CHAR_BREAK_PROP_ZWJ + NUM_CHAR_BREAK_PROPS] =
       -                UINT16_C(1 << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC),
       +                UINT16_C(1) << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC,
                [CHAR_BREAK_PROP_EXTEND + NUM_CHAR_BREAK_PROPS] =
       -                UINT16_C(1 << CHAR_BREAK_PROP_EXTEND)                |
       -                UINT16_C(1 << CHAR_BREAK_PROP_ZWJ),
       +                UINT16_C(1) << CHAR_BREAK_PROP_EXTEND                |
       +                UINT16_C(1) << CHAR_BREAK_PROP_ZWJ,
                [CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC + NUM_CHAR_BREAK_PROPS] =
       -                UINT16_C(1 << CHAR_BREAK_PROP_ZWJ)                   |
       -                UINT16_C(1 << CHAR_BREAK_PROP_EXTEND),
       +                UINT16_C(1) << CHAR_BREAK_PROP_ZWJ                   |
       +                UINT16_C(1) << CHAR_BREAK_PROP_EXTEND,
        };
        static const uint_least16_t dont_break_gb11[2 * NUM_CHAR_BREAK_PROPS] = {
                [CHAR_BREAK_PROP_ZWJ + NUM_CHAR_BREAK_PROPS] =
       -                UINT16_C(1 << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC),
       +                UINT16_C(1) << CHAR_BREAK_PROP_EXTENDED_PICTOGRAPHIC,
        };
        static const uint_least16_t flag_update_gb12_13[2 * NUM_CHAR_BREAK_PROPS] = {
                [CHAR_BREAK_PROP_REGIONAL_INDICATOR] =
       -                UINT16_C(1 << CHAR_BREAK_PROP_REGIONAL_INDICATOR),
       +                UINT16_C(1) << CHAR_BREAK_PROP_REGIONAL_INDICATOR,
        };
        static const uint_least16_t dont_break_gb12_13[2 * NUM_CHAR_BREAK_PROPS] = {
                [CHAR_BREAK_PROP_REGIONAL_INDICATOR + NUM_CHAR_BREAK_PROPS] =
       -                UINT16_C(1 << CHAR_BREAK_PROP_REGIONAL_INDICATOR),
       +                UINT16_C(1) << CHAR_BREAK_PROP_REGIONAL_INDICATOR,
        };
        
        static inline enum char_break_property
       @@ -135,23 +134,23 @@ grapheme_is_character_break(uint_least32_t cp0, uint_least32_t cp1, GRAPHEME_STA
                        state->gb11_flag =
                                flag_update_gb11[cp0_prop + NUM_CHAR_BREAK_PROPS *
                                                 state->gb11_flag] &
       -                        UINT16_C(1 << cp1_prop);
       +                        UINT16_C(1) << cp1_prop;
                        state->gb12_13_flag =
                                flag_update_gb12_13[cp0_prop + NUM_CHAR_BREAK_PROPS *
                                                    state->gb12_13_flag] &
       -                        UINT16_C(1 << cp1_prop);
       +                        UINT16_C(1) << cp1_prop;
        
                        /*
                         * Apply grapheme cluster breaking algorithm (UAX #29), see
                         * http://unicode.org/reports/tr29/#Grapheme_Cluster_Boundary_Rules
                         */
       -                notbreak = (dont_break[cp0_prop] & UINT16_C(1 << cp1_prop)) ||
       +                notbreak = (dont_break[cp0_prop] & (UINT16_C(1) << cp1_prop)) ||
                                   (dont_break_gb11[cp0_prop + state->gb11_flag *
                                                    NUM_CHAR_BREAK_PROPS] &
       -                            UINT16_C(1 << cp1_prop)) ||
       +                            (UINT16_C(1) << cp1_prop)) ||
                                   (dont_break_gb12_13[cp0_prop + state->gb12_13_flag *
                                                       NUM_CHAR_BREAK_PROPS] &
       -                            UINT16_C(1 << cp1_prop));
       +                            (UINT16_C(1) << cp1_prop));
        
                        /* update or reset flags (when we have a break) */
                        if (likely(!notbreak)) {
       @@ -168,9 +167,9 @@ grapheme_is_character_break(uint_least32_t cp0, uint_least32_t cp1, GRAPHEME_STA
                         * Given we have no state, this behaves as if the state-booleans
                         * were all set to false
                         */
       -                notbreak = (dont_break[cp0_prop] & UINT16_C(1 << cp1_prop)) ||
       -                           (dont_break_gb11[cp0_prop] & UINT16_C(1 << cp1_prop)) ||
       -                           (dont_break_gb12_13[cp0_prop] & UINT16_C(1 << cp1_prop));
       +                notbreak = (dont_break[cp0_prop] & (UINT16_C(1) << cp1_prop)) ||
       +                           (dont_break_gb11[cp0_prop] & (UINT16_C(1) << cp1_prop)) ||
       +                           (dont_break_gb12_13[cp0_prop] & (UINT16_C(1) << cp1_prop));
                }
        
                return !notbreak;
 (DIR) diff --git a/src/line.c b/src/line.c
       @@ -1,8 +1,6 @@
        /* See LICENSE file for copyright and license details. */
        #include <stdbool.h>
        #include <stddef.h>
       -#include <stdlib.h>
       -#include <string.h>
        
        #include "../gen/line.h"
        #include "../grapheme.h"
 (DIR) diff --git a/src/sentence.c b/src/sentence.c
       @@ -1,8 +1,6 @@
        /* See LICENSE file for copyright and license details. */
        #include <stdbool.h>
        #include <stddef.h>
       -#include <stdlib.h>
       -#include <string.h>
        
        #include "../gen/sentence.h"
        #include "../grapheme.h"
 (DIR) diff --git a/src/utf8.c b/src/utf8.c
       @@ -1,5 +1,6 @@
        /* See LICENSE file for copyright and license details. */
       -#include <stdio.h>
       +#include <stddef.h>
       +#include <stdint.h>
        
        #include "../grapheme.h"
        #include "util.h"
 (DIR) diff --git a/src/util.c b/src/util.c
       @@ -1,7 +1,8 @@
        /* See LICENSE file for copyright and license details. */
       +#include <limits.h>
        #include <stdbool.h>
       +#include <stddef.h>
        #include <stdint.h>
       -#include <stdlib.h>
        
        #include "../gen/types.h"
        #include "../grapheme.h"
       @@ -88,6 +89,12 @@ herodotus_reader_next_codepoint_break(const HERODOTUS_READER *r)
                }
        }
        
       +size_t
       +herodotus_reader_number_read(const HERODOTUS_READER *r)
       +{
       +        return r->off;
       +}
       +
        enum herodotus_status
        herodotus_read_codepoint(HERODOTUS_READER *r, bool advance, uint_least32_t *cp)
        {
       @@ -202,7 +209,7 @@ herodotus_writer_nul_terminate(HERODOTUS_WRITER *w)
        }
        
        size_t
       -herodotus_writer_number_written(HERODOTUS_WRITER *w)
       +herodotus_writer_number_written(const HERODOTUS_WRITER *w)
        {
                return w->off;
        }
 (DIR) diff --git a/src/word.c b/src/word.c
       @@ -1,8 +1,6 @@
        /* See LICENSE file for copyright and license details. */
        #include <stdbool.h>
        #include <stddef.h>
       -#include <stdlib.h>
       -#include <string.h>
        
        #include "../gen/word.h"
        #include "../grapheme.h"