util.h - libgrapheme - unicode string library
(HTM) git clone git://git.suckless.org/libgrapheme
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
util.h (3691B)
---
1 /* See LICENSE file for copyright and license details. */
2 #ifndef UTIL_H
3 #define UTIL_H
4
5 #include <stdbool.h>
6 #include <stddef.h>
7 #include <stdint.h>
8
9 #include "../gen/types.h"
10 #include "../grapheme.h"
11
12 #undef MIN
13 #define MIN(x, y) ((x) < (y) ? (x) : (y))
14 #undef MAX
15 #define MAX(x, y) ((x) > (y) ? (x) : (y))
16 #undef LEN
17 #define LEN(x) (sizeof(x) / sizeof(*(x)))
18
19 #undef likely
20 #undef unlikely
21 #ifdef __has_builtin
22 #if __has_builtin(__builtin_expect)
23 #define likely(expr) __builtin_expect(!!(expr), 1)
24 #define unlikely(expr) __builtin_expect(!!(expr), 0)
25 #else
26 #define likely(expr) (expr)
27 #define unlikely(expr) (expr)
28 #endif
29 #else
30 #define likely(expr) (expr)
31 #define unlikely(expr) (expr)
32 #endif
33
34 /*
35 * Herodotus, the ancient greek historian and geographer,
36 * was criticized for including legends and other fantastic
37 * accounts into his works, among others by his contemporary
38 * Thucydides.
39 *
40 * The Herodotus readers and writers are tailored towards the needs
41 * of the library interface, doing all the dirty work behind the
42 * scenes. While the reader is relatively faithful in his accounts,
43 * the Herodotus writer will never fail and always claim to write the
44 * data. Internally, it only writes as much as it can, and will simply
45 * keep account of the rest. This way, we can properly signal truncation.
46 *
47 * In this sense, explaining the naming, the writer is always a bit
48 * inaccurate in his accounts.
49 *
50 */
51 enum herodotus_status {
52 HERODOTUS_STATUS_SUCCESS,
53 HERODOTUS_STATUS_END_OF_BUFFER,
54 HERODOTUS_STATUS_SOFT_LIMIT_REACHED,
55 };
56
57 enum herodotus_type {
58 HERODOTUS_TYPE_CODEPOINT,
59 HERODOTUS_TYPE_UTF8,
60 };
61
62 typedef struct herodotus_reader {
63 enum herodotus_type type;
64 const void *src;
65 size_t srclen;
66 size_t off;
67 bool terminated_by_null;
68 size_t soft_limit[10];
69 } HERODOTUS_READER;
70
71 typedef struct herodotus_writer {
72 enum herodotus_type type;
73 void *dest;
74 size_t destlen;
75 size_t off;
76 size_t first_unwritable_offset;
77 } HERODOTUS_WRITER;
78
79 struct proper {
80 /*
81 * prev_prop[1] prev_prop[0] | next_prop[0] next_prop[1]
82 */
83 struct {
84 uint_least8_t prev_prop[2];
85 uint_least8_t next_prop[2];
86 } raw, skip;
87
88 HERODOTUS_READER mid_reader, raw_reader, skip_reader;
89 void *state;
90 uint_least8_t no_prop;
91 uint_least8_t (*get_break_prop)(uint_least32_t);
92 bool (*is_skippable_prop)(uint_least8_t);
93 void (*skip_shift_callback)(uint_least8_t, void *);
94 };
95
96 void herodotus_reader_init(HERODOTUS_READER *, enum herodotus_type,
97 const void *, size_t);
98 void herodotus_reader_copy(const HERODOTUS_READER *, HERODOTUS_READER *);
99 void herodotus_reader_push_advance_limit(HERODOTUS_READER *, size_t);
100 void herodotus_reader_pop_limit(HERODOTUS_READER *);
101 size_t herodotus_reader_number_read(const HERODOTUS_READER *);
102 size_t herodotus_reader_next_word_break(const HERODOTUS_READER *);
103 size_t herodotus_reader_next_codepoint_break(const HERODOTUS_READER *);
104 enum herodotus_status herodotus_read_codepoint(HERODOTUS_READER *, bool,
105 uint_least32_t *);
106
107 void herodotus_writer_init(HERODOTUS_WRITER *, enum herodotus_type, void *,
108 size_t);
109 void herodotus_writer_nul_terminate(HERODOTUS_WRITER *);
110 size_t herodotus_writer_number_written(const HERODOTUS_WRITER *);
111 void herodotus_write_codepoint(HERODOTUS_WRITER *, uint_least32_t);
112
113 void proper_init(const HERODOTUS_READER *, void *, uint_least8_t,
114 uint_least8_t (*get_break_prop)(uint_least32_t),
115 bool (*is_skippable_prop)(uint_least8_t),
116 void (*skip_shift_callback)(uint_least8_t, void *),
117 struct proper *);
118 int proper_advance(struct proper *);
119
120 #endif /* UTIL_H */