grapheme_next_character_break_utf8.3 - libgrapheme - unicode string library
(HTM) git clone git://git.suckless.org/libgrapheme
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
grapheme_next_character_break_utf8.3 (2492B)
---
1 .Dd 2022-08-26
2 .Dt GRAPHEME_NEXT_CHARACTER_BREAK_UTF8 3
3 .Os suckless.org
4 .Sh NAME
5 .Nm grapheme_next_character_break_utf8
6 .Nd determine byte-offset to next grapheme cluster break
7 .Sh SYNOPSIS
8 .In grapheme.h
9 .Ft size_t
10 .Fn grapheme_next_character_break_utf8 "const char *str" "size_t len"
11 .Sh DESCRIPTION
12 The
13 .Fn grapheme_next_character_break_utf8
14 function computes the offset (in bytes) to the next grapheme
15 cluster break (see
16 .Xr libgrapheme 7 )
17 in the UTF-8-encoded string
18 .Va str
19 of length
20 .Va len .
21 If a grapheme cluster begins at
22 .Va str
23 this offset is equal to the length of said grapheme cluster.
24 .Pp
25 If
26 .Va len
27 is set to
28 .Dv SIZE_MAX
29 (stdint.h is already included by grapheme.h) the string
30 .Va str
31 is interpreted to be NUL-terminated and processing stops when a
32 NUL-byte is encountered.
33 .Pp
34 For non-UTF-8 input data
35 .Xr grapheme_is_character_break 3
36 and
37 .Xr grapheme_next_character_break 3
38 can be used instead.
39 .Sh RETURN VALUES
40 The
41 .Fn grapheme_next_character_break_utf8
42 function returns the offset (in bytes) to the next grapheme cluster
43 break in
44 .Va str
45 or 0 if
46 .Va str
47 is
48 .Dv NULL .
49 .Sh EXAMPLES
50 .Bd -literal
51 /* cc (-static) -o example example.c -lgrapheme */
52 #include <grapheme.h>
53 #include <stdint.h>
54 #include <stdio.h>
55
56 int
57 main(void)
58 {
59 /* UTF-8 encoded input */
60 char *s = "T\\xC3\\xABst \\xF0\\x9F\\x91\\xA8\\xE2\\x80\\x8D\\xF0"
61 "\\x9F\\x91\\xA9\\xE2\\x80\\x8D\\xF0\\x9F\\x91\\xA6 \\xF0"
62 "\\x9F\\x87\\xBA\\xF0\\x9F\\x87\\xB8 \\xE0\\xA4\\xA8\\xE0"
63 "\\xA5\\x80 \\xE0\\xAE\\xA8\\xE0\\xAE\\xBF!";
64 size_t ret, len, off;
65
66 printf("Input: \\"%s\\"\\n", s);
67
68 /* print each grapheme cluster with byte-length */
69 printf("Grapheme clusters in NUL-delimited input:\\n");
70 for (off = 0; s[off] != '\\0'; off += ret) {
71 ret = grapheme_next_character_break_utf8(s + off, SIZE_MAX);
72 printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
73 }
74 printf("\\n");
75
76 /* do the same, but this time string is length-delimited */
77 len = 17;
78 printf("Grapheme clusters in input delimited to %zu bytes:\\n", len);
79 for (off = 0; off < len; off += ret) {
80 ret = grapheme_next_character_break_utf8(s + off, len - off);
81 printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
82 }
83
84 return 0;
85 }
86 .Ed
87 .Sh SEE ALSO
88 .Xr grapheme_is_character_break 3 ,
89 .Xr grapheme_next_character_break 3 ,
90 .Xr libgrapheme 7
91 .Sh STANDARDS
92 .Fn grapheme_next_character_break_utf8
93 is compliant with the Unicode 14.0.0 specification.
94 .Sh AUTHORS
95 .An Laslo Hunhold Aq Mt dev@frign.de