index.md - sites - public wiki contents of suckless.org
(HTM) git clone git://git.suckless.org/sites
(DIR) Log
(DIR) Files
(DIR) Refs
---
index.md (2347B)
---
1 GRAPHEME\_DECODE\_UTF8(3) - Library Functions Manual
2
3 # NAME
4
5 **grapheme\_decode\_utf8** - decode first codepoint in UTF-8-encoded string
6
7 # SYNOPSIS
8
9 **#include <grapheme.h>**
10
11 *size\_t*
12 **grapheme\_decode\_utf8**(*const char \*str*, *size\_t len*, *uint\_least32\_t \*cp*);
13
14 # DESCRIPTION
15
16 The
17 **grapheme\_decode\_utf8**()
18 function decodes the first codepoint in the UTF-8-encoded string
19 *str*
20 of length
21 *len*.
22 If the UTF-8-sequence is invalid (overlong encoding, unexpected byte,
23 string ends unexpectedly, empty string, etc.) the decoding is stopped
24 at the last processed byte and the decoded codepoint set to
25 `GRAPHEME_INVALID_CODEPOINT`.
26
27 If
28 *cp*
29 is not
30 `NULL`
31 the decoded codepoint is stored in the memory pointed to by
32 *cp*.
33
34 Given NUL has a unique 1 byte representation, it is safe to operate on
35 NUL-terminated strings by setting
36 *len*
37 to
38 `SIZE_MAX`
39 (stdint.h is already included by grapheme.h) and terminating when
40 *cp*
41 is 0 (see
42 *EXAMPLES*
43 for an example).
44
45 # RETURN VALUES
46
47 The
48 **grapheme\_decode\_utf8**()
49 function returns the number of processed bytes and 0 if
50 *str*
51 is
52 `NULL`
53 or
54 *len*
55 is 0.
56 If the string ends unexpectedly in a multibyte sequence, the desired
57 length (that is larger than
58 *len*)
59 is returned.
60
61 # EXAMPLES
62
63 /* cc (-static) -o example example.c -lgrapheme */
64 #include <grapheme.h>
65 #include <inttypes.h>
66 #include <stdio.h>
67
68 void
69 print_cps(const char *str, size_t len)
70 {
71 size_t ret, off;
72 uint_least32_t cp;
73
74 for (off = 0; off < len; off += ret) {
75 if ((ret = grapheme_decode_utf8(str + off,
76 len - off, &cp)) > (len - off)) {
77 /*
78 * string ended unexpectedly in the middle of a
79 * multibyte sequence and we have the choice
80 * here to possibly expand str by ret - len + off
81 * bytes to get a full sequence, but we just
82 * bail out in this case.
83 */
84 break;
85 }
86 printf("%"PRIxLEAST32"\n", cp);
87 }
88 }
89
90 void
91 print_cps_nul_terminated(const char *str)
92 {
93 size_t ret, off;
94 uint_least32_t cp;
95
96 for (off = 0; (ret = grapheme_decode_utf8(str + off,
97 SIZE_MAX, &cp)) > 0 &&
98 cp != 0; off += ret) {
99 printf("%"PRIxLEAST32"\n", cp);
100 }
101 }
102
103 # SEE ALSO
104
105 grapheme\_encode\_utf8(3),
106 libgrapheme(7)
107
108 # AUTHORS
109
110 Laslo Hunhold ([dev@frign.de](mailto:dev@frign.de))
111
112 suckless.org - 2022-10-06