grapheme_decode_utf8.3 - libgrapheme - unicode string library
(HTM) git clone git://git.suckless.org/libgrapheme
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
grapheme_decode_utf8.3 (2302B)
---
1 .Dd 2022-08-26
2 .Dt GRAPHEME_DECODE_UTF8 3
3 .Os suckless.org
4 .Sh NAME
5 .Nm grapheme_decode_utf8
6 .Nd decode first codepoint in UTF-8-encoded string
7 .Sh SYNOPSIS
8 .In grapheme.h
9 .Ft size_t
10 .Fn grapheme_decode_utf8 "const char *str" "size_t len" "uint_least32_t *cp"
11 .Sh DESCRIPTION
12 The
13 .Fn grapheme_decode_utf8
14 function decodes the first codepoint in the UTF-8-encoded string
15 .Va str
16 of length
17 .Va len .
18 If the UTF-8-sequence is invalid (overlong encoding, unexpected byte,
19 string ends unexpectedly, empty string, etc.) the decoding is stopped
20 at the last processed byte and the decoded codepoint set to
21 .Dv GRAPHEME_INVALID_CODEPOINT .
22 .Pp
23 If
24 .Va cp
25 is not
26 .Dv NULL
27 the decoded codepoint is stored in the memory pointed to by
28 .Va cp .
29 .Pp
30 Given NUL has a unique 1 byte representation, it is safe to operate on
31 NUL-terminated strings by setting
32 .Va len
33 to
34 .Dv SIZE_MAX
35 (stdint.h is already included by grapheme.h) and terminating when
36 .Va cp
37 is 0 (see
38 .Sx EXAMPLES
39 for an example).
40 .Sh RETURN VALUES
41 The
42 .Fn grapheme_decode_utf8
43 function returns the number of processed bytes and 0 if
44 .Va str
45 is
46 .Dv NULL
47 or
48 .Va len
49 is 0.
50 If the string ends unexpectedly in a multibyte sequence, the desired
51 length (that is larger than
52 .Va len )
53 is returned.
54 .Sh EXAMPLES
55 .Bd -literal
56 /* cc (-static) -o example example.c -lgrapheme */
57 #include <grapheme.h>
58 #include <inttypes.h>
59 #include <stdio.h>
60
61 void
62 print_cps(const char *str, size_t len)
63 {
64 size_t ret, off;
65 uint_least32_t cp;
66
67 for (off = 0; off < len; off += ret) {
68 if ((ret = grapheme_decode_utf8(str + off,
69 len - off, &cp)) > (len - off)) {
70 /*
71 * string ended unexpectedly in the middle of a
72 * multibyte sequence and we have the choice
73 * here to possibly expand str by ret - len + off
74 * bytes to get a full sequence, but we just
75 * bail out in this case.
76 */
77 break;
78 }
79 printf("%"PRIxLEAST32"\\n", cp);
80 }
81 }
82
83 void
84 print_cps_nul_terminated(const char *str)
85 {
86 size_t ret, off;
87 uint_least32_t cp;
88
89 for (off = 0; (ret = grapheme_decode_utf8(str + off,
90 SIZE_MAX, &cp)) > 0 &&
91 cp != 0; off += ret) {
92 printf("%"PRIxLEAST32"\\n", cp);
93 }
94 }
95 .Ed
96 .Sh SEE ALSO
97 .Xr grapheme_encode_utf8 3 ,
98 .Xr libgrapheme 7
99 .Sh AUTHORS
100 .An Laslo Hunhold Aq Mt dev@frign.de