grapheme_next_character_break_utf8.3 - libgrapheme - unicode string library
 (HTM) git clone git://git.suckless.org/libgrapheme
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       grapheme_next_character_break_utf8.3 (2492B)
       ---
            1 .Dd 2022-08-26
            2 .Dt GRAPHEME_NEXT_CHARACTER_BREAK_UTF8 3
            3 .Os suckless.org
            4 .Sh NAME
            5 .Nm grapheme_next_character_break_utf8
            6 .Nd determine byte-offset to next grapheme cluster break
            7 .Sh SYNOPSIS
            8 .In grapheme.h
            9 .Ft size_t
           10 .Fn grapheme_next_character_break_utf8 "const char *str" "size_t len"
           11 .Sh DESCRIPTION
           12 The
           13 .Fn grapheme_next_character_break_utf8
           14 function computes the offset (in bytes) to the next grapheme
           15 cluster break (see
           16 .Xr libgrapheme 7 )
           17 in the UTF-8-encoded string
           18 .Va str
           19 of length
           20 .Va len .
           21 If a grapheme cluster begins at
           22 .Va str
           23 this offset is equal to the length of said grapheme cluster.
           24 .Pp
           25 If
           26 .Va len
           27 is set to
           28 .Dv SIZE_MAX
           29 (stdint.h is already included by grapheme.h) the string
           30 .Va str
           31 is interpreted to be NUL-terminated and processing stops when a
           32 NUL-byte is encountered.
           33 .Pp
           34 For non-UTF-8 input data
           35 .Xr grapheme_is_character_break 3
           36 and
           37 .Xr grapheme_next_character_break 3
           38 can be used instead.
           39 .Sh RETURN VALUES
           40 The
           41 .Fn grapheme_next_character_break_utf8
           42 function returns the offset (in bytes) to the next grapheme cluster
           43 break in
           44 .Va str
           45 or 0 if
           46 .Va str
           47 is
           48 .Dv NULL .
           49 .Sh EXAMPLES
           50 .Bd -literal
           51 /* cc (-static) -o example example.c -lgrapheme */
           52 #include <grapheme.h>
           53 #include <stdint.h>
           54 #include <stdio.h>
           55 
           56 int
           57 main(void)
           58 {
           59         /* UTF-8 encoded input */
           60         char *s = "T\\xC3\\xABst \\xF0\\x9F\\x91\\xA8\\xE2\\x80\\x8D\\xF0"
           61                   "\\x9F\\x91\\xA9\\xE2\\x80\\x8D\\xF0\\x9F\\x91\\xA6 \\xF0"
           62                   "\\x9F\\x87\\xBA\\xF0\\x9F\\x87\\xB8 \\xE0\\xA4\\xA8\\xE0"
           63                   "\\xA5\\x80 \\xE0\\xAE\\xA8\\xE0\\xAE\\xBF!";
           64         size_t ret, len, off;
           65 
           66         printf("Input: \\"%s\\"\\n", s);
           67 
           68         /* print each grapheme cluster with byte-length */
           69         printf("Grapheme clusters in NUL-delimited input:\\n");
           70         for (off = 0; s[off] != '\\0'; off += ret) {
           71                 ret = grapheme_next_character_break_utf8(s + off, SIZE_MAX);
           72                 printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
           73         }
           74         printf("\\n");
           75 
           76         /* do the same, but this time string is length-delimited */
           77         len = 17;
           78         printf("Grapheme clusters in input delimited to %zu bytes:\\n", len);
           79         for (off = 0; off < len; off += ret) {
           80                 ret = grapheme_next_character_break_utf8(s + off, len - off);
           81                 printf("%2zu bytes | %.*s\\n", ret, (int)ret, s + off, ret);
           82         }
           83 
           84         return 0;
           85 }
           86 .Ed
           87 .Sh SEE ALSO
           88 .Xr grapheme_is_character_break 3 ,
           89 .Xr grapheme_next_character_break 3 ,
           90 .Xr libgrapheme 7
           91 .Sh STANDARDS
           92 .Fn grapheme_next_character_break_utf8
           93 is compliant with the Unicode 14.0.0 specification.
           94 .Sh AUTHORS
           95 .An Laslo Hunhold Aq Mt dev@frign.de