grapheme_decode_utf8.3 - libgrapheme - unicode string library
 (HTM) git clone git://git.suckless.org/libgrapheme
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       grapheme_decode_utf8.3 (2302B)
       ---
            1 .Dd 2022-08-26
            2 .Dt GRAPHEME_DECODE_UTF8 3
            3 .Os suckless.org
            4 .Sh NAME
            5 .Nm grapheme_decode_utf8
            6 .Nd decode first codepoint in UTF-8-encoded string
            7 .Sh SYNOPSIS
            8 .In grapheme.h
            9 .Ft size_t
           10 .Fn grapheme_decode_utf8 "const char *str" "size_t len" "uint_least32_t *cp"
           11 .Sh DESCRIPTION
           12 The
           13 .Fn grapheme_decode_utf8
           14 function decodes the first codepoint in the UTF-8-encoded string
           15 .Va str
           16 of length
           17 .Va len .
           18 If the UTF-8-sequence is invalid (overlong encoding, unexpected byte,
           19 string ends unexpectedly, empty string, etc.) the decoding is stopped
           20 at the last processed byte and the decoded codepoint set to
           21 .Dv GRAPHEME_INVALID_CODEPOINT .
           22 .Pp
           23 If
           24 .Va cp
           25 is not
           26 .Dv NULL
           27 the decoded codepoint is stored in the memory pointed to by
           28 .Va cp .
           29 .Pp
           30 Given NUL has a unique 1 byte representation, it is safe to operate on
           31 NUL-terminated strings by setting
           32 .Va len
           33 to
           34 .Dv SIZE_MAX
           35 (stdint.h is already included by grapheme.h) and terminating when
           36 .Va cp
           37 is 0 (see
           38 .Sx EXAMPLES
           39 for an example).
           40 .Sh RETURN VALUES
           41 The
           42 .Fn grapheme_decode_utf8
           43 function returns the number of processed bytes and 0 if
           44 .Va str
           45 is
           46 .Dv NULL
           47 or
           48 .Va len
           49 is 0.
           50 If the string ends unexpectedly in a multibyte sequence, the desired
           51 length (that is larger than
           52 .Va len )
           53 is returned.
           54 .Sh EXAMPLES
           55 .Bd -literal
           56 /* cc (-static) -o example example.c -lgrapheme */
           57 #include <grapheme.h>
           58 #include <inttypes.h>
           59 #include <stdio.h>
           60 
           61 void
           62 print_cps(const char *str, size_t len)
           63 {
           64         size_t ret, off;
           65         uint_least32_t cp;
           66 
           67         for (off = 0; off < len; off += ret) {
           68                 if ((ret = grapheme_decode_utf8(str + off,
           69                                                 len - off, &cp)) > (len - off)) {
           70                         /*
           71                          * string ended unexpectedly in the middle of a
           72                          * multibyte sequence and we have the choice
           73                          * here to possibly expand str by ret - len + off
           74                          * bytes to get a full sequence, but we just
           75                          * bail out in this case.
           76                          */
           77                         break;
           78                 }
           79                 printf("%"PRIxLEAST32"\\n", cp);
           80         }
           81 }
           82 
           83 void
           84 print_cps_nul_terminated(const char *str)
           85 {
           86         size_t ret, off;
           87         uint_least32_t cp;
           88 
           89         for (off = 0; (ret = grapheme_decode_utf8(str + off,
           90                                                   SIZE_MAX, &cp)) > 0 &&
           91              cp != 0; off += ret) {
           92                 printf("%"PRIxLEAST32"\\n", cp);
           93         }
           94 }
           95 .Ed
           96 .Sh SEE ALSO
           97 .Xr grapheme_encode_utf8 3 ,
           98 .Xr libgrapheme 7
           99 .Sh AUTHORS
          100 .An Laslo Hunhold Aq Mt dev@frign.de