index.md - sites - public wiki contents of suckless.org
 (HTM) git clone git://git.suckless.org/sites
 (DIR) Log
 (DIR) Files
 (DIR) Refs
       ---
       index.md (2347B)
       ---
            1 GRAPHEME\_DECODE\_UTF8(3) - Library Functions Manual
            2 
            3 # NAME
            4 
            5 **grapheme\_decode\_utf8** - decode first codepoint in UTF-8-encoded string
            6 
            7 # SYNOPSIS
            8 
            9 **#include <grapheme.h>**
           10 
           11 *size\_t*  
           12 **grapheme\_decode\_utf8**(*const char \*str*, *size\_t len*, *uint\_least32\_t \*cp*);
           13 
           14 # DESCRIPTION
           15 
           16 The
           17 **grapheme\_decode\_utf8**()
           18 function decodes the first codepoint in the UTF-8-encoded string
           19 *str*
           20 of length
           21 *len*.
           22 If the UTF-8-sequence is invalid (overlong encoding, unexpected byte,
           23 string ends unexpectedly, empty string, etc.) the decoding is stopped
           24 at the last processed byte and the decoded codepoint set to
           25 `GRAPHEME_INVALID_CODEPOINT`.
           26 
           27 If
           28 *cp*
           29 is not
           30 `NULL`
           31 the decoded codepoint is stored in the memory pointed to by
           32 *cp*.
           33 
           34 Given NUL has a unique 1 byte representation, it is safe to operate on
           35 NUL-terminated strings by setting
           36 *len*
           37 to
           38 `SIZE_MAX`
           39 (stdint.h is already included by grapheme.h) and terminating when
           40 *cp*
           41 is 0 (see
           42 *EXAMPLES*
           43 for an example).
           44 
           45 # RETURN VALUES
           46 
           47 The
           48 **grapheme\_decode\_utf8**()
           49 function returns the number of processed bytes and 0 if
           50 *str*
           51 is
           52 `NULL`
           53 or
           54 *len*
           55 is 0.
           56 If the string ends unexpectedly in a multibyte sequence, the desired
           57 length (that is larger than
           58 *len*)
           59 is returned.
           60 
           61 # EXAMPLES
           62 
           63         /* cc (-static) -o example example.c -lgrapheme */
           64         #include <grapheme.h>
           65         #include <inttypes.h>
           66         #include <stdio.h>
           67         
           68         void
           69         print_cps(const char *str, size_t len)
           70         {
           71                 size_t ret, off;
           72                 uint_least32_t cp;
           73         
           74                 for (off = 0; off < len; off += ret) {
           75                         if ((ret = grapheme_decode_utf8(str + off,
           76                                                         len - off, &cp)) > (len - off)) {
           77                                 /*
           78                                  * string ended unexpectedly in the middle of a
           79                                  * multibyte sequence and we have the choice
           80                                  * here to possibly expand str by ret - len + off
           81                                  * bytes to get a full sequence, but we just
           82                                  * bail out in this case.
           83                                  */
           84                                 break;
           85                         }
           86                         printf("%"PRIxLEAST32"\n", cp);
           87                 }
           88         }
           89         
           90         void
           91         print_cps_nul_terminated(const char *str)
           92         {
           93                 size_t ret, off;
           94                 uint_least32_t cp;
           95         
           96                 for (off = 0; (ret = grapheme_decode_utf8(str + off,
           97                                                           SIZE_MAX, &cp)) > 0 &&
           98                      cp != 0; off += ret) {
           99                         printf("%"PRIxLEAST32"\n", cp);
          100                 }
          101         }
          102 
          103 # SEE ALSO
          104 
          105 grapheme\_encode\_utf8(3),
          106 libgrapheme(7)
          107 
          108 # AUTHORS
          109 
          110 Laslo Hunhold ([dev@frign.de](mailto:dev@frign.de))
          111 
          112 suckless.org - 2022-10-06