next_break.sh - libgrapheme - unicode string library
 (HTM) git clone git://git.suckless.org/libgrapheme
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       next_break.sh (3397B)
       ---
            1 if [ "$ENCODING" = "utf8" ]; then
            2         UNIT="byte"
            3         SUFFIX="_utf8"
            4         ANTISUFFIX=""
            5 else
            6         UNIT="codepoint"
            7         SUFFIX=""
            8         ANTISUFFIX="_utf8"
            9 fi
           10 
           11 cat << EOF
           12 .Dd ${MAN_DATE}
           13 .Dt GRAPHEME_NEXT_$(printf "%s_break%s" "$TYPE" "$SUFFIX" | tr [:lower:] [:upper:]) 3
           14 .Os suckless.org
           15 .Sh NAME
           16 .Nm grapheme_next_${TYPE}_break${SUFFIX}
           17 .Nd determine ${UNIT}-offset to next ${REALTYPE} break
           18 .Sh SYNOPSIS
           19 .In grapheme.h
           20 .Ft size_t
           21 .Fn grapheme_next_${TYPE}_break${SUFFIX} "const $(if [ "$ENCODING" = "utf8" ]; then printf "char"; else printf "uint_least32_t"; fi) *str" "size_t len"
           22 .Sh DESCRIPTION
           23 The
           24 .Fn grapheme_next_${TYPE}_break${SUFFIX}
           25 function computes the offset (in ${UNIT}s) to the next ${REALTYPE}
           26 break (see
           27 .Xr libgrapheme 7 )
           28 in the $(if [ "$ENCODING" = "utf8" ]; then printf "UTF-8-encoded string"; else printf "codepoint array"; fi)
           29 .Va str
           30 of length
           31 .Va len .$(if [ "$TYPE" != "line" ]; then printf "\nIf a ${REALTYPE} begins at
           32 .Va str
           33 this offset is equal to the length of said ${REALTYPE}."; fi)
           34 .Pp
           35 If
           36 .Va len
           37 is set to
           38 .Dv SIZE_MAX
           39 (stdint.h is already included by grapheme.h) the string
           40 .Va str
           41 is interpreted to be NUL-terminated and processing stops when
           42 a $(if [ "$ENCODING" = "utf8" ]; then printf "NUL-byte"; else printf "codepoint with the value 0"; fi) is encountered.
           43 .Pp
           44 For $(if [ "$ENCODING" != "utf8" ]; then printf "UTF-8-encoded"; else printf "non-UTF-8"; fi) input
           45 data$(if [ "$TYPE" = "character" ] && [ "$ENCODING" = "utf8" ]; then printf "\n.Xr grapheme_is_character_break 3 and"; fi)
           46 .Xr grapheme_next_${TYPE}_break${ANTISUFFIX} 3
           47 can be used instead.
           48 .Sh RETURN VALUES
           49 The
           50 .Fn grapheme_next_${TYPE}_break${SUFFIX}
           51 function returns the offset (in ${UNIT}s) to the next ${REALTYPE}
           52 break in
           53 .Va str
           54 or 0 if
           55 .Va str
           56 is
           57 .Dv NULL .
           58 EOF
           59 
           60 if [ "$ENCODING" = "utf8" ]; then
           61 cat << EOF
           62 .Sh EXAMPLES
           63 .Bd -literal
           64 /* cc (-static) -o example example.c -lgrapheme */
           65 #include <grapheme.h>
           66 #include <stdint.h>
           67 #include <stdio.h>
           68 
           69 int
           70 main(void)
           71 {
           72         /* UTF-8 encoded input */
           73         char *s = "T\\\\xC3\\\\xABst \\\\xF0\\\\x9F\\\\x91\\\\xA8\\\\xE2\\\\x80\\\\x8D\\\\xF0"
           74                   "\\\\x9F\\\\x91\\\\xA9\\\\xE2\\\\x80\\\\x8D\\\\xF0\\\\x9F\\\\x91\\\\xA6 \\\\xF0"
           75                   "\\\\x9F\\\\x87\\\\xBA\\\\xF0\\\\x9F\\\\x87\\\\xB8 \\\\xE0\\\\xA4\\\\xA8\\\\xE0"
           76                   "\\\\xA5\\\\x80 \\\\xE0\\\\xAE\\\\xA8\\\\xE0\\\\xAE\\\\xBF!";
           77         size_t ret, len, off;
           78 
           79         printf("Input: \\\\"%s\\\\"\\\\n", s);
           80 
           81         /* print each ${REALTYPE} with byte-length */
           82         printf("${REALTYPE}s in NUL-delimited input:\\\\n");
           83         for (off = 0; s[off] != '\\\\0'; off += ret) {
           84                 ret = grapheme_next_${TYPE}_break_utf8(s + off, SIZE_MAX);
           85                 printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off);
           86         }
           87         printf("\\\\n");
           88 
           89         /* do the same, but this time string is length-delimited */
           90         len = 17;
           91         printf("${REALTYPE}s in input delimited to %zu bytes:\\\\n", len);
           92         for (off = 0; off < len; off += ret) {
           93                 ret = grapheme_next_${TYPE}_break_utf8(s + off, len - off);
           94                 printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off);
           95         }
           96 
           97         return 0;
           98 }
           99 .Ed
          100 EOF
          101 fi
          102 
          103 cat << EOF
          104 .Sh SEE ALSO$(if [ "$TYPE" = "character" ] && [ "$ENCODING" != "utf8" ]; then printf "\n.Xr grapheme_is_character_break 3 ,"; fi)
          105 .Xr grapheme_next_${TYPE}_break${ANTISUFFIX} 3 ,
          106 .Xr libgrapheme 7
          107 .Sh STANDARDS
          108 .Fn grapheme_next_${TYPE}_break${SUFFIX}
          109 is compliant with the Unicode ${UNICODE_VERSION} specification.
          110 .Sh AUTHORS
          111 .An Laslo Hunhold Aq Mt dev@frign.de
          112 EOF