trune.3 - plan9port - [fork] Plan 9 from user space
 (HTM) git clone git://src.adamsgaard.dk/plan9port
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       trune.3 (3083B)
       ---
            1 .TH RUNE 3
            2 .SH NAME
            3 runetochar, chartorune, runelen, runenlen, fullrune, utfecpy, utflen, utfnlen, utfrune, utfrrune, utfutf \- rune/UTF conversion
            4 .SH SYNOPSIS
            5 .ta \w'\fLchar*xx'u
            6 .B #include <u.h>
            7 .br
            8 .B #include <libc.h>
            9 .PP
           10 .B
           11 int        runetochar(char *s, Rune *r)
           12 .PP
           13 .B
           14 int        chartorune(Rune *r, char *s)
           15 .PP
           16 .B
           17 int        runelen(long r)
           18 .PP
           19 .B
           20 int        runenlen(Rune *r, int n)
           21 .PP
           22 .B
           23 int        fullrune(char *s, int n)
           24 .PP
           25 .B
           26 char*        utfecpy(char *s1, char *es1, char *s2)
           27 .PP
           28 .B
           29 int        utflen(char *s)
           30 .PP
           31 .B
           32 int        utfnlen(char *s, long n)
           33 .PP
           34 .B
           35 char*        utfrune(char *s, long c)
           36 .PP
           37 .B
           38 char*        utfrrune(char *s, long c)
           39 .PP
           40 .B
           41 char*        utfutf(char *s1, char *s2)
           42 .SH DESCRIPTION
           43 These routines convert to and from a
           44 .SM UTF
           45 byte stream and runes.
           46 .PP
           47 .I Runetochar
           48 copies one rune at
           49 .I r
           50 to at most
           51 .B UTFmax
           52 bytes starting at
           53 .I s
           54 and returns the number of bytes copied.
           55 .BR UTFmax ,
           56 defined as
           57 .B 3
           58 in
           59 .BR <libc.h> ,
           60 is the maximum number of bytes required to represent a rune.
           61 .PP
           62 .I Chartorune
           63 copies at most
           64 .B UTFmax
           65 bytes starting at
           66 .I s
           67 to one rune at
           68 .I r
           69 and returns the number of bytes copied.
           70 If the input is not exactly in
           71 .SM UTF
           72 format,
           73 .I chartorune
           74 will convert to
           75 .B Runeerror
           76 (0xFFFD)
           77 and return 1.
           78 .PP
           79 .I Runelen
           80 returns the number of bytes
           81 required to convert
           82 .I r
           83 into
           84 .SM UTF.
           85 .PP
           86 .I Runenlen
           87 returns the number of bytes
           88 required to convert the
           89 .I n
           90 runes pointed to by
           91 .I r
           92 into
           93 .SM UTF.
           94 .PP
           95 .I Fullrune
           96 returns 1 if the string
           97 .I s
           98 of length
           99 .I n
          100 is long enough to be decoded by
          101 .I chartorune
          102 and 0 otherwise.
          103 This does not guarantee that the string
          104 contains a legal
          105 .SM UTF
          106 encoding.
          107 This routine is used by programs that
          108 obtain input a byte at
          109 a time and need to know when a full rune
          110 has arrived.
          111 .PP
          112 The following routines are analogous to the
          113 corresponding string routines with
          114 .B utf
          115 substituted for
          116 .B str
          117 and
          118 .B rune
          119 substituted for
          120 .BR chr .
          121 .PP
          122 .I Utfecpy
          123 copies UTF sequences until a null sequence has been copied, but writes no 
          124 sequences beyond
          125 .IR es1 .
          126 If any sequences are copied,
          127 .I s1
          128 is terminated by a null sequence, and a pointer to that sequence is returned.
          129 Otherwise, the original
          130 .I s1
          131 is returned.
          132 .PP
          133 .I Utflen
          134 returns the number of runes that
          135 are represented by the
          136 .SM UTF
          137 string
          138 .IR s .
          139 .PP
          140 .I Utfnlen
          141 returns the number of complete runes that
          142 are represented by the first
          143 .I n
          144 bytes of
          145 .SM UTF
          146 string
          147 .IR s .
          148 If the last few bytes of the string contain an incompletely coded rune,
          149 .I utfnlen
          150 will not count them; in this way, it differs from
          151 .IR utflen ,
          152 which includes every byte of the string.
          153 .PP
          154 .I Utfrune
          155 .RI ( utfrrune )
          156 returns a pointer to the first (last)
          157 occurrence of rune
          158 .I c
          159 in the
          160 .SM UTF
          161 string
          162 .IR s ,
          163 or 0 if
          164 .I c
          165 does not occur in the string.
          166 The NUL byte terminating a string is considered to
          167 be part of the string
          168 .IR s .
          169 .PP
          170 .I Utfutf
          171 returns a pointer to the first occurrence of
          172 the
          173 .SM UTF
          174 string
          175 .I s2
          176 as a
          177 .SM UTF
          178 substring of
          179 .IR s1 ,
          180 or 0 if there is none.
          181 If
          182 .I s2
          183 is the null string,
          184 .I utfutf
          185 returns
          186 .IR s1 .
          187 .SH SOURCE
          188 .B \*9/src/lib9/utf/rune.c
          189 .br
          190 .B \*9/src/lib9/utf/utfrune.c
          191 .SH SEE ALSO
          192 .MR utf (7) ,
          193 .MR tcs (1)