add more useful functions to utf.h - iomenu - interactive terminal-based selection menu
(HTM) git clone git://bitreich.org/iomenu git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/iomenu
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) Tags
(DIR) README
(DIR) LICENSE
---
(DIR) commit 39fe39ddae414f337914be1aaa03915679635415
(DIR) parent 9adba189e74c6b6752d04f8e6954177b35a99bbd
(HTM) Author: Josuah Demangeon <mail@josuah.net>
Date: Tue, 16 Jan 2018 22:34:54 +0100
add more useful functions to utf.h
Diffstat:
M iomenu.c | 2 +-
A main.c | 18 ++++++++++++++++++
M utf8.c | 78 ++++++++++++++++++++++---------
M utf8.h | 7 ++++---
4 files changed, 80 insertions(+), 25 deletions(-)
---
(DIR) diff --git a/iomenu.c b/iomenu.c
@@ -350,7 +350,7 @@ format(char *str, int cols)
col++;
}
str++;
- } else if (utf8_to_rune(&rune, str) && utf8_is_print(rune)) {
+ } else if (utf8_torune(&rune, str) && utf8_isprint(rune)) {
int i = utf8_len(str);
while (i--)
*fmt++ = *str++;
(DIR) diff --git a/main.c b/main.c
@@ -0,0 +1,18 @@
+#include <stdio.h>
+#include "utf8.h"
+
+int
+main(void)
+{
+ int n = 100;
+ char s[] = "浪漫的夢想", *cut;
+
+ if ((cut = utf8_col(s, n)) == NULL) {
+ printf("the whole string fit\n");
+ return 0;
+ }
+ printf("%zd\n", cut - s);
+ *cut = '\0';
+ printf("%s\n", s);
+ return 0;
+}
(DIR) diff --git a/utf8.c b/utf8.c
@@ -26,32 +26,36 @@
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
+#include <wchar.h> /* only used for wcwidth() */
#include "utf8.h"
/*
* Return the number of bytes in rune for the `n` next char in `s`,
- * or 0 if ti is misencoded.
+ * or 0 if is misencoded or if it is '\0'.
*/
size_t
utf8_len(char *s)
{
unsigned char *sp = (unsigned char *) s;
- int i, len = (*sp < 0x80) ? 1 : /* 0xxxxxxx < 10000000 */
- (*sp < 0xc0) ? 0 : /* 10xxxxxx < 11000000 */
- (*sp < 0xe0) ? 2 : /* 110xxxxx < 11100000 */
- (*sp < 0xf0) ? 3 : /* 1110xxxx < 11110000 */
- (*sp < 0xf8) ? 4 : /* 11110xxx < 11111000 */
- (*sp < 0xfc) ? 5 : /* 111110xx < 11111100 */
- (*sp < 0xfe) ? 6 : /* 1111110x < 11111110 */
- (*sp < 0xff) ? 7 : /* 11111110 < 11111111 */
- 0;
- if ((size_t) len > strlen(s)) return 0;
-
- /* check continuation bytes */
- for (sp++, i = 1; i < len; i++, sp++)
+ int i, len;
+
+ len = (*sp == 0x0) ? 0 : /* 00000000 */
+ (*sp < 0x80) ? 1 : /* 0xxxxxxx < 10000000 */
+ (*sp < 0xc0) ? 0 : /* 10xxxxxx < 11000000 */
+ (*sp < 0xe0) ? 2 : /* 110xxxxx < 11100000 */
+ (*sp < 0xf0) ? 3 : /* 1110xxxx < 11110000 */
+ (*sp < 0xf8) ? 4 : /* 11110xxx < 11111000 */
+ (*sp < 0xfc) ? 5 : /* 111110xx < 11111100 */
+ (*sp < 0xfe) ? 6 : /* 1111110x < 11111110 */
+ (*sp < 0xff) ? 7 : /* 11111110 < 11111111 */
+ 0;
+
+ /* check continuation bytes and '\0' */
+ for (sp++, i = 1; i < len; i++, sp++) {
if ((*sp & 0xc0) != 0x80) /* 10xxxxxx & 11000000 */
return 0;
+ }
return len;
}
@@ -61,11 +65,11 @@ utf8_len(char *s)
* 0 if rune is too long.
*/
size_t
-utf8_rune_len(long r)
+utf8_runelen(long r)
{
- return (r <= 0x0000007f) ? 1 : (r <= 0x000007ff) ? 2 :
- (r <= 0x0000ffff) ? 3 : (r <= 0x001fffff) ? 4 :
- (r <= 0x03ffffff) ? 5 : (r <= 0x7fffffff) ? 6 : 0;
+ return (r <= 0x0000007f) ? 1 : (r <= 0x000007ff) ? 2 :
+ (r <= 0x0000ffff) ? 3 : (r <= 0x001fffff) ? 4 :
+ (r <= 0x03ffffff) ? 5 : (r <= 0x7fffffff) ? 6 : 0;
}
/*
@@ -74,7 +78,7 @@ utf8_rune_len(long r)
* Return the number of bytes read or 0 if the string is misencoded.
*/
size_t
-utf8_to_rune(long *r, char *s)
+utf8_torune(long *r, char *s)
{
char mask[] = { 0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01 };
size_t i, len = utf8_len(s);
@@ -90,17 +94,49 @@ utf8_to_rune(long *r, char *s)
*r = (*r << 6) | (*s++ & 0x3f); /* 10xxxxxx */
/* overlong sequences */
- if (utf8_rune_len(*r) != len)
+ if (utf8_runelen(*r) != len)
return 0;
return len;
}
/*
+ *
+ */
+utf8_tostr(char *s, long r)
+{
+
+}
+
+/*
* Return 1 if the rune is a printable character, and 0 otherwise.
*/
int
-utf8_is_print(long r)
+utf8_isprint(long r)
{
return (0x1f < r && r != 0x7f && r < 0x80) || 0x9f < r;
}
+
+/*
+ * Return a pointer to the first byte of a character of `s' that would be
+ * rendered at the `col'-th column in a monospaced terminal, or NULL if the
+ * whole string fit.
+ */
+char *
+utf8_col(char *s, size_t col)
+{
+ size_t n;
+ long r;
+ char *pos;
+
+ for (n = 0; n < col; n += wcwidth(r)) {
+ pos = s;
+ if (*s == '\0')
+ return NULL;
+ s += utf8_torune(&r, s);
+ utf8_toutf();
+ printf("%zd:'%s' ", n, s);
+ }
+
+ return pos;
+}
(DIR) diff --git a/utf8.h b/utf8.h
@@ -1,6 +1,7 @@
size_t utf8_len(char *);
size_t rune_len(long);
-size_t utf8_to_rune(long *, char *);
-int utf8_is_unicode(long);
+size_t utf8_torune(long *, char *);
+int utf8_isunicode(long);
int utf8_check(char *);
-int utf8_is_print(long);
+int utf8_isprint(long);
+char *utf8_col();