Added embedded UTF-8 handling library - iomenu - interactive terminal-based selection menu
(HTM) git clone git://bitreich.org/iomenu git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/iomenu
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) Tags
(DIR) README
(DIR) LICENSE
---
(DIR) commit e01fa60f7992641ba6f3f5109dec6783e397a248
(DIR) parent bc428475d5d8e678f99f9006aab8bc1d656c61b6
(HTM) Author: Josuah Demangeonā ā µ <mail@josuah.net>
Date: Mon, 3 Apr 2017 23:50:45 +0200
Added embedded UTF-8 handling library
Diffstat:
M iomenu.c | 2 +-
M utf.c | 188 +++++++++++++++++++++----------
M utf.h | 22 ++++++++++++----------
3 files changed, 141 insertions(+), 71 deletions(-)
---
(DIR) diff --git a/iomenu.c b/iomenu.c
@@ -11,7 +11,7 @@
#include <sys/ioctl.h>
-#define OFFSET 30 /* in horizontal mode, amount of space kept for writing */
+#define OFFSET 40 /* in horizontal mode, amount of space kept for writing */
#define CONTINUE 2 /* as opposed to EXIT_SUCCESS and EXIT_FAILURE */
#define CONTROL(char) (char ^ 0x40)
(DIR) diff --git a/utf.c b/utf.c
@@ -1,9 +1,12 @@
/*
- * Functions handling UTF-8 srings:
+ * Functions handling UTF-8 strings:
*
* stdin -> buffer -> stdout
* UTF-8 -> rune -> UTF-8
* char[] -> long[] -> char[]
+ *
+ * Thanks to Connor Lane Smith for the idea of combining switches and
+ * binary masks.
*/
@@ -14,11 +17,12 @@
#include "utf.h"
+/* --- lengths -------------------------------------------------------------- */
+
+
/*
* Return the number of bytes in rune for the `n` next char in `s`,
* or 0 if ti is misencoded.
- *
- * Thanks to Connor Lane Smith for the idea of using 0x??.
*/
int
utflen(char *s, int n)
@@ -53,7 +57,8 @@ utflen(char *s, int n)
/*
- * Return the number of bytes required to display `rune`
+ * Return the number of bytes required to encode `rune` into UTF-8, or
+ * 0 if rune is too long.
*/
int
runelen(long r)
@@ -68,15 +73,19 @@ runelen(long r)
}
+/* --- conversions ---------------------------------------------------------- */
+
+
/*
- * Sets `r` to a rune corresponding to the firsts `n` bytes of `s`
- * and return the number of bytes read.
- * if `s` is misencoded, the rune is stored as a negative value.
+ * Sets `r` to a rune corresponding to the firsts `n` bytes of `s`.
+ * If `s` is misencoded, the rune is stored as a negative value.
+ *
+ * Return the number of bytes read.
*/
int
utftorune(long *r, char *s, int n)
{
- int len = utflen(s, n);
+ int len = utflen(s, n), i;
/* first byte */
switch (len) {
@@ -90,7 +99,7 @@ utftorune(long *r, char *s, int n)
}
/* continuation bytes */
- for (int i = 1; i < len; i++)
+ for (i = 1; i < len; i++)
*r = (*r << 6) | (s[i] & 0x3f); /* 10xxxxxx */
/* overlong sequences */
@@ -104,8 +113,28 @@ utftorune(long *r, char *s, int n)
/*
- * Encode the rune `r` in utf-8 in `s`, null-terminated, and return
- * the number of bytes written, 0 if `r` is invalid.
+ * Convert the utf char sring `src` of size `n` to a long string
+ * `dest`.
+ *
+ * Return the length of `i`.
+ */
+size_t
+utftorunes(long *runev, char *utf, size_t n)
+{
+ size_t i, j;
+
+ for (i = 0, j = 0; n > 0; i++)
+ j += utftorune(runev + i, utf[j], n - j);
+
+ runev[i] = '\0';
+ return i;
+}
+
+
+/*
+ * Encode the rune `r` in utf-8 in `s`, null-terminated.
+ *
+ * Return the number of bytes written, 0 if `r` is invalid.
*/
int
runetoutf(char *s, long r)
@@ -150,12 +179,42 @@ runetoutf(char *s, long r)
s[5] = 0x80 | (0x3f & (r)); /* 10xxxxxx */
s[6] = '\0';
return 6;
+ default:
+ s[0] = '\0';
+ return 0;
+ }
+}
+
+
+/*
+ * Fill `s` with a printable representation of `r`.
+ *
+ * Return the width of the character.
+ */
+int
+runetoprint(char *s, long r)
+{
+ if (r < 0) {
+ return sprintf(s, "[%02x]", (unsigned char) -r);
+
+ } else if (r == 0x7f || r < ' ') {
+ return sprintf(s, "[%02lx]", r);
+
+ } else if (!isprintrune(r)) {
+ return sprintf(s, "[%04lx]", r);
+
+ } else {
+ runetoutf(s, r);
+ return 1;
}
return 0;
}
+/* --- standard library ----------------------------------------------------- */
+
+
/*
* Returns 1 if the rune is a printable character and 0 if not.
*/
@@ -163,14 +222,13 @@ int
isprintrune(long r)
{
return !(
- (r == 0x7f || r < ' ') || /* ascii control */
+ (r < ' ' || r == 0x7f) || /* ascii control */
(0x80 <= r && r < 0xa0) || /* unicode control */
(r > 0x10ffff) || /* outside range */
- (r % 0x010000 == 0x00fffe) || /* noncharacters */
- (r % 0x010000 == 0x00ffff) ||
+ ((r & 0x00fffe) == 0x00fffe) || /* noncharacters */
(0x00fdd0 <= r && r <= 0x00fdef) ||
(0x00e000 <= r && r <= 0x00f8ff) || /* private use */
@@ -183,63 +241,72 @@ isprintrune(long r)
/*
- * Fill `s` with a printable representation of `r` and return the
- * width of the character. The tab characters are converted to
- * spaces as if it was at the column `col`.
+ * Read an utf string from `f` up to the first '\n' character or the
+ * end of the file. It is stored as a rune array into the newly
+ * allocated `r`.
+ *
+ * Return the length of `r`, or -1 if malloc fails or if the end of
+ * `f` is reached.
*/
-int
-runetoprint(char *s, long r, int col)
+size_t
+getrunes(long **r, FILE *f)
{
- if (r < 0) {
- return sprintf(s, "[%02x]", (unsigned char) -r);
+ size_t slen, rlen = 0, size = BUFSIZ, i;
+ int c;
+ char *s;
- } else if (r == 0x7f || r < ' ') {
- return sprintf(s, "[%02lx]", r);
+ if (!(s = malloc(size))) return -1;
+ for (slen = 0; (c = fgetc(f)) != EOF && (c != '\n'); slen++) {
+ if (slen > size && !(s = realloc(s, ++size))) return -1;
+ s[slen] = c;
+ }
- } else if (!isprintrune(r)) {
- return sprintf(s, "[%04lx]", r);
+ if (!(*r = malloc(size * sizeof (long)))) return -1;
+ for (i = 0; i < slen; rlen++)
+ i += utftorune(*r + rlen, s + i, slen - i);
+ (*r)[rlen] = '\0';
- } else if (r == '\t') {
- int i;
- for (i = 1; (col + i) % 8 != 0; i++)
- s[i] = ' ';
- s[0] = ' '; s[i] = '\0';
- return i;
+ free(s);
+ return feof(f) ? -1 : rlen;
+}
- } else {
- runetoutf(s, r);
- return 1;
- }
- return 0;
+long *
+runescpy(long *dest, long *src)
+{
+ size_t i;
+
+ for (i = 0; src[i] != '\0'; i++)
+ dest[i] = src[i];
+ dest[i] = '\0';
+
+ return dest;
}
-/*
- * Read a newly allocated string from `f` up to the first '\n'
- * character or the end of the file. It is stored as a rune array, and
- * `r` is set to point to it. The length of the string is returned, or
- * -1 if malloc fails.
- */
-int
-getutf(long **r, FILE *f)
+long *
+runeschr(long *s, long r)
{
- int slen, rlen = 0, c, size = BUFSIZ;
- char *s;
+ size_t i;
- if (!(s = malloc(size))) return -1;
- for (slen = 0; (c = fgetc(f)) != EOF && (c != '\n'); slen++) {
- if (slen > size)
- if (!(s = realloc(s, ++size))) return -1;
- s[slen] = c;
- }
+ for (i = 0; s[i] != '\0'; i++)
+ if (s[i] == r) return s + i;
- if (!(*r = malloc(size * sizeof (long)))) return -1;
- for (int i = 0; i < slen; rlen++)
- i += utftorune(*r + rlen, s + i, slen - i);
+ return NULL;
+}
- free(s);
- return rlen;
+
+long *
+runescat(long *s1, long *s2)
+{
+ size_t i, j;
+
+ for (i = 0; s1[i] != '\0'; i++);
+ for (j = 0; s2[j] != '\0'; j++)
+ s1[i + j] = s2[j];
+ s1[i + j] = '\0';
+
+ return s1;
}
@@ -248,10 +315,11 @@ main()
{
char s[BUFSIZ];
long *r;
+ int len, i;
- for (int len; (len = getutf(&r, stdin)) >= 0 && !feof(stdin); free(r)) {
- for (int i = 0; i < len; i++) {
- runetoprint(s, r[i], 0);
+ for (len = 0; (len = getutf(&r, stdin)) >= 0 && !feof(stdin); free(r)) {
+ for (i = 0; i < len; i++) {
+ runetoprint(s, r[i]);
fputs(s, stdout);
}
(DIR) diff --git a/utf.h b/utf.h
@@ -1,14 +1,16 @@
/* rune / utf length */
-int utflen(char *, int);
-int runelen(long);
+int utflen(char *, int);
+int runelen(long);
-/* decode / encode */
-int utftorune(long *, char *, int);
-int runetoutf(char *, long);
+/* conversion */
+int utftorune(long *, char *, int);
+int runetoutf(char *, long);
+int runetoprint(char *, long);
-/* rune class */
-int isprintrune(long);
+/* input/output */
-/* stdin / stdout */
-int runetoprint(char *, long, int);
-int getutf(long **, FILE *);
+size_t getutf(long **, FILE *);
+
+/* standard library */
+int runeisprint(long);
+long *runestrcpy();