utf8expr.c - utf8expr - expr(1) for UTF-8
(HTM) git clone git://bitreich.org/utf8expr/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/utf8expr/
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) Tags
(DIR) LICENSE
---
utf8expr.c (2190B)
---
1 /*
2 * Copy me if you can.
3 * by 20h
4 */
5
6 #include <unistd.h>
7 #include <string.h>
8 #include <stdlib.h>
9 #include <stdio.h>
10 #include <libgen.h>
11
12 #include "arg.h"
13
14 char *argv0;
15
16 /*
17 * Idea taken from:
18 * http://canonical.org/~kragen/strlen-utf8.html
19 */
20 size_t
21 utf8strlen(char *s)
22 {
23 size_t i;
24
25 i = 0;
26 for (; s[0]; s++) {
27 if ((s[0] & 0xc0) != 0x80)
28 i++;
29 }
30
31 return i;
32 }
33
34 char *
35 utf8strchr(char *s, char *c)
36 {
37 size_t j, cl;
38
39 cl = strlen(c);
40 if (cl == 0)
41 return NULL;
42
43 for (j = 0; ; s++) {
44 if (j > 6)
45 return NULL;
46 j++;
47
48 if ((s[0] & 0xc0) != 0x80 || s[0] == '\0') {
49 if (cl == j) {
50 if (!memcmp(&s[-j], c, cl))
51 return &s[-j];
52 }
53 j = 0;
54
55 if (s[0] == '\0')
56 break;
57 }
58 }
59
60 return NULL;
61 }
62
63 char *
64 utf8substr(char *s, size_t pos, size_t *length)
65 {
66 size_t i, j, rl;
67 char *ret;
68
69 if (*length < 1)
70 return NULL;
71
72 ret = NULL;
73 rl = 0;
74 for (i = 0, j = 0; *length > 0; s++) {
75 if (j > 6)
76 return NULL;
77 j++;
78
79 if (ret != NULL)
80 rl++;
81
82 if ((s[0] & 0xc0) != 0x80 || s[0] == '\0') {
83 if (i >= pos) {
84 if (ret == NULL) {
85 ret = &s[-j];
86 rl = j;
87 }
88 (*length)--;
89 }
90 i++;
91 j = 0;
92
93 if (s[0] == '\0')
94 break;
95 }
96 }
97
98 *length = rl;
99 return ret;
100 }
101
102 size_t
103 utf8index(char *s, char *chars)
104 {
105 size_t i, j;
106 char c[7];
107
108 j = 0;
109 for (i = 0; ; s++) {
110 if (j > 6)
111 return 0;
112 j++;
113
114 if ((s[0] & 0xc0) != 0x80 || s[0] == '\0') {
115 memset(c, 0, sizeof(c));
116 memmove(c, &s[-j], j);
117 if (utf8strchr(chars, c))
118 return i;
119 i++;
120 j = 0;
121
122 if (s[0] == '\0')
123 break;
124 }
125 }
126
127 return 0;
128 }
129
130 void
131 usage(void)
132 {
133 fprintf(stderr, "usage: %s [substr|index|length] str [args ...]\n",
134 basename(argv0));
135 exit(1);
136 }
137
138 int
139 main(int argc, char *argv[])
140 {
141 char *s;
142 size_t len;
143
144 argv0 = argv[0];
145
146 if (argc < 3)
147 usage();
148
149 switch(argv[1][0]) {
150 case 'i':
151 if (argc < 4)
152 usage();
153 printf("%ld\n", utf8index(argv[2], argv[3]));
154 break;
155 case 'l':
156 printf("%ld\n", utf8strlen(argv[2]));
157 break;
158 case 's':
159 if (argc < 5)
160 usage();
161 len = atoi(argv[4]);
162 s = utf8substr(argv[2], atoi(argv[3]), &len);
163 if (s == NULL)
164 return -1;
165 printf("%.*s\n", (int)len, s);
166 break;
167 default:
168 usage();
169 };
170
171 return 0;
172 }
173