tr.c - sbase - suckless unix tools
(HTM) git clone git://git.suckless.org/sbase
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
tr.c (6191B)
---
1 /* See LICENSE file for copyright and license details. */
2 #include <stdlib.h>
3
4 #include "utf.h"
5 #include "util.h"
6
7 static int cflag = 0;
8 static int dflag = 0;
9 static int sflag = 0;
10
11 struct range {
12 Rune start;
13 Rune end;
14 size_t quant;
15 };
16
17 static struct {
18 char *name;
19 int (*check)(Rune);
20 } classes[] = {
21 { "alnum", isalnumrune },
22 { "alpha", isalpharune },
23 { "blank", isblankrune },
24 { "cntrl", iscntrlrune },
25 { "digit", isdigitrune },
26 { "graph", isgraphrune },
27 { "lower", islowerrune },
28 { "print", isprintrune },
29 { "punct", ispunctrune },
30 { "space", isspacerune },
31 { "upper", isupperrune },
32 { "xdigit", isxdigitrune },
33 };
34
35 static struct range *set1 = NULL;
36 static size_t set1ranges = 0;
37 static int (*set1check)(Rune) = NULL;
38 static struct range *set2 = NULL;
39 static size_t set2ranges = 0;
40 static int (*set2check)(Rune) = NULL;
41
42 static size_t
43 rangelen(struct range r)
44 {
45 return (r.end - r.start + 1) * r.quant;
46 }
47
48 static size_t
49 setlen(struct range *set, size_t setranges)
50 {
51 size_t len = 0, i;
52
53 for (i = 0; i < setranges; i++)
54 len += rangelen(set[i]);
55
56 return len;
57 }
58
59 static int
60 rstrmatch(Rune *r, char *s, size_t n)
61 {
62 size_t i;
63
64 for (i = 0; i < n; i++)
65 if (r[i] != s[i])
66 return 0;
67 return 1;
68 }
69
70 static size_t
71 makeset(char *str, struct range **set, int (**check)(Rune))
72 {
73 Rune *rstr;
74 size_t len, i, j, m, n;
75 size_t q, setranges = 0;
76 int factor, base;
77
78 /* rstr defines at most len ranges */
79 unescape(str);
80 rstr = ereallocarray(NULL, utflen(str) + 1, sizeof(*rstr));
81 len = utftorunestr(str, rstr);
82 *set = ereallocarray(NULL, len, sizeof(**set));
83
84 for (i = 0; i < len; i++) {
85 if (rstr[i] == '[') {
86 j = i;
87 nextbrack:
88 if (j >= len)
89 goto literal;
90 for (m = j; m < len; m++)
91 if (rstr[m] == ']') {
92 j = m;
93 break;
94 }
95 if (j == i)
96 goto literal;
97
98 /* CLASSES [=EQUIV=] (skip) */
99 if (j - i > 3 && rstr[i + 1] == '=' && rstr[m - 1] == '=') {
100 if (j - i != 4)
101 goto literal;
102 (*set)[setranges].start = rstr[i + 2];
103 (*set)[setranges].end = rstr[i + 2];
104 (*set)[setranges].quant = 1;
105 setranges++;
106 i = j;
107 continue;
108 }
109
110 /* CLASSES [:CLASS:] */
111 if (j - i > 3 && rstr[i + 1] == ':' && rstr[m - 1] == ':') {
112 for (n = 0; n < LEN(classes); n++) {
113 if (rstrmatch(rstr + i + 2, classes[n].name, j - i - 3)) {
114 *check = classes[n].check;
115 return 0;
116 }
117 }
118 eprintf("Invalid character class.\n");
119 }
120
121 /* REPEAT [_*n] (only allowed in set2) */
122 if (j - i > 2 && rstr[i + 2] == '*') {
123 /* check if right side of '*' is a number */
124 q = 0;
125 factor = 1;
126 base = (rstr[i + 3] == '0') ? 8 : 10;
127 for (n = j - 1; n > i + 2; n--) {
128 if (rstr[n] < '0' || rstr[n] > '9') {
129 n = 0;
130 break;
131 }
132 q += (rstr[n] - '0') * factor;
133 factor *= base;
134 }
135 if (n == 0) {
136 j = m + 1;
137 goto nextbrack;
138 }
139 (*set)[setranges].start = rstr[i + 1];
140 (*set)[setranges].end = rstr[i + 1];
141 (*set)[setranges].quant = q ? q : setlen(set1, MAX(set1ranges, 1));
142 setranges++;
143 i = j;
144 continue;
145 }
146
147 j = m + 1;
148 goto nextbrack;
149 }
150 literal:
151 /* RANGES [_-__-_], _-__-_ */
152 /* LITERALS _______ */
153 (*set)[setranges].start = rstr[i];
154
155 if (i < len - 2 && rstr[i + 1] == '-' && rstr[i + 2] >= rstr[i])
156 i += 2;
157 (*set)[setranges].end = rstr[i];
158 (*set)[setranges].quant = 1;
159 setranges++;
160 }
161
162 free(rstr);
163 return setranges;
164 }
165
166 static void
167 usage(void)
168 {
169 eprintf("usage: %s [-cCds] set1 [set2]\n", argv0);
170 }
171
172 int
173 main(int argc, char *argv[])
174 {
175 Rune r, lastrune = 0;
176 size_t off1, off2, i, m;
177 int ret = 0;
178
179 ARGBEGIN {
180 case 'c':
181 case 'C':
182 cflag = 1;
183 break;
184 case 'd':
185 dflag = 1;
186 break;
187 case 's':
188 sflag = 1;
189 break;
190 default:
191 usage();
192 } ARGEND
193
194 if (!argc || argc > 2 || (dflag == sflag && argc != 2) ||
195 (dflag && argc != 1))
196 usage();
197
198 set1ranges = makeset(argv[0], &set1, &set1check);
199 if (argc == 2) {
200 set2ranges = makeset(argv[1], &set2, &set2check);
201 /* sanity checks as we are translating */
202 if (!set2ranges && !set2check)
203 eprintf("cannot map to an empty set.\n");
204 if (set2check && set2check != islowerrune &&
205 set2check != isupperrune) {
206 eprintf("can only map to 'lower' and 'upper' class.\n");
207 }
208 }
209 read:
210 if (!efgetrune(&r, stdin, "<stdin>")) {
211 ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>");
212 return ret;
213 }
214 if (argc == 1 && sflag)
215 goto write;
216 for (i = 0, off1 = 0; i < set1ranges; off1 += rangelen(set1[i]), i++) {
217 if (set1[i].start <= r && r <= set1[i].end) {
218 if (dflag) {
219 if (cflag)
220 goto write;
221 else
222 goto read;
223 }
224 if (cflag)
225 goto write;
226
227 /* map r to set2 */
228 if (set2check) {
229 if (set2check == islowerrune)
230 r = tolowerrune(r);
231 else
232 r = toupperrune(r);
233 } else {
234 off1 += r - set1[i].start;
235 if (off1 > setlen(set2, set2ranges) - 1) {
236 r = set2[set2ranges - 1].end;
237 goto write;
238 }
239 for (m = 0, off2 = 0; m < set2ranges; m++) {
240 if (off2 + rangelen(set2[m]) > off1) {
241 m++;
242 break;
243 }
244 off2 += rangelen(set2[m]);
245 }
246 m--;
247 r = set2[m].start + (off1 - off2) / set2[m].quant;
248 }
249 goto write;
250 }
251 }
252 if (set1check && set1check(r)) {
253 if (cflag)
254 goto write;
255 if (dflag)
256 goto read;
257 if (set2check) {
258 if (set2check == islowerrune)
259 r = tolowerrune(r);
260 else
261 r = toupperrune(r);
262 } else {
263 r = set2[set2ranges - 1].end;
264 }
265 goto write;
266 }
267 if (!dflag && cflag) {
268 if (set2check) {
269 if (set2check == islowerrune)
270 r = tolowerrune(r);
271 else
272 r = toupperrune(r);
273 } else {
274 r = set2[set2ranges - 1].end;
275 }
276 goto write;
277 }
278 if (dflag && cflag)
279 goto read;
280 write:
281 if (argc == 1 && sflag && r == lastrune) {
282 if (set1check && set1check(r))
283 goto read;
284 for (i = 0; i < set1ranges; i++) {
285 if (set1[i].start <= r && r <= set1[i].end)
286 goto read;
287 }
288 }
289 if (argc == 2 && sflag && r == lastrune) {
290 if (set2check && set2check(r))
291 goto read;
292 for (i = 0; i < set2ranges; i++) {
293 if (set2[i].start <= r && r <= set2[i].end)
294 goto read;
295 }
296 }
297 efputrune(&r, stdout, "<stdout>");
298 lastrune = r;
299 goto read;
300 }