sort.c - sbase - suckless unix tools
(HTM) git clone git://git.suckless.org/sbase
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
sort.c (9468B)
---
1 /* See LICENSE file for copyright and license details. */
2 #include <ctype.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6
7 #include "queue.h"
8 #include "text.h"
9 #include "utf.h"
10 #include "util.h"
11
12 struct keydef {
13 int start_column;
14 int end_column;
15 int start_char;
16 int end_char;
17 int flags;
18 TAILQ_ENTRY(keydef) entry;
19 };
20
21 struct column {
22 struct line line;
23 size_t cap;
24 };
25
26 enum {
27 MOD_N = 1 << 0,
28 MOD_STARTB = 1 << 1,
29 MOD_ENDB = 1 << 2,
30 MOD_R = 1 << 3,
31 MOD_D = 1 << 4,
32 MOD_F = 1 << 5,
33 MOD_I = 1 << 6,
34 };
35
36 static TAILQ_HEAD(kdhead, keydef) kdhead = TAILQ_HEAD_INITIALIZER(kdhead);
37
38 static int Cflag = 0, cflag = 0, uflag = 0;
39 static char *fieldsep = NULL;
40 static size_t fieldseplen = 0;
41 static struct column col1, col2;
42
43 static void
44 skipblank(struct line *a)
45 {
46 while (a->len && (*(a->data) == ' ' || *(a->data) == '\t')) {
47 a->data++;
48 a->len--;
49 }
50 }
51
52 static void
53 skipnonblank(struct line *a)
54 {
55 while (a->len && (*(a->data) != '\n' && *(a->data) != ' ' &&
56 *(a->data) != '\t')) {
57 a->data++;
58 a->len--;
59 }
60 }
61
62 static void
63 skipcolumn(struct line *a, int skip_to_next_col)
64 {
65 char *s;
66
67 if (fieldsep) {
68 if ((s = memmem(a->data, a->len, fieldsep, fieldseplen))) {
69 if (skip_to_next_col)
70 s += fieldseplen;
71 a->len -= s - a->data;
72 a->data = s;
73 } else {
74 a->data += a->len - 1;
75 a->len = 1;
76 }
77 } else {
78 skipblank(a);
79 skipnonblank(a);
80 }
81 }
82
83 static void
84 columns(struct line *line, const struct keydef *kd, struct column *col)
85 {
86 Rune r;
87 struct line start, end;
88 size_t utflen, rlen;
89 int i;
90
91 start.data = line->data;
92 start.len = line->len;
93 for (i = 1; i < kd->start_column; i++)
94 skipcolumn(&start, 1);
95 if (kd->flags & MOD_STARTB)
96 skipblank(&start);
97 for (utflen = 0; start.len > 1 && utflen < kd->start_char - 1;) {
98 rlen = chartorune(&r, start.data);
99 start.data += rlen;
100 start.len -= rlen;
101 utflen++;
102 }
103
104 end.data = line->data;
105 end.len = line->len;
106 if (kd->end_column) {
107 for (i = 1; i < kd->end_column; i++)
108 skipcolumn(&end, 1);
109 if (kd->flags & MOD_ENDB)
110 skipblank(&end);
111 if (kd->end_char) {
112 for (utflen = 0; end.len > 1 && utflen < kd->end_char;) {
113 rlen = chartorune(&r, end.data);
114 end.data += rlen;
115 end.len -= rlen;
116 utflen++;
117 }
118 } else {
119 skipcolumn(&end, 0);
120 }
121 } else {
122 end.data += end.len - 1;
123 end.len = 1;
124 }
125 col->line.len = MAX(0, end.data - start.data);
126 if (!(col->line.data) || col->cap < col->line.len + 1) {
127 free(col->line.data);
128 col->line.data = emalloc(col->line.len + 1);
129 col->cap = col->line.len + 1;
130 }
131 memcpy(col->line.data, start.data, col->line.len);
132 col->line.data[col->line.len] = '\0';
133 }
134
135 static int
136 skipmodcmp(struct line *a, struct line *b, int flags)
137 {
138 Rune r1, r2;
139 size_t offa = 0, offb = 0;
140
141 do {
142 offa += chartorune(&r1, a->data + offa);
143 offb += chartorune(&r2, b->data + offb);
144
145 if (flags & MOD_D && flags & MOD_I) {
146 while (offa < a->len && ((!isblankrune(r1) &&
147 !isalnumrune(r1)) || (!isprintrune(r1))))
148 offa += chartorune(&r1, a->data + offa);
149 while (offb < b->len && ((!isblankrune(r2) &&
150 !isalnumrune(r2)) || (!isprintrune(r2))))
151 offb += chartorune(&r2, b->data + offb);
152 }
153 else if (flags & MOD_D) {
154 while (offa < a->len && !isblankrune(r1) &&
155 !isalnumrune(r1))
156 offa += chartorune(&r1, a->data + offa);
157 while (offb < b->len && !isblankrune(r2) &&
158 !isalnumrune(r2))
159 offb += chartorune(&r2, b->data + offb);
160 }
161 else if (flags & MOD_I) {
162 while (offa < a->len && !isprintrune(r1))
163 offa += chartorune(&r1, a->data + offa);
164 while (offb < b->len && !isprintrune(r2))
165 offb += chartorune(&r2, b->data + offb);
166 }
167 if (flags & MOD_F) {
168 r1 = toupperrune(r1);
169 r2 = toupperrune(r2);
170 }
171 } while (r1 && r1 == r2);
172
173 return r1 - r2;
174 }
175
176 static int
177 slinecmp(struct line *a, struct line *b)
178 {
179 int res = 0;
180 double x, y;
181 struct keydef *kd;
182
183 TAILQ_FOREACH(kd, &kdhead, entry) {
184 columns(a, kd, &col1);
185 columns(b, kd, &col2);
186
187 /* if -u is given, don't use default key definition
188 * unless it is the only one */
189 if (uflag && kd == TAILQ_LAST(&kdhead, kdhead) &&
190 TAILQ_LAST(&kdhead, kdhead) != TAILQ_FIRST(&kdhead)) {
191 res = 0;
192 } else if (kd->flags & MOD_N) {
193 x = strtod(col1.line.data, NULL);
194 y = strtod(col2.line.data, NULL);
195 res = (x < y) ? -1 : (x > y);
196 } else if (kd->flags & (MOD_D | MOD_F | MOD_I)) {
197 res = skipmodcmp(&col1.line, &col2.line, kd->flags);
198 } else {
199 res = linecmp(&col1.line, &col2.line);
200 }
201
202 if (kd->flags & MOD_R)
203 res = -res;
204 if (res)
205 break;
206 }
207
208 return res;
209 }
210
211 static int
212 check(FILE *fp, const char *fname)
213 {
214 static struct line prev, cur, tmp;
215 static size_t prevsize, cursize, tmpsize;
216 ssize_t len;
217
218 if (!prev.data) {
219 if ((len = getline(&prev.data, &prevsize, fp)) < 0)
220 eprintf("getline:");
221 prev.len = len;
222 }
223 while ((len = getline(&cur.data, &cursize, fp)) > 0) {
224 cur.len = len;
225 if (uflag > slinecmp(&cur, &prev)) {
226 if (!Cflag) {
227 weprintf("disorder %s: ", fname);
228 fwrite(cur.data, 1, cur.len, stderr);
229 }
230 return 1;
231 }
232 tmp = cur;
233 tmpsize = cursize;
234 cur = prev;
235 cursize = prevsize;
236 prev = tmp;
237 prevsize = tmpsize;
238 }
239
240 return 0;
241 }
242
243 static int
244 parse_flags(char **s, int *flags, int bflag)
245 {
246 while (isalpha((int)**s)) {
247 switch (*((*s)++)) {
248 case 'b':
249 *flags |= bflag;
250 break;
251 case 'd':
252 *flags |= MOD_D;
253 break;
254 case 'f':
255 *flags |= MOD_F;
256 break;
257 case 'i':
258 *flags |= MOD_I;
259 break;
260 case 'n':
261 *flags |= MOD_N;
262 break;
263 case 'r':
264 *flags |= MOD_R;
265 break;
266 default:
267 return -1;
268 }
269 }
270
271 return 0;
272 }
273
274 static void
275 addkeydef(char *kdstr, int flags)
276 {
277 struct keydef *kd;
278
279 kd = enmalloc(2, sizeof(*kd));
280
281 /* parse key definition kdstr with format
282 * start_column[.start_char][flags][,end_column[.end_char][flags]]
283 */
284 kd->start_column = 1;
285 kd->start_char = 1;
286 kd->end_column = 0; /* 0 means end of line */
287 kd->end_char = 0; /* 0 means end of column */
288 kd->flags = flags;
289
290 if ((kd->start_column = strtol(kdstr, &kdstr, 10)) < 1)
291 enprintf(2, "invalid start column in key definition\n");
292
293 if (*kdstr == '.') {
294 if ((kd->start_char = strtol(kdstr + 1, &kdstr, 10)) < 1)
295 enprintf(2, "invalid start character in key "
296 "definition\n");
297 }
298 if (parse_flags(&kdstr, &kd->flags, MOD_STARTB) < 0)
299 enprintf(2, "invalid start flags in key definition\n");
300
301 if (*kdstr == ',') {
302 if ((kd->end_column = strtol(kdstr + 1, &kdstr, 10)) < 0)
303 enprintf(2, "invalid end column in key definition\n");
304 if (*kdstr == '.') {
305 if ((kd->end_char = strtol(kdstr + 1, &kdstr, 10)) < 0)
306 enprintf(2, "invalid end character in key "
307 "definition\n");
308 }
309 if (parse_flags(&kdstr, &kd->flags, MOD_ENDB) < 0)
310 enprintf(2, "invalid end flags in key definition\n");
311 }
312
313 if (*kdstr != '\0')
314 enprintf(2, "invalid key definition\n");
315
316 TAILQ_INSERT_TAIL(&kdhead, kd, entry);
317 }
318
319 static void
320 usage(void)
321 {
322 enprintf(2, "usage: %s [-Cbcdfimnru] [-o outfile] [-t delim] "
323 "[-k def]... [file ...]\n", argv0);
324 }
325
326 int
327 main(int argc, char *argv[])
328 {
329 FILE *fp, *ofp = stdout;
330 struct linebuf linebuf = EMPTY_LINEBUF;
331 size_t i;
332 int global_flags = 0, ret = 0;
333 char *outfile = NULL;
334
335 ARGBEGIN {
336 case 'C':
337 Cflag = 1;
338 break;
339 case 'b':
340 global_flags |= MOD_STARTB | MOD_ENDB;
341 break;
342 case 'c':
343 cflag = 1;
344 break;
345 case 'd':
346 global_flags |= MOD_D;
347 break;
348 case 'f':
349 global_flags |= MOD_F;
350 break;
351 case 'i':
352 global_flags |= MOD_I;
353 break;
354 case 'k':
355 addkeydef(EARGF(usage()), global_flags);
356 break;
357 case 'm':
358 /* more or less for free, but for performance-reasons,
359 * we should keep this flag in mind and maybe some later
360 * day implement it properly so we don't run out of memory
361 * while merging large sorted files.
362 */
363 break;
364 case 'n':
365 global_flags |= MOD_N;
366 break;
367 case 'o':
368 outfile = EARGF(usage());
369 break;
370 case 'r':
371 global_flags |= MOD_R;
372 break;
373 case 't':
374 fieldsep = EARGF(usage());
375 if (!*fieldsep)
376 eprintf("empty delimiter\n");
377 fieldseplen = unescape(fieldsep);
378 break;
379 case 'u':
380 uflag = 1;
381 break;
382 default:
383 usage();
384 } ARGEND
385
386 /* -b shall only apply to custom key definitions */
387 if (TAILQ_EMPTY(&kdhead) && global_flags)
388 addkeydef("1", global_flags & ~(MOD_STARTB | MOD_ENDB));
389 if (TAILQ_EMPTY(&kdhead) || (!Cflag && !cflag))
390 addkeydef("1", global_flags & MOD_R);
391
392 if (!argc) {
393 if (Cflag || cflag) {
394 if (check(stdin, "<stdin>") && !ret)
395 ret = 1;
396 } else {
397 getlines(stdin, &linebuf);
398 }
399 } else for (; *argv; argc--, argv++) {
400 if (!strcmp(*argv, "-")) {
401 *argv = "<stdin>";
402 fp = stdin;
403 } else if (!(fp = fopen(*argv, "r"))) {
404 enprintf(2, "fopen %s:", *argv);
405 continue;
406 }
407 if (Cflag || cflag) {
408 if (check(fp, *argv) && !ret)
409 ret = 1;
410 } else {
411 getlines(fp, &linebuf);
412 }
413 if (fp != stdin && fshut(fp, *argv))
414 ret = 2;
415 }
416
417 if (!Cflag && !cflag) {
418 if (outfile && !(ofp = fopen(outfile, "w")))
419 eprintf("fopen %s:", outfile);
420
421 qsort(linebuf.lines, linebuf.nlines, sizeof(*linebuf.lines),
422 (int (*)(const void *, const void *))slinecmp);
423
424 for (i = 0; i < linebuf.nlines; i++) {
425 if (!uflag || i == 0 ||
426 slinecmp(&linebuf.lines[i], &linebuf.lines[i - 1])) {
427 fwrite(linebuf.lines[i].data, 1,
428 linebuf.lines[i].len, ofp);
429 }
430 }
431 }
432
433 if (fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>") |
434 fshut(stderr, "<stderr>"))
435 ret = 2;
436
437 return ret;
438 }