join.c - sbase - suckless unix tools
(HTM) git clone git://git.suckless.org/sbase
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
join.c (9795B)
---
1 /* See LICENSE file for copyright and license details. */
2 #include <ctype.h>
3 #include <stdint.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <string.h>
7
8 #include "text.h"
9 #include "utf.h"
10 #include "util.h"
11
12 enum {
13 INIT = 1,
14 GROW = 2,
15 };
16
17 enum {
18 EXPAND = 0,
19 RESET = 1,
20 };
21
22 enum { FIELD_ERROR = -2, };
23
24 struct field {
25 char *s;
26 size_t len;
27 };
28
29 struct jline {
30 struct line text;
31 size_t nf;
32 size_t maxf;
33 struct field *fields;
34 };
35
36 struct spec {
37 size_t fileno;
38 size_t fldno;
39 };
40
41 struct outlist {
42 size_t ns;
43 size_t maxs;
44 struct spec **specs;
45 };
46
47 struct span {
48 size_t nl;
49 size_t maxl;
50 struct jline **lines;
51 };
52
53 static char *sep = NULL;
54 static char *replace = NULL;
55 static const char defaultofs = ' ';
56 static const int jfield = 1; /* POSIX default join field */
57 static int unpairsa = 0, unpairsb = 0;
58 static int oflag = 0;
59 static int pairs = 1;
60 static size_t seplen;
61 static struct outlist output;
62
63 static void
64 usage(void)
65 {
66 eprintf("usage: %s [-1 field] [-2 field] [-o list] [-e string] "
67 "[-a | -v fileno] [-t delim] file1 file2\n", argv0);
68 }
69
70 static void
71 prfield(struct field *fp)
72 {
73 if (fwrite(fp->s, 1, fp->len, stdout) != fp->len)
74 eprintf("fwrite:");
75 }
76
77 static void
78 prsep(void)
79 {
80 if (sep)
81 fwrite(sep, 1, seplen, stdout);
82 else
83 putchar(defaultofs);
84 }
85
86 static void
87 swaplines(struct jline *la, struct jline *lb)
88 {
89 struct jline tmp;
90
91 tmp = *la;
92 *la = *lb;
93 *lb = tmp;
94 }
95
96 static void
97 prjoin(struct jline *la, struct jline *lb, size_t jfa, size_t jfb)
98 {
99 struct spec *sp;
100 struct field *joinfield;
101 size_t i;
102
103 if (jfa >= la->nf || jfb >= lb->nf)
104 return;
105
106 joinfield = &la->fields[jfa];
107
108 if (oflag) {
109 for (i = 0; i < output.ns; i++) {
110 sp = output.specs[i];
111
112 if (sp->fileno == 1) {
113 if (sp->fldno < la->nf)
114 prfield(&la->fields[sp->fldno]);
115 else if (replace)
116 fputs(replace, stdout);
117 } else if (sp->fileno == 2) {
118 if (sp->fldno < lb->nf)
119 prfield(&lb->fields[sp->fldno]);
120 else if (replace)
121 fputs(replace, stdout);
122 } else if (sp->fileno == 0) {
123 prfield(joinfield);
124 }
125
126 if (i < output.ns - 1)
127 prsep();
128 }
129 } else {
130 prfield(joinfield);
131 prsep();
132
133 for (i = 0; i < la->nf; i++) {
134 if (i != jfa) {
135 prfield(&la->fields[i]);
136 prsep();
137 }
138 }
139 for (i = 0; i < lb->nf; i++) {
140 if (i != jfb) {
141 prfield(&lb->fields[i]);
142 if (i < lb->nf - 1)
143 prsep();
144 }
145 }
146 }
147 putchar('\n');
148 }
149
150 static void
151 prline(struct jline *lp)
152 {
153 if (fwrite(lp->text.data, 1, lp->text.len, stdout) != lp->text.len)
154 eprintf("fwrite:");
155 putchar('\n');
156 }
157
158 static int
159 jlinecmp(struct jline *la, struct jline *lb, size_t jfa, size_t jfb)
160 {
161 int status;
162
163 /* return FIELD_ERROR if both lines are short */
164 if (jfa >= la->nf) {
165 status = (jfb >= lb->nf) ? FIELD_ERROR : -1;
166 } else if (jfb >= lb->nf) {
167 status = 1;
168 } else {
169 status = memcmp(la->fields[jfa].s, lb->fields[jfb].s,
170 MAX(la->fields[jfa].len, lb->fields[jfb].len));
171 LIMIT(status, -1, 1);
172 }
173
174 return status;
175 }
176
177 static void
178 addfield(struct jline *lp, char *sp, size_t len)
179 {
180 if (lp->nf >= lp->maxf) {
181 lp->fields = ereallocarray(lp->fields, (GROW * lp->maxf),
182 sizeof(struct field));
183 lp->maxf *= GROW;
184 }
185 lp->fields[lp->nf].s = sp;
186 lp->fields[lp->nf].len = len;
187 lp->nf++;
188 }
189
190 static void
191 prspanjoin(struct span *spa, struct span *spb, size_t jfa, size_t jfb)
192 {
193 size_t i, j;
194
195 for (i = 0; i < (spa->nl - 1); i++)
196 for (j = 0; j < (spb->nl - 1); j++)
197 prjoin(spa->lines[i], spb->lines[j], jfa, jfb);
198 }
199
200 static struct jline *
201 makeline(char *s, size_t len)
202 {
203 struct jline *lp;
204 char *tmp;
205 size_t i, end;
206
207 if (s[len - 1] == '\n')
208 s[--len] = '\0';
209
210 lp = ereallocarray(NULL, INIT, sizeof(struct jline));
211 lp->text.data = s;
212 lp->text.len = len;
213 lp->fields = ereallocarray(NULL, INIT, sizeof(struct field));
214 lp->nf = 0;
215 lp->maxf = INIT;
216
217 for (i = 0; i < lp->text.len && isblank(lp->text.data[i]); i++)
218 ;
219 while (i < lp->text.len) {
220 if (sep) {
221 if ((lp->text.len - i) < seplen ||
222 !(tmp = memmem(lp->text.data + i,
223 lp->text.len - i, sep, seplen))) {
224 goto eol;
225 }
226 end = tmp - lp->text.data;
227 addfield(lp, lp->text.data + i, end - i);
228 i = end + seplen;
229 } else {
230 for (end = i; !(isblank(lp->text.data[end])); end++) {
231 if (end + 1 == lp->text.len)
232 goto eol;
233 }
234 addfield(lp, lp->text.data + i, end - i);
235 for (i = end; isblank(lp->text.data[i]); i++)
236 ;
237 }
238 }
239 eol:
240 addfield(lp, lp->text.data + i, lp->text.len - i);
241
242 return lp;
243 }
244
245 static int
246 addtospan(struct span *sp, FILE *fp, int reset)
247 {
248 char *newl = NULL;
249 ssize_t len;
250 size_t size = 0;
251
252 if ((len = getline(&newl, &size, fp)) < 0) {
253 if (ferror(fp))
254 eprintf("getline:");
255 else
256 return 0;
257 }
258
259 if (reset)
260 sp->nl = 0;
261
262 if (sp->nl >= sp->maxl) {
263 sp->lines = ereallocarray(sp->lines, (GROW * sp->maxl),
264 sizeof(struct jline *));
265 sp->maxl *= GROW;
266 }
267
268 sp->lines[sp->nl] = makeline(newl, len);
269 sp->nl++;
270 return 1;
271 }
272
273 static void
274 initspan(struct span *sp)
275 {
276 sp->nl = 0;
277 sp->maxl = INIT;
278 sp->lines = ereallocarray(NULL, INIT, sizeof(struct jline *));
279 }
280
281 static void
282 freespan(struct span *sp)
283 {
284 size_t i;
285
286 for (i = 0; i < sp->nl; i++) {
287 free(sp->lines[i]->fields);
288 free(sp->lines[i]->text.data);
289 }
290 free(sp->lines);
291 }
292
293 static void
294 initolist(struct outlist *olp)
295 {
296 olp->ns = 0;
297 olp->maxs = 1;
298 olp->specs = ereallocarray(NULL, INIT, sizeof(struct spec *));
299 }
300
301 static void
302 addspec(struct outlist *olp, struct spec *sp)
303 {
304 if (olp->ns >= olp->maxs) {
305 olp->specs = ereallocarray(olp->specs, (GROW * olp->maxs),
306 sizeof(struct spec *));
307 olp->maxs *= GROW;
308 }
309 olp->specs[olp->ns] = sp;
310 olp->ns++;
311 }
312
313 static struct spec *
314 makespec(char *s)
315 {
316 struct spec *sp;
317 int fileno;
318 size_t fldno;
319
320 if (!strcmp(s, "0")) { /* join field must be 0 and nothing else */
321 fileno = 0;
322 fldno = 0;
323 } else if ((s[0] == '1' || s[0] == '2') && s[1] == '.') {
324 fileno = s[0] - '0';
325 fldno = estrtonum(&s[2], 1, MIN(LLONG_MAX, SIZE_MAX)) - 1;
326 } else {
327 eprintf("%s: invalid format\n", s);
328 }
329
330 sp = ereallocarray(NULL, INIT, sizeof(struct spec));
331 sp->fileno = fileno;
332 sp->fldno = fldno;
333 return sp;
334 }
335
336 static void
337 makeolist(struct outlist *olp, char *s)
338 {
339 char *item, *sp;
340 sp = s;
341
342 while (sp) {
343 item = sp;
344 sp = strpbrk(sp, ", \t");
345 if (sp)
346 *sp++ = '\0';
347 addspec(olp, makespec(item));
348 }
349 }
350
351 static void
352 freespecs(struct outlist *olp)
353 {
354 size_t i;
355
356 for (i = 0; i < olp->ns; i++)
357 free(olp->specs[i]);
358 }
359
360 static void
361 join(FILE *fa, FILE *fb, size_t jfa, size_t jfb)
362 {
363 struct span spa, spb;
364 int cmp, eofa, eofb;
365
366 initspan(&spa);
367 initspan(&spb);
368 cmp = eofa = eofb = 0;
369
370 addtospan(&spa, fa, RESET);
371 addtospan(&spb, fb, RESET);
372
373 while (spa.nl && spb.nl) {
374 if ((cmp = jlinecmp(spa.lines[0], spb.lines[0], jfa, jfb)) < 0) {
375 if (unpairsa)
376 prline(spa.lines[0]);
377 if (!addtospan(&spa, fa, RESET)) {
378 if (unpairsb) { /* a is EOF'd; print the rest of b */
379 do
380 prline(spb.lines[0]);
381 while (addtospan(&spb, fb, RESET));
382 }
383 eofa = eofb = 1;
384 } else {
385 continue;
386 }
387 } else if (cmp > 0) {
388 if (unpairsb)
389 prline(spb.lines[0]);
390 if (!addtospan(&spb, fb, RESET)) {
391 if (unpairsa) { /* b is EOF'd; print the rest of a */
392 do
393 prline(spa.lines[0]);
394 while (addtospan(&spa, fa, RESET));
395 }
396 eofa = eofb = 1;
397 } else {
398 continue;
399 }
400 } else if (cmp == 0) {
401 /* read all consecutive matching lines from a */
402 do {
403 if (!addtospan(&spa, fa, EXPAND)) {
404 eofa = 1;
405 spa.nl++;
406 break;
407 }
408 } while (jlinecmp(spa.lines[spa.nl-1], spb.lines[0], jfa, jfb) == 0);
409
410 /* read all consecutive matching lines from b */
411 do {
412 if (!addtospan(&spb, fb, EXPAND)) {
413 eofb = 1;
414 spb.nl++;
415 break;
416 }
417 } while (jlinecmp(spa.lines[0], spb.lines[spb.nl-1], jfa, jfb) == 0);
418
419 if (pairs)
420 prspanjoin(&spa, &spb, jfa, jfb);
421
422 } else { /* FIELD_ERROR: both lines lacked join fields */
423 if (unpairsa)
424 prline(spa.lines[0]);
425 if (unpairsb)
426 prline(spb.lines[0]);
427 eofa = addtospan(&spa, fa, RESET) ? 0 : 1;
428 eofb = addtospan(&spb, fb, RESET) ? 0 : 1;
429 if (!eofa && !eofb)
430 continue;
431 }
432
433 if (eofa) {
434 spa.nl = 0;
435 } else {
436 swaplines(spa.lines[0], spa.lines[spa.nl - 1]); /* ugly */
437 spa.nl = 1;
438 }
439
440 if (eofb) {
441 spb.nl = 0;
442 } else {
443 swaplines(spb.lines[0], spb.lines[spb.nl - 1]); /* ugly */
444 spb.nl = 1;
445 }
446 }
447 freespan(&spa);
448 freespan(&spb);
449 }
450
451
452 int
453 main(int argc, char *argv[])
454 {
455 size_t jf[2] = { jfield, jfield, };
456 FILE *fp[2];
457 int ret = 0, n;
458 char *fno;
459
460 ARGBEGIN {
461 case '1':
462 jf[0] = estrtonum(EARGF(usage()), 1, MIN(LLONG_MAX, SIZE_MAX));
463 break;
464 case '2':
465 jf[1] = estrtonum(EARGF(usage()), 1, MIN(LLONG_MAX, SIZE_MAX));
466 break;
467 case 'a':
468 fno = EARGF(usage());
469 if (strcmp(fno, "1") == 0)
470 unpairsa = 1;
471 else if (strcmp(fno, "2") == 0)
472 unpairsb = 1;
473 else
474 usage();
475 break;
476 case 'e':
477 replace = EARGF(usage());
478 break;
479 case 'o':
480 oflag = 1;
481 initolist(&output);
482 makeolist(&output, EARGF(usage()));
483 break;
484 case 't':
485 sep = EARGF(usage());
486 break;
487 case 'v':
488 pairs = 0;
489 fno = EARGF(usage());
490 if (strcmp(fno, "1") == 0)
491 unpairsa = 1;
492 else if (strcmp(fno, "2") == 0)
493 unpairsb = 1;
494 else
495 usage();
496 break;
497 default:
498 usage();
499 } ARGEND
500
501 if (sep)
502 seplen = unescape(sep);
503
504 if (argc != 2)
505 usage();
506
507 for (n = 0; n < 2; n++) {
508 if (!strcmp(argv[n], "-")) {
509 argv[n] = "<stdin>";
510 fp[n] = stdin;
511 } else if (!(fp[n] = fopen(argv[n], "r"))) {
512 eprintf("fopen %s:", argv[n]);
513 }
514 }
515
516 jf[0]--;
517 jf[1]--;
518
519 join(fp[0], fp[1], jf[0], jf[1]);
520
521 if (oflag)
522 freespecs(&output);
523
524 if (fshut(fp[0], argv[0]) | (fp[0] != fp[1] && fshut(fp[1], argv[1])) |
525 fshut(stdout, "<stdout>"))
526 ret = 2;
527
528 return ret;
529 }