join.c - sbase - suckless unix tools
 (HTM) git clone git://git.suckless.org/sbase
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       join.c (9795B)
       ---
            1 /* See LICENSE file for copyright and license details. */
            2 #include <ctype.h>
            3 #include <stdint.h>
            4 #include <stdio.h>
            5 #include <stdlib.h>
            6 #include <string.h>
            7 
            8 #include "text.h"
            9 #include "utf.h"
           10 #include "util.h"
           11 
           12 enum {
           13         INIT = 1,
           14         GROW = 2,
           15 };
           16 
           17 enum {
           18         EXPAND = 0,
           19         RESET  = 1,
           20 };
           21 
           22 enum { FIELD_ERROR = -2, };
           23 
           24 struct field {
           25         char *s;
           26         size_t len;
           27 };
           28 
           29 struct jline {
           30         struct line text;
           31         size_t nf;
           32         size_t maxf;
           33         struct field *fields;
           34 };
           35 
           36 struct spec {
           37         size_t fileno;
           38         size_t fldno;
           39 };
           40 
           41 struct outlist {
           42         size_t ns;
           43         size_t maxs;
           44         struct spec **specs;
           45 };
           46 
           47 struct span {
           48         size_t nl;
           49         size_t maxl;
           50         struct jline **lines;
           51 };
           52 
           53 static char *sep = NULL;
           54 static char *replace = NULL;
           55 static const char defaultofs = ' ';
           56 static const int jfield = 1;            /* POSIX default join field */
           57 static int unpairsa = 0, unpairsb = 0;
           58 static int oflag = 0;
           59 static int pairs = 1;
           60 static size_t seplen;
           61 static struct outlist output;
           62 
           63 static void
           64 usage(void)
           65 {
           66         eprintf("usage: %s [-1 field] [-2 field] [-o list] [-e string] "
           67                 "[-a | -v fileno] [-t delim] file1 file2\n", argv0);
           68 }
           69 
           70 static void
           71 prfield(struct field *fp)
           72 {
           73         if (fwrite(fp->s, 1, fp->len, stdout) != fp->len)
           74                 eprintf("fwrite:");
           75 }
           76 
           77 static void
           78 prsep(void)
           79 {
           80         if (sep)
           81                 fwrite(sep, 1, seplen, stdout);
           82         else
           83                 putchar(defaultofs);
           84 }
           85 
           86 static void
           87 swaplines(struct jline *la, struct jline *lb)
           88 {
           89         struct jline tmp;
           90 
           91         tmp = *la;
           92         *la = *lb;
           93         *lb = tmp;
           94 }
           95 
           96 static void
           97 prjoin(struct jline *la, struct jline *lb, size_t jfa, size_t jfb)
           98 {
           99         struct spec *sp;
          100         struct field *joinfield;
          101         size_t i;
          102 
          103         if (jfa >= la->nf || jfb >= lb->nf)
          104                 return;
          105 
          106         joinfield = &la->fields[jfa];
          107 
          108         if (oflag) {
          109                 for (i = 0; i < output.ns; i++) {
          110                         sp = output.specs[i];
          111 
          112                         if (sp->fileno == 1) {
          113                                 if (sp->fldno < la->nf)
          114                                         prfield(&la->fields[sp->fldno]);
          115                                 else if (replace)
          116                                         fputs(replace, stdout);
          117                         } else if (sp->fileno == 2) {
          118                                 if (sp->fldno < lb->nf)
          119                                         prfield(&lb->fields[sp->fldno]);
          120                                 else if (replace)
          121                                         fputs(replace, stdout);
          122                         } else if (sp->fileno == 0) {
          123                                 prfield(joinfield);
          124                         }
          125 
          126                         if (i < output.ns - 1)
          127                                 prsep();
          128                 }
          129         } else {
          130                 prfield(joinfield);
          131                 prsep();
          132 
          133                 for (i = 0; i < la->nf; i++) {
          134                         if (i != jfa) {
          135                                 prfield(&la->fields[i]);
          136                                 prsep();
          137                         }
          138                 }
          139                 for (i = 0; i < lb->nf; i++) {
          140                         if (i != jfb) {
          141                                 prfield(&lb->fields[i]);
          142                                 if (i < lb->nf - 1)
          143                                         prsep();
          144                         }
          145                 }
          146         }
          147         putchar('\n');
          148 }
          149 
          150 static void
          151 prline(struct jline *lp)
          152 {
          153         if (fwrite(lp->text.data, 1, lp->text.len, stdout) != lp->text.len)
          154                 eprintf("fwrite:");
          155         putchar('\n');
          156 }
          157 
          158 static int
          159 jlinecmp(struct jline *la, struct jline *lb, size_t jfa, size_t jfb)
          160 {
          161         int status;
          162 
          163         /* return FIELD_ERROR if both lines are short */
          164         if (jfa >= la->nf) {
          165                 status = (jfb >= lb->nf) ? FIELD_ERROR : -1;
          166         } else if (jfb >= lb->nf) {
          167                 status = 1;
          168         } else {
          169                 status = memcmp(la->fields[jfa].s, lb->fields[jfb].s,
          170                                 MAX(la->fields[jfa].len, lb->fields[jfb].len));
          171                 LIMIT(status, -1, 1);
          172         }
          173 
          174         return status;
          175 }
          176 
          177 static void
          178 addfield(struct jline *lp, char *sp, size_t len)
          179 {
          180         if (lp->nf >= lp->maxf) {
          181                 lp->fields = ereallocarray(lp->fields, (GROW * lp->maxf),
          182                         sizeof(struct field));
          183                 lp->maxf *= GROW;
          184         }
          185         lp->fields[lp->nf].s = sp;
          186         lp->fields[lp->nf].len = len;
          187         lp->nf++;
          188 }
          189 
          190 static void
          191 prspanjoin(struct span *spa, struct span *spb, size_t jfa, size_t jfb)
          192 {
          193         size_t i, j;
          194 
          195         for (i = 0; i < (spa->nl - 1); i++)
          196                 for (j = 0; j < (spb->nl - 1); j++)
          197                         prjoin(spa->lines[i], spb->lines[j], jfa, jfb);
          198 }
          199 
          200 static struct jline *
          201 makeline(char *s, size_t len)
          202 {
          203         struct jline *lp;
          204         char *tmp;
          205         size_t i, end;
          206 
          207         if (s[len - 1] == '\n')
          208                 s[--len] = '\0';
          209 
          210         lp = ereallocarray(NULL, INIT, sizeof(struct jline));
          211         lp->text.data = s;
          212         lp->text.len = len;
          213         lp->fields = ereallocarray(NULL, INIT, sizeof(struct field));
          214         lp->nf = 0;
          215         lp->maxf = INIT;
          216 
          217         for (i = 0; i < lp->text.len && isblank(lp->text.data[i]); i++)
          218                 ;
          219         while (i < lp->text.len) {
          220                 if (sep) {
          221                         if ((lp->text.len - i) < seplen ||
          222                             !(tmp = memmem(lp->text.data + i,
          223                                            lp->text.len - i, sep, seplen))) {
          224                                 goto eol;
          225                         }
          226                         end = tmp - lp->text.data;
          227                         addfield(lp, lp->text.data + i, end - i);
          228                         i = end + seplen;
          229                 } else {
          230                         for (end = i; !(isblank(lp->text.data[end])); end++) {
          231                                 if (end + 1 == lp->text.len)
          232                                         goto eol;
          233                         }
          234                         addfield(lp, lp->text.data + i, end - i);
          235                         for (i = end; isblank(lp->text.data[i]); i++)
          236                                 ;
          237                 }
          238         }
          239 eol:
          240         addfield(lp, lp->text.data + i, lp->text.len - i);
          241 
          242         return lp;
          243 }
          244 
          245 static int
          246 addtospan(struct span *sp, FILE *fp, int reset)
          247 {
          248         char *newl = NULL;
          249         ssize_t len;
          250         size_t size = 0;
          251 
          252         if ((len = getline(&newl, &size, fp)) < 0) {
          253                 if (ferror(fp))
          254                         eprintf("getline:");
          255                 else
          256                         return 0;
          257         }
          258 
          259         if (reset)
          260                 sp->nl = 0;
          261 
          262         if (sp->nl >= sp->maxl) {
          263                 sp->lines = ereallocarray(sp->lines, (GROW * sp->maxl),
          264                         sizeof(struct jline *));
          265                 sp->maxl *= GROW;
          266         }
          267 
          268         sp->lines[sp->nl] = makeline(newl, len);
          269         sp->nl++;
          270         return 1;
          271 }
          272 
          273 static void
          274 initspan(struct span *sp)
          275 {
          276         sp->nl = 0;
          277         sp->maxl = INIT;
          278         sp->lines = ereallocarray(NULL, INIT, sizeof(struct jline *));
          279 }
          280 
          281 static void
          282 freespan(struct span *sp)
          283 {
          284         size_t i;
          285 
          286         for (i = 0; i < sp->nl; i++) {
          287                 free(sp->lines[i]->fields);
          288                 free(sp->lines[i]->text.data);
          289         }
          290         free(sp->lines);
          291 }
          292 
          293 static void
          294 initolist(struct outlist *olp)
          295 {
          296         olp->ns = 0;
          297         olp->maxs = 1;
          298         olp->specs = ereallocarray(NULL, INIT, sizeof(struct spec *));
          299 }
          300 
          301 static void
          302 addspec(struct outlist *olp, struct spec *sp)
          303 {
          304         if (olp->ns >= olp->maxs) {
          305                 olp->specs = ereallocarray(olp->specs, (GROW * olp->maxs),
          306                         sizeof(struct spec *));
          307                 olp->maxs *= GROW;
          308         }
          309         olp->specs[olp->ns] = sp;
          310         olp->ns++;
          311 }
          312 
          313 static struct spec *
          314 makespec(char *s)
          315 {
          316         struct spec *sp;
          317         int fileno;
          318         size_t fldno;
          319 
          320         if (!strcmp(s, "0")) {   /* join field must be 0 and nothing else */
          321                 fileno = 0;
          322                 fldno = 0;
          323         } else if ((s[0] == '1' || s[0] == '2') && s[1] == '.') {
          324                 fileno = s[0] - '0';
          325                 fldno = estrtonum(&s[2], 1, MIN(LLONG_MAX, SIZE_MAX)) - 1;
          326         } else {
          327                 eprintf("%s: invalid format\n", s);
          328         }
          329 
          330         sp = ereallocarray(NULL, INIT, sizeof(struct spec));
          331         sp->fileno = fileno;
          332         sp->fldno = fldno;
          333         return sp;
          334 }
          335 
          336 static void
          337 makeolist(struct outlist *olp, char *s)
          338 {
          339         char *item, *sp;
          340         sp = s;
          341 
          342         while (sp) {
          343                 item = sp;
          344                 sp = strpbrk(sp, ", \t");
          345                 if (sp)
          346                         *sp++ = '\0';
          347                 addspec(olp, makespec(item));
          348         }
          349 }
          350 
          351 static void
          352 freespecs(struct outlist *olp)
          353 {
          354         size_t i;
          355 
          356         for (i = 0; i < olp->ns; i++)
          357                 free(olp->specs[i]);
          358 }
          359 
          360 static void
          361 join(FILE *fa, FILE *fb, size_t jfa, size_t jfb)
          362 {
          363         struct span spa, spb;
          364         int cmp, eofa, eofb;
          365 
          366         initspan(&spa);
          367         initspan(&spb);
          368         cmp = eofa = eofb = 0;
          369 
          370         addtospan(&spa, fa, RESET);
          371         addtospan(&spb, fb, RESET);
          372 
          373         while (spa.nl && spb.nl) {
          374                 if ((cmp = jlinecmp(spa.lines[0], spb.lines[0], jfa, jfb)) < 0) {
          375                         if (unpairsa)
          376                                 prline(spa.lines[0]);
          377                         if (!addtospan(&spa, fa, RESET)) {
          378                                 if (unpairsb) {    /* a is EOF'd; print the rest of b */
          379                                         do
          380                                                 prline(spb.lines[0]);
          381                                         while (addtospan(&spb, fb, RESET));
          382                                 }
          383                                 eofa = eofb = 1;
          384                         } else {
          385                                 continue;
          386                         }
          387                 } else if (cmp > 0) {
          388                         if (unpairsb)
          389                                 prline(spb.lines[0]);
          390                         if (!addtospan(&spb, fb, RESET)) {
          391                                 if (unpairsa) {    /* b is EOF'd; print the rest of a */
          392                                         do
          393                                                 prline(spa.lines[0]);
          394                                         while (addtospan(&spa, fa, RESET));
          395                                 }
          396                                 eofa = eofb = 1;
          397                         } else {
          398                                 continue;
          399                         }
          400                 } else if (cmp == 0) {
          401                         /* read all consecutive matching lines from a */
          402                         do {
          403                                 if (!addtospan(&spa, fa, EXPAND)) {
          404                                         eofa = 1;
          405                                         spa.nl++;
          406                                         break;
          407                                 }
          408                         } while (jlinecmp(spa.lines[spa.nl-1], spb.lines[0], jfa, jfb) == 0);
          409 
          410                         /* read all consecutive matching lines from b */
          411                         do {
          412                                 if (!addtospan(&spb, fb, EXPAND)) {
          413                                         eofb = 1;
          414                                         spb.nl++;
          415                                         break;
          416                                 }
          417                         } while (jlinecmp(spa.lines[0], spb.lines[spb.nl-1], jfa, jfb) == 0);
          418 
          419                         if (pairs)
          420                                 prspanjoin(&spa, &spb, jfa, jfb);
          421 
          422                 } else {      /* FIELD_ERROR: both lines lacked join fields */
          423                         if (unpairsa)
          424                                 prline(spa.lines[0]);
          425                         if (unpairsb)
          426                                 prline(spb.lines[0]);
          427                         eofa = addtospan(&spa, fa, RESET) ? 0 : 1;
          428                         eofb = addtospan(&spb, fb, RESET) ? 0 : 1;
          429                         if (!eofa && !eofb)
          430                                 continue;
          431                 }
          432 
          433                 if (eofa) {
          434                         spa.nl = 0;
          435                 } else {
          436                         swaplines(spa.lines[0], spa.lines[spa.nl - 1]);   /* ugly */
          437                         spa.nl = 1;
          438                 }
          439 
          440                 if (eofb) {
          441                         spb.nl = 0;
          442                 } else {
          443                         swaplines(spb.lines[0], spb.lines[spb.nl - 1]);   /* ugly */
          444                         spb.nl = 1;
          445                 }
          446         }
          447         freespan(&spa);
          448         freespan(&spb);
          449 }
          450 
          451 
          452 int
          453 main(int argc, char *argv[])
          454 {
          455         size_t jf[2] = { jfield, jfield, };
          456         FILE *fp[2];
          457         int ret = 0, n;
          458         char *fno;
          459 
          460         ARGBEGIN {
          461         case '1':
          462                 jf[0] = estrtonum(EARGF(usage()), 1, MIN(LLONG_MAX, SIZE_MAX));
          463                 break;
          464         case '2':
          465                 jf[1] = estrtonum(EARGF(usage()), 1, MIN(LLONG_MAX, SIZE_MAX));
          466                 break;
          467         case 'a':
          468                 fno = EARGF(usage());
          469                 if (strcmp(fno, "1") == 0)
          470                         unpairsa = 1;
          471                 else if (strcmp(fno, "2") == 0)
          472                         unpairsb = 1;
          473                 else
          474                         usage();
          475                 break;
          476         case 'e':
          477                 replace = EARGF(usage());
          478                 break;
          479         case 'o':
          480                 oflag = 1;
          481                 initolist(&output);
          482                 makeolist(&output, EARGF(usage()));
          483                 break;
          484         case 't':
          485                 sep = EARGF(usage());
          486                 break;
          487         case 'v':
          488                 pairs = 0;
          489                 fno = EARGF(usage());
          490                 if (strcmp(fno, "1") == 0)
          491                         unpairsa = 1;
          492                 else if (strcmp(fno, "2") == 0)
          493                         unpairsb = 1;
          494                 else
          495                         usage();
          496                 break;
          497         default:
          498                 usage();
          499         } ARGEND
          500 
          501         if (sep)
          502                 seplen = unescape(sep);
          503 
          504         if (argc != 2)
          505                 usage();
          506 
          507         for (n = 0; n < 2; n++) {
          508                 if (!strcmp(argv[n], "-")) {
          509                         argv[n] = "<stdin>";
          510                         fp[n] = stdin;
          511                 } else if (!(fp[n] = fopen(argv[n], "r"))) {
          512                         eprintf("fopen %s:", argv[n]);
          513                 }
          514         }
          515 
          516         jf[0]--;
          517         jf[1]--;
          518 
          519         join(fp[0], fp[1], jf[0], jf[1]);
          520 
          521         if (oflag)
          522                 freespecs(&output);
          523 
          524         if (fshut(fp[0], argv[0]) | (fp[0] != fp[1] && fshut(fp[1], argv[1])) |
          525             fshut(stdout, "<stdout>"))
          526                 ret = 2;
          527 
          528         return ret;
          529 }