tr.c - sbase - suckless unix tools
 (HTM) git clone git://git.suckless.org/sbase
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       tr.c (6191B)
       ---
            1 /* See LICENSE file for copyright and license details. */
            2 #include <stdlib.h>
            3 
            4 #include "utf.h"
            5 #include "util.h"
            6 
            7 static int cflag = 0;
            8 static int dflag = 0;
            9 static int sflag = 0;
           10 
           11 struct range {
           12         Rune   start;
           13         Rune   end;
           14         size_t quant;
           15 };
           16 
           17 static struct {
           18         char    *name;
           19         int    (*check)(Rune);
           20 } classes[] = {
           21         { "alnum",  isalnumrune  },
           22         { "alpha",  isalpharune  },
           23         { "blank",  isblankrune  },
           24         { "cntrl",  iscntrlrune  },
           25         { "digit",  isdigitrune  },
           26         { "graph",  isgraphrune  },
           27         { "lower",  islowerrune  },
           28         { "print",  isprintrune  },
           29         { "punct",  ispunctrune  },
           30         { "space",  isspacerune  },
           31         { "upper",  isupperrune  },
           32         { "xdigit", isxdigitrune },
           33 };
           34 
           35 static struct range *set1        = NULL;
           36 static size_t set1ranges         = 0;
           37 static int    (*set1check)(Rune) = NULL;
           38 static struct range *set2        = NULL;
           39 static size_t set2ranges         = 0;
           40 static int    (*set2check)(Rune) = NULL;
           41 
           42 static size_t
           43 rangelen(struct range r)
           44 {
           45         return (r.end - r.start + 1) * r.quant;
           46 }
           47 
           48 static size_t
           49 setlen(struct range *set, size_t setranges)
           50 {
           51         size_t len = 0, i;
           52 
           53         for (i = 0; i < setranges; i++)
           54                 len += rangelen(set[i]);
           55 
           56         return len;
           57 }
           58 
           59 static int
           60 rstrmatch(Rune *r, char *s, size_t n)
           61 {
           62         size_t i;
           63 
           64         for (i = 0; i < n; i++)
           65                 if (r[i] != s[i])
           66                         return 0;
           67         return 1;
           68 }
           69 
           70 static size_t
           71 makeset(char *str, struct range **set, int (**check)(Rune))
           72 {
           73         Rune  *rstr;
           74         size_t len, i, j, m, n;
           75         size_t q, setranges = 0;
           76         int    factor, base;
           77 
           78         /* rstr defines at most len ranges */
           79         unescape(str);
           80         rstr = ereallocarray(NULL, utflen(str) + 1, sizeof(*rstr));
           81         len = utftorunestr(str, rstr);
           82         *set = ereallocarray(NULL, len, sizeof(**set));
           83 
           84         for (i = 0; i < len; i++) {
           85                 if (rstr[i] == '[') {
           86                         j = i;
           87 nextbrack:
           88                         if (j >= len)
           89                                 goto literal;
           90                         for (m = j; m < len; m++)
           91                                 if (rstr[m] == ']') {
           92                                         j = m;
           93                                         break;
           94                                 }
           95                         if (j == i)
           96                                 goto literal;
           97 
           98                         /* CLASSES [=EQUIV=] (skip) */
           99                         if (j - i > 3 && rstr[i + 1] == '=' && rstr[m - 1] == '=') {
          100                                 if (j - i != 4)
          101                                         goto literal;
          102                                 (*set)[setranges].start = rstr[i + 2];
          103                                 (*set)[setranges].end   = rstr[i + 2];
          104                                 (*set)[setranges].quant = 1;
          105                                 setranges++;
          106                                 i = j;
          107                                 continue;
          108                         }
          109 
          110                         /* CLASSES [:CLASS:] */
          111                         if (j - i > 3 && rstr[i + 1] == ':' && rstr[m - 1] == ':') {
          112                                 for (n = 0; n < LEN(classes); n++) {
          113                                         if (rstrmatch(rstr + i + 2, classes[n].name, j - i - 3)) {
          114                                                 *check = classes[n].check;
          115                                                 return 0;
          116                                         }
          117                                 }
          118                                 eprintf("Invalid character class.\n");
          119                         }
          120 
          121                         /* REPEAT  [_*n] (only allowed in set2) */
          122                         if (j - i > 2 && rstr[i + 2] == '*') {
          123                                 /* check if right side of '*' is a number */
          124                                 q = 0;
          125                                 factor = 1;
          126                                 base = (rstr[i + 3] == '0') ? 8 : 10;
          127                                 for (n = j - 1; n > i + 2; n--) {
          128                                         if (rstr[n] < '0' || rstr[n] > '9') {
          129                                                 n = 0;
          130                                                 break;
          131                                         }
          132                                         q += (rstr[n] - '0') * factor;
          133                                         factor *= base;
          134                                 }
          135                                 if (n == 0) {
          136                                         j = m + 1;
          137                                         goto nextbrack;
          138                                 }
          139                                 (*set)[setranges].start = rstr[i + 1];
          140                                 (*set)[setranges].end   = rstr[i + 1];
          141                                 (*set)[setranges].quant = q ? q : setlen(set1, MAX(set1ranges, 1));
          142                                 setranges++;
          143                                 i = j;
          144                                 continue;
          145                         }
          146 
          147                         j = m + 1;
          148                         goto nextbrack;
          149                 }
          150 literal:
          151                 /* RANGES [_-__-_], _-__-_ */
          152                 /* LITERALS _______ */
          153                 (*set)[setranges].start = rstr[i];
          154 
          155                 if (i < len - 2 && rstr[i + 1] == '-' && rstr[i + 2] >= rstr[i])
          156                         i += 2;
          157                 (*set)[setranges].end = rstr[i];
          158                 (*set)[setranges].quant = 1;
          159                 setranges++;
          160         }
          161 
          162         free(rstr);
          163         return setranges;
          164 }
          165 
          166 static void
          167 usage(void)
          168 {
          169         eprintf("usage: %s [-cCds] set1 [set2]\n", argv0);
          170 }
          171 
          172 int
          173 main(int argc, char *argv[])
          174 {
          175         Rune r, lastrune = 0;
          176         size_t off1, off2, i, m;
          177         int ret = 0;
          178 
          179         ARGBEGIN {
          180         case 'c':
          181         case 'C':
          182                 cflag = 1;
          183                 break;
          184         case 'd':
          185                 dflag = 1;
          186                 break;
          187         case 's':
          188                 sflag = 1;
          189                 break;
          190         default:
          191                 usage();
          192         } ARGEND
          193 
          194         if (!argc || argc > 2 || (dflag == sflag && argc != 2) ||
          195             (dflag && argc != 1))
          196                 usage();
          197 
          198         set1ranges = makeset(argv[0], &set1, &set1check);
          199         if (argc == 2) {
          200                 set2ranges = makeset(argv[1], &set2, &set2check);
          201                 /* sanity checks as we are translating */
          202                 if (!set2ranges && !set2check)
          203                         eprintf("cannot map to an empty set.\n");
          204                 if (set2check && set2check != islowerrune &&
          205                     set2check != isupperrune) {
          206                         eprintf("can only map to 'lower' and 'upper' class.\n");
          207                 }
          208         }
          209 read:
          210         if (!efgetrune(&r, stdin, "<stdin>")) {
          211                 ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>");
          212                 return ret;
          213         }
          214         if (argc == 1 && sflag)
          215                 goto write;
          216         for (i = 0, off1 = 0; i < set1ranges; off1 += rangelen(set1[i]), i++) {
          217                 if (set1[i].start <= r && r <= set1[i].end) {
          218                         if (dflag) {
          219                                 if (cflag)
          220                                         goto write;
          221                                 else
          222                                         goto read;
          223                         }
          224                         if (cflag)
          225                                 goto write;
          226 
          227                         /* map r to set2 */
          228                         if (set2check) {
          229                                 if (set2check == islowerrune)
          230                                         r = tolowerrune(r);
          231                                 else
          232                                         r = toupperrune(r);
          233                         } else {
          234                                 off1 += r - set1[i].start;
          235                                 if (off1 > setlen(set2, set2ranges) - 1) {
          236                                         r = set2[set2ranges - 1].end;
          237                                         goto write;
          238                                 }
          239                                 for (m = 0, off2 = 0; m < set2ranges; m++) {
          240                                         if (off2 + rangelen(set2[m]) > off1) {
          241                                                 m++;
          242                                                 break;
          243                                         }
          244                                         off2 += rangelen(set2[m]);
          245                                 }
          246                                 m--;
          247                                 r = set2[m].start + (off1 - off2) / set2[m].quant;
          248                         }
          249                         goto write;
          250                 }
          251         }
          252         if (set1check && set1check(r)) {
          253                 if (cflag)
          254                         goto write;
          255                 if (dflag)
          256                         goto read;
          257                 if (set2check) {
          258                         if (set2check == islowerrune)
          259                                 r = tolowerrune(r);
          260                         else
          261                                 r = toupperrune(r);
          262                 } else {
          263                         r = set2[set2ranges - 1].end;
          264                 }
          265                 goto write;
          266         }
          267         if (!dflag && cflag) {
          268                 if (set2check) {
          269                         if (set2check == islowerrune)
          270                                 r = tolowerrune(r);
          271                         else
          272                                 r = toupperrune(r);
          273                 } else {
          274                         r = set2[set2ranges - 1].end;
          275                 }
          276                 goto write;
          277         }
          278         if (dflag && cflag)
          279                 goto read;
          280 write:
          281         if (argc == 1 && sflag && r == lastrune) {
          282                 if (set1check && set1check(r))
          283                         goto read;
          284                 for (i = 0; i < set1ranges; i++) {
          285                         if (set1[i].start <= r && r <= set1[i].end)
          286                                 goto read;
          287                 }
          288         }
          289         if (argc == 2 && sflag && r == lastrune) {
          290                 if (set2check && set2check(r))
          291                         goto read;
          292                 for (i = 0; i < set2ranges; i++) {
          293                         if (set2[i].start <= r && r <= set2[i].end)
          294                                 goto read;
          295                 }
          296         }
          297         efputrune(&r, stdout, "<stdout>");
          298         lastrune = r;
          299         goto read;
          300 }