sort.c - 9base - revived minimalist port of Plan 9 userland to Unix
 (HTM) git clone git://git.suckless.org/9base
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       sort.c (29541B)
       ---
            1 #include        <u.h>
            2 #include        <libc.h>
            3 #include        <bio.h>
            4 
            5 /*
            6 bugs:
            7         00/ff for end of file can conflict with 00/ff characters
            8 */
            9 
           10 enum
           11 {
           12         Nline        = 500000,                /* default max number of lines saved in memory */
           13         Nmerge        = 10,                        /* max number of temporary files merged */
           14         Nfield        = 20,                        /* max number of argument fields */
           15 
           16         Bflag        = 1<<0,                        /* flags per field */
           17         B1flag        = 1<<1,
           18 
           19         Dflag        = 1<<2,
           20         Fflag        = 1<<3,
           21         Gflag        = 1<<4,
           22         Iflag        = 1<<5,
           23         Mflag        = 1<<6,
           24         Nflag        = 1<<7,
           25         Rflag        = 1<<8,
           26         Wflag        = 1<<9,
           27 
           28         NSstart        = 0,                        /* states for number to key decoding */
           29         NSsign,
           30         NSzero,
           31         NSdigit,
           32         NSpoint,
           33         NSfract,
           34         NSzerofract,
           35         NSexp,
           36         NSexpsign,
           37         NSexpdigit
           38 };
           39 
           40 typedef        struct        Line        Line;
           41 typedef        struct        Key        Key;
           42 typedef        struct        Merge        Merge;
           43 typedef        struct        Field        Field;
           44 
           45 struct        Line
           46 {
           47         Key*        key;
           48         int        llen;                /* always >= 1 */
           49         uchar        line[1];        /* always ends in '\n' */
           50 };
           51 
           52 struct        Merge
           53 {
           54         Key*        key;                /* copy of line->key so (Line*) looks like (Merge*) */
           55         Line*        line;                /* line at the head of a merged temp file */
           56         int        fd;                /* file descriptor */
           57         Biobuf        b;                /* iobuf for reading a temp file */
           58 };
           59 
           60 struct        Key
           61 {
           62         int        klen;
           63         uchar        key[1];
           64 };
           65 
           66 struct        Field
           67 {
           68         int        beg1;
           69         int        beg2;
           70         int        end1;
           71         int        end2;
           72 
           73         long        flags;
           74         uchar        mapto[256];
           75 
           76         void        (*dokey)(Key*, uchar*, uchar*, Field*);
           77 };
           78 
           79 struct args
           80 {
           81         char*        ofile;
           82         char*        tname;
           83         Rune        tabchar;
           84         char        cflag;
           85         char        uflag;
           86         char        vflag;
           87         int        nfield;
           88         int        nfile;
           89         Field        field[Nfield];
           90 
           91         Line**        linep;
           92         long        nline;                        /* number of lines in this temp file */
           93         long        lineno;                        /* overall ordinal for -s option */
           94         int        ntemp;
           95         long        mline;                        /* max lines per file */
           96 } args;
           97 
           98 extern        int        latinmap[];
           99 extern        Rune*        month[12];
          100 
          101 void        buildkey(Line*);
          102 void        doargs(int, char*[]);
          103 void        dofield(char*, int*, int*, int, int);
          104 void        dofile(Biobuf*);
          105 void        dokey_(Key*, uchar*, uchar*, Field*);
          106 void        dokey_dfi(Key*, uchar*, uchar*, Field*);
          107 void        dokey_gn(Key*, uchar*, uchar*, Field*);
          108 void        dokey_m(Key*, uchar*, uchar*, Field*);
          109 void        dokey_r(Key*, uchar*, uchar*, Field*);
          110 void        done(char*);
          111 int        kcmp(Key*, Key*);
          112 void        makemapd(Field*);
          113 void        makemapm(Field*);
          114 void        mergefiles(int, int, Biobuf*);
          115 void        mergeout(Biobuf*);
          116 void        newfield(void);
          117 Line*        newline(Biobuf*);
          118 void        nomem(void);
          119 void        notifyf(void*, char*);
          120 void        printargs(void);
          121 void        printout(Biobuf*);
          122 void        setfield(int, int);
          123 uchar*        skip(uchar*, int, int, int, int);
          124 void        sort4(void*, ulong);
          125 char*        tempfile(int);
          126 void        tempout(void);
          127 void        lineout(Biobuf*, Line*);
          128 
          129 void
          130 main(int argc, char *argv[])
          131 {
          132         int i, f;
          133         char *s;
          134         Biobuf bbuf;
          135 
          136         notify(notifyf);        /**/
          137         doargs(argc, argv);
          138         if(args.vflag)
          139                 printargs();
          140 
          141         for(i=1; i<argc; i++) {
          142                 s = argv[i];
          143                 if(s == 0)
          144                         continue;
          145                 if(strcmp(s, "-") == 0) {
          146                         Binit(&bbuf, 0, OREAD);
          147                         dofile(&bbuf);
          148                         Bterm(&bbuf);
          149                         continue;
          150                 }
          151                 f = open(s, OREAD);
          152                 if(f < 0) {
          153                         fprint(2, "sort: open %s: %r\n", s);
          154                         done("open");
          155                 }
          156                 Binit(&bbuf, f, OREAD);
          157                 dofile(&bbuf);
          158                 Bterm(&bbuf);
          159                 close(f);
          160         }
          161         if(args.nfile == 0) {
          162                 Binit(&bbuf, 0, OREAD);
          163                 dofile(&bbuf);
          164                 Bterm(&bbuf);
          165         }
          166         if(args.cflag)
          167                 done(0);
          168         if(args.vflag)
          169                 fprint(2, "=========\n");
          170 
          171         f = 1;
          172         if(args.ofile) {
          173                 f = create(args.ofile, OWRITE, 0666);
          174                 if(f < 0) {
          175                         fprint(2, "sort: create %s: %r\n", args.ofile);
          176                         done("create");
          177                 }
          178         }
          179 
          180         Binit(&bbuf, f, OWRITE);
          181         if(args.ntemp) {
          182                 tempout();
          183                 mergeout(&bbuf);
          184         } else {
          185                 printout(&bbuf);
          186         }
          187         Bterm(&bbuf);
          188         done(0);
          189 }
          190 
          191 void
          192 dofile(Biobuf *b)
          193 {
          194         Line *l, *ol;
          195         int n;
          196 
          197         if(args.cflag) {
          198                 ol = newline(b);
          199                 if(ol == 0)
          200                         return;
          201                 for(;;) {
          202                         l = newline(b);
          203                         if(l == 0)
          204                                 break;
          205                         n = kcmp(ol->key, l->key);
          206                         if(n > 0 || (n == 0 && args.uflag)) {
          207                                 fprint(2, "sort: -c file not in sort\n"); /**/
          208                                 done("order");
          209                         }
          210                         free(ol->key);
          211                         free(ol);
          212                         ol = l;
          213                 }
          214                 return;
          215         }
          216 
          217         if(args.linep == 0) {
          218                 args.linep = malloc(args.mline * sizeof(args.linep));
          219                 if(args.linep == 0)
          220                         nomem();
          221         }
          222         for(;;) {
          223                 l = newline(b);
          224                 if(l == 0)
          225                         break;
          226                 if(args.nline >= args.mline)
          227                         tempout();
          228                 args.linep[args.nline] = l;
          229                 args.nline++;
          230                 args.lineno++;
          231         }
          232 }
          233 
          234 void
          235 notifyf(void *a, char *s)
          236 {
          237         USED(a);
          238         if(strcmp(s, "interrupt") == 0)
          239                 done(0);
          240         if(strcmp(s, "hangup") == 0)
          241                 done(0);
          242         if(strcmp(s, "kill") == 0)
          243                 done(0);
          244         if(strncmp(s, "sys: write on closed pipe", 25) == 0)
          245                 done(0);
          246         noted(NDFLT);
          247 }
          248 
          249 Line*
          250 newline(Biobuf *b)
          251 {
          252         Line *l;
          253         char *p;
          254         int n, c;
          255 
          256         p = Brdline(b, '\n');
          257         n = Blinelen(b);
          258         if(p == 0) {
          259                 if(n == 0)
          260                         return 0;
          261                 l = 0;
          262                 for(n=0;;) {
          263                         if((n & 31) == 0) {
          264                                 l = realloc(l, sizeof(Line) +
          265                                         (n+31)*sizeof(l->line[0]));
          266                                 if(l == 0)
          267                                         nomem();
          268                         }
          269                         c = Bgetc(b);
          270                         if(c < 0) {
          271                                 fprint(2, "sort: newline added\n");
          272                                 c = '\n';
          273                         }
          274                         l->line[n++] = c;
          275                         if(c == '\n')
          276                                 break;
          277                 }
          278                 l->llen = n;
          279                 buildkey(l);
          280                 return l;
          281         }
          282         l = malloc(sizeof(Line) +
          283                 (n-1)*sizeof(l->line[0]));
          284         if(l == 0)
          285                 nomem();
          286         l->llen = n;
          287         memmove(l->line, p, n);
          288         buildkey(l);
          289         return l;
          290 }
          291 
          292 void
          293 lineout(Biobuf *b, Line *l)
          294 {
          295         int n, m;
          296 
          297         n = l->llen;
          298         m = Bwrite(b, l->line, n);
          299         if(n != m)
          300                 exits("write");
          301 }
          302 
          303 void
          304 tempout(void)
          305 {
          306         long n;
          307         Line **lp, *l;
          308         char *tf;
          309         int f;
          310         Biobuf tb;
          311 
          312         sort4(args.linep, args.nline);
          313         tf = tempfile(args.ntemp);
          314         args.ntemp++;
          315         f = create(tf, OWRITE, 0666);
          316         if(f < 0) {
          317                 fprint(2, "sort: create %s: %r\n", tf);
          318                 done("create");
          319         }
          320 
          321         Binit(&tb, f, OWRITE);
          322         lp = args.linep;
          323         for(n=args.nline; n>0; n--) {
          324                 l = *lp++;
          325                 lineout(&tb, l);
          326                 free(l->key);
          327                 free(l);
          328         }
          329         args.nline = 0;
          330         Bterm(&tb);
          331         close(f);
          332 }
          333 
          334 void
          335 done(char *xs)
          336 {
          337         int i;
          338 
          339         for(i=0; i<args.ntemp; i++)
          340                 remove(tempfile(i));
          341         exits(xs);
          342 }
          343 
          344 void
          345 nomem(void)
          346 {
          347         fprint(2, "sort: out of memory\n");
          348         done("mem");
          349 }
          350 
          351 char*
          352 tempfile(int n)
          353 {
          354         static char file[100];
          355         static uint pid;
          356         char *dir;
          357 
          358         dir = "/var/tmp";
          359         if(args.tname)
          360                 dir = args.tname;
          361         if(strlen(dir) >= nelem(file)-20) {
          362                 fprint(2, "temp file directory name is too long: %s\n", dir);
          363                 done("tdir");
          364         }
          365 
          366         if(pid == 0) {
          367                 pid = getpid();
          368                 if(pid == 0) {
          369                         pid = time(0);
          370                         if(pid == 0)
          371                                 pid = 1;
          372                 }
          373         }
          374 
          375         sprint(file, "%s/sort.%.4d.%.4d", dir, pid%10000, n);
          376         return file;
          377 }
          378 
          379 void
          380 mergeout(Biobuf *b)
          381 {
          382         int n, i, f;
          383         char *tf;
          384         Biobuf tb;
          385 
          386         for(i=0; i<args.ntemp; i+=n) {
          387                 n = args.ntemp - i;
          388                 if(n > Nmerge) {
          389                         tf = tempfile(args.ntemp);
          390                         args.ntemp++;
          391                         f = create(tf, OWRITE, 0666);
          392                         if(f < 0) {
          393                                 fprint(2, "sort: create %s: %r\n", tf);
          394                                 done("create");
          395                         }
          396                         Binit(&tb, f, OWRITE);
          397 
          398                         n = Nmerge;
          399                         mergefiles(i, n, &tb);
          400 
          401                         Bterm(&tb);
          402                         close(f);
          403                 } else
          404                         mergefiles(i, n, b);
          405         }
          406 }
          407 
          408 void
          409 mergefiles(int t, int n, Biobuf *b)
          410 {
          411         Merge *m, *mp, **mmp;
          412         Key *ok;
          413         Line *l;
          414         char *tf;
          415         int i, f, nn;
          416 
          417         mmp = malloc(n*sizeof(*mmp));
          418         mp = malloc(n*sizeof(*mp));
          419         if(mmp == 0 || mp == 0)
          420                 nomem();
          421 
          422         nn = 0;
          423         m = mp;
          424         for(i=0; i<n; i++,m++) {
          425                 tf = tempfile(t+i);
          426                 f = open(tf, OREAD);
          427                 if(f < 0) {
          428                         fprint(2, "sort: reopen %s: %r\n", tf);
          429                         done("open");
          430                 }
          431                 m->fd = f;
          432                 Binit(&m->b, f, OREAD);
          433                 mmp[nn] = m;
          434 
          435                 l = newline(&m->b);
          436                 if(l == 0)
          437                         continue;
          438                 nn++;
          439                 m->line = l;
          440                 m->key = l->key;
          441         }
          442 
          443         ok = 0;
          444         for(;;) {
          445                 sort4(mmp, nn);
          446                 m = *mmp;
          447                 if(nn == 0)
          448                         break;
          449                 for(;;) {
          450                         l = m->line;
          451                         if(args.uflag && ok && kcmp(ok, l->key) == 0) {
          452                                 free(l->key);
          453                                 free(l);
          454                         } else {
          455                                 lineout(b, l);
          456                                 if(ok)
          457                                         free(ok);
          458                                 ok = l->key;
          459                                 free(l);
          460                         }
          461 
          462                         l = newline(&m->b);
          463                         if(l == 0) {
          464                                 nn--;
          465                                 mmp[0] = mmp[nn];
          466                                 break;
          467                         }
          468                         m->line = l;
          469                         m->key = l->key;
          470                         if(nn > 1 && kcmp(mmp[0]->key, mmp[1]->key) > 0)
          471                                 break;
          472                 }
          473         }
          474         if(ok)
          475                 free(ok);
          476 
          477         m = mp;
          478         for(i=0; i<n; i++,m++) {
          479                 Bterm(&m->b);
          480                 close(m->fd);
          481         }
          482 
          483         free(mp);
          484         free(mmp);
          485 }
          486 
          487 int
          488 kcmp(Key *ka, Key *kb)
          489 {
          490         int n, m;
          491 
          492         /*
          493          * set n to length of smaller key
          494          */
          495         n = ka->klen;
          496         m = kb->klen;
          497         if(n > m)
          498                 n = m;
          499         return memcmp(ka->key, kb->key, n);
          500 }
          501 
          502 void
          503 printout(Biobuf *b)
          504 {
          505         long n;
          506         Line **lp, *l;
          507         Key *ok;
          508 
          509         sort4(args.linep, args.nline);
          510         lp = args.linep;
          511         ok = 0;
          512         for(n=args.nline; n>0; n--) {
          513                 l = *lp++;
          514                 if(args.uflag && ok && kcmp(ok, l->key) == 0)
          515                         continue;
          516                 lineout(b, l);
          517                 ok = l->key;
          518         }
          519 }
          520 
          521 void
          522 setfield(int n, int c)
          523 {
          524         Field *f;
          525 
          526         f = &args.field[n];
          527         switch(c) {
          528         default:
          529                 fprint(2, "sort: unknown option: field.%C\n", c);
          530                 done("option");
          531         case 'b':        /* skip blanks */
          532                 f->flags |= Bflag;
          533                 break;
          534         case 'd':        /* directory order */
          535                 f->flags |= Dflag;
          536                 break;
          537         case 'f':        /* fold case */
          538                 f->flags |= Fflag;
          539                 break;
          540         case 'g':        /* floating point -n case */
          541                 f->flags |= Gflag;
          542                 break;
          543         case 'i':        /* ignore non-ascii */
          544                 f->flags |= Iflag;
          545                 break;
          546         case 'M':        /* month */
          547                 f->flags |= Mflag;
          548                 break;
          549         case 'n':        /* numbers */
          550                 f->flags |= Nflag;
          551                 break;
          552         case 'r':        /* reverse */
          553                 f->flags |= Rflag;
          554                 break;
          555         case 'w':        /* ignore white */
          556                 f->flags |= Wflag;
          557                 break;
          558         }
          559 }
          560 
          561 void
          562 dofield(char *s, int *n1, int *n2, int off1, int off2)
          563 {
          564         int c, n;
          565 
          566         c = *s++;
          567         if(c >= '0' && c <= '9') {
          568                 n = 0;
          569                 while(c >= '0' && c <= '9') {
          570                         n = n*10 + (c-'0');
          571                         c = *s++;
          572                 }
          573                 n -= off1;        /* posix committee: rot in hell */
          574                 if(n < 0) {
          575                         fprint(2, "sort: field offset must be positive\n");
          576                         done("option");
          577                 }
          578                 *n1 = n;
          579         }
          580         if(c == '.') {
          581                 c = *s++;
          582                 if(c >= '0' && c <= '9') {
          583                         n = 0;
          584                         while(c >= '0' && c <= '9') {
          585                                 n = n*10 + (c-'0');
          586                                 c = *s++;
          587                         }
          588                         n -= off2;
          589                         if(n < 0) {
          590                                 fprint(2, "sort: character offset must be positive\n");
          591                                 done("option");
          592                         }
          593                         *n2 = n;
          594                 }
          595         }
          596         while(c != 0) {
          597                 setfield(args.nfield, c);
          598                 c = *s++;
          599         }
          600 }
          601 
          602 void
          603 printargs(void)
          604 {
          605         int i, n;
          606         Field *f;
          607         char *prefix;
          608 
          609         fprint(2, "sort");
          610         for(i=0; i<=args.nfield; i++) {
          611                 f = &args.field[i];
          612                 prefix = " -";
          613                 if(i) {
          614                         n = f->beg1;
          615                         if(n >= 0)
          616                                 fprint(2, " +%d", n);
          617                         else
          618                                 fprint(2, " +*");
          619                         n = f->beg2;
          620                         if(n >= 0)
          621                                 fprint(2, ".%d", n);
          622                         else
          623                                 fprint(2, ".*");
          624 
          625                         if(f->flags & B1flag)
          626                                 fprint(2, "b");
          627 
          628                         n = f->end1;
          629                         if(n >= 0)
          630                                 fprint(2, " -%d", n);
          631                         else
          632                                 fprint(2, " -*");
          633                         n = f->end2;
          634                         if(n >= 0)
          635                                 fprint(2, ".%d", n);
          636                         else
          637                                 fprint(2, ".*");
          638                         prefix = "";
          639                 }
          640                 if(f->flags & Bflag)
          641                         fprint(2, "%sb", prefix);
          642                 if(f->flags & Dflag)
          643                         fprint(2, "%sd", prefix);
          644                 if(f->flags & Fflag)
          645                         fprint(2, "%sf", prefix);
          646                 if(f->flags & Gflag)
          647                         fprint(2, "%sg", prefix);
          648                 if(f->flags & Iflag)
          649                         fprint(2, "%si", prefix);
          650                 if(f->flags & Mflag)
          651                         fprint(2, "%sM", prefix);
          652                 if(f->flags & Nflag)
          653                         fprint(2, "%sn", prefix);
          654                 if(f->flags & Rflag)
          655                         fprint(2, "%sr", prefix);
          656                 if(f->flags & Wflag)
          657                         fprint(2, "%sw", prefix);
          658         }
          659         if(args.cflag)
          660                 fprint(2, " -c");
          661         if(args.uflag)
          662                 fprint(2, " -u");
          663         if(args.ofile)
          664                 fprint(2, " -o %s", args.ofile);
          665         if(args.mline != Nline)
          666                 fprint(2, " -l %ld", args.mline);
          667         fprint(2, "\n");
          668 }
          669 
          670 void
          671 newfield(void)
          672 {
          673         int n;
          674         Field *f;
          675 
          676         n = args.nfield + 1;
          677         if(n >= Nfield) {
          678                 fprint(2, "sort: too many fields specified\n");
          679                 done("option");
          680         }
          681         args.nfield = n;
          682         f = &args.field[n];
          683         f->beg1 = -1;
          684         f->beg2 = -1;
          685         f->end1 = -1;
          686         f->end2 = -1;
          687 }
          688 
          689 void
          690 doargs(int argc, char *argv[])
          691 {
          692         int i, c, hadplus;
          693         char *s, *p, *q;
          694         Field *f;
          695 
          696         hadplus = 0;
          697         args.mline = Nline;
          698         for(i=1; i<argc; i++) {
          699                 s = argv[i];
          700                 c = *s++;
          701                 if(c == '-') {
          702                         c = *s;
          703                         if(c == 0)                /* forced end of arg marker */
          704                                 break;
          705                         argv[i] = 0;                /* clobber args processed */
          706                         if(c == '.' || (c >= '0' && c <= '9')) {
          707                                 if(!hadplus)
          708                                         newfield();
          709                                 f = &args.field[args.nfield];
          710                                 dofield(s, &f->end1, &f->end2, 0, 0);
          711                                 hadplus = 0;
          712                                 continue;
          713                         }
          714 
          715                         while(c = *s++)
          716                         switch(c) {
          717                         case '-':        /* end of options */
          718                                 i = argc;
          719                                 continue;
          720                         case 'T':        /* temp directory */
          721                                 if(*s == 0) {
          722                                         i++;
          723                                         if(i < argc) {
          724                                                 args.tname = argv[i];
          725                                                 argv[i] = 0;
          726                                         }
          727                                 } else
          728                                         args.tname = s;
          729                                 s = strchr(s, 0);
          730                                 break;
          731                         case 'o':        /* output file */
          732                                 if(*s == 0) {
          733                                         i++;
          734                                         if(i < argc) {
          735                                                 args.ofile = argv[i];
          736                                                 argv[i] = 0;
          737                                         }
          738                                 } else
          739                                         args.ofile = s;
          740                                 s = strchr(s, 0);
          741                                 break;
          742                         case 'k':        /* posix key (what were they thinking?) */
          743                                 p = 0;
          744                                 if(*s == 0) {
          745                                         i++;
          746                                         if(i < argc) {
          747                                                 p = argv[i];
          748                                                 argv[i] = 0;
          749                                         }
          750                                 } else
          751                                         p = s;
          752                                 s = strchr(s, 0);
          753                                 if(p == 0)
          754                                         break;
          755 
          756                                 newfield();
          757                                 q = strchr(p, ',');
          758                                 if(q)
          759                                         *q++ = 0;
          760                                 f = &args.field[args.nfield];
          761                                 dofield(p, &f->beg1, &f->beg2, 1, 1);
          762                                 if(f->flags & Bflag) {
          763                                         f->flags |= B1flag;
          764                                         f->flags &= ~Bflag;
          765                                 }
          766                                 if(q) {
          767                                         dofield(q, &f->end1, &f->end2, 1, 0);
          768                                         if(f->end2 <= 0)
          769                                                 f->end1++;
          770                                 }
          771                                 hadplus = 0;
          772                                 break;
          773                         case 't':        /* tab character */
          774                                 if(*s == 0) {
          775                                         i++;
          776                                         if(i < argc) {
          777                                                 chartorune(&args.tabchar, argv[i]);
          778                                                 argv[i] = 0;
          779                                         }
          780                                 } else
          781                                         s += chartorune(&args.tabchar, s);
          782                                 if(args.tabchar == '\n') {
          783                                         fprint(2, "aw come on, rob\n");
          784                                         done("rob");
          785                                 }
          786                                 break;
          787                         case 'c':        /* check order */
          788                                 args.cflag = 1;
          789                                 break;
          790                         case 'u':        /* unique */
          791                                 args.uflag = 1;
          792                                 break;
          793                         case 'v':        /* debugging noise */
          794                                 args.vflag = 1;
          795                                 break;
          796                         case 'l':
          797                                 if(*s == 0) {
          798                                         i++;
          799                                         if(i < argc) {
          800                                                 args.mline = atol(argv[i]);
          801                                                 argv[i] = 0;
          802                                         }
          803                                 } else
          804                                         args.mline = atol(s);
          805                                 s = strchr(s, 0);
          806                                 break;
          807 
          808                         case 'M':        /* month */
          809                         case 'b':        /* skip blanks */
          810                         case 'd':        /* directory order */
          811                         case 'f':        /* fold case */
          812                         case 'g':        /* floating numbers */
          813                         case 'i':        /* ignore non-ascii */
          814                         case 'n':        /* numbers */
          815                         case 'r':        /* reverse */
          816                         case 'w':        /* ignore white */
          817                                 if(args.nfield > 0)
          818                                         fprint(2, "sort: global field set after -k\n");
          819                                 setfield(0, c);
          820                                 break;
          821                         case 'm':
          822                                 /* option m silently ignored but required by posix */
          823                                 break;
          824                         default:
          825                                 fprint(2, "sort: unknown option: -%C\n", c);
          826                                 done("option");
          827                         }
          828                         continue;
          829                 }
          830                 if(c == '+') {
          831                         argv[i] = 0;                /* clobber args processed */
          832                         c = *s;
          833                         if(c == '.' || (c >= '0' && c <= '9')) {
          834                                 newfield();
          835                                 f = &args.field[args.nfield];
          836                                 dofield(s, &f->beg1, &f->beg2, 0, 0);
          837                                 if(f->flags & Bflag) {
          838                                         f->flags |= B1flag;
          839                                         f->flags &= ~Bflag;
          840                                 }
          841                                 hadplus = 1;
          842                                 continue;
          843                         }
          844                         fprint(2, "sort: unknown option: +%C\n", c);
          845                         done("option");
          846                 }
          847                 args.nfile++;
          848         }
          849 
          850         for(i=0; i<=args.nfield; i++) {
          851                 f = &args.field[i];
          852 
          853                 /*
          854                  * global options apply to fields that
          855                  * specify no options
          856                  */
          857                 if(f->flags == 0) {
          858                         f->flags = args.field[0].flags;
          859                         if(args.field[0].flags & Bflag)
          860                                 f->flags |= B1flag;
          861                 }
          862 
          863 
          864                 /*
          865                  * build buildkey specification
          866                  */
          867                 switch(f->flags & ~(Bflag|B1flag)) {
          868                 default:
          869                         fprint(2, "sort: illegal combination of flags: %lx\n", f->flags);
          870                         done("option");
          871                 case 0:
          872                         f->dokey = dokey_;
          873                         break;
          874                 case Rflag:
          875                         f->dokey = dokey_r;
          876                         break;
          877                 case Gflag:
          878                 case Nflag:
          879                 case Gflag|Nflag:
          880                 case Gflag|Rflag:
          881                 case Nflag|Rflag:
          882                 case Gflag|Nflag|Rflag:
          883                         f->dokey = dokey_gn;
          884                         break;
          885                 case Mflag:
          886                 case Mflag|Rflag:
          887                         f->dokey = dokey_m;
          888                         makemapm(f);
          889                         break;
          890                 case Dflag:
          891                 case Dflag|Fflag:
          892                 case Dflag|Fflag|Iflag:
          893                 case Dflag|Fflag|Iflag|Rflag:
          894                 case Dflag|Fflag|Iflag|Rflag|Wflag:
          895                 case Dflag|Fflag|Iflag|Wflag:
          896                 case Dflag|Fflag|Rflag:
          897                 case Dflag|Fflag|Rflag|Wflag:
          898                 case Dflag|Fflag|Wflag:
          899                 case Dflag|Iflag:
          900                 case Dflag|Iflag|Rflag:
          901                 case Dflag|Iflag|Rflag|Wflag:
          902                 case Dflag|Iflag|Wflag:
          903                 case Dflag|Rflag:
          904                 case Dflag|Rflag|Wflag:
          905                 case Dflag|Wflag:
          906                 case Fflag:
          907                 case Fflag|Iflag:
          908                 case Fflag|Iflag|Rflag:
          909                 case Fflag|Iflag|Rflag|Wflag:
          910                 case Fflag|Iflag|Wflag:
          911                 case Fflag|Rflag:
          912                 case Fflag|Rflag|Wflag:
          913                 case Fflag|Wflag:
          914                 case Iflag:
          915                 case Iflag|Rflag:
          916                 case Iflag|Rflag|Wflag:
          917                 case Iflag|Wflag:
          918                 case Wflag:
          919                         f->dokey = dokey_dfi;
          920                         makemapd(f);
          921                         break;
          922                 }
          923         }
          924 
          925         /*
          926          * random spot checks
          927          */
          928         if(args.nfile > 1 && args.cflag) {
          929                 fprint(2, "sort: -c can have at most one input file\n");
          930                 done("option");
          931         }
          932         return;
          933 }
          934 
          935 uchar*
          936 skip(uchar *l, int n1, int n2, int bflag, int endfield)
          937 {
          938         int i, c, tc;
          939         Rune r;
          940 
          941         if(endfield && n1 < 0)
          942                 return 0;
          943 
          944         c = *l++;
          945         tc = args.tabchar;
          946         if(tc) {
          947                 if(tc < Runeself) {
          948                         for(i=n1; i>0; i--) {
          949                                 while(c != tc) {
          950                                         if(c == '\n')
          951                                                 return 0;
          952                                         c = *l++;
          953                                 }
          954                                 if(!(endfield && i == 1))
          955                                         c = *l++;
          956                         }
          957                 } else {
          958                         l--;
          959                         l += chartorune(&r, (char*)l);
          960                         for(i=n1; i>0; i--) {
          961                                 while(r != tc) {
          962                                         if(r == '\n')
          963                                                 return 0;
          964                                         l += chartorune(&r, (char*)l);
          965                                 }
          966                                 if(!(endfield && i == 1))
          967                                         l += chartorune(&r, (char*)l);
          968                         }
          969                         c = r;
          970                 }
          971         } else {
          972                 for(i=n1; i>0; i--) {
          973                         while(c == ' ' || c == '\t')
          974                                 c = *l++;
          975                         while(c != ' ' && c != '\t') {
          976                                 if(c == '\n')
          977                                         return 0;
          978                                 c = *l++;
          979                         }
          980                 }
          981         }
          982 
          983         if(bflag)
          984                 while(c == ' ' || c == '\t')
          985                         c = *l++;
          986 
          987         l--;
          988         for(i=n2; i>0; i--) {
          989                 c = *l;
          990                 if(c < Runeself) {
          991                         if(c == '\n')
          992                                 return 0;
          993                         l++;
          994                         continue;
          995                 }
          996                 l += chartorune(&r, (char*)l);
          997         }
          998         return l;
          999 }
         1000 
         1001 void
         1002 dokey_gn(Key *k, uchar *lp, uchar *lpe, Field *f)
         1003 {
         1004         uchar *kp;
         1005         int c, cl, dp;
         1006         int state, nzero, exp, expsign, rflag;
         1007 
         1008         cl = k->klen + 3;
         1009         kp = k->key + cl;        /* skip place for sign, exponent[2] */
         1010 
         1011         nzero = 0;                /* number of trailing zeros */
         1012         exp = 0;                /* value of the exponent */
         1013         expsign = 0;                /* sign of the exponent */
         1014         dp = 0x4040;                /* location of decimal point */
         1015         rflag = f->flags&Rflag;        /* xor of rflag and - sign */
         1016         state = NSstart;
         1017 
         1018         for(;; lp++) {
         1019                 if(lp >= lpe)
         1020                         break;
         1021                 c = *lp;
         1022 
         1023                 if(c == ' ' || c == '\t') {
         1024                         switch(state) {
         1025                         case NSstart:
         1026                         case NSsign:
         1027                                 continue;
         1028                         }
         1029                         break;
         1030                 }
         1031                 if(c == '+' || c == '-') {
         1032                         switch(state) {
         1033                         case NSstart:
         1034                                 state = NSsign;
         1035                                 if(c == '-')
         1036                                         rflag = !rflag;
         1037                                 continue;
         1038                         case NSexp:
         1039                                 state = NSexpsign;
         1040                                 if(c == '-')
         1041                                         expsign = 1;
         1042                                 continue;
         1043                         }
         1044                         break;
         1045                 }
         1046                 if(c == '0') {
         1047                         switch(state) {
         1048                         case NSdigit:
         1049                                 if(rflag)
         1050                                         c = ~c;
         1051                                 *kp++ = c;
         1052                                 cl++;
         1053                                 nzero++;
         1054                                 dp++;
         1055                                 state = NSdigit;
         1056                                 continue;
         1057                         case NSfract:
         1058                                 if(rflag)
         1059                                         c = ~c;
         1060                                 *kp++ = c;
         1061                                 cl++;
         1062                                 nzero++;
         1063                                 state = NSfract;
         1064                                 continue;
         1065                         case NSstart:
         1066                         case NSsign:
         1067                         case NSzero:
         1068                                 state = NSzero;
         1069                                 continue;
         1070                         case NSzerofract:
         1071                         case NSpoint:
         1072                                 dp--;
         1073                                 state = NSzerofract;
         1074                                 continue;
         1075                         case NSexpsign:
         1076                         case NSexp:
         1077                         case NSexpdigit:
         1078                                 exp = exp*10 + (c - '0');
         1079                                 state = NSexpdigit;
         1080                                 continue;
         1081                         }
         1082                         break;
         1083                 }
         1084                 if(c >= '1' && c <= '9') {
         1085                         switch(state) {
         1086                         case NSzero:
         1087                         case NSstart:
         1088                         case NSsign:
         1089                         case NSdigit:
         1090                                 if(rflag)
         1091                                         c = ~c;
         1092                                 *kp++ = c;
         1093                                 cl++;
         1094                                 nzero = 0;
         1095                                 dp++;
         1096                                 state = NSdigit;
         1097                                 continue;
         1098                         case NSzerofract:
         1099                         case NSpoint:
         1100                         case NSfract:
         1101                                 if(rflag)
         1102                                         c = ~c;
         1103                                 *kp++ = c;
         1104                                 cl++;
         1105                                 nzero = 0;
         1106                                 state = NSfract;
         1107                                 continue;
         1108                         case NSexpsign:
         1109                         case NSexp:
         1110                         case NSexpdigit:
         1111                                 exp = exp*10 + (c - '0');
         1112                                 state = NSexpdigit;
         1113                                 continue;
         1114                         }
         1115                         break;
         1116                 }
         1117                 if(c == '.') {
         1118                         switch(state) {
         1119                         case NSstart:
         1120                         case NSsign:
         1121                                 state = NSpoint;
         1122                                 continue;
         1123                         case NSzero:
         1124                                 state = NSzerofract;
         1125                                 continue;
         1126                         case NSdigit:
         1127                                 state = NSfract;
         1128                                 continue;
         1129                         }
         1130                         break;
         1131                 }
         1132                 if((f->flags & Gflag) && (c == 'e' || c == 'E')) {
         1133                         switch(state) {
         1134                         case NSdigit:
         1135                         case NSfract:
         1136                                 state = NSexp;
         1137                                 continue;
         1138                         }
         1139                         break;
         1140                 }
         1141                 break;
         1142         }
         1143 
         1144         switch(state) {
         1145         /*
         1146          * result is zero
         1147          */
         1148         case NSstart:
         1149         case NSsign:
         1150         case NSzero:
         1151         case NSzerofract:
         1152         case NSpoint:
         1153                 kp = k->key + k->klen;
         1154                 k->klen += 2;
         1155                 kp[0] = 0x20;        /* between + and - */
         1156                 kp[1] = 0;
         1157                 return;
         1158         /*
         1159          * result has exponent
         1160          */
         1161         case NSexpsign:
         1162         case NSexp:
         1163         case NSexpdigit:
         1164                 if(expsign)
         1165                         exp = -exp;
         1166                 dp += exp;
         1167 
         1168         /*
         1169          * result is fixed point number
         1170          */
         1171         case NSdigit:
         1172         case NSfract:
         1173                 kp -= nzero;
         1174                 cl -= nzero;
         1175                 break;
         1176         }
         1177 
         1178         /*
         1179          * end of number
         1180          */
         1181         c = 0;
         1182         if(rflag)
         1183                 c = ~c;
         1184         *kp = c;
         1185 
         1186         /*
         1187          * sign and exponent
         1188          */
         1189         c = 0x30;
         1190         if(rflag) {
         1191                 c = 0x10;
         1192                 dp = ~dp;
         1193         }
         1194         kp = k->key + k->klen;
         1195         kp[0] = c;
         1196         kp[1] = (dp >> 8);
         1197         kp[2] = dp;
         1198         k->klen = cl+1;
         1199 }
         1200 
         1201 void
         1202 dokey_m(Key *k, uchar *lp, uchar *lpe, Field *f)
         1203 {
         1204         uchar *kp;
         1205         Rune r, place[3];
         1206         int c, cl, pc;
         1207         int rflag;
         1208 
         1209         rflag = f->flags&Rflag;
         1210         pc = 0;
         1211 
         1212         cl = k->klen;
         1213         kp = k->key + cl;
         1214 
         1215         for(;;) {
         1216                 /*
         1217                  * get the character
         1218                  */
         1219                 if(lp >= lpe)
         1220                         break;
         1221                 c = *lp;
         1222                 if(c >= Runeself) {
         1223                         lp += chartorune(&r, (char*)lp);
         1224                         c = r;
         1225                 } else
         1226                         lp++;
         1227 
         1228                 if(c < nelem(f->mapto)) {
         1229                         c = f->mapto[c];
         1230                         if(c == 0)
         1231                                 continue;
         1232                 }
         1233                 place[pc++] = c;
         1234                 if(pc < 3)
         1235                         continue;
         1236                 for(c=11; c>=0; c--)
         1237                         if(memcmp(month[c], place, sizeof(place)) == 0)
         1238                                 break;
         1239                 c += 10;
         1240                 if(rflag)
         1241                         c = ~c;
         1242                 *kp++ = c;
         1243                 cl++;
         1244                 break;
         1245         }
         1246 
         1247         c = 0;
         1248         if(rflag)
         1249                 c = ~c;
         1250         *kp = c;
         1251         k->klen = cl+1;
         1252 }
         1253 
         1254 void
         1255 dokey_dfi(Key *k, uchar *lp, uchar *lpe, Field *f)
         1256 {
         1257         uchar *kp;
         1258         Rune r;
         1259         int c, cl, n, rflag;
         1260 
         1261         cl = k->klen;
         1262         kp = k->key + cl;
         1263         rflag = f->flags & Rflag;
         1264 
         1265         for(;;) {
         1266                 /*
         1267                  * get the character
         1268                  */
         1269                 if(lp >= lpe)
         1270                         break;
         1271                 c = *lp;
         1272                 if(c >= Runeself) {
         1273                         lp += chartorune(&r, (char*)lp);
         1274                         c = r;
         1275                 } else
         1276                         lp++;
         1277 
         1278                 /*
         1279                  * do the various mappings.
         1280                  * the common case is handled
         1281                  * completely by the table.
         1282                  */
         1283                 if(c != 0 && c < Runeself) {
         1284                         c = f->mapto[c];
         1285                         if(c) {
         1286                                 *kp++ = c;
         1287                                 cl++;
         1288                         }
         1289                         continue;
         1290                 }
         1291 
         1292                 /*
         1293                  * for characters out of range,
         1294                  * the table does not do Rflag.
         1295                  * ignore is based on mapto[255]
         1296                  */
         1297                 if(c != 0 && c < nelem(f->mapto)) {
         1298                         c = f->mapto[c];
         1299                         if(c == 0)
         1300                                 continue;
         1301                 } else
         1302                         if(f->mapto[nelem(f->mapto)-1] == 0)
         1303                                 continue;
         1304 
         1305                 /*
         1306                  * put it in the key
         1307                  */
         1308                 r = c;
         1309                 n = runetochar((char*)kp, &r);
         1310                 kp += n;
         1311                 cl += n;
         1312                 if(rflag)
         1313                         while(n > 0) {
         1314                                 kp[-n] = ~kp[-n];
         1315                                 n--;
         1316                         }
         1317         }
         1318 
         1319         /*
         1320          * end of key
         1321          */
         1322         k->klen = cl+1;
         1323         if(rflag) {
         1324                 *kp = ~0;
         1325                 return;
         1326         }
         1327         *kp = 0;
         1328 }
         1329 
         1330 void
         1331 dokey_r(Key *k, uchar *lp, uchar *lpe, Field *f)
         1332 {
         1333         int cl, n;
         1334         uchar *kp;
         1335 
         1336         USED(f);
         1337         n = lpe - lp;
         1338         if(n < 0)
         1339                 n = 0;
         1340         cl = k->klen;
         1341         kp = k->key + cl;
         1342         k->klen = cl+n+1;
         1343 
         1344         lpe -= 3;
         1345         while(lp < lpe) {
         1346                 kp[0] = ~lp[0];
         1347                 kp[1] = ~lp[1];
         1348                 kp[2] = ~lp[2];
         1349                 kp[3] = ~lp[3];
         1350                 kp += 4;
         1351                 lp += 4;
         1352         }
         1353 
         1354         lpe += 3;
         1355         while(lp < lpe)
         1356                 *kp++ = ~*lp++;
         1357         *kp = ~0;
         1358 }
         1359 
         1360 void
         1361 dokey_(Key *k, uchar *lp, uchar *lpe, Field *f)
         1362 {
         1363         int n, cl;
         1364         uchar *kp;
         1365 
         1366         USED(f);
         1367         n = lpe - lp;
         1368         if(n < 0)
         1369                 n = 0;
         1370         cl = k->klen;
         1371         kp = k->key + cl;
         1372         k->klen = cl+n+1;
         1373         memmove(kp, lp, n);
         1374         kp[n] = 0;
         1375 }
         1376 
         1377 void
         1378 buildkey(Line *l)
         1379 {
         1380         Key *k;
         1381         uchar *lp, *lpe;
         1382         int ll, kl, cl, i, n;
         1383         Field *f;
         1384 
         1385         ll = l->llen - 1;
         1386         kl = 0;                        /* allocated length */
         1387         cl = 0;                        /* current length */
         1388         k = 0;
         1389 
         1390         for(i=1; i<=args.nfield; i++) {
         1391                 f = &args.field[i];
         1392                 lp = skip(l->line, f->beg1, f->beg2, f->flags&B1flag, 0);
         1393                 if(lp == 0)
         1394                         lp = l->line + ll;
         1395                 lpe = skip(l->line, f->end1, f->end2, f->flags&Bflag, 1);
         1396                 if(lpe == 0)
         1397                         lpe = l->line + ll;
         1398                 n = (lpe - lp) + 1;
         1399                 if(n <= 0)
         1400                         n = 1;
         1401                 if(cl+(n+4) > kl) {
         1402                         kl = cl+(n+4);
         1403                         k = realloc(k, sizeof(Key) +
         1404                                 (kl-1)*sizeof(k->key[0]));
         1405                         if(k == 0)
         1406                                 nomem();
         1407                 }
         1408                 k->klen = cl;
         1409                 (*f->dokey)(k, lp, lpe, f);
         1410                 cl = k->klen;
         1411         }
         1412 
         1413         /*
         1414          * global comparisons
         1415          */
         1416         if(!(args.uflag && cl > 0)) {
         1417                 f = &args.field[0];
         1418                 if(cl+(ll+4) > kl) {
         1419                         kl = cl+(ll+4);
         1420                         k = realloc(k, sizeof(Key) +
         1421                                 (kl-1)*sizeof(k->key[0]));
         1422                         if(k == 0)
         1423                                 nomem();
         1424                 }
         1425                 k->klen = cl;
         1426                 (*f->dokey)(k, l->line, l->line+ll, f);
         1427                 cl = k->klen;
         1428         }
         1429 
         1430         l->key = k;
         1431         k->klen = cl;
         1432 
         1433         if(args.vflag) {
         1434                 write(2, l->line, l->llen);
         1435                 for(i=0; i<k->klen; i++) {
         1436                         fprint(2, " %.2x", k->key[i]);
         1437                         if(k->key[i] == 0x00 || k->key[i] == 0xff)
         1438                                 fprint(2, "\n");
         1439                 }
         1440         }
         1441 }
         1442 
         1443 void
         1444 makemapm(Field *f)
         1445 {
         1446         int i, c;
         1447 
         1448         for(i=0; i<nelem(f->mapto); i++) {
         1449                 c = 1;
         1450                 if(i == ' ' || i == '\t')
         1451                         c = 0;
         1452                 if(i >= 'a' && i <= 'z')
         1453                         c = i + ('A' - 'a');
         1454                 if(i >= 'A' && i <= 'Z')
         1455                         c = i;
         1456                 f->mapto[i] = c;
         1457                 if(args.vflag) {
         1458                         if((i & 15) == 0)
         1459                                 fprint(2, "        ");
         1460                         fprint(2, " %.2x", c);
         1461                         if((i & 15) == 15)
         1462                                 fprint(2, "\n");
         1463                 }
         1464         }
         1465 }
         1466 
         1467 void
         1468 makemapd(Field *f)
         1469 {
         1470         int i, j, c;
         1471 
         1472         for(i=0; i<nelem(f->mapto); i++) {
         1473                 c = i;
         1474                 if(f->flags & Iflag)
         1475                         if(c < 040 || c > 0176)
         1476                                 c = -1;
         1477                 if((f->flags & Wflag) && c >= 0)
         1478                         if(c == ' ' || c == '\t')
         1479                                 c = -1;
         1480                 if((f->flags & Dflag) && c >= 0)
         1481                         if(!(c == ' ' || c == '\t' ||
         1482                             (c >= 'a' && c <= 'z') ||
         1483                             (c >= 'A' && c <= 'Z') ||
         1484                             (c >= '0' && c <= '9'))) {
         1485                                 for(j=0; latinmap[j]; j+=3)
         1486                                         if(c == latinmap[j+0] ||
         1487                                            c == latinmap[j+1])
         1488                                                 break;
         1489                                 if(latinmap[j] == 0)
         1490                                         c = -1;
         1491                         }
         1492                 if((f->flags & Fflag) && c >= 0) {
         1493                         if(c >= 'a' && c <= 'z')
         1494                                 c += 'A' - 'a';
         1495                         for(j=0; latinmap[j]; j+=3)
         1496                                 if(c == latinmap[j+0] ||
         1497                                    c == latinmap[j+1]) {
         1498                                         c = latinmap[j+2];
         1499                                         break;
         1500                                 }
         1501                 }
         1502                 if((f->flags & Rflag) && c >= 0 && i > 0 && i < Runeself)
         1503                         c = ~c & 0xff;
         1504                 if(c < 0)
         1505                         c = 0;
         1506                 f->mapto[i] = c;
         1507                 if(args.vflag) {
         1508                         if((i & 15) == 0)
         1509                                 fprint(2, "        ");
         1510                         fprint(2, " %.2x", c);
         1511                         if((i & 15) == 15)
         1512                                 fprint(2, "\n");
         1513                 }
         1514         }
         1515 }
         1516 
         1517 int        latinmap[] =
         1518 {
         1519 /*        lcase        ucase        fold        */
         1520         0xe0,        0xc0,        0x41,                /*         L'à',        L'À',        L'A',         */
         1521         0xe1,        0xc1,        0x41,                /*         L'á',        L'Á',        L'A',         */
         1522         0xe2,        0xc2,        0x41,                /*         L'â',        L'Â',        L'A',         */
         1523         0xe4,        0xc4,        0x41,                /*         L'ä',        L'Ä',        L'A',         */
         1524         0xe3,        0xc3,        0x41,                /*         L'ã',        L'Ã',        L'A',         */
         1525         0xe5,        0xc5,        0x41,                /*         L'å',        L'Å',        L'A',         */
         1526         0xe8,        0xc8,        0x45,                /*         L'è',        L'È',        L'E',         */
         1527         0xe9,        0xc9,        0x45,                /*         L'é',        L'É',        L'E',         */
         1528         0xea,        0xca,        0x45,                /*         L'ê',        L'Ê',        L'E',         */
         1529         0xeb,        0xcb,        0x45,                /*         L'ë',        L'Ë',        L'E',         */
         1530         0xec,        0xcc,        0x49,                /*         L'ì',        L'Ì',        L'I',         */
         1531         0xed,        0xcd,        0x49,                /*         L'í',        L'Í',        L'I',         */
         1532         0xee,        0xce,        0x49,                /*         L'î',        L'Î',        L'I',         */
         1533         0xef,        0xcf,        0x49,                /*         L'ï',        L'Ï',        L'I',         */
         1534         0xf2,        0xd2,        0x4f,                /*         L'ò',        L'Ò',        L'O',         */
         1535         0xf3,        0xd3,        0x4f,                /*         L'ó',        L'Ó',        L'O',         */
         1536         0xf4,        0xd4,        0x4f,                /*         L'ô',        L'Ô',        L'O',         */
         1537         0xf6,        0xd6,        0x4f,                /*         L'ö',        L'Ö',        L'O',         */
         1538         0xf5,        0xd5,        0x4f,                /*         L'õ',        L'Õ',        L'O',         */
         1539         0xf8,        0xd8,        0x4f,                /*         L'ø',        L'Ø',        L'O',         */
         1540         0xf9,        0xd9,        0x55,                /*         L'ù',        L'Ù',        L'U',         */
         1541         0xfa,        0xda,        0x55,                /*         L'ú',        L'Ú',        L'U',         */
         1542         0xfb,        0xdb,        0x55,                /*         L'û',        L'Û',        L'U',         */
         1543         0xfc,        0xdc,        0x55,                /*         L'ü',        L'Ü',        L'U',         */
         1544         0xe6,        0xc6,        0x41,                /*         L'æ',        L'Æ',        L'A',         */
         1545         0xf0,        0xd0,        0x44,                /*         L'ð',        L'Ð',        L'D',         */
         1546         0xf1,        0xd1,        0x4e,                /*         L'ñ',        L'Ñ',        L'N',         */
         1547         0xfd,        0xdd,        0x59,                /*         L'ý',        L'Ý',        L'Y',         */
         1548         0xe7,        0xc7,        0x43,                /*         L'ç',        L'Ç',        L'C',         */
         1549         0,
         1550 };
         1551 
         1552 Rune LJAN[] = { 'J', 'A', 'N', 0 };
         1553 Rune LFEB[] = { 'F', 'E', 'B', 0 };
         1554 Rune LMAR[] = { 'M', 'A', 'R', 0 };
         1555 Rune LAPR[] = { 'A', 'P', 'R', 0 };
         1556 Rune LMAY[] = { 'M', 'A', 'Y', 0 };
         1557 Rune LJUN[] = { 'J', 'U', 'N', 0 };
         1558 Rune LJUL[] = { 'J', 'U', 'L', 0 };
         1559 Rune LAUG[] = { 'A', 'U', 'G', 0 };
         1560 Rune LSEP[] = { 'S', 'E', 'P', 0 };
         1561 Rune LOCT[] = { 'O', 'C', 'T', 0 };
         1562 Rune LNOV[] = { 'N', 'O', 'V', 0 };
         1563 Rune LDEC[] = { 'D', 'E', 'C', 0 };
         1564 
         1565 Rune*        month[12] =
         1566 {
         1567         LJAN,
         1568         LFEB,
         1569         LMAR,
         1570         LAPR,
         1571         LMAY,
         1572         LJUN,
         1573         LJUL,
         1574         LAUG,
         1575         LSEP,
         1576         LOCT,
         1577         LNOV,
         1578         LDEC,
         1579 };
         1580 
         1581 /************** radix sort ***********/
         1582 
         1583 enum
         1584 {
         1585         Threshold        = 14
         1586 };
         1587 
         1588 void        rsort4(Key***, ulong, int);
         1589 void        bsort4(Key***, ulong, int);
         1590 
         1591 void
         1592 sort4(void *a, ulong n)
         1593 {
         1594         if(n > Threshold)
         1595                 rsort4((Key***)a, n, 0);
         1596         else
         1597                 bsort4((Key***)a, n, 0);
         1598 }
         1599 
         1600 void
         1601 rsort4(Key ***a, ulong n, int b)
         1602 {
         1603         Key ***ea, ***t, ***u, **t1, **u1, *k;
         1604         Key ***part[257];
         1605         static long count[257];
         1606         long clist[257+257], *cp, *cp1;
         1607         int c, lowc, higc;
         1608 
         1609         /*
         1610          * pass 1 over all keys,
         1611          * count the number of each key[b].
         1612          * find low count and high count.
         1613          */
         1614         lowc = 256;
         1615         higc = 0;
         1616         ea = a+n;
         1617         for(t=a; t<ea; t++) {
         1618                 k = **t;
         1619                 n = k->klen;
         1620                 if(b >= n) {
         1621                         count[256]++;
         1622                         continue;
         1623                 }
         1624                 c = k->key[b];
         1625                 n = count[c]++;
         1626                 if(n == 0) {
         1627                         if(c < lowc)
         1628                                 lowc = c;
         1629                         if(c > higc)
         1630                                 higc = c;
         1631                 }
         1632         }
         1633 
         1634         /*
         1635          * pass 2 over all counts,
         1636          * put partition pointers in part[c].
         1637          * save compacted indexes and counts
         1638          * in clist[].
         1639          */
         1640         t = a;
         1641         n = count[256];
         1642         clist[0] = n;
         1643         part[256] = t;
         1644         t += n;
         1645 
         1646         cp1 = clist+1;
         1647         cp = count+lowc;
         1648         for(c=lowc; c<=higc; c++,cp++) {
         1649                 n = *cp;
         1650                 if(n) {
         1651                         cp1[0] = n;
         1652                         cp1[1] = c;
         1653                         cp1 += 2;
         1654                         part[c] = t;
         1655                         t += n;
         1656                 }
         1657         }
         1658         *cp1 = 0;
         1659 
         1660         /*
         1661          * pass 3 over all counts.
         1662          * chase lowest pointer in each partition
         1663          * around a permutation until it comes
         1664          * back and is stored where it started.
         1665          * static array, count[], should be
         1666          * reduced to zero entries except maybe
         1667          * count[256].
         1668          */
         1669         for(cp1=clist+1; cp1[0]; cp1+=2) {
         1670                 c = cp1[1];
         1671                 cp = count+c;
         1672                 while(*cp) {
         1673                         t1 = *part[c];
         1674                         for(;;) {
         1675                                 k = *t1;
         1676                                 n = 256;
         1677                                 if(b < k->klen)
         1678                                         n = k->key[b];
         1679                                 u = part[n]++;
         1680                                 count[n]--;
         1681                                 u1 = *u;
         1682                                 *u = t1;
         1683                                 if(n == c)
         1684                                         break;
         1685                                 t1 = u1;
         1686                         }
         1687                 }
         1688         }
         1689 
         1690         /*
         1691          * pass 4 over all partitions.
         1692          * call recursively.
         1693          */
         1694         b++;
         1695         t = a + clist[0];
         1696         count[256] = 0;
         1697         for(cp1=clist+1; n=cp1[0]; cp1+=2) {
         1698                 if(n > Threshold)
         1699                         rsort4(t, n, b);
         1700                 else
         1701                 if(n > 1)
         1702                         bsort4(t, n, b);
         1703                 t += n;
         1704         }
         1705 }
         1706 
         1707 /*
         1708  * bubble sort to pick up
         1709  * the pieces.
         1710  */
         1711 void
         1712 bsort4(Key ***a, ulong n, int b)
         1713 {
         1714         Key ***i, ***j, ***k, ***l, **t;
         1715         Key *ka, *kb;
         1716         int n1, n2;
         1717 
         1718         l = a+n;
         1719         j = a;
         1720 
         1721 loop:
         1722         i = j;
         1723         j++;
         1724         if(j >= l)
         1725                 return;
         1726 
         1727         ka = **i;
         1728         kb = **j;
         1729         n1 = ka->klen - b;
         1730         n2 = kb->klen - b;
         1731         if(n1 > n2)
         1732                 n1 = n2;
         1733         if(n1 <= 0)
         1734                 goto loop;
         1735         n2 = ka->key[b] - kb->key[b];
         1736         if(n2 == 0)
         1737                 n2 = memcmp(ka->key+b, kb->key+b, n1);
         1738         if(n2 <= 0)
         1739                 goto loop;
         1740 
         1741         for(;;) {
         1742                 k = i+1;
         1743 
         1744                 t = *k;
         1745                 *k = *i;
         1746                 *i = t;
         1747 
         1748                 if(i <= a)
         1749                         goto loop;
         1750 
         1751                 i--;
         1752                 ka = **i;
         1753                 kb = *t;
         1754                 n1 = ka->klen - b;
         1755                 n2 = kb->klen - b;
         1756                 if(n1 > n2)
         1757                         n1 = n2;
         1758                 if(n1 <= 0)
         1759                         goto loop;
         1760                 n2 = ka->key[b] - kb->key[b];
         1761                 if(n2 == 0)
         1762                         n2 = memcmp(ka->key+b, kb->key+b, n1);
         1763                 if(n2 <= 0)
         1764                         goto loop;
         1765         }
         1766 }