parser.c - scc - simple c99 compiler
 (HTM) git clone git://git.simple-cc.org/scc
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) Submodules
 (DIR) README
 (DIR) LICENSE
       ---
       parser.c (14397B)
       ---
            1 #include <errno.h>
            2 #include <limits.h>
            3 #include <stdio.h>
            4 #include <stdlib.h>
            5 #include <string.h>
            6 
            7 #include <scc/cstd.h>
            8 #include <scc/scc.h>
            9 
           10 #include "cc2.h"
           11 
           12 #define STACKSIZ     50
           13 
           14 Type funetype = {
           15         .flags = FUNF | ELLIPS
           16 };
           17 
           18 Type funtype = {
           19         .flags = FUNF
           20 };
           21 
           22 union tokenop {
           23         void *arg;
           24         unsigned op;
           25 };
           26 
           27 Node *laststmt;
           28 static Swtch swtbl[NR_BLOCK], *swp = swtbl;
           29 static Symbol *lastfun;
           30 
           31 typedef void parsefun(char *, union tokenop);
           32 static parsefun type, symbol, getname, unary, binary, ternary, call,
           33                 constant, composed, binit, einit,
           34                 jump, oreturn, loop, assign,
           35                 ocase, bswitch, eswitch, builtin;
           36 
           37 typedef void evalfun(void);
           38 static evalfun vardecl, beginfun, endfun, endpars, stmt,
           39                array, aggregate, flddecl, labeldcl;
           40 
           41 static struct decoc {
           42         void (*eval)(void);
           43         void (*parse)(char *token, union tokenop);
           44         union tokenop u;
           45 } optbl[] = {      /*  eval     parse           args */
           46         ['A']   = {  vardecl,  symbol, .u.op  =  SAUTO<<8 | OAUTO},
           47         ['R']   = {  vardecl,  symbol, .u.op  =   SREG<<8 |  OREG},
           48         ['G']   = {  vardecl,  symbol, .u.op  =  SGLOB<<8 |  OMEM},
           49         ['X']   = {  vardecl,  symbol, .u.op  = SEXTRN<<8 |  OMEM},
           50         ['Y']   = {  vardecl,  symbol, .u.op  =  SPRIV<<8 |  OMEM},
           51         ['T']   = {  vardecl,  symbol, .u.op  = SLOCAL<<8 |  OMEM},
           52         ['M']   = {  flddecl,  symbol, .u.op  =  SMEMB<<8 |  OMEM},
           53         ['L']   = { labeldcl,  symbol, .u.op  = SLABEL<<8 | OLABEL},
           54 
           55         ['C']   = {     NULL,    type, .u.arg =    &int8type},
           56         ['I']   = {     NULL,    type, .u.arg =   &int16type},
           57         ['W']   = {     NULL,    type, .u.arg =   &int32type},
           58         ['Q']   = {     NULL,    type, .u.arg =   &int64type},
           59         ['K']   = {     NULL,    type, .u.arg =   &uint8type},
           60         ['N']   = {     NULL,    type, .u.arg =  &uint16type},
           61         ['Z']   = {     NULL,    type, .u.arg =  &uint32type},
           62         ['O']   = {     NULL,    type, .u.arg =  &uint64type},
           63         ['J']   = {     NULL,    type, .u.arg = &float32type},
           64         ['D']   = {     NULL,    type, .u.arg = &float64type},
           65         ['H']   = {     NULL,    type, .u.arg = &float80type},
           66         ['0']   = {     NULL,    type, .u.arg =    &voidtype},
           67         ['B']   = {     NULL,    type, .u.arg =    &booltype},
           68         ['P']   = {     NULL,    type, .u.arg =     &ptrtype},
           69         ['E']   = {     NULL,    type, .u.arg =    &funetype},
           70         ['1']        = {     NULL,    type, .u.arg =    &arg_type},
           71 
           72         ['F']   = {     NULL,    type, .u.arg =     &funtype},
           73         ['V']   = {    array,composed,                     0},
           74         ['U']   = {aggregate,composed,                     0},
           75         ['S']   = {aggregate,composed,                     0},
           76 
           77         ['"']   = {     NULL, getname,                     0},
           78         ['{']   = { beginfun,    NULL,                     0},
           79         ['}']   = {   endfun,    NULL,                     0},
           80         ['(']   = {     NULL,   binit,                     0},
           81         [')']   = {     NULL,   einit,                     0},
           82         ['\\']  = {  endpars,    NULL,                     0},
           83         ['\t']  = {     stmt,    NULL,                     0},
           84 
           85         ['~']   = {     NULL,   unary, .u.op =          OCPL},
           86         ['_']   = {     NULL,   unary, .u.op =         OSNEG},
           87         ['\'']  = {     NULL,   unary, .u.op =         OADDR},
           88         ['@']   = {     NULL,   unary, .u.op =          OPTR},
           89         ['g']   = {     NULL,   unary, .u.op =         OCAST},
           90         ['p']   = {     NULL,   unary, .u.op =          OPAR},
           91         ['n']   = {     NULL,   unary, .u.op =          ONEG},
           92 
           93         ['a']   = {     NULL,  binary, .u.op =          OAND},
           94         ['o']   = {     NULL,  binary, .u.op =           OOR},
           95         ['.']   = {     NULL,  binary, .u.op =        OFIELD},
           96         ['+']   = {     NULL,  binary, .u.op =          OADD},
           97         ['-']   = {     NULL,  binary, .u.op =          OSUB},
           98         ['*']   = {     NULL,  binary, .u.op =          OMUL},
           99         ['%']   = {     NULL,  binary, .u.op =          OMOD},
          100         ['/']   = {     NULL,  binary, .u.op =          ODIV},
          101         ['l']   = {     NULL,  binary, .u.op =          OSHL},
          102         ['r']   = {     NULL,  binary, .u.op =          OSHR},
          103         ['<']   = {     NULL,  binary, .u.op =           OLT},
          104         ['>']   = {     NULL,  binary, .u.op =           OGT},
          105         ['[']   = {     NULL,  binary, .u.op =           OLE},
          106         [']']   = {     NULL,  binary, .u.op =           OGE},
          107         ['=']   = {     NULL,  binary, .u.op =           OEQ},
          108         ['!']   = {     NULL,  binary, .u.op =           ONE},
          109         ['&']   = {     NULL,  binary, .u.op =         OBAND},
          110         ['|']   = {     NULL,  binary, .u.op =          OBOR},
          111         ['^']   = {     NULL,  binary, .u.op =         OBXOR},
          112         [',']   = {     NULL,  binary, .u.op =        OCOMMA},
          113         ['m']   = {     NULL,  builtin,.u.op =      OBUILTIN},
          114 
          115         [':']   = {     NULL,  assign, .u.op =        OASSIG},
          116         ['?']   = {     NULL, ternary, .u.op =          OASK},
          117         ['c']   = {     NULL,    call, .u.op =         OCALL},
          118         ['z']   = {     NULL,    call, .u.op =        OCALLE},
          119 
          120         ['#']   = {     NULL,constant, .u.op =        OCONST},
          121 
          122         ['j']   = {     NULL,    jump, .u.op =          OJMP},
          123         ['y']   = {     NULL,    jump, .u.op =       OBRANCH},
          124         ['h']   = {     NULL, oreturn, .u.op =          ORET},
          125         ['i']   = {     NULL,    NULL, .u.op =          OINC},
          126         ['d']   = {     NULL,    NULL, .u.op =          ODEC},
          127 
          128         ['b']   = {     NULL,    loop, .u.op =        OBLOOP},
          129         ['e']   = {     NULL,    loop, .u.op =        OELOOP},
          130 
          131         ['v']   = {     NULL,   ocase, .u.op =         OCASE},
          132         ['f']   = {     NULL,   ocase, .u.op =      ODEFAULT},
          133         ['t']   = {     NULL, eswitch, .u.op =      OESWITCH},
          134         ['s']   = {     NULL, bswitch, .u.op =      OBSWITCH},
          135 };
          136 
          137 static int sclass, inpars, ininit, beginf, endf, lineno;
          138 static void *stack[STACKSIZ], **sp = stack;
          139 
          140 static Node *
          141 push(void *elem)
          142 {
          143         if (sp == &stack[STACKSIZ])
          144                 error(ESTACKO);
          145         return *sp++ = elem;
          146 }
          147 
          148 static void *
          149 pop(void)
          150 {
          151         if (sp == stack)
          152                 error(ESTACKU);
          153         return *--sp;
          154 }
          155 
          156 static int
          157 empty(void)
          158 {
          159         return sp == stack;
          160 }
          161 
          162 static void
          163 type(char *token, union tokenop u)
          164 {
          165         push(u.arg);
          166 }
          167 
          168 static void
          169 composed(char *token, union tokenop u)
          170 {
          171         Symbol *sym;
          172 
          173         sym = getsym(atoi(token+1));
          174         sym->type.id = sym->id;
          175         push(&sym->type);
          176 }
          177 
          178 static void
          179 getname(char *t, union tokenop u)
          180 {
          181         push((*++t) ? xstrdup(t) : NULL);
          182 }
          183 
          184 static void
          185 symbol(char *token, union tokenop u)
          186 {
          187         Node *np = node(u.op & 0xFF);
          188         Symbol *sym = getsym(atoi(token+1));
          189 
          190         sclass = u.op >> 8;
          191         np->u.sym = sym;
          192         np->type = sym->type;
          193         push(np);
          194 }
          195 
          196 static Type *
          197 gettype(char *token)
          198 {
          199         struct decoc *dp;
          200 
          201         dp = &optbl[*token];
          202         if (!dp->parse)
          203                 error(ESYNTAX);
          204         (*dp->parse)(token, dp->u);
          205         return pop();
          206 }
          207 
          208 static void
          209 constant(char *token, union tokenop u)
          210 {
          211         static char letters[] = "0123456789ABCDEF";
          212         Node *np;
          213         TUINT v;
          214         unsigned c;
          215 
          216         ++token;
          217         if (*token == '"') {
          218                 ++token;
          219                 np = node(OSTRING);
          220                 np->type.flags = STRF;
          221                 np->type.size = strlen(token);
          222                 np->type.align = int8type.align;
          223                 np->u.s = xstrdup(token);
          224         } else {
          225                 np = node(OCONST);
          226                 np->type = *gettype(token++);
          227                 for (v = 0; c = *token++; v += c) {
          228                         v <<= 4;
          229                         c = strchr(letters, c) - letters;
          230                 }
          231                 np->u.i = v;
          232         }
          233         push(np);
          234 }
          235 
          236 static void
          237 assign(char *token, union tokenop u)
          238 {
          239         int subop;
          240         Node *np = node(u.op);
          241 
          242         switch (subop = *++token) {
          243         case '+':
          244         case '-':
          245         case '*':
          246         case '%':
          247         case '/':
          248         case 'l':
          249         case 'r':
          250         case '&':
          251         case '|':
          252         case '^':
          253         case 'i':
          254         case 'd':
          255                 ++token;
          256                 subop = optbl[subop].u.op;
          257                 break;
          258         default:
          259                 subop = 0;
          260                 break;
          261         }
          262 
          263         np->u.subop = subop;
          264         np->type = *gettype(token);
          265         np->right = pop();
          266         np->left = pop();
          267         push(np);
          268 }
          269 
          270 static void
          271 ternary(char *token, union tokenop u)
          272 {
          273         Node *ask = node(OASK), *colon = node(OCOLON);
          274         Type *tp = gettype(token+1);
          275 
          276         colon->right = pop();
          277         colon->left = pop();
          278 
          279         ask->type = *tp;
          280         ask->left = pop();
          281         ask->right = colon;
          282         push(ask);
          283 }
          284 
          285 static void
          286 eval(char *tok)
          287 {
          288         struct decoc *dp;
          289 
          290         do {
          291                 dp = &optbl[*tok];
          292                 if (!dp->parse)
          293                         break;
          294                 (*dp->parse)(tok, dp->u);
          295         } while (tok = strtok(NULL, "\t\n"));
          296 }
          297 
          298 static int
          299 nextline(void)
          300 {
          301         static char line[LINESIZ];
          302         size_t len;
          303         int c;
          304         void (*fun)(void);
          305 
          306 repeat:
          307         ++lineno;
          308         if (!fgets(line, sizeof(line), stdin))
          309                 return 0;
          310         if ((len = strlen(line)) == 0 || line[0] == '\n')
          311                 goto repeat;
          312         if (line[len-1] != '\n')
          313                 error(len < sizeof(line)-1 ? ELNBLNE : ELNLINE);
          314         line[len-1] = '\0';
          315 
          316         c = *line;
          317         eval(strtok(line, "\t\n"));
          318         if ((fun = *optbl[c].eval) != NULL)
          319                 (*fun)();
          320         if (sp != stack)
          321                 error(ESTACKA);
          322         return 1;
          323 }
          324 
          325 static void
          326 oreturn(char *token, union tokenop u)
          327 {
          328         Node *np = node(u.op);
          329 
          330         if (token = strtok(NULL, "\t\n"))
          331                 eval(token);
          332         if (!empty())
          333                 np->left = pop();
          334         push(np);
          335 }
          336 
          337 static void
          338 addcase(Node *np)
          339 {
          340         TINT val;
          341         Node **bp;
          342         Swtch *cur;
          343 
          344         if (swp == swtbl)
          345                 error(EWTACKU);
          346 
          347         cur = swp - 1;
          348         if (np->op == ODEFAULT) {
          349                 cur->defnode = np;
          350         } else {
          351                 cur->nr++;
          352                 bp = cur->cases;
          353                 bp = xrealloc(bp, cur->nr * sizeof(Node *));
          354                 bp[cur->nr-1] = np;
          355                 cur->cases = bp;
          356 
          357                 val = np->left->u.i;
          358                 if (val < cur->min)
          359                         cur->min = val;
          360                 if (val > cur->max)
          361                         cur->max = val;
          362         }
          363 }
          364 
          365 static void
          366 bswitch(char *token, union tokenop u)
          367 {
          368         Swtch *cur;
          369         Node *np = node(u.op);
          370 
          371         if (swp == &swtbl[NR_BLOCK])
          372                 error(EWTACKO);
          373         cur = swp++;
          374         cur->nr = 0;
          375         cur->min = TINT_MAX;
          376         cur->max = TINT_MIN;
          377         cur->defnode = NULL;
          378         cur->cases = NULL;
          379         eval(strtok(NULL, "\t\n"));
          380         np->left = pop();
          381 
          382         push(cur->bswtch = np);
          383 }
          384 
          385 static int
          386 cmpcase(const void *p1, const void *p2)
          387 {
          388         TINT d;
          389         Node *np1, *np2;
          390 
          391         np1 = *(Node **) p1;
          392         np2 = *(Node **) p2;
          393         d = np1->left->u.i - np2->left->u.i;
          394         if (d < 0)
          395                 return -1;
          396         if (d > 0)
          397                 return 1;
          398         return 0;
          399 }
          400 
          401 static void
          402 eswitch(char *token, union tokenop u)
          403 {
          404         Node *p;
          405         Swtch *cur;
          406 
          407         if (swp == swtbl)
          408                 error(EWTACKU);
          409         jump(token, u);
          410 
          411         cur = --swp;
          412         cur->eswtch = pop();
          413         cur->bswtch->u.swtch = newswtch(cur);
          414         qsort(cur->cases, cur->nr, sizeof(Node *), cmpcase);
          415 }
          416 
          417 static void
          418 ocase(char *token, union tokenop u)
          419 {
          420         jump(token, u);
          421         addcase(pop());
          422 }
          423 
          424 static void
          425 jump(char *token, union tokenop u)
          426 {
          427         Symbol *label;
          428         Node *aux, *np = node(u.op);
          429 
          430         eval(strtok(NULL, "\t\n"));
          431 
          432         if (u.op == OBRANCH || u.op == OCASE)
          433                 np->left = pop();
          434         aux = pop();
          435         label = aux->u.sym;
          436         label->refcnt++;
          437         np->u.sym = label;
          438         delnode(aux);
          439         push(np);
          440 }
          441 
          442 static void
          443 loop(char *token, union tokenop u)
          444 {
          445         push(node(u.op));
          446 }
          447 
          448 static void
          449 unary(char *token, union tokenop u)
          450 {
          451         Node *np = node(u.op);
          452 
          453         np->type = *gettype(token+1);
          454         np->left = pop();
          455         np->right = NULL;
          456         push(np);
          457 }
          458 
          459 static void
          460 call(char *token, union tokenop u)
          461 {
          462         Node *np, *par, *fun = node(u.op);
          463 
          464         for (par = NULL;; par = np) {
          465                 np = pop();
          466                 if (np->op != OPAR)
          467                         break;
          468                 np->right = par;
          469         }
          470 
          471         fun->type = *gettype(token+1);
          472         fun->left = np;
          473         fun->right = par;
          474         push(fun);
          475 }
          476 
          477 static void
          478 builtin(char *token, union tokenop u)
          479 {
          480         Node *np = node(u.op);
          481         char *name;
          482         unsigned subop, nchilds;
          483 
          484         np->type = *gettype(token+1);
          485         name = pop();
          486 
          487         if (!strcmp("__builtin_va_arg", name)) {
          488                 nchilds = 1;
          489                 subop = BVA_ARG;
          490         } else if (!strcmp("__builtin_va_start", name)) {
          491                 nchilds = 2;
          492                 subop = BVA_START;
          493         } else if (!strcmp("__builtin_va_end", name)) {
          494                 nchilds = 1;
          495                 subop = BVA_END;
          496         } else if (!strcmp("__builtin_va_copy", name)) {
          497                 nchilds = 2;
          498                 subop = BVA_COPY;
          499         } else {
          500                 error(EBBUILT);
          501         }
          502 
          503         np->u.subop = subop;
          504         np->right = (nchilds == 2) ? pop() : NULL;
          505         np->left = (nchilds != 0) ? pop() : NULL;
          506 
          507         free(name);
          508         push(np);
          509 }
          510 
          511 static void
          512 binary(char *token, union tokenop u)
          513 {
          514         Node *np = node(u.op);
          515 
          516         np->type = *gettype(token+1);
          517         np->right = pop();
          518         np->left = pop();
          519         push(np);
          520 }
          521 
          522 static void
          523 binit(char *token, union tokenop u)
          524 {
          525         ininit = 1;
          526 }
          527 
          528 static void
          529 einit(char *token, union tokenop u)
          530 {
          531         ininit = 0;
          532         endinit();
          533 }
          534 
          535 static void
          536 endpars(void)
          537 {
          538         if (!inpars)
          539                 error(ESYNTAX);
          540         inpars = 0;
          541 }
          542 
          543 static void
          544 aggregate(void)
          545 {
          546         Node *align, *size;
          547         char *name;
          548         Type *tp;
          549         Symbol *sym;
          550 
          551         align = pop();
          552         size = pop();
          553         name = pop();
          554         tp = pop();
          555 
          556         tp->size = size->u.i;
          557         tp->align = align->u.i;
          558         tp->flags = AGGRF;
          559         /*
          560          * type is the first field of Symbol so we can obtain the
          561          * address of the symbol from the address of the type.
          562          * We have to do this because composed returns the pointer
          563          * to the type, but in this function we also need the
          564          * symbol to store the name.
          565          */
          566         sym = (Symbol *) tp;
          567         sym->name = name;
          568         deftype(tp);
          569 
          570         delnode(align);
          571         delnode(size);
          572 }
          573 
          574 static void
          575 array(void)
          576 {
          577         Type *tp, *base;
          578         Node *size;
          579 
          580         size = pop();
          581         base = pop();
          582         tp = pop();
          583         tp->flags = ARRF;
          584 
          585         if (size->u.i > LONG_MAX/base->size)
          586                 error(EOVERFL);
          587 
          588         tp->size = size->u.i * base->size;
          589         tp->align = base->align;
          590 
          591         delnode(size);
          592 }
          593 
          594 static void
          595 decl(Symbol *sym)
          596 {
          597         Type *tp = &sym->type;
          598 
          599         if (tp->flags & FUNF) {
          600                 lastfun = sym;
          601         } else {
          602                 switch (sym->kind) {
          603                 case SEXTRN:
          604                 case SGLOB:
          605                 case SPRIV:
          606                 case SLOCAL:
          607                         defglobal(sym);
          608                         break;
          609                 case SAUTO:
          610                 case SREG:
          611                         if (!beginf)
          612                                 error(ESYNTAX);
          613                         ((inpars) ? defpar : defvar)(sym);
          614                         break;
          615                 default:
          616                         abort();
          617                 }
          618         }
          619 }
          620 
          621 static void
          622 vardecl(void)
          623 {
          624         Type *tp, *rp;
          625         Node *np;
          626         Symbol *sym;
          627         char *name;
          628 
          629         name = pop();
          630         tp = pop();
          631         if (tp->flags & FUNF)
          632                 rp = pop();
          633         np = pop();
          634 
          635         sym = np->u.sym;
          636         /*
          637          * We have to free sym->name because in tentative declarations
          638          * we can have multiple declarations of the same symbol, and in
          639          * this case our parser will allocate twice the memory
          640          */
          641         free(sym->name);
          642         sym->name = name;
          643         sym->type = *tp;
          644         if (tp->flags & FUNF)
          645                 sym->rtype = *rp;
          646         sym->kind = sclass;
          647 
          648         if (ininit)
          649                 sym->type.flags |= INITF;
          650         decl(sym);
          651         delnode(np);
          652 }
          653 
          654 static void
          655 flddecl(void)
          656 {
          657         Node *off, *np;
          658         char *name;
          659         Type *tp;
          660         Symbol *sym;
          661 
          662         off = pop();
          663         name = pop();
          664         tp = pop();
          665         np = pop();
          666 
          667         sym = np->u.sym;
          668         sym->u.off = off->u.i;
          669         sym->name = name;
          670         sym->type = *tp;
          671 
          672         delnode(np);
          673         delnode(off);
          674 }
          675 
          676 static void
          677 labeldcl(void)
          678 {
          679         Node *np;
          680         Symbol *sym;
          681 
          682         np = pop();
          683         np->op = ONOP;
          684         sym = np->u.sym;
          685         sym->kind = SLABEL;
          686         labelstmt(np, sym);
          687         addstmt(np);
          688 }
          689 
          690 static void
          691 stmt(void)
          692 {
          693         Node *np;
          694 
          695         if (empty())
          696                 return;
          697         np = pop();
          698         if (ininit) {
          699                 data(np);
          700                 deltree(np);
          701                 return;
          702         }
          703         addstmt(np);
          704 }
          705 
          706 static void
          707 beginfun(void)
          708 {
          709         newfun(lastfun, node(OBFUN));
          710         beginf = inpars = 1;
          711         pushctx();
          712 }
          713 
          714 static void
          715 endfun(void)
          716 {
          717         endf = 1;
          718         laststmt = addstmt(node(OEFUN));
          719 }
          720 
          721 void
          722 parse(void)
          723 {
          724         /* clean from previous function */
          725         cleanswtch();
          726         cleancfg();
          727         cleannodes();
          728         popctx();
          729 
          730         inpars = ininit = beginf = endf = 0;
          731 
          732         while (!endf && nextline())
          733                 ;
          734 
          735         if (ferror(stdin))
          736                 error(EFERROR, strerror(errno));
          737 
          738         if (beginf && !endf)
          739                 error(EBAFFUN);
          740 
          741         PRTREE("after parsing");
          742 }