parser.c - scc - simple c99 compiler
(HTM) git clone git://git.simple-cc.org/scc
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) Submodules
(DIR) README
(DIR) LICENSE
---
parser.c (14397B)
---
1 #include <errno.h>
2 #include <limits.h>
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6
7 #include <scc/cstd.h>
8 #include <scc/scc.h>
9
10 #include "cc2.h"
11
12 #define STACKSIZ 50
13
14 Type funetype = {
15 .flags = FUNF | ELLIPS
16 };
17
18 Type funtype = {
19 .flags = FUNF
20 };
21
22 union tokenop {
23 void *arg;
24 unsigned op;
25 };
26
27 Node *laststmt;
28 static Swtch swtbl[NR_BLOCK], *swp = swtbl;
29 static Symbol *lastfun;
30
31 typedef void parsefun(char *, union tokenop);
32 static parsefun type, symbol, getname, unary, binary, ternary, call,
33 constant, composed, binit, einit,
34 jump, oreturn, loop, assign,
35 ocase, bswitch, eswitch, builtin;
36
37 typedef void evalfun(void);
38 static evalfun vardecl, beginfun, endfun, endpars, stmt,
39 array, aggregate, flddecl, labeldcl;
40
41 static struct decoc {
42 void (*eval)(void);
43 void (*parse)(char *token, union tokenop);
44 union tokenop u;
45 } optbl[] = { /* eval parse args */
46 ['A'] = { vardecl, symbol, .u.op = SAUTO<<8 | OAUTO},
47 ['R'] = { vardecl, symbol, .u.op = SREG<<8 | OREG},
48 ['G'] = { vardecl, symbol, .u.op = SGLOB<<8 | OMEM},
49 ['X'] = { vardecl, symbol, .u.op = SEXTRN<<8 | OMEM},
50 ['Y'] = { vardecl, symbol, .u.op = SPRIV<<8 | OMEM},
51 ['T'] = { vardecl, symbol, .u.op = SLOCAL<<8 | OMEM},
52 ['M'] = { flddecl, symbol, .u.op = SMEMB<<8 | OMEM},
53 ['L'] = { labeldcl, symbol, .u.op = SLABEL<<8 | OLABEL},
54
55 ['C'] = { NULL, type, .u.arg = &int8type},
56 ['I'] = { NULL, type, .u.arg = &int16type},
57 ['W'] = { NULL, type, .u.arg = &int32type},
58 ['Q'] = { NULL, type, .u.arg = &int64type},
59 ['K'] = { NULL, type, .u.arg = &uint8type},
60 ['N'] = { NULL, type, .u.arg = &uint16type},
61 ['Z'] = { NULL, type, .u.arg = &uint32type},
62 ['O'] = { NULL, type, .u.arg = &uint64type},
63 ['J'] = { NULL, type, .u.arg = &float32type},
64 ['D'] = { NULL, type, .u.arg = &float64type},
65 ['H'] = { NULL, type, .u.arg = &float80type},
66 ['0'] = { NULL, type, .u.arg = &voidtype},
67 ['B'] = { NULL, type, .u.arg = &booltype},
68 ['P'] = { NULL, type, .u.arg = &ptrtype},
69 ['E'] = { NULL, type, .u.arg = &funetype},
70 ['1'] = { NULL, type, .u.arg = &arg_type},
71
72 ['F'] = { NULL, type, .u.arg = &funtype},
73 ['V'] = { array,composed, 0},
74 ['U'] = {aggregate,composed, 0},
75 ['S'] = {aggregate,composed, 0},
76
77 ['"'] = { NULL, getname, 0},
78 ['{'] = { beginfun, NULL, 0},
79 ['}'] = { endfun, NULL, 0},
80 ['('] = { NULL, binit, 0},
81 [')'] = { NULL, einit, 0},
82 ['\\'] = { endpars, NULL, 0},
83 ['\t'] = { stmt, NULL, 0},
84
85 ['~'] = { NULL, unary, .u.op = OCPL},
86 ['_'] = { NULL, unary, .u.op = OSNEG},
87 ['\''] = { NULL, unary, .u.op = OADDR},
88 ['@'] = { NULL, unary, .u.op = OPTR},
89 ['g'] = { NULL, unary, .u.op = OCAST},
90 ['p'] = { NULL, unary, .u.op = OPAR},
91 ['n'] = { NULL, unary, .u.op = ONEG},
92
93 ['a'] = { NULL, binary, .u.op = OAND},
94 ['o'] = { NULL, binary, .u.op = OOR},
95 ['.'] = { NULL, binary, .u.op = OFIELD},
96 ['+'] = { NULL, binary, .u.op = OADD},
97 ['-'] = { NULL, binary, .u.op = OSUB},
98 ['*'] = { NULL, binary, .u.op = OMUL},
99 ['%'] = { NULL, binary, .u.op = OMOD},
100 ['/'] = { NULL, binary, .u.op = ODIV},
101 ['l'] = { NULL, binary, .u.op = OSHL},
102 ['r'] = { NULL, binary, .u.op = OSHR},
103 ['<'] = { NULL, binary, .u.op = OLT},
104 ['>'] = { NULL, binary, .u.op = OGT},
105 ['['] = { NULL, binary, .u.op = OLE},
106 [']'] = { NULL, binary, .u.op = OGE},
107 ['='] = { NULL, binary, .u.op = OEQ},
108 ['!'] = { NULL, binary, .u.op = ONE},
109 ['&'] = { NULL, binary, .u.op = OBAND},
110 ['|'] = { NULL, binary, .u.op = OBOR},
111 ['^'] = { NULL, binary, .u.op = OBXOR},
112 [','] = { NULL, binary, .u.op = OCOMMA},
113 ['m'] = { NULL, builtin,.u.op = OBUILTIN},
114
115 [':'] = { NULL, assign, .u.op = OASSIG},
116 ['?'] = { NULL, ternary, .u.op = OASK},
117 ['c'] = { NULL, call, .u.op = OCALL},
118 ['z'] = { NULL, call, .u.op = OCALLE},
119
120 ['#'] = { NULL,constant, .u.op = OCONST},
121
122 ['j'] = { NULL, jump, .u.op = OJMP},
123 ['y'] = { NULL, jump, .u.op = OBRANCH},
124 ['h'] = { NULL, oreturn, .u.op = ORET},
125 ['i'] = { NULL, NULL, .u.op = OINC},
126 ['d'] = { NULL, NULL, .u.op = ODEC},
127
128 ['b'] = { NULL, loop, .u.op = OBLOOP},
129 ['e'] = { NULL, loop, .u.op = OELOOP},
130
131 ['v'] = { NULL, ocase, .u.op = OCASE},
132 ['f'] = { NULL, ocase, .u.op = ODEFAULT},
133 ['t'] = { NULL, eswitch, .u.op = OESWITCH},
134 ['s'] = { NULL, bswitch, .u.op = OBSWITCH},
135 };
136
137 static int sclass, inpars, ininit, beginf, endf, lineno;
138 static void *stack[STACKSIZ], **sp = stack;
139
140 static Node *
141 push(void *elem)
142 {
143 if (sp == &stack[STACKSIZ])
144 error(ESTACKO);
145 return *sp++ = elem;
146 }
147
148 static void *
149 pop(void)
150 {
151 if (sp == stack)
152 error(ESTACKU);
153 return *--sp;
154 }
155
156 static int
157 empty(void)
158 {
159 return sp == stack;
160 }
161
162 static void
163 type(char *token, union tokenop u)
164 {
165 push(u.arg);
166 }
167
168 static void
169 composed(char *token, union tokenop u)
170 {
171 Symbol *sym;
172
173 sym = getsym(atoi(token+1));
174 sym->type.id = sym->id;
175 push(&sym->type);
176 }
177
178 static void
179 getname(char *t, union tokenop u)
180 {
181 push((*++t) ? xstrdup(t) : NULL);
182 }
183
184 static void
185 symbol(char *token, union tokenop u)
186 {
187 Node *np = node(u.op & 0xFF);
188 Symbol *sym = getsym(atoi(token+1));
189
190 sclass = u.op >> 8;
191 np->u.sym = sym;
192 np->type = sym->type;
193 push(np);
194 }
195
196 static Type *
197 gettype(char *token)
198 {
199 struct decoc *dp;
200
201 dp = &optbl[*token];
202 if (!dp->parse)
203 error(ESYNTAX);
204 (*dp->parse)(token, dp->u);
205 return pop();
206 }
207
208 static void
209 constant(char *token, union tokenop u)
210 {
211 static char letters[] = "0123456789ABCDEF";
212 Node *np;
213 TUINT v;
214 unsigned c;
215
216 ++token;
217 if (*token == '"') {
218 ++token;
219 np = node(OSTRING);
220 np->type.flags = STRF;
221 np->type.size = strlen(token);
222 np->type.align = int8type.align;
223 np->u.s = xstrdup(token);
224 } else {
225 np = node(OCONST);
226 np->type = *gettype(token++);
227 for (v = 0; c = *token++; v += c) {
228 v <<= 4;
229 c = strchr(letters, c) - letters;
230 }
231 np->u.i = v;
232 }
233 push(np);
234 }
235
236 static void
237 assign(char *token, union tokenop u)
238 {
239 int subop;
240 Node *np = node(u.op);
241
242 switch (subop = *++token) {
243 case '+':
244 case '-':
245 case '*':
246 case '%':
247 case '/':
248 case 'l':
249 case 'r':
250 case '&':
251 case '|':
252 case '^':
253 case 'i':
254 case 'd':
255 ++token;
256 subop = optbl[subop].u.op;
257 break;
258 default:
259 subop = 0;
260 break;
261 }
262
263 np->u.subop = subop;
264 np->type = *gettype(token);
265 np->right = pop();
266 np->left = pop();
267 push(np);
268 }
269
270 static void
271 ternary(char *token, union tokenop u)
272 {
273 Node *ask = node(OASK), *colon = node(OCOLON);
274 Type *tp = gettype(token+1);
275
276 colon->right = pop();
277 colon->left = pop();
278
279 ask->type = *tp;
280 ask->left = pop();
281 ask->right = colon;
282 push(ask);
283 }
284
285 static void
286 eval(char *tok)
287 {
288 struct decoc *dp;
289
290 do {
291 dp = &optbl[*tok];
292 if (!dp->parse)
293 break;
294 (*dp->parse)(tok, dp->u);
295 } while (tok = strtok(NULL, "\t\n"));
296 }
297
298 static int
299 nextline(void)
300 {
301 static char line[LINESIZ];
302 size_t len;
303 int c;
304 void (*fun)(void);
305
306 repeat:
307 ++lineno;
308 if (!fgets(line, sizeof(line), stdin))
309 return 0;
310 if ((len = strlen(line)) == 0 || line[0] == '\n')
311 goto repeat;
312 if (line[len-1] != '\n')
313 error(len < sizeof(line)-1 ? ELNBLNE : ELNLINE);
314 line[len-1] = '\0';
315
316 c = *line;
317 eval(strtok(line, "\t\n"));
318 if ((fun = *optbl[c].eval) != NULL)
319 (*fun)();
320 if (sp != stack)
321 error(ESTACKA);
322 return 1;
323 }
324
325 static void
326 oreturn(char *token, union tokenop u)
327 {
328 Node *np = node(u.op);
329
330 if (token = strtok(NULL, "\t\n"))
331 eval(token);
332 if (!empty())
333 np->left = pop();
334 push(np);
335 }
336
337 static void
338 addcase(Node *np)
339 {
340 TINT val;
341 Node **bp;
342 Swtch *cur;
343
344 if (swp == swtbl)
345 error(EWTACKU);
346
347 cur = swp - 1;
348 if (np->op == ODEFAULT) {
349 cur->defnode = np;
350 } else {
351 cur->nr++;
352 bp = cur->cases;
353 bp = xrealloc(bp, cur->nr * sizeof(Node *));
354 bp[cur->nr-1] = np;
355 cur->cases = bp;
356
357 val = np->left->u.i;
358 if (val < cur->min)
359 cur->min = val;
360 if (val > cur->max)
361 cur->max = val;
362 }
363 }
364
365 static void
366 bswitch(char *token, union tokenop u)
367 {
368 Swtch *cur;
369 Node *np = node(u.op);
370
371 if (swp == &swtbl[NR_BLOCK])
372 error(EWTACKO);
373 cur = swp++;
374 cur->nr = 0;
375 cur->min = TINT_MAX;
376 cur->max = TINT_MIN;
377 cur->defnode = NULL;
378 cur->cases = NULL;
379 eval(strtok(NULL, "\t\n"));
380 np->left = pop();
381
382 push(cur->bswtch = np);
383 }
384
385 static int
386 cmpcase(const void *p1, const void *p2)
387 {
388 TINT d;
389 Node *np1, *np2;
390
391 np1 = *(Node **) p1;
392 np2 = *(Node **) p2;
393 d = np1->left->u.i - np2->left->u.i;
394 if (d < 0)
395 return -1;
396 if (d > 0)
397 return 1;
398 return 0;
399 }
400
401 static void
402 eswitch(char *token, union tokenop u)
403 {
404 Node *p;
405 Swtch *cur;
406
407 if (swp == swtbl)
408 error(EWTACKU);
409 jump(token, u);
410
411 cur = --swp;
412 cur->eswtch = pop();
413 cur->bswtch->u.swtch = newswtch(cur);
414 qsort(cur->cases, cur->nr, sizeof(Node *), cmpcase);
415 }
416
417 static void
418 ocase(char *token, union tokenop u)
419 {
420 jump(token, u);
421 addcase(pop());
422 }
423
424 static void
425 jump(char *token, union tokenop u)
426 {
427 Symbol *label;
428 Node *aux, *np = node(u.op);
429
430 eval(strtok(NULL, "\t\n"));
431
432 if (u.op == OBRANCH || u.op == OCASE)
433 np->left = pop();
434 aux = pop();
435 label = aux->u.sym;
436 label->refcnt++;
437 np->u.sym = label;
438 delnode(aux);
439 push(np);
440 }
441
442 static void
443 loop(char *token, union tokenop u)
444 {
445 push(node(u.op));
446 }
447
448 static void
449 unary(char *token, union tokenop u)
450 {
451 Node *np = node(u.op);
452
453 np->type = *gettype(token+1);
454 np->left = pop();
455 np->right = NULL;
456 push(np);
457 }
458
459 static void
460 call(char *token, union tokenop u)
461 {
462 Node *np, *par, *fun = node(u.op);
463
464 for (par = NULL;; par = np) {
465 np = pop();
466 if (np->op != OPAR)
467 break;
468 np->right = par;
469 }
470
471 fun->type = *gettype(token+1);
472 fun->left = np;
473 fun->right = par;
474 push(fun);
475 }
476
477 static void
478 builtin(char *token, union tokenop u)
479 {
480 Node *np = node(u.op);
481 char *name;
482 unsigned subop, nchilds;
483
484 np->type = *gettype(token+1);
485 name = pop();
486
487 if (!strcmp("__builtin_va_arg", name)) {
488 nchilds = 1;
489 subop = BVA_ARG;
490 } else if (!strcmp("__builtin_va_start", name)) {
491 nchilds = 2;
492 subop = BVA_START;
493 } else if (!strcmp("__builtin_va_end", name)) {
494 nchilds = 1;
495 subop = BVA_END;
496 } else if (!strcmp("__builtin_va_copy", name)) {
497 nchilds = 2;
498 subop = BVA_COPY;
499 } else {
500 error(EBBUILT);
501 }
502
503 np->u.subop = subop;
504 np->right = (nchilds == 2) ? pop() : NULL;
505 np->left = (nchilds != 0) ? pop() : NULL;
506
507 free(name);
508 push(np);
509 }
510
511 static void
512 binary(char *token, union tokenop u)
513 {
514 Node *np = node(u.op);
515
516 np->type = *gettype(token+1);
517 np->right = pop();
518 np->left = pop();
519 push(np);
520 }
521
522 static void
523 binit(char *token, union tokenop u)
524 {
525 ininit = 1;
526 }
527
528 static void
529 einit(char *token, union tokenop u)
530 {
531 ininit = 0;
532 endinit();
533 }
534
535 static void
536 endpars(void)
537 {
538 if (!inpars)
539 error(ESYNTAX);
540 inpars = 0;
541 }
542
543 static void
544 aggregate(void)
545 {
546 Node *align, *size;
547 char *name;
548 Type *tp;
549 Symbol *sym;
550
551 align = pop();
552 size = pop();
553 name = pop();
554 tp = pop();
555
556 tp->size = size->u.i;
557 tp->align = align->u.i;
558 tp->flags = AGGRF;
559 /*
560 * type is the first field of Symbol so we can obtain the
561 * address of the symbol from the address of the type.
562 * We have to do this because composed returns the pointer
563 * to the type, but in this function we also need the
564 * symbol to store the name.
565 */
566 sym = (Symbol *) tp;
567 sym->name = name;
568 deftype(tp);
569
570 delnode(align);
571 delnode(size);
572 }
573
574 static void
575 array(void)
576 {
577 Type *tp, *base;
578 Node *size;
579
580 size = pop();
581 base = pop();
582 tp = pop();
583 tp->flags = ARRF;
584
585 if (size->u.i > LONG_MAX/base->size)
586 error(EOVERFL);
587
588 tp->size = size->u.i * base->size;
589 tp->align = base->align;
590
591 delnode(size);
592 }
593
594 static void
595 decl(Symbol *sym)
596 {
597 Type *tp = &sym->type;
598
599 if (tp->flags & FUNF) {
600 lastfun = sym;
601 } else {
602 switch (sym->kind) {
603 case SEXTRN:
604 case SGLOB:
605 case SPRIV:
606 case SLOCAL:
607 defglobal(sym);
608 break;
609 case SAUTO:
610 case SREG:
611 if (!beginf)
612 error(ESYNTAX);
613 ((inpars) ? defpar : defvar)(sym);
614 break;
615 default:
616 abort();
617 }
618 }
619 }
620
621 static void
622 vardecl(void)
623 {
624 Type *tp, *rp;
625 Node *np;
626 Symbol *sym;
627 char *name;
628
629 name = pop();
630 tp = pop();
631 if (tp->flags & FUNF)
632 rp = pop();
633 np = pop();
634
635 sym = np->u.sym;
636 /*
637 * We have to free sym->name because in tentative declarations
638 * we can have multiple declarations of the same symbol, and in
639 * this case our parser will allocate twice the memory
640 */
641 free(sym->name);
642 sym->name = name;
643 sym->type = *tp;
644 if (tp->flags & FUNF)
645 sym->rtype = *rp;
646 sym->kind = sclass;
647
648 if (ininit)
649 sym->type.flags |= INITF;
650 decl(sym);
651 delnode(np);
652 }
653
654 static void
655 flddecl(void)
656 {
657 Node *off, *np;
658 char *name;
659 Type *tp;
660 Symbol *sym;
661
662 off = pop();
663 name = pop();
664 tp = pop();
665 np = pop();
666
667 sym = np->u.sym;
668 sym->u.off = off->u.i;
669 sym->name = name;
670 sym->type = *tp;
671
672 delnode(np);
673 delnode(off);
674 }
675
676 static void
677 labeldcl(void)
678 {
679 Node *np;
680 Symbol *sym;
681
682 np = pop();
683 np->op = ONOP;
684 sym = np->u.sym;
685 sym->kind = SLABEL;
686 labelstmt(np, sym);
687 addstmt(np);
688 }
689
690 static void
691 stmt(void)
692 {
693 Node *np;
694
695 if (empty())
696 return;
697 np = pop();
698 if (ininit) {
699 data(np);
700 deltree(np);
701 return;
702 }
703 addstmt(np);
704 }
705
706 static void
707 beginfun(void)
708 {
709 newfun(lastfun, node(OBFUN));
710 beginf = inpars = 1;
711 pushctx();
712 }
713
714 static void
715 endfun(void)
716 {
717 endf = 1;
718 laststmt = addstmt(node(OEFUN));
719 }
720
721 void
722 parse(void)
723 {
724 /* clean from previous function */
725 cleanswtch();
726 cleancfg();
727 cleannodes();
728 popctx();
729
730 inpars = ininit = beginf = endf = 0;
731
732 while (!endf && nextline())
733 ;
734
735 if (ferror(stdin))
736 error(EFERROR, strerror(errno));
737
738 if (beginf && !endf)
739 error(EBAFFUN);
740
741 PRTREE("after parsing");
742 }