parser.c - scc - simple c99 compiler
(HTM) git clone git://git.simple-cc.org/scc
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) Submodules
(DIR) README
(DIR) LICENSE
---
parser.c (16205B)
---
1 #include <assert.h>
2 #include <ctype.h>
3 #include <errno.h>
4 #include <limits.h>
5 #include <stdarg.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9
10 #include "make.h"
11
12 #define MAXREPL 30
13 #define TABSIZ 64
14 #define MAXTOKEN FILENAME_MAX
15 #define ITEM 128
16
17 typedef struct macro Macro;
18
19 enum inputype {
20 FTFILE,
21 FTEXPAN,
22 };
23
24 enum {
25 STBEGIN,
26 STINTERNAL,
27 STREPLACE,
28 STTO,
29 STEND,
30 };
31
32 struct input {
33 int siz;
34 int type;
35
36 FILE *fp;
37 struct loc loc;
38
39 int pos;
40 char *buf;
41
42 struct input *prev;
43 };
44
45 struct macro {
46 char *name;
47 char *value;
48 int where;
49
50 struct macro *next;
51 };
52
53 static struct input *input;
54 static char token[MAXTOKEN];
55 static int tok;
56 static Macro *htab[TABSIZ];
57
58 void
59 dumpmacros(void)
60 {
61 Macro **pp, *p;
62
63 for (pp = htab; pp < &htab[TABSIZ]; ++pp) {
64 for (p = *pp; p; p = p->next)
65 printf("%s = %s\n", p->name, getmacro(p->name));
66 }
67 }
68
69 static Macro *
70 lookup(char *name)
71 {
72 Macro *mp;
73 int h = hash(name) & TABSIZ-1;
74
75 for (mp = htab[h]; mp && strcmp(mp->name, name); mp = mp->next)
76 ;
77
78 if (mp)
79 return mp;
80
81 mp = emalloc(sizeof(*mp));
82 mp->name = estrdup(name);
83 mp->value = estrdup("");
84 mp->next = htab[h];
85 mp->where = UNDEF;
86 htab[h] = mp;
87
88 return mp;
89 }
90
91 static char *
92 macroinfo(char *name, int *pwhere, Macro **mpp)
93 {
94 char *s, *t;
95 int hide, where;
96 Macro *mp = lookup(name);
97
98 hide = 0;
99 if (!strcmp(name, "SHELL") || !strcmp(name, "MAKEFLAGS"))
100 hide = 1;
101
102 s = mp->value;
103 where = mp->where;
104
105 if (!hide && (where == UNDEF || where == INTERNAL || eflag)) {
106 t = getenv(name);
107 if (t) {
108 where = ENVIRON;
109 s = t;
110 }
111 }
112
113 if (pwhere)
114 *pwhere = where;
115 if (mpp)
116 *mpp = mp;
117
118 return s;
119 }
120
121 char *
122 getmacro(char *name)
123 {
124 return macroinfo(name, NULL, NULL);
125 }
126
127 void
128 setmacro(char *name, char *val, int where, int export)
129 {
130 int owhere, set;
131 char *s;
132 Macro *mp;
133
134 assert(where != ENVIRON);
135
136 s = macroinfo(name, &owhere, &mp);
137
138 /*
139 * Default values are defined before anything else, and marked
140 * as INTERNAL because they are injected as parseable text, and
141 * MAKEFILE and INTERNAL variables are always overriden. ENVIRON
142 * macros are generated in macroinfo() and this is why this function
143 * should not receive a where == ENVIRON ever.
144 */
145 switch (owhere) {
146 case UNDEF:
147 case INTERNAL:
148 case MAKEFILE:
149 set = 1;
150 break;
151 case ENVIRON:
152 set = (where == MAKEFLAGS || where == CMDLINE);
153 set |= (where == MAKEFILE && !eflag);
154 break;
155 case MAKEFLAGS:
156 set = (where == CMDLINE || where == MAKEFLAGS);
157 break;
158 case CMDLINE:
159 set = (where == CMDLINE);
160 break;
161 default:
162 abort();
163 }
164
165 if (!set) {
166 debug("hidding override of %s from '%s' to '%s'", name, s, val);
167 } else {
168 debug("override %s from '%s' to '%s'", name, s, val);
169 free(mp->value);
170 mp->value = estrdup(val);
171 mp->where = where;
172
173 if (export && strcmp(name, "SHELL") != 0) {
174 debug("exporting macro %s", name);
175 exportvar(name, val);
176 }
177 }
178 }
179
180 void
181 freeloc(struct loc *loc)
182 {
183 free(loc->fname);
184 }
185
186 static struct loc *
187 getloc(void)
188 {
189 struct input *ip;
190
191 for (ip = input; ip && ip->type != FTFILE; ip = ip->prev)
192 ;
193 if (!ip)
194 return NULL;
195
196 return &ip->loc;
197 }
198
199
200 void
201 error(char *fmt, ...)
202 {
203 va_list va;
204 struct loc *loc;
205
206 fprintf(stderr, "make: error: ");
207 if ((loc = getloc()) != NULL)
208 fprintf(stderr, "%s:%d: ", loc->fname, loc->lineno);
209
210 va_start(va, fmt);
211 vfprintf(stderr, fmt, va);
212 va_end(va);
213 putc('\n', stderr);
214
215 exit(EXIT_FAILURE);
216 }
217
218 void
219 warning(char *fmt, ...)
220 {
221 va_list va;
222 struct loc *loc;
223
224 fprintf(stderr, "make: warning: ");
225 if ((loc = getloc()) != NULL)
226 fprintf(stderr, "%s:%d: ", loc->fname, loc->lineno);
227
228 va_start(va, fmt);
229 vfprintf(stderr, fmt, va);
230 va_end(va);
231 putc('\n', stderr);
232 }
233
234 static void
235 pop(void)
236 {
237 struct input *ip = input->prev;
238
239 if (input->type == FTFILE) {
240 if (input->fp)
241 fclose(input->fp);
242 freeloc(&input->loc);
243 }
244 free(input->buf);
245 free(input);
246
247 input = ip;
248 }
249
250 static void
251 push(int type, ...)
252 {
253 int line, len, pos;
254 FILE *fp = NULL;
255 char *buf, *s, *fname = NULL;
256 va_list va;
257 struct input *ip;
258
259 va_start(va, type);
260 switch (type) {
261 case FTFILE:
262 fp = va_arg(va, FILE *);
263 s = va_arg(va, char *);
264 line = va_arg(va, int);
265 fname = estrdup(s);
266 buf = emalloc(BUFSIZ);
267 pos = len = BUFSIZ;
268 break;
269 case FTEXPAN:
270 s = va_arg(va, char *);
271 buf = estrdup(s);
272 line = pos = 0;
273 len = strlen(s);
274 break;
275 }
276 va_end(va);
277
278 ip = emalloc(sizeof(*ip));
279 ip->siz = len;
280 ip->buf = buf;
281 ip->type = type;
282 ip->fp = fp;
283 ip->loc.fname = fname;
284 ip->loc.lineno = line;
285 ip->pos = pos;
286 ip->prev = input;
287
288 input = ip;
289 }
290
291 static char *
292 trim(char *s)
293 {
294 size_t len;
295
296 while (isspace(*s))
297 s++;
298
299 for (len = strlen(s); len > 0 && isspace(s[len-1]); --len)
300 s[len-1] = '\0';
301
302 return s;
303 }
304
305 static void
306 include(char *s)
307 {
308 int len;
309 FILE *fp;
310 char *fil, *t;
311
312 s = trim(s);
313 fil = expandstring(s, NULL, getloc());
314
315 t = trim(fil);
316 if (strlen(t) != 0) {
317 debug("including '%s'", t);
318 if ((fp = fopen(t, "r")) == NULL)
319 error("opening %s:%s", t, strerror(errno));
320 push(FTFILE, fp, t, 0);
321 }
322
323 free(fil);
324 }
325
326 static char *
327 nextline(void)
328 {
329 int c;
330 FILE *fp;
331 char *s, *lim;
332
333 assert(input->type == FTFILE);
334
335 repeat:
336 fp = input->fp;
337 if (!fp || feof(fp))
338 return NULL;
339
340 lim = &input->buf[input->siz];
341 for (s = input->buf; s < lim; *s++ = c) {
342 c = getc(fp);
343 if (c == '\n' || c == EOF) {
344 input->loc.lineno++;
345 *s++ = '\n';
346 break;
347 }
348 if (c > UCHAR_MAX || c < 0)
349 error("invalid character '%c' (%d)", c, c);
350 }
351
352
353 if (s == lim)
354 error("too long line");
355 if (ferror(fp))
356 error(strerror(errno));
357 *s = '\0';
358
359 if (!strcmp(input->buf, ""))
360 goto repeat;
361
362 if (!strncmp(input->buf, "include", 7) && isblank(input->buf[7])) {
363 input->pos = input->siz;
364 include(input->buf+7);
365 goto repeat;
366 }
367
368 input->pos = 0;
369
370
371 return input->buf;
372 }
373
374 static int
375 empty(struct input *ip)
376 {
377 return ip->pos == ip->siz || ip->buf[ip->pos] == '\0';
378 }
379
380 static int
381 moreinput(void)
382 {
383 while (input) {
384 if (!empty(input))
385 break;
386
387 switch (input->type) {
388 case FTEXPAN:
389 pop();
390 break;
391 case FTFILE:
392 if (!nextline())
393 pop();
394 break;
395 }
396 }
397
398 return input != NULL;
399 }
400
401 static int
402 nextc(void)
403 {
404 if (!moreinput())
405 return EOF;
406
407 return input->buf[input->pos++];
408 }
409
410 /*
411 * This function only can be called after a call to nextc
412 * that didn't return EOF. It can return '\0', but as
413 * it is used only to check against '$' then it is not
414 * a problem.
415 */
416 static int
417 ahead(void)
418 {
419 return input->buf[input->pos];
420 }
421
422 static int
423 back(int c)
424 {
425 if (c == EOF)
426 return c;
427 assert(input->pos > 0);
428 return input->buf[--input->pos] = c;
429 }
430
431 static void
432 comment(void)
433 {
434 int c;
435
436 while ((c = nextc()) != EOF && c != '\n') {
437 if (c == '\\' && nextc() == EOF)
438 break;
439 }
440 }
441
442 static void
443 skipspaces(void)
444 {
445 int c;
446
447 for (c = nextc(); c == ' ' || c == '\t'; c = nextc())
448 ;
449 back(c);
450 }
451
452 static int
453 validchar(int c)
454 {
455 if (c == EOF)
456 return 0;
457 return c == '.' || c == '/' || c == '_' || c == '-' || isalnum(c);
458 }
459
460 static char *
461 expandmacro(char *name)
462 {
463 char *s;
464
465 s = expandstring(getmacro(name), NULL, getloc());
466 debug("macro %s expanded to '%s'", name, s);
467
468 return s;
469 }
470
471 static void
472 replace(char *line, char *repl, char *to)
473 {
474 int siz, at, len, replsiz, tosiz, sep, pos;
475 char *oline, *s, *cur, *buf;
476
477 debug("replacing '%s', with '%s' to '%s'", line, repl, to);
478 oline = line;
479 tosiz = strlen(to);
480 replsiz = strlen(repl);
481
482 buf = NULL;
483 for (pos = 0; *line; pos += siz) {
484 cur = NULL;
485 siz = 0;
486
487 for (siz = 0; *line == ' ' || *line == '\t'; ++siz) {
488 cur = erealloc(cur, siz+1);
489 cur[siz] = *line++;
490 }
491
492 len = strcspn(line, " \t");
493 at = len - replsiz;
494 if (at < 0 || memcmp(line + at, repl, replsiz)) {
495 cur = erealloc(cur, siz + len);
496 memcpy(cur + siz, line, len);
497 siz += len;
498 } else {
499 cur = erealloc(cur, siz + at + tosiz);
500 memcpy(cur + siz, line, at);
501 memcpy(cur + siz + at, to, tosiz);
502 siz += at + tosiz;
503 }
504
505 line += len;
506 buf = erealloc(buf, pos + siz);
507 memcpy(buf + pos, cur, siz);
508 free(cur);
509 }
510
511 if (pos > 0) {
512 buf = erealloc(buf, pos + 1);
513 buf[pos] = '\0';
514 debug("\treplace '%s' with '%s'", oline, buf);
515 push(FTEXPAN, buf);
516 }
517
518 free(buf);
519 }
520
521 static void
522 expandsimple(Target *tp)
523 {
524 char *s;
525 Target **p;
526 int len, c;
527
528 switch (c = nextc()) {
529 case '@':
530 if (!tp || !tp->target)
531 return;
532 push(FTEXPAN, tp->target);
533 break;
534 case '<':
535 if (!tp || !tp->req)
536 return;
537 push(FTEXPAN, tp->req);
538 break;
539 case '*':
540 if (!tp || !tp->target)
541 return;
542 s = strrchr(tp->target, '.');
543 if (!s) {
544 push(FTEXPAN, tp->target);
545 return;
546 }
547
548 len = s - tp->target;
549 s = emalloc(len+1);
550 memcpy(s, tp->target, len);
551 s[len] = '\0';
552 push(FTEXPAN, s);
553 free(s);
554 break;
555 case '?':
556 if (!tp)
557 return;
558
559 if (tp->req && stamp(tp->req) > tp->stamp) {
560 push(FTEXPAN, " ");
561 push(FTEXPAN, tp->req);
562 }
563
564 for (p = tp->deps; p && *p; ++p) {
565 if (stamp((*p)->name) > tp->stamp) {
566 push(FTEXPAN, " ");
567 push(FTEXPAN, (*p)->name);
568 }
569 }
570 break;
571 default:
572 token[0] = c;
573 token[1] = '\0';
574 s = expandmacro(token);
575 push(FTEXPAN, s);
576 free(s);
577 break;
578 }
579 }
580
581 static int
582 internal(int ch)
583 {
584 switch (ch) {
585 case '@':
586 case '?':
587 case '*':
588 case '<':
589 return 1;
590 default:
591 return 0;
592 }
593 }
594
595 static void
596 expansion(Target *tp)
597 {
598 int delim, c, repli, toi, namei, st;
599 char name[MAXTOKEN], repl[MAXREPL], to[MAXREPL];
600 char *s, *erepl;
601
602 c = nextc();
603 if (c == '(')
604 delim = ')';
605 else if (c == '{')
606 delim = '}';
607 else
608 delim = 0;
609
610 if (!delim) {
611 back(c);
612 expandsimple(tp);
613 return;
614 }
615
616 s = NULL;
617 namei = repli = toi = 0;
618 st = STBEGIN;
619
620 while (st != STEND && (c = nextc()) != EOF) {
621 switch (st) {
622 case STBEGIN:
623 if (c == ':') {
624 st = STREPLACE;
625 name[namei] = '\0';
626 s = expandmacro(name);
627 break;
628 }
629 if (c == delim) {
630 name[namei] = '\0';
631 s = expandmacro(name);
632 goto no_replace;
633 }
634 if (namei == MAXTOKEN-1)
635 error("expansion text too long");
636
637 if (namei == 0 && internal(c)) {
638 name[namei++] = '$';
639 name[namei++] = c;
640 name[namei] = '\0';
641 st = STINTERNAL;
642 s = expandstring(name, tp, getloc());
643 break;
644 }
645
646 if (!validchar(c))
647 error("invalid macro name in expansion");
648 name[namei++] = c;
649 break;
650 case STINTERNAL:
651 if (c == delim)
652 goto no_replace;
653 if (c != ':')
654 error("invalid internal macro in expansion");
655 st = STREPLACE;
656 break;
657 case STREPLACE:
658 if (c == '=') {
659 st = STTO;
660 break;
661 }
662 if (c == delim)
663 error("invalid replacement pattern in expansion");
664 if (repli == MAXREPL-1)
665 error("macro replacement too big");
666 repl[repli++] = c;
667 break;
668 case STTO:
669 if (c == delim) {
670 st = STEND;
671 break;
672 }
673
674 if (toi == MAXREPL-1)
675 error("macro substiturion too big");
676 to[toi++] = c;
677 break;
678 }
679 }
680
681 if (c == EOF)
682 error("found eof while parsing expansion");
683
684 repl[repli] = '\0';
685 to[toi] = '\0';
686
687 erepl = expandstring(repl, tp, getloc());
688 replace(s, erepl, to);
689
690 free(erepl);
691 free(s);
692 return;
693
694 no_replace:
695 push(FTEXPAN, s);
696 free(s);
697 }
698
699 /*
700 * Horrible hack to do string expansion.
701 * We cannot use normal push and nextc because that
702 * would consume characters of the current file too.
703 * For that reason it cleans the input and it recovers
704 * it later.
705 */
706 char *
707 expandstring(char *line, Target *tp, struct loc *loc)
708 {
709 int c, n;
710 char *s;
711 struct input *ip = input;
712
713 input = NULL;
714 push(FTFILE, NULL, loc->fname, loc->lineno);
715 push(FTEXPAN, line);
716
717 n = 0;
718 s = NULL;
719 while ((c = nextc()) != EOF) {
720 if (c != '$') {
721 s = erealloc(s, ++n);
722 s[n-1] = c;
723 continue;
724 }
725
726 if ((c = nextc()) == '$') {
727 s = erealloc(s, n += 2);
728 s[n-2] = '$';
729 s[n-1] = '$';
730 } else {
731 back(c);
732 expansion(tp);
733 }
734 }
735
736 s = erealloc(s, n+1);
737 s[n] = '\0';
738 input = ip;
739
740 return s;
741 }
742
743 static int
744 item(void)
745 {
746 int c;
747 char *s;
748 char buf[MAXTOKEN];
749
750 for (s = buf; s < &buf[MAXTOKEN] - 1; ) {
751 c = nextc();
752 if (c == '$' && ahead() != '$')
753 expansion(NULL);
754 else if (validchar(c))
755 *s++ = c;
756 else
757 break;
758 }
759 back(c);
760
761 if (s >= &buf[MAXTOKEN] - 1)
762 error("token too long");
763 if (s == buf)
764 error("invalid empty token");
765 *s++ = '\0';
766 memcpy(token, buf, s - buf);
767
768 return ITEM;
769 }
770
771 static int
772 next(void)
773 {
774 int c;
775
776 repeat:
777 /*
778 * It is better to avoid skipspaces() here, because
779 * it can generate the need for 2 calls to back(),
780 * and we need the character anyway.
781 */
782 c = nextc();
783 if (c == ' ' || c == '\t')
784 goto repeat;
785
786 if (c == '\\') {
787 if ((c = nextc()) == '\n')
788 goto repeat;
789 back(c);
790 c = '\\';
791 }
792
793 switch (c) {
794 case EOF:
795 strcpy(token, "<EOF>");
796 tok = EOF;
797 break;
798 case '$':
799 if ((c = nextc()) == '$')
800 goto single;
801 back(c);
802 expansion(NULL);
803 goto repeat;
804 case '#':
805 comment();
806 c = '\n';
807 case ';':
808 case ':':
809 case '=':
810 case '\n':
811 single:
812 token[0] = c;
813 token[1] = '\0';
814 tok = c;
815 break;
816 default:
817 if (!validchar(c))
818 error("unexpected character '%c'", c);
819 back(c);
820 tok = item();
821 break;
822 }
823
824 return tok;
825 }
826
827 static char *
828 readmacrodef(void)
829 {
830 int n, c;
831 char *line;
832
833 n = 0;
834 line = NULL;
835 while ((c = nextc()) != EOF) {
836 line = erealloc(line, n+1);
837 if (c == '\n')
838 break;
839 if (c == '#') {
840 comment();
841 break;
842 }
843 if (c == '\\') {
844 if ((c = nextc()) != '\n') {
845 back(c);
846 c = '\\';
847 } else {
848 skipspaces();
849 c = ' ';
850 }
851 }
852
853 line[n++] = c;
854 }
855 if (c == EOF)
856 error("EOF while looking for end of line");
857 line[n] = '\0';
858
859 return line;
860 }
861
862 static struct action
863 readcmd(void)
864 {
865 int n, c;
866 struct loc *loc;
867 struct action act;
868
869 skipspaces();
870
871 loc = getloc();
872 act.loc.fname = estrdup(loc->fname);
873 act.loc.lineno = loc->lineno;
874
875 n = 0;
876 act.line = NULL;
877 while ((c = nextc()) != EOF) {
878 act.line = erealloc(act.line, n+1);
879 if (c == '\n')
880 break;
881 if (c == '\\') {
882 if ((c = nextc()) == '\n') {
883 if ((c = nextc()) != '\t')
884 back(c);
885 continue;
886 }
887 back(c);
888 c = '\\';
889 }
890 act.line[n++] = c;
891 }
892 if (c == EOF)
893 error("EOF while looking for end of command");
894 act.line[n] = '\0';
895
896 return act;
897 }
898
899 static void
900 rule(char *targets[], int ntargets)
901 {
902 int c, i, j, ndeps, nactions;
903 struct action *acts;
904 char **deps = NULL;
905
906 if (ntargets == 0)
907 error("missing target");
908
909 for (ndeps = 0; next() == ITEM; ++ndeps) {
910 deps = erealloc(deps, (ndeps+1) * sizeof(char *));
911 deps[ndeps] = estrdup(token);
912 }
913
914 if (tok != '\n' && tok != ';')
915 error("garbage at the end of the line");
916
917 nactions = 0;
918 acts = NULL;
919 if (tok == ';') {
920 nactions++;
921 acts = erealloc(acts, nactions * sizeof(*acts));
922 acts[nactions-1] = readcmd();
923 }
924
925 for (;;) {
926 if ((c = nextc()) == '#') {
927 comment();
928 continue;
929 }
930 if (c != '\t')
931 break;
932 nactions++;
933 acts = erealloc(acts, nactions * sizeof(*acts));
934 acts[nactions-1] = readcmd();
935 }
936 back(c);
937
938 for (i = 0; i < ntargets; i++) {
939 addtarget(targets[i], ndeps);
940 for (j = 0; j < ndeps; j++)
941 adddep(targets[i], deps[j]);
942 if (nactions > 0)
943 addrule(targets[i], acts, nactions);
944 }
945
946 for (i = 0; i < ndeps; i++)
947 free(deps[i]);
948 free(deps);
949
950 for (i = 0; i < nactions; i++) {
951 free(acts[i].line);
952 freeloc(&acts[i].loc);
953 }
954 free(acts);
955 }
956
957 static void
958 assign(char *macros[], int where, int n)
959 {
960 char *defs;
961
962 if (n != 1)
963 error("invalid macro definition");
964
965 skipspaces();
966 defs = readmacrodef();
967 setmacro(*macros, defs, where, NOEXPORT);
968 free(defs);
969 }
970
971 void
972 parseinput(int where)
973 {
974 int i, n;
975 char **targets;
976
977 while (moreinput()) {
978 n = 0;
979 targets = NULL;
980
981 next();
982 if (tok == '\n')
983 continue;
984
985 while (tok == ITEM) {
986 n++;
987 targets = erealloc(targets, n * sizeof(char *));
988 targets[n-1] = estrdup(token);
989 next();
990 }
991
992 switch (tok) {
993 case ':':
994 rule(targets, n);
995 break;
996 case '=':
997 assign(targets, where, n);
998 break;
999 default:
1000 error("unexpected token '%s'(%d)", token, tok);
1001 }
1002
1003 for (i = 0; i < n; i++)
1004 free(targets[i]);
1005 free(targets);
1006 }
1007 }
1008
1009 int
1010 parse(char *fname)
1011 {
1012 FILE *fp;
1013
1014 if (!fname) {
1015 fp = stdin;
1016 fname = "<stdin>";
1017 } else if ((fp = fopen(fname, "r")) == NULL) {
1018 return 0;
1019 }
1020
1021 debug("parsing %s", fname);
1022 push(FTFILE, fp, fname, 0);
1023 parseinput(MAKEFILE);
1024
1025 return 1;
1026 }
1027
1028 void
1029 inject(char *s)
1030 {
1031 push(FTFILE, NULL, "<internal>", 0);
1032 push(FTEXPAN, s);
1033 parseinput(INTERNAL);
1034 }