parser.c - scc - simple c99 compiler
(HTM) git clone git://git.simple-cc.org/scc
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) Submodules
(DIR) README
(DIR) LICENSE
---
parser.c (6917B)
---
1 #include <assert.h>
2 #include <ctype.h>
3 #include <errno.h>
4 #include <limits.h>
5 #include <setjmp.h>
6 #include <stdarg.h>
7 #include <stdio.h>
8 #include <stdlib.h>
9 #include <string.h>
10
11 #include <scc/cstd.h>
12 #include <scc/mach.h>
13 #include <scc/scc.h>
14
15 #include "as.h"
16
17 #define NARGS 20
18 #define NR_INPUTS 10
19 #define MAXLINE 100
20
21 struct input {
22 char *fname;
23 unsigned lineno;
24 FILE *fp;
25 };
26
27 int nerrors;
28 jmp_buf recover;
29 char yytext[INTIDENTSIZ+1];
30 int yytoken;
31 size_t yylen;
32 union yylval yylval;
33
34 static char *textp, *endp;
35 static int regmode;
36 static struct input inputs[NR_INPUTS], *isp = inputs;
37
38 static int
39 follow(int expect1, int expect2, int ifyes1, int ifyes2, int ifno)
40 {
41 int c;
42
43 if ((c = *++textp) == expect1)
44 return ifyes1;
45 if (c == expect2)
46 return ifyes2;
47 --textp;
48 return ifno;
49 }
50
51 static void
52 tok2str(void)
53 {
54 if ((yylen = endp - textp) > INTIDENTSIZ) {
55 error("token too big");
56 yylen = INTIDENTSIZ;
57 }
58 memcpy(yytext, textp, yylen);
59 yytext[yylen] = '\0';
60 textp = endp;
61 }
62
63 static int
64 iden(void)
65 {
66 int c;
67 char *p;
68
69 for ( ; c = *endp; ++endp) {
70 if (isalnum(c))
71 continue;
72 switch (c) {
73 case '\'':
74 case '_':
75 case '.':
76 case '$':
77 continue;
78 default:
79 goto out_loop;
80 }
81 }
82
83 out_loop:
84 tok2str();
85 yylval.sym = lookup(yytext);
86
87 return (yylval.sym->flags & FREG) ? REG : IDEN;
88 }
89
90 static int
91 number(void)
92 {
93 int c, base = 10;
94 char *p;
95 TUINT n;
96 static char digits[] = "0123456789ABCDEF";
97
98 if (*endp == '0') {
99 base = 8;
100 ++endp;
101 if (*endp == 'x') {
102 base = 16;
103 ++endp;
104 }
105 }
106 for (n = 0; (c = *endp) && isxdigit(c); n += c) {
107 p = strchr(digits, toupper(c));
108 c = p - digits;
109 if (c > base)
110 error("invalid digit in number");
111 if (n >= TUINT_MAX/base - c)
112 error("overflow in number");
113 n *= base;
114 endp++;
115 }
116 tok2str();
117 yylval.sym = tmpsym(n);
118
119 return NUMBER;
120 }
121
122 static int
123 character(void)
124 {
125 int c;
126 char *p;
127
128 while (*endp != '\'')
129 ++endp;
130 return NUMBER;
131 }
132
133 static int
134 string(void)
135 {
136 int c;
137 size_t l;
138 char *s;
139 Symbol *sym = tmpsym(0);
140
141 for (++endp; *endp != '"'; ++endp)
142 ;
143 ++endp;
144 tok2str();
145 yylval.sym = sym;
146 /* FIXME: this memory is not freed ever */
147 l = yylen-2;
148 s = memcpy(xmalloc(l+1), yytext+1, l);
149 s[l] = '\0';
150 sym->name = s;
151
152 return STRING;
153 }
154
155 static int
156 operator(void)
157 {
158 int c;
159
160 ++endp;
161 if ((c = *textp) == '>')
162 c = follow('=', '>', LE, SHL, '>');
163 else if (c == '<')
164 c = follow('=', '<', GE, SHR, '>');
165 tok2str();
166
167 return c;
168 }
169
170 int
171 ahead(void)
172 {
173 while (isspace(*textp))
174 ++textp;
175
176 if (*textp != '\0')
177 return *textp;
178 return EOS;
179 }
180
181 int
182 next(void)
183 {
184 int c;
185
186 while (isspace(*textp))
187 ++textp;
188
189 endp = textp;
190
191 switch (c = *textp) {
192 case '\0':
193 strcpy(yytext, "EOS");
194 yylen = 3;
195 c = EOS;
196 break;
197 case '"':
198 c = string();
199 break;
200 case '\'':
201 c = character();
202 break;
203 case '%':
204 c = (regmode ? iden : operator)();
205 break;
206 case '.':
207 case '_':
208 c = iden();
209 break;
210 default:
211 if (isdigit(c))
212 c = number();
213 else if (isalpha(c))
214 c = iden();
215 else
216 c = operator();
217 break;
218 }
219 return yytoken = c;
220 }
221
222 void
223 expect(int token)
224 {
225 if (yytoken != token)
226 unexpected();
227 next();
228 }
229
230 void
231 unexpected(void)
232 {
233 error("unexpected '%s'", yytext);
234 }
235
236 void
237 error(char *msg, ...)
238 {
239 va_list va;
240 struct input *ip;
241
242 assert(isp > inputs);
243 ip = &isp[-1];
244
245 va_start(va, msg);
246 fprintf(stderr, "as:%s:%u: ", ip->fname, ip->lineno);
247 vfprintf(stderr, msg, va);
248 putc('\n', stderr);
249 nerrors++;
250 va_end(va);
251
252 if (nerrors == 10)
253 die("as: too many errors");
254 longjmp(recover, 1);
255 }
256
257 Node *
258 getreg(void)
259 {
260 Node *np;
261
262 np = node(REG, NULL, NULL);
263 np->sym = yylval.sym;
264 np->addr = AREG;
265 expect(REG);
266 return np;
267 }
268
269 void
270 regctx(int mode)
271 {
272 regmode = mode;
273 }
274
275 Node *
276 operand(char **strp)
277 {
278 int c, imm = 0;
279 Node *np;
280
281 textp = *strp;
282 regctx(1);
283 switch (next()) {
284 case EOS:
285 np = NULL;
286 break;
287 case REG:
288 np = getreg();
289 break;
290 case STRING:
291 np = node(yytoken, NULL, NULL);
292 np->sym = yylval.sym;
293 np->addr = ASTR;
294 next();
295 break;
296 case IDEN:
297 c = ahead();
298 if (c != EOS && c != ',')
299 goto expression;
300 np = node(IDEN, NULL, NULL);
301 np->sym = yylval.sym;
302 np->addr = ANUMBER;
303 next();
304 break;
305 case '$':
306 next();
307 imm = 1;
308 default:
309 expression:
310 if (!imm) {
311 np = moperand();
312 } else {
313 np = expr();
314 np->addr = AIMM;
315 }
316 }
317 if (yytoken != ',' && yytoken != EOS)
318 error("trailing characters in expression '%s'", textp);
319 *strp = endp;
320
321 return np;
322 }
323
324 Node **
325 getargs(char *s)
326 {
327 Node **ap;
328 static Node *args[NARGS];
329
330 if (!s)
331 return NULL;
332
333 for (ap = args; ap < &args[NARGS-1]; ++ap) {
334 if ((*ap = operand(&s)) == NULL)
335 return args;
336 }
337 error("too many arguments in one instruction");
338 }
339
340 static char *
341 field(char **oldp, size_t *siz)
342 {
343 char *s, *t, *begin;
344 size_t n;
345
346 if ((begin = *oldp) == NULL)
347 return NULL;
348
349 for (s = begin; isspace(*s) && *s != '\t'; ++s)
350 ;
351 if (*s == '\0' || *s == '#') {
352 *s = '\0';
353 return *oldp = NULL;
354 }
355
356 for (t = s; *t && *t != '\t'; ++t)
357 ;
358 if (*t == '\t')
359 *t++ = '\0';
360 *siz -= begin - t;
361 *oldp = t;
362
363 while (t >= s && isspace(*t))
364 *t-- = '\0';
365 return (*s != '\0') ? s : NULL;
366 }
367
368 static int
369 validlabel(char *name)
370 {
371 int c;
372
373 while ((c = *name++) != '\0') {
374 if (isalnum(c))
375 continue;
376 switch (c) {
377 case '_':
378 case '.':
379 case '$':
380 continue;
381 case ':':
382 if (*name != '\0')
383 return 0;
384 *--name = '\0';
385 continue;
386 default:
387 return 0;
388 }
389 }
390 return 1;
391 }
392
393 static int
394 extract(char *s, size_t len, struct line *lp)
395 {
396 int r = 0;
397
398 if (lp->label = field(&s, &len))
399 r++;
400 if (lp->op = field(&s, &len))
401 r++;
402 if (lp->args = field(&s, &len))
403 r++;
404
405 if (s && *s && *s != '#')
406 error("trailing characters at the end of the line");
407 if (lp->label && !validlabel(lp->label))
408 error("incorrect label name '%s'", lp->label);
409
410 return r;
411 }
412
413 static size_t
414 getln(FILE *fp, char buff[MAXLINE])
415 {
416 int c;
417 char *bp;
418
419 for (bp = buff; (c = getc(fp)) != EOF; *bp++ = c) {
420 if (c == '\n')
421 break;
422
423 if (c > UCHAR_MAX)
424 error("invalid character '%x'", c);
425
426 if (bp == &buff[MAXLINE-1])
427 error("line too long");
428 }
429 *bp = '\0';
430
431 return bp - buff;
432 }
433
434 int
435 nextline(struct line *lp)
436 {
437 struct input *ip;
438 size_t n;
439 static char buff[MAXLINE];
440
441 assert(isp > inputs);
442 repeat:
443 if (isp == inputs)
444 return 0;
445 ip = &isp[-1];
446 if (feof(ip->fp)) {
447 delinput();
448 goto repeat;
449 }
450 n = getln(ip->fp, buff);
451 if (++ip->lineno == 0)
452 die("as: %s: file too long", infile);
453 if (n == 0)
454 goto repeat;
455 if (extract(buff, n, lp) == 0)
456 goto repeat;
457 return 1;
458 }
459
460 void
461 addinput(char *fname)
462 {
463 FILE *fp;
464
465 if (isp == &inputs[NR_INPUTS])
466 die("as: too many included files");
467 if ((fp = fopen(fname, "r")) == NULL)
468 die("as: %s: %s", fname, strerror(errno));
469 isp->fname = xstrdup(fname);
470 isp->fp = fp;
471 isp->lineno = 0;
472 ++isp;
473 }
474
475 int
476 delinput(void)
477 {
478 if (isp == inputs)
479 return EOF;
480 --isp;
481 if (fclose(isp->fp) == EOF)
482 die("as: %s: %s", isp->fname, strerror(errno));
483 free(isp->fname);
484 return 0;
485 }