lex.c - 9base - revived minimalist port of Plan 9 userland to Unix
 (HTM) git clone git://git.suckless.org/9base
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       lex.c (6203B)
       ---
            1 #include "rc.h"
            2 #include "exec.h"
            3 #include "io.h"
            4 #include "getflags.h"
            5 #include "fns.h"
            6 int getnext(void);
            7 
            8 int
            9 wordchr(int c)
           10 {
           11         return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF;
           12 }
           13 
           14 int
           15 idchr(int c)
           16 {
           17         /*
           18          * Formerly:
           19          * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9'
           20          *        || c=='_' || c=='*';
           21          */
           22         return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c);
           23 }
           24 int future = EOF;
           25 int doprompt = 1;
           26 int inquote;
           27 int incomm;
           28 /*
           29  * Look ahead in the input stream
           30  */
           31 
           32 int
           33 nextc(void)
           34 {
           35         if(future==EOF)
           36                 future = getnext();
           37         return future;
           38 }
           39 /*
           40  * Consume the lookahead character.
           41  */
           42 
           43 int
           44 advance(void)
           45 {
           46         int c = nextc();
           47         lastc = future;
           48         future = EOF;
           49         return c;
           50 }
           51 /*
           52  * read a character from the input stream
           53  */        
           54 
           55 int
           56 getnext(void)
           57 {
           58         int c;
           59         static int peekc = EOF;
           60         if(peekc!=EOF){
           61                 c = peekc;
           62                 peekc = EOF;
           63                 return c;
           64         }
           65         if(runq->eof)
           66                 return EOF;
           67         if(doprompt)
           68                 pprompt();
           69         c = rchr(runq->cmdfd);
           70         if(!inquote && c=='\\'){
           71                 c = rchr(runq->cmdfd);
           72                 if(c=='\n' && !incomm){                /* don't continue a comment */
           73                         doprompt = 1;
           74                         c=' ';
           75                 }
           76                 else{
           77                         peekc = c;
           78                         c='\\';
           79                 }
           80         }
           81         doprompt = doprompt || c=='\n' || c==EOF;
           82         if(c==EOF)
           83                 runq->eof++;
           84         else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c);
           85         return c;
           86 }
           87 
           88 void
           89 pprompt(void)
           90 {
           91         var *prompt;
           92         if(runq->iflag){
           93                 pstr(err, promptstr);
           94                 flush(err);
           95                 prompt = vlook("prompt");
           96                 if(prompt->val && prompt->val->next)
           97                         promptstr = prompt->val->next->word;
           98                 else
           99                         promptstr="\t";
          100         }
          101         runq->lineno++;
          102         doprompt = 0;
          103 }
          104 
          105 void
          106 skipwhite(void)
          107 {
          108         int c;
          109         for(;;){
          110                 c = nextc();
          111                 /* Why did this used to be  if(!inquote && c=='#') ?? */
          112                 if(c=='#'){
          113                         incomm = 1;
          114                         for(;;){
          115                                 c = nextc();
          116                                 if(c=='\n' || c==EOF) {
          117                                         incomm = 0;
          118                                         break;
          119                                 }
          120                                 advance();
          121                         }
          122                 }
          123                 if(c==' ' || c=='\t')
          124                         advance();
          125                 else return;
          126         }
          127 }
          128 
          129 void
          130 skipnl(void)
          131 {
          132         int c;
          133         for(;;){
          134                 skipwhite();
          135                 c = nextc();
          136                 if(c!='\n')
          137                         return;
          138                 advance();
          139         }
          140 }
          141 
          142 int
          143 nextis(int c)
          144 {
          145         if(nextc()==c){
          146                 advance();
          147                 return 1;
          148         }
          149         return 0;
          150 }
          151 
          152 char*
          153 addtok(char *p, int val)
          154 {
          155         if(p==0)
          156                 return 0;
          157         if(p==&tok[NTOK-1]){
          158                 *p = 0;
          159                 yyerror("token buffer too short");
          160                 return 0;
          161         }
          162         *p++=val;
          163         return p;
          164 }
          165 
          166 char*
          167 addutf(char *p, int c)
          168 {
          169         p = addtok(p, c);
          170         if(twobyte(c))         /* 2-byte escape */
          171                 return addtok(p, advance());
          172         if(threebyte(c)){        /* 3-byte escape */
          173                 p = addtok(p, advance());
          174                 return addtok(p, advance());
          175         }
          176         return p;
          177 }
          178 int lastdol;        /* was the last token read '$' or '$#' or '"'? */
          179 int lastword;        /* was the last token read a word or compound word terminator? */
          180 
          181 int
          182 yylex(void)
          183 {
          184         int c, d = nextc();
          185         char *w = tok;
          186         struct tree *t;
          187         yylval.tree = 0;
          188         /*
          189          * Embarassing sneakiness:  if the last token read was a quoted or unquoted
          190          * WORD then we alter the meaning of what follows.  If the next character
          191          * is `(', we return SUB (a subscript paren) and consume the `('.  Otherwise,
          192          * if the next character is the first character of a simple or compound word,
          193          * we insert a `^' before it.
          194          */
          195         if(lastword){
          196                 lastword = 0;
          197                 if(d=='('){
          198                         advance();
          199                         strcpy(tok, "( [SUB]");
          200                         return SUB;
          201                 }
          202                 if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){
          203                         strcpy(tok, "^");
          204                         return '^';
          205                 }
          206         }
          207         inquote = 0;
          208         skipwhite();
          209         switch(c = advance()){
          210         case EOF:
          211                 lastdol = 0;
          212                 strcpy(tok, "EOF");
          213                 return EOF;
          214         case '$':
          215                 lastdol = 1;
          216                 if(nextis('#')){
          217                         strcpy(tok, "$#");
          218                         return COUNT;
          219                 }
          220                 if(nextis('"')){
          221                         strcpy(tok, "$\"");
          222                         return '"';
          223                 }
          224                 strcpy(tok, "$");
          225                 return '$';
          226         case '&':
          227                 lastdol = 0;
          228                 if(nextis('&')){
          229                         skipnl();
          230                         strcpy(tok, "&&");
          231                         return ANDAND;
          232                 }
          233                 strcpy(tok, "&");
          234                 return '&';
          235         case '|':
          236                 lastdol = 0;
          237                 if(nextis(c)){
          238                         skipnl();
          239                         strcpy(tok, "||");
          240                         return OROR;
          241                 }
          242         case '<':
          243         case '>':
          244                 lastdol = 0;
          245                 /*
          246                  * funny redirection tokens:
          247                  *        redir:        arrow | arrow '[' fd ']'
          248                  *        arrow:        '<' | '<<' | '>' | '>>' | '|'
          249                  *        fd:        digit | digit '=' | digit '=' digit
          250                  *        digit:        '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
          251                  * some possibilities are nonsensical and get a message.
          252                  */
          253                 *w++=c;
          254                 t = newtree();
          255                 switch(c){
          256                 case '|':
          257                         t->type = PIPE;
          258                         t->fd0 = 1;
          259                         t->fd1 = 0;
          260                         break;
          261                 case '>':
          262                         t->type = REDIR;
          263                         if(nextis(c)){
          264                                 t->rtype = APPEND;
          265                                 *w++=c;
          266                         }
          267                         else t->rtype = WRITE;
          268                         t->fd0 = 1;
          269                         break;
          270                 case '<':
          271                         t->type = REDIR;
          272                         if(nextis(c)){
          273                                 t->rtype = HERE;
          274                                 *w++=c;
          275                         } else if (nextis('>')){
          276                                 t->rtype = RDWR;
          277                                 *w++=c;
          278                         } else t->rtype = READ;
          279                         t->fd0 = 0;
          280                         break;
          281                 }
          282                 if(nextis('[')){
          283                         *w++='[';
          284                         c = advance();
          285                         *w++=c;
          286                         if(c<'0' || '9'<c){
          287                         RedirErr:
          288                                 *w = 0;
          289                                 yyerror(t->type==PIPE?"pipe syntax"
          290                                                 :"redirection syntax");
          291                                 return EOF;
          292                         }
          293                         t->fd0 = 0;
          294                         do{
          295                                 t->fd0 = t->fd0*10+c-'0';
          296                                 *w++=c;
          297                                 c = advance();
          298                         }while('0'<=c && c<='9');
          299                         if(c=='='){
          300                                 *w++='=';
          301                                 if(t->type==REDIR)
          302                                         t->type = DUP;
          303                                 c = advance();
          304                                 if('0'<=c && c<='9'){
          305                                         t->rtype = DUPFD;
          306                                         t->fd1 = t->fd0;
          307                                         t->fd0 = 0;
          308                                         do{
          309                                                 t->fd0 = t->fd0*10+c-'0';
          310                                                 *w++=c;
          311                                                 c = advance();
          312                                         }while('0'<=c && c<='9');
          313                                 }
          314                                 else{
          315                                         if(t->type==PIPE)
          316                                                 goto RedirErr;
          317                                         t->rtype = CLOSE;
          318                                 }
          319                         }
          320                         if(c!=']'
          321                         || t->type==DUP && (t->rtype==HERE || t->rtype==APPEND))
          322                                 goto RedirErr;
          323                         *w++=']';
          324                 }
          325                 *w='\0';
          326                 yylval.tree = t;
          327                 if(t->type==PIPE)
          328                         skipnl();
          329                 return t->type;
          330         case '\'':
          331                 lastdol = 0;
          332                 lastword = 1;
          333                 inquote = 1;
          334                 for(;;){
          335                         c = advance();
          336                         if(c==EOF)
          337                                 break;
          338                         if(c=='\''){
          339                                 if(nextc()!='\'')
          340                                         break;
          341                                 advance();
          342                         }
          343                         w = addutf(w, c);
          344                 }
          345                 if(w!=0)
          346                         *w='\0';
          347                 t = token(tok, WORD);
          348                 t->quoted = 1;
          349                 yylval.tree = t;
          350                 return t->type;
          351         }
          352         if(!wordchr(c)){
          353                 lastdol = 0;
          354                 tok[0] = c;
          355                 tok[1]='\0';
          356                 return c;
          357         }
          358         for(;;){
          359                 /* next line should have (char)c==GLOB, but ken's compiler is broken */
          360                 if(c=='*' || c=='[' || c=='?' || c==(unsigned char)GLOB)
          361                         w = addtok(w, GLOB);
          362                 w = addutf(w, c);
          363                 c = nextc();
          364                 if(lastdol?!idchr(c):!wordchr(c)) break;
          365                 advance();
          366         }
          367 
          368         lastword = 1;
          369         lastdol = 0;
          370         if(w!=0)
          371                 *w='\0';
          372         t = klook(tok);
          373         if(t->type!=WORD)
          374                 lastword = 0;
          375         t->quoted = 0;
          376         yylval.tree = t;
          377         return t->type;
          378 }