awkgram.y - 9base - revived minimalist port of Plan 9 userland to Unix
 (HTM) git clone git://git.suckless.org/9base
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
 (DIR) LICENSE
       ---
       awkgram.y (13931B)
       ---
            1 /****************************************************************
            2 Copyright (C) Lucent Technologies 1997
            3 All Rights Reserved
            4 
            5 Permission to use, copy, modify, and distribute this software and
            6 its documentation for any purpose and without fee is hereby
            7 granted, provided that the above copyright notice appear in all
            8 copies and that both that the copyright notice and this
            9 permission notice and warranty disclaimer appear in supporting
           10 documentation, and that the name Lucent Technologies or any of
           11 its entities not be used in advertising or publicity pertaining
           12 to distribution of the software without specific, written prior
           13 permission.
           14 
           15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
           16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
           17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
           18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
           19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
           20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
           21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
           22 THIS SOFTWARE.
           23 ****************************************************************/
           24 
           25 %{
           26 #include <stdio.h>
           27 #include <string.h>
           28 #include "awk.h"
           29 
           30 #define        makedfa(a,b)        compre(a)
           31 
           32 void checkdup(Node *list, Cell *item);
           33 int yywrap(void) { return(1); }
           34 
           35 Node        *beginloc = 0;
           36 Node        *endloc = 0;
           37 int        infunc        = 0;        /* = 1 if in arglist or body of func */
           38 int        inloop        = 0;        /* = 1 if in while, for, do */
           39 char        *curfname = 0;        /* current function name */
           40 Node        *arglist = 0;        /* list of args for current function */
           41 %}
           42 
           43 %union {
           44         Node        *p;
           45         Cell        *cp;
           46         int        i;
           47         char        *s;
           48 }
           49 
           50 %token        <i>        FIRSTTOKEN        /* must be first */
           51 %token        <p>        PROGRAM PASTAT PASTAT2 XBEGIN XEND
           52 %token        <i>        NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
           53 %token        <i>        ARRAY
           54 %token        <i>        MATCH NOTMATCH MATCHOP
           55 %token        <i>        FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS
           56 %token        <i>        AND BOR APPEND EQ GE GT LE LT NE IN
           57 %token        <i>        ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC 
           58 %token        <i>        SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
           59 %token        <i>        ADD MINUS MULT DIVIDE MOD
           60 %token        <i>        ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
           61 %token        <i>        PRINT PRINTF SPRINTF
           62 %token        <p>        ELSE INTEST CONDEXPR
           63 %token        <i>        POSTINCR PREINCR POSTDECR PREDECR
           64 %token        <cp>        VAR IVAR VARNF CALL NUMBER STRING
           65 %token        <s>        REGEXPR
           66 
           67 %type        <p>        pas pattern ppattern plist pplist patlist prarg term re
           68 %type        <p>        pa_pat pa_stat pa_stats
           69 %type        <s>        reg_expr
           70 %type        <p>        simple_stmt opt_simple_stmt stmt stmtlist
           71 %type        <p>        var varname funcname varlist
           72 %type        <p>        for if else while
           73 %type        <i>        do st
           74 %type        <i>        pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
           75 %type        <i>        subop print
           76 
           77 %right        ASGNOP
           78 %right        '?'
           79 %right        ':'
           80 %left        BOR
           81 %left        AND
           82 %left        GETLINE
           83 %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
           84 %left        ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC 
           85 %left        GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
           86 %left        PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
           87 %left        REGEXPR VAR VARNF IVAR WHILE '('
           88 %left        CAT
           89 %left        '+' '-'
           90 %left        '*' '/' '%'
           91 %left        NOT UMINUS
           92 %right        POWER
           93 %right        DECR INCR
           94 %left        INDIRECT
           95 %token        LASTTOKEN        /* must be last */
           96 
           97 %%
           98 
           99 program:
          100           pas        { if (errorflag==0)
          101                         winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
          102         | error        { yyclearin; bracecheck(); SYNTAX("bailing out"); }
          103         ;
          104 
          105 and:
          106           AND | and NL
          107         ;
          108 
          109 bor:
          110           BOR | bor NL
          111         ;
          112 
          113 comma:
          114           ',' | comma NL
          115         ;
          116 
          117 do:
          118           DO | do NL
          119         ;
          120 
          121 else:
          122           ELSE | else NL
          123         ;
          124 
          125 for:
          126           FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
          127                 { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
          128         | FOR '(' opt_simple_stmt ';'  ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
          129                 { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
          130         | FOR '(' varname IN varname rparen {inloop++;} stmt
          131                 { --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
          132         ;
          133 
          134 funcname:
          135           VAR        { setfname($1); }
          136         | CALL        { setfname($1); }
          137         ;
          138 
          139 if:
          140           IF '(' pattern rparen                { $$ = notnull($3); }
          141         ;
          142 
          143 lbrace:
          144           '{' | lbrace NL
          145         ;
          146 
          147 nl:
          148           NL | nl NL
          149         ;
          150 
          151 opt_nl:
          152           /* empty */        { $$ = 0; }
          153         | nl
          154         ;
          155 
          156 opt_pst:
          157           /* empty */        { $$ = 0; }
          158         | pst
          159         ;
          160 
          161 
          162 opt_simple_stmt:
          163           /* empty */                        { $$ = 0; }
          164         | simple_stmt
          165         ;
          166 
          167 pas:
          168           opt_pst                        { $$ = 0; }
          169         | opt_pst pa_stats opt_pst        { $$ = $2; }
          170         ;
          171 
          172 pa_pat:
          173           pattern        { $$ = notnull($1); }
          174         ;
          175 
          176 pa_stat:
          177           pa_pat                        { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
          178         | pa_pat lbrace stmtlist '}'        { $$ = stat2(PASTAT, $1, $3); }
          179         | pa_pat ',' pa_pat                { $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); }
          180         | pa_pat ',' pa_pat lbrace stmtlist '}'        { $$ = pa2stat($1, $3, $5); }
          181         | lbrace stmtlist '}'                { $$ = stat2(PASTAT, NIL, $2); }
          182         | XBEGIN lbrace stmtlist '}'
          183                 { beginloc = linkum(beginloc, $3); $$ = 0; }
          184         | XEND lbrace stmtlist '}'
          185                 { endloc = linkum(endloc, $3); $$ = 0; }
          186         | FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
          187                 { infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
          188         ;
          189 
          190 pa_stats:
          191           pa_stat
          192         | pa_stats opt_pst pa_stat        { $$ = linkum($1, $3); }
          193         ;
          194 
          195 patlist:
          196           pattern
          197         | patlist comma pattern                { $$ = linkum($1, $3); }
          198         ;
          199 
          200 ppattern:
          201           var ASGNOP ppattern                { $$ = op2($2, $1, $3); }
          202         | ppattern '?' ppattern ':' ppattern %prec '?'
          203                  { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
          204         | ppattern bor ppattern %prec BOR
          205                 { $$ = op2(BOR, notnull($1), notnull($3)); }
          206         | ppattern and ppattern %prec AND
          207                 { $$ = op2(AND, notnull($1), notnull($3)); }
          208         | ppattern MATCHOP reg_expr        { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
          209         | ppattern MATCHOP ppattern
          210                 { if (constnode($3))
          211                         $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
          212                   else
          213                         $$ = op3($2, (Node *)1, $1, $3); }
          214         | ppattern IN varname                { $$ = op2(INTEST, $1, makearr($3)); }
          215         | '(' plist ')' IN varname        { $$ = op2(INTEST, $2, makearr($5)); }
          216         | ppattern term %prec CAT        { $$ = op2(CAT, $1, $2); }
          217         | re
          218         | term
          219         ;
          220 
          221 pattern:
          222           var ASGNOP pattern                { $$ = op2($2, $1, $3); }
          223         | pattern '?' pattern ':' pattern %prec '?'
          224                  { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
          225         | pattern bor pattern %prec BOR
          226                 { $$ = op2(BOR, notnull($1), notnull($3)); }
          227         | pattern and pattern %prec AND
          228                 { $$ = op2(AND, notnull($1), notnull($3)); }
          229         | pattern EQ pattern                { $$ = op2($2, $1, $3); }
          230         | pattern GE pattern                { $$ = op2($2, $1, $3); }
          231         | pattern GT pattern                { $$ = op2($2, $1, $3); }
          232         | pattern LE pattern                { $$ = op2($2, $1, $3); }
          233         | pattern LT pattern                { $$ = op2($2, $1, $3); }
          234         | pattern NE pattern                { $$ = op2($2, $1, $3); }
          235         | pattern MATCHOP reg_expr        { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
          236         | pattern MATCHOP pattern
          237                 { if (constnode($3))
          238                         $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
          239                   else
          240                         $$ = op3($2, (Node *)1, $1, $3); }
          241         | pattern IN varname                { $$ = op2(INTEST, $1, makearr($3)); }
          242         | '(' plist ')' IN varname        { $$ = op2(INTEST, $2, makearr($5)); }
          243         | pattern '|' GETLINE var        { 
          244                         if (safe) SYNTAX("cmd | getline is unsafe");
          245                         else $$ = op3(GETLINE, $4, itonp($2), $1); }
          246         | pattern '|' GETLINE                { 
          247                         if (safe) SYNTAX("cmd | getline is unsafe");
          248                         else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
          249         | pattern term %prec CAT        { $$ = op2(CAT, $1, $2); }
          250         | re
          251         | term
          252         ;
          253 
          254 plist:
          255           pattern comma pattern                { $$ = linkum($1, $3); }
          256         | plist comma pattern                { $$ = linkum($1, $3); }
          257         ;
          258 
          259 pplist:
          260           ppattern
          261         | pplist comma ppattern                { $$ = linkum($1, $3); }
          262         ;
          263 
          264 prarg:
          265           /* empty */                        { $$ = rectonode(); }
          266         | pplist
          267         | '(' plist ')'                        { $$ = $2; }
          268         ;
          269 
          270 print:
          271           PRINT | PRINTF
          272         ;
          273 
          274 pst:
          275           NL | ';' | pst NL | pst ';'
          276         ;
          277 
          278 rbrace:
          279           '}' | rbrace NL
          280         ;
          281 
          282 re:
          283            reg_expr
          284                 { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
          285         | NOT re        { $$ = op1(NOT, notnull($2)); }
          286         ;
          287 
          288 reg_expr:
          289           '/' {startreg();} REGEXPR '/'                { $$ = $3; }
          290         ;
          291 
          292 rparen:
          293           ')' | rparen NL
          294         ;
          295 
          296 simple_stmt:
          297           print prarg '|' term                { 
          298                         if (safe) SYNTAX("print | is unsafe");
          299                         else $$ = stat3($1, $2, itonp($3), $4); }
          300         | print prarg APPEND term        {
          301                         if (safe) SYNTAX("print >> is unsafe");
          302                         else $$ = stat3($1, $2, itonp($3), $4); }
          303         | print prarg GT term                {
          304                         if (safe) SYNTAX("print > is unsafe");
          305                         else $$ = stat3($1, $2, itonp($3), $4); }
          306         | print prarg                        { $$ = stat3($1, $2, NIL, NIL); }
          307         | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
          308         | DELETE varname                 { $$ = stat2(DELETE, makearr($2), 0); }
          309         | pattern                        { $$ = exptostat($1); }
          310         | error                                { yyclearin; SYNTAX("illegal statement"); }
          311         ;
          312 
          313 st:
          314           nl
          315         | ';' opt_nl
          316         ;
          317 
          318 stmt:
          319           BREAK st                { if (!inloop) SYNTAX("break illegal outside of loops");
          320                                   $$ = stat1(BREAK, NIL); }
          321         | CLOSE pattern st        { $$ = stat1(CLOSE, $2); }
          322         | CONTINUE st                {  if (!inloop) SYNTAX("continue illegal outside of loops");
          323                                   $$ = stat1(CONTINUE, NIL); }
          324         | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
          325                 { $$ = stat2(DO, $3, notnull($7)); }
          326         | EXIT pattern st        { $$ = stat1(EXIT, $2); }
          327         | EXIT st                { $$ = stat1(EXIT, NIL); }
          328         | for
          329         | if stmt else stmt        { $$ = stat3(IF, $1, $2, $4); }
          330         | if stmt                { $$ = stat3(IF, $1, $2, NIL); }
          331         | lbrace stmtlist rbrace { $$ = $2; }
          332         | NEXT st        { if (infunc)
          333                                 SYNTAX("next is illegal inside a function");
          334                           $$ = stat1(NEXT, NIL); }
          335         | NEXTFILE st        { if (infunc)
          336                                 SYNTAX("nextfile is illegal inside a function");
          337                           $$ = stat1(NEXTFILE, NIL); }
          338         | RETURN pattern st        { $$ = stat1(RETURN, $2); }
          339         | RETURN st                { $$ = stat1(RETURN, NIL); }
          340         | simple_stmt st
          341         | while {inloop++;} stmt        { --inloop; $$ = stat2(WHILE, $1, $3); }
          342         | ';' opt_nl                { $$ = 0; }
          343         ;
          344 
          345 stmtlist:
          346           stmt
          347         | stmtlist stmt                { $$ = linkum($1, $2); }
          348         ;
          349 
          350 subop:
          351           SUB | GSUB
          352         ;
          353 
          354 term:
          355            term '/' ASGNOP term                { $$ = op2(DIVEQ, $1, $4); }
          356          | term '+' term                        { $$ = op2(ADD, $1, $3); }
          357         | term '-' term                        { $$ = op2(MINUS, $1, $3); }
          358         | term '*' term                        { $$ = op2(MULT, $1, $3); }
          359         | term '/' term                        { $$ = op2(DIVIDE, $1, $3); }
          360         | term '%' term                        { $$ = op2(MOD, $1, $3); }
          361         | term POWER term                { $$ = op2(POWER, $1, $3); }
          362         | '-' term %prec UMINUS                { $$ = op1(UMINUS, $2); }
          363         | '+' term %prec UMINUS                { $$ = $2; }
          364         | NOT term %prec UMINUS                { $$ = op1(NOT, notnull($2)); }
          365         | BLTIN '(' ')'                        { $$ = op2(BLTIN, itonp($1), rectonode()); }
          366         | BLTIN '(' patlist ')'                { $$ = op2(BLTIN, itonp($1), $3); }
          367         | BLTIN                                { $$ = op2(BLTIN, itonp($1), rectonode()); }
          368         | CALL '(' ')'                        { $$ = op2(CALL, celltonode($1,CVAR), NIL); }
          369         | CALL '(' patlist ')'                { $$ = op2(CALL, celltonode($1,CVAR), $3); }
          370         | DECR var                        { $$ = op1(PREDECR, $2); }
          371         | INCR var                        { $$ = op1(PREINCR, $2); }
          372         | var DECR                        { $$ = op1(POSTDECR, $1); }
          373         | var INCR                        { $$ = op1(POSTINCR, $1); }
          374         | GETLINE var LT term                { $$ = op3(GETLINE, $2, itonp($3), $4); }
          375         | GETLINE LT term                { $$ = op3(GETLINE, NIL, itonp($2), $3); }
          376         | GETLINE var                        { $$ = op3(GETLINE, $2, NIL, NIL); }
          377         | GETLINE                        { $$ = op3(GETLINE, NIL, NIL, NIL); }
          378         | INDEX '(' pattern comma pattern ')'
          379                 { $$ = op2(INDEX, $3, $5); }
          380         | INDEX '(' pattern comma reg_expr ')'
          381                 { SYNTAX("index() doesn't permit regular expressions");
          382                   $$ = op2(INDEX, $3, (Node*)$5); }
          383         | '(' pattern ')'                { $$ = $2; }
          384         | MATCHFCN '(' pattern comma reg_expr ')'
          385                 { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
          386         | MATCHFCN '(' pattern comma pattern ')'
          387                 { if (constnode($5))
          388                         $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
          389                   else
          390                         $$ = op3(MATCHFCN, (Node *)1, $3, $5); }
          391         | NUMBER                        { $$ = celltonode($1, CCON); }
          392         | SPLIT '(' pattern comma varname comma pattern ')'     /* string */
          393                 { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
          394         | SPLIT '(' pattern comma varname comma reg_expr ')'    /* const /regexp/ */
          395                 { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
          396         | SPLIT '(' pattern comma varname ')'
          397                 { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); }  /* default */
          398         | SPRINTF '(' patlist ')'        { $$ = op1($1, $3); }
          399         | STRING                         { $$ = celltonode($1, CCON); }
          400         | subop '(' reg_expr comma pattern ')'
          401                 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
          402         | subop '(' pattern comma pattern ')'
          403                 { if (constnode($3))
          404                         $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
          405                   else
          406                         $$ = op4($1, (Node *)1, $3, $5, rectonode()); }
          407         | subop '(' reg_expr comma pattern comma var ')'
          408                 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
          409         | subop '(' pattern comma pattern comma var ')'
          410                 { if (constnode($3))
          411                         $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
          412                   else
          413                         $$ = op4($1, (Node *)1, $3, $5, $7); }
          414         | SUBSTR '(' pattern comma pattern comma pattern ')'
          415                 { $$ = op3(SUBSTR, $3, $5, $7); }
          416         | SUBSTR '(' pattern comma pattern ')'
          417                 { $$ = op3(SUBSTR, $3, $5, NIL); }
          418         | var
          419         ;
          420 
          421 var:
          422           varname
          423         | varname '[' patlist ']'        { $$ = op2(ARRAY, makearr($1), $3); }
          424         | IVAR                                { $$ = op1(INDIRECT, celltonode($1, CVAR)); }
          425         | INDIRECT term                         { $$ = op1(INDIRECT, $2); }
          426         ;        
          427 
          428 varlist:
          429           /* nothing */                { arglist = $$ = 0; }
          430         | VAR                        { arglist = $$ = celltonode($1,CVAR); }
          431         | varlist comma VAR        {
          432                         checkdup($1, $3);
          433                         arglist = $$ = linkum($1,celltonode($3,CVAR)); }
          434         ;
          435 
          436 varname:
          437           VAR                        { $$ = celltonode($1, CVAR); }
          438         | ARG                         { $$ = op1(ARG, itonp($1)); }
          439         | VARNF                        { $$ = op1(VARNF, (Node *) $1); }
          440         ;
          441 
          442 
          443 while:
          444           WHILE '(' pattern rparen        { $$ = notnull($3); }
          445         ;
          446 
          447 %%
          448 
          449 void setfname(Cell *p)
          450 {
          451         if (isarr(p))
          452                 SYNTAX("%s is an array, not a function", p->nval);
          453         else if (isfcn(p))
          454                 SYNTAX("you can't define function %s more than once", p->nval);
          455         curfname = p->nval;
          456 }
          457 
          458 int constnode(Node *p)
          459 {
          460         return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
          461 }
          462 
          463 char *strnode(Node *p)
          464 {
          465         return ((Cell *)(p->narg[0]))->sval;
          466 }
          467 
          468 Node *notnull(Node *n)
          469 {
          470         switch (n->nobj) {
          471         case LE: case LT: case EQ: case NE: case GT: case GE:
          472         case BOR: case AND: case NOT:
          473                 return n;
          474         default:
          475                 return op2(NE, n, nullnode);
          476         }
          477 }
          478 
          479 void checkdup(Node *vl, Cell *cp)        /* check if name already in list */
          480 {
          481         char *s = cp->nval;
          482         for ( ; vl; vl = vl->nnext) {
          483                 if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
          484                         SYNTAX("duplicate argument %s", s);
          485                         break;
          486                 }
          487         }
          488 }
          489