diff options
Diffstat (limited to 'tools/lemon/lemon.c')
-rw-r--r-- | tools/lemon/lemon.c | 281 |
1 files changed, 183 insertions, 98 deletions
diff --git a/tools/lemon/lemon.c b/tools/lemon/lemon.c index 650a55c383..8eb5cbc487 100644 --- a/tools/lemon/lemon.c +++ b/tools/lemon/lemon.c @@ -391,6 +391,12 @@ struct lemon { int nrule; /* Number of rules */ int nsymbol; /* Number of terminal and nonterminal symbols */ int nterminal; /* Number of terminal symbols */ + int minShiftReduce; /* Minimum shift-reduce action value */ + int errAction; /* Error action value */ + int accAction; /* Accept action value */ + int noAction; /* No-op action value */ + int minReduce; /* Minimum reduce action */ + int maxAction; /* Maximum action value of any kind */ struct symbol **symbols; /* Sorted array of pointers to symbols */ int errorcnt; /* Number of errors */ struct symbol *errsym; /* The error symbol */ @@ -415,6 +421,7 @@ struct lemon { char *tokenprefix; /* A prefix added to token names in the .h file */ int nconflict; /* Number of parsing conflicts */ int nactiontab; /* Number of entries in the yy_action[] table */ + int nlookaheadtab; /* Number of entries in yy_lookahead[] */ int tablesize; /* Total table size of all tables in bytes */ int basisflag; /* Print only basis configurations */ int has_fallback; /* True if any %fallback is seen in the grammar */ @@ -591,10 +598,12 @@ struct acttab { int mxLookahead; /* Maximum aLookahead[].lookahead */ int nLookahead; /* Used slots in aLookahead[] */ int nLookaheadAlloc; /* Slots allocated in aLookahead[] */ + int nterminal; /* Number of terminal symbols */ + int nsymbol; /* total number of symbols */ }; /* Return the number of entries in the yy_action table */ -#define acttab_size(X) ((X)->nAction) +#define acttab_lookahead_size(X) ((X)->nAction) /* The value for the N-th entry in yy_action */ #define acttab_yyaction(X,N) ((X)->aAction[N].action) @@ -612,14 +621,16 @@ PRIVATE void acttab_free(acttab *p){ #endif /* Allocate a new acttab structure */ -PRIVATE acttab *acttab_alloc(void){ - acttab *p = (acttab *) calloc( 1, sizeof(*p) ); - if( p==0 ){ - fprintf(stderr,"Unable to allocate memory for a new acttab."); - exit(1); - } - memset(p, 0, sizeof(*p)); - return p; +PRIVATE acttab *acttab_alloc(int nsymbol, int nterminal) { + acttab *p = (acttab *)calloc(1, sizeof(*p)); + if (p == 0) { + fprintf(stderr, "Unable to allocate memory for a new acttab."); + exit(1); + } + memset(p, 0, sizeof(*p)); + p->nsymbol = nsymbol; + p->nterminal = nterminal; + return p; } /* Add a new action to the current transaction set. @@ -659,16 +670,24 @@ PRIVATE void acttab_action(acttab *p, int lookahead, int action){ ** to an empty set in preparation for a new round of acttab_action() calls. ** ** Return the offset into the action table of the new transaction. +** +** If the makeItSafe parameter is true, then the offset is chosen so that +** it is impossible to overread the yy_lookaside[] table regardless of +** the lookaside token. This is done for the terminal symbols, as they +** come from external inputs and can contain syntax errors. When makeItSafe +** is false, there is more flexibility in selecting offsets, resulting in +** a smaller table. For non-terminal symbols, which are never syntax errors, +** makeItSafe can be false. */ -PRIVATE int acttab_insert(acttab *p){ - int i, j, k, n; - assert( p->nLookahead>0 ); +PRIVATE int acttab_insert(acttab *p, int makeItSafe) { + int i, j, k, n, end; + assert(p->nLookahead>0); /* Make sure we have enough space to hold the expanded action table ** in the worst case. The worst case occurs if the transaction set ** must be appended to the current action table */ - n = p->mxLookahead + 1; + n = p->nsymbol + 1; if( p->nAction + n >= p->nActionAlloc ){ int oldAlloc = p->nActionAlloc; p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20; @@ -690,7 +709,8 @@ PRIVATE int acttab_insert(acttab *p){ ** ** i is the index in p->aAction[] where p->mnLookahead is inserted. */ - for(i=p->nAction-1; i>=0; i--){ + end = makeItSafe ? p->mnLookahead : 0; + for (i = p->nAction - 1; i >= end; i--) { if( p->aAction[i].lookahead==p->mnLookahead ){ /* All lookaheads and actions in the aLookahead[] transaction ** must match against the candidate aAction[i] entry. */ @@ -720,12 +740,13 @@ PRIVATE int acttab_insert(acttab *p){ ** an empty offset in the aAction[] table in which we can add the ** aLookahead[] transaction. */ - if( i<0 ){ + if (i<end) { /* Look for holes in the aAction[] table that fit the current ** aLookahead[] transaction. Leave i set to the offset of the hole. ** If no holes are found, i is left at p->nAction, which means the ** transaction will be appended. */ - for(i=0; i<p->nActionAlloc - p->mxLookahead; i++){ + i = makeItSafe ? p->mnLookahead : 0; + for (; i<p->nActionAlloc - p->mxLookahead; i++) { if( p->aAction[i].lookahead<0 ){ for(j=0; j<p->nLookahead; j++){ k = p->aLookahead[j].lookahead - p->mnLookahead + i; @@ -743,11 +764,19 @@ PRIVATE int acttab_insert(acttab *p){ } } /* Insert transaction set at index i. */ +#if 0 + printf("Acttab:"); + for (j = 0; j<p->nLookahead; j++) { + printf(" %d", p->aLookahead[j].lookahead); + } + printf(" inserted at %d\n", i); +#endif for(j=0; j<p->nLookahead; j++){ k = p->aLookahead[j].lookahead - p->mnLookahead + i; p->aAction[k] = p->aLookahead[j]; if( k>=p->nAction ) p->nAction = k+1; } + if (makeItSafe && i + p->nterminal >= p->nAction) p->nAction = i + p->nterminal + 1; p->nLookahead = 0; /* Return the offset that is added to the lookahead in order to get the @@ -755,6 +784,16 @@ PRIVATE int acttab_insert(acttab *p){ return i - p->mnLookahead; } +/* +** Return the size of the action table without the trailing syntax error +** entries. +*/ +int acttab_action_size(acttab *p) { + int n = p->nAction; + while (n>0 && p->aAction[n - 1].lookahead<0) { n--; } + return n; +} + /********************** From the file "build.c" *****************************/ /* ** Routines to construction the finite state machine for the LEMON @@ -1772,6 +1811,7 @@ int main(int argc _U_, char **argv) stats_line("states", lem.nxstate); stats_line("conflicts", lem.nconflict); stats_line("action table entries", lem.nactiontab); + stats_line("lookahead table entries", lem.nlookaheadtab); stats_line("total table size (bytes)", lem.tablesize); } if( lem.nconflict > 0 ){ @@ -2209,7 +2249,8 @@ enum e_state { WAITING_FOR_FALLBACK_ID, WAITING_FOR_WILDCARD_ID, WAITING_FOR_CLASS_ID, - WAITING_FOR_CLASS_TOKEN + WAITING_FOR_CLASS_TOKEN, + WAITING_FOR_TOKEN_NAME }; struct pstate { char *filename; /* Name of the input file */ @@ -2525,6 +2566,8 @@ to follow the previous rule."); }else if( strcmp(x,"fallback")==0 ){ psp->fallback = 0; psp->state = WAITING_FOR_FALLBACK_ID; + } else if (strcmp(x, "token") == 0) { + psp->state = WAITING_FOR_TOKEN_NAME; }else if( strcmp(x,"wildcard")==0 ){ psp->state = WAITING_FOR_WILDCARD_ID; }else if( strcmp(x,"token_class")==0 ){ @@ -2680,6 +2723,26 @@ to follow the previous rule."); } } break; + case WAITING_FOR_TOKEN_NAME: + /* Tokens do not have to be declared before use. But they can be + ** in order to control their assigned integer number. The number for + ** each token is assigned when it is first seen. So by including + ** + ** %token ONE TWO THREE + ** + ** early in the grammar file, that assigns small consecutive values + ** to each of the tokens ONE TWO and THREE. + */ + if (x[0] == '.') { + psp->state = WAITING_FOR_DECL_OR_RULE; + } else if (!ISUPPER(x[0])) { + ErrorMsg(psp->filename, psp->tokenlineno, + "%%token argument \"%s\" should be a token", x); + psp->errorcnt++; + } else { + (void)Symbol_new(x); + } + break; case WAITING_FOR_WILDCARD_ID: if( x[0]=='.' ){ psp->state = WAITING_FOR_DECL_OR_RULE; @@ -3067,6 +3130,27 @@ PRIVATE FILE *file_open( return fp; } +/* Print the text of a rule +*/ +void rule_print(FILE *out, struct rule *rp) { + int i, j; + fprintf(out, "%s", rp->lhs->name); + /* if( rp->lhsalias ) fprintf(out,"(%s)",rp->lhsalias); */ + fprintf(out, " ::="); + for (i = 0; i<rp->nrhs; i++) { + struct symbol *sp = rp->rhs[i]; + if (sp->type == MULTITERMINAL) { + fprintf(out, " %s", sp->subsym[0]->name); + for (j = 1; j<sp->nsubsym; j++) { + fprintf(out, "|%s", sp->subsym[j]->name); + } + } else { + fprintf(out, " %s", sp->name); + } + /* if( rp->rhsalias[i] ) fprintf(out,"(%s)",rp->rhsalias[i]); */ + } +} + /* Duplicate the input file without comments and without actions ** on rules */ void Reprint(struct lemon *lemp) @@ -3094,21 +3178,7 @@ void Reprint(struct lemon *lemp) printf("\n"); } for(rp=lemp->rule; rp; rp=rp->next){ - printf("%s",rp->lhs->name); - /* if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */ - printf(" ::="); - for(i=0; i<rp->nrhs; i++){ - sp = rp->rhs[i]; - if( sp->type==MULTITERMINAL ){ - printf(" %s", sp->subsym[0]->name); - for(j=1; j<sp->nsubsym; j++){ - printf("|%s", sp->subsym[j]->name); - } - }else{ - printf(" %s", sp->name); - } - /* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */ - } + rule_print(stdout, rp); printf("."); if( rp->precsym ) printf(" [%s]",rp->precsym->name); /* if( rp->code ) printf("\n %s",rp->code); */ @@ -3369,23 +3439,26 @@ PRIVATE char *pathsearch(char *argv0, char *name, int modemask) */ PRIVATE int compute_action(struct lemon *lemp, struct action *ap) { - int act; - switch( ap->type ){ - case SHIFT: act = ap->x.stp->statenum; break; - case SHIFTREDUCE: { - act = ap->x.rp->iRule + lemp->nstate; - /* Since a SHIFT is inherient after a prior REDUCE, convert any - ** SHIFTREDUCE action with a nonterminal on the LHS into a simple - ** REDUCE action: */ - if (ap->sp->index >= lemp->nterminal) act += lemp->nrule; - break; - } - case REDUCE: act = ap->x.rp->iRule + lemp->nstate+lemp->nrule; break; - case ERROR: act = lemp->nstate + lemp->nrule*2; break; - case ACCEPT: act = lemp->nstate + lemp->nrule*2 + 1; break; + int act; + switch (ap->type) { + case SHIFT: act = ap->x.stp->statenum; break; + case SHIFTREDUCE: { + /* Since a SHIFT is inherient after a prior REDUCE, convert any + ** SHIFTREDUCE action with a nonterminal on the LHS into a simple + ** REDUCE action: */ + if (ap->sp->index >= lemp->nterminal) { + act = lemp->minReduce + ap->x.rp->iRule; + } else { + act = lemp->minShiftReduce + ap->x.rp->iRule; + } + break; + } + case REDUCE: act = lemp->minReduce + ap->x.rp->iRule; break; + case ERROR: act = lemp->errAction; break; + case ACCEPT: act = lemp->accAction; break; default: act = -1; break; - } - return act; + } + return act; } #define LINESIZE 1000 @@ -4092,6 +4165,13 @@ void ReportTable( int mnNtOfst, mxNtOfst; struct axset *ax; + lemp->minShiftReduce = lemp->nstate; + lemp->errAction = lemp->minShiftReduce + lemp->nrule; + lemp->accAction = lemp->errAction + 1; + lemp->noAction = lemp->accAction + 1; + lemp->minReduce = lemp->noAction + 1; + lemp->maxAction = lemp->minReduce + lemp->nrule; + in = tplt_open(lemp); if( in==0 ) return; out = file_open(lemp,".c","wb"); @@ -4130,7 +4210,7 @@ void ReportTable( minimum_size_type(0, lemp->nsymbol+1, &szCodeType)); lineno++; fprintf(out,"#define YYNOCODE %d\n",lemp->nsymbol+1); lineno++; fprintf(out,"#define YYACTIONTYPE %s\n", - minimum_size_type(0,lemp->nstate+lemp->nrule*2+5,&szActionType)); lineno++; + minimum_size_type(0, lemp->maxAction, &szActionType)); lineno++; if( lemp->wildcard ){ fprintf(out,"#define YYWILDCARD %d\n", lemp->wildcard->index); lineno++; @@ -4198,7 +4278,7 @@ void ReportTable( ** of placing the largest action sets first */ for(i=0; i<lemp->nxstate*2; i++) ax[i].iOrder = i; qsort(ax, lemp->nxstate*2, sizeof(ax[0]), axset_compare); - pActtab = acttab_alloc(); + pActtab = acttab_alloc(lemp->nsymbol, lemp->nterminal); for(i=0; i<lemp->nxstate*2 && ax[i].nAction>0; i++){ stp = ax[i].stp; if( ax[i].isTkn ){ @@ -4209,7 +4289,7 @@ void ReportTable( if( action<0 ) continue; acttab_action(pActtab, ap->sp->index, action); } - stp->iTknOfst = acttab_insert(pActtab); + stp->iTknOfst = acttab_insert(pActtab, 1); if( stp->iTknOfst<mnTknOfst ) mnTknOfst = stp->iTknOfst; if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst; }else{ @@ -4221,7 +4301,7 @@ void ReportTable( if( action<0 ) continue; acttab_action(pActtab, ap->sp->index, action); } - stp->iNtOfst = acttab_insert(pActtab); + stp->iNtOfst = acttab_insert(pActtab, 0); if( stp->iNtOfst<mnNtOfst ) mnNtOfst = stp->iNtOfst; if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst; } @@ -4252,19 +4332,21 @@ void ReportTable( /* Finish rendering the constants now that the action table has ** been computed */ - fprintf(out,"#define YYNSTATE %d\n",lemp->nxstate); lineno++; - fprintf(out,"#define YYNRULE %d\n",lemp->nrule); lineno++; - fprintf(out,"#define YY_MAX_SHIFT %d\n",lemp->nxstate-1); lineno++; - fprintf(out,"#define YY_MIN_SHIFTREDUCE %d\n",lemp->nstate); lineno++; - i = lemp->nstate + lemp->nrule; - fprintf(out,"#define YY_MAX_SHIFTREDUCE %d\n", i-1); lineno++; - fprintf(out,"#define YY_MIN_REDUCE %d\n", i); lineno++; - i = lemp->nstate + lemp->nrule*2; - fprintf(out,"#define YY_MAX_REDUCE %d\n", i-1); lineno++; - fprintf(out,"#define YY_ERROR_ACTION %d\n", i); lineno++; - fprintf(out,"#define YY_ACCEPT_ACTION %d\n", i+1); lineno++; - fprintf(out,"#define YY_NO_ACTION %d\n", i+2); lineno++; - tplt_xfer(lemp->name,in,out,&lineno); + fprintf(out, "#define YYNSTATE %d\n", lemp->nxstate); lineno++; + fprintf(out, "#define YYNRULE %d\n", lemp->nrule); lineno++; + fprintf(out, "#define YYNTOKEN %d\n", lemp->nterminal); lineno++; + fprintf(out, "#define YY_MAX_SHIFT %d\n", lemp->nxstate - 1); lineno++; + i = lemp->minShiftReduce; + fprintf(out, "#define YY_MIN_SHIFTREDUCE %d\n", i); lineno++; + i += lemp->nrule; + fprintf(out, "#define YY_MAX_SHIFTREDUCE %d\n", i - 1); lineno++; + fprintf(out, "#define YY_ERROR_ACTION %d\n", lemp->errAction); lineno++; + fprintf(out, "#define YY_ACCEPT_ACTION %d\n", lemp->accAction); lineno++; + fprintf(out, "#define YY_NO_ACTION %d\n", lemp->noAction); lineno++; + fprintf(out, "#define YY_MIN_REDUCE %d\n", lemp->minReduce); lineno++; + i = lemp->minReduce + lemp->nrule; + fprintf(out, "#define YY_MAX_REDUCE %d\n", i - 1); lineno++; + tplt_xfer(lemp->name, in, out, &lineno); /* Now output the action table and its associates: ** @@ -4279,25 +4361,26 @@ void ReportTable( */ /* Output the yy_action table */ - lemp->nactiontab = n = acttab_size(pActtab); + lemp->nactiontab = n = acttab_action_size(pActtab); lemp->tablesize += n*szActionType; fprintf(out,"#define YY_ACTTAB_COUNT (%d)\n", n); lineno++; fprintf(out,"static const YYACTIONTYPE yy_action[] = {\n"); lineno++; - for(i=j=0; i<n; i++){ - int action = acttab_yyaction(pActtab, i); - if( action<0 ) action = lemp->nstate + lemp->nrule + 2; - if( j==0 ) fprintf(out," /* %5d */ ", i); - fprintf(out, " %4d,", action); - if( j==9 || i==n-1 ){ - fprintf(out, "\n"); lineno++; - j = 0; - }else{ - j++; - } + for (i = j = 0; i<n; i++) { + int action = acttab_yyaction(pActtab, i); + if (action<0) action = lemp->noAction; + if (j == 0) fprintf(out, " /* %5d */ ", i); + fprintf(out, " %4d,", action); + if (j == 9 || i == n - 1) { + fprintf(out, "\n"); lineno++; + j = 0; + } else { + j++; + } } fprintf(out, "};\n"); lineno++; /* Output the yy_lookahead table */ + lemp->nlookaheadtab = n = acttab_lookahead_size(pActtab); lemp->tablesize += n*szCodeType; fprintf(out,"static const YYCODETYPE yy_lookahead[] = {\n"); lineno++; for(i=j=0; i<n; i++){ @@ -4317,7 +4400,6 @@ void ReportTable( /* Output the yy_shift_ofst[] table */ n = lemp->nxstate; while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--; - fprintf(out, "#define YY_SHIFT_USE_DFLT (%d)\n", lemp->nactiontab); lineno++; fprintf(out, "#define YY_SHIFT_COUNT (%d)\n", n-1); lineno++; fprintf(out, "#define YY_SHIFT_MIN (%d)\n", mnTknOfst); lineno++; fprintf(out, "#define YY_SHIFT_MAX (%d)\n", mxTknOfst); lineno++; @@ -4342,7 +4424,6 @@ void ReportTable( fprintf(out, "};\n"); lineno++; /* Output the yy_reduce_ofst[] table */ - fprintf(out, "#define YY_REDUCE_USE_DFLT (%d)\n", mnNtOfst-1); lineno++; n = lemp->nxstate; while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--; fprintf(out, "#define YY_REDUCE_COUNT (%d)\n", n-1); lineno++; @@ -4371,16 +4452,20 @@ void ReportTable( fprintf(out, "static const YYACTIONTYPE yy_default[] = {\n"); lineno++; n = lemp->nxstate; lemp->tablesize += n*szActionType; - for(i=j=0; i<n; i++){ - stp = lemp->sorted[i]; - if( j==0 ) fprintf(out," /* %5d */ ", i); - fprintf(out, " %4d,", stp->iDfltReduce+lemp->nstate+lemp->nrule); - if( j==9 || i==n-1 ){ - fprintf(out, "\n"); lineno++; - j = 0; - }else{ - j++; - } + for (i = j = 0; i<n; i++) { + stp = lemp->sorted[i]; + if (j == 0) fprintf(out, " /* %5d */ ", i); + if (stp->iDfltReduce<0) { + fprintf(out, " %4d,", lemp->errAction); + } else { + fprintf(out, " %4d,", stp->iDfltReduce + lemp->minReduce); + } + if (j == 9 || i == n - 1) { + fprintf(out, "\n"); lineno++; + j = 0; + } else { + j++; + } } fprintf(out, "};\n"); lineno++; tplt_xfer(lemp->name,in,out,&lineno); @@ -4406,12 +4491,10 @@ void ReportTable( /* Generate a table containing the symbolic name of every symbol */ - for(i=0; i<lemp->nsymbol; i++){ - lemon_sprintf(line,"\"%s\",",lemp->symbols[i]->name); - fprintf(out," %-15s",line); - if( (i&3)==3 ){ fprintf(out,"\n"); lineno++; } + for (i = 0; i<lemp->nsymbol; i++) { + lemon_sprintf(line, "\"%s\",", lemp->symbols[i]->name); + fprintf(out, " /* %4d */ \"%s\",\n", i, lemp->symbols[i]->name); lineno++; } - if( (i&3)!=0 ){ fprintf(out,"\n"); lineno++; } tplt_xfer(lemp->name,in,out,&lineno); /* Generate a table containing a text string that describes every @@ -4498,8 +4581,10 @@ void ReportTable( ** Note: This code depends on the fact that rules are number ** sequentually beginning with 0. */ - for(rp=lemp->rule; rp; rp=rp->next){ - fprintf(out, " { %d, %d },\n", rp->lhs->index, -rp->nrhs); lineno++; + for (i = 0, rp = lemp->rule; rp; rp = rp->next, i++) { + fprintf(out, " { %4d, %4d }, /* (%d) ", rp->lhs->index, -rp->nrhs, i); + rule_print(out, rp); + fprintf(out, " */\n"); lineno++; } tplt_xfer(lemp->name,in,out,&lineno); @@ -4765,7 +4850,7 @@ void ResortStates(struct lemon *lemp) for(i=0; i<lemp->nstate; i++){ stp = lemp->sorted[i]; stp->nTknAct = stp->nNtAct = 0; - stp->iDfltReduce = lemp->nrule; /* Init dflt action to "syntax error" */ + stp->iDfltReduce = -1; /* Init dflt action to "syntax error" */ stp->iTknOfst = NO_OFFSET; stp->iNtOfst = NO_OFFSET; for(ap=stp->ap; ap; ap=ap->next){ @@ -4777,7 +4862,7 @@ void ResortStates(struct lemon *lemp) stp->nNtAct++; }else{ assert( stp->autoReduce==0 || stp->pDfltReduce==ap->x.rp ); - stp->iDfltReduce = iAction - lemp->nstate - lemp->nrule; + stp->iDfltReduce = iAction; } } } |