aboutsummaryrefslogtreecommitdiffstats
path: root/tools/lemon/lemon.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/lemon/lemon.c')
-rw-r--r--tools/lemon/lemon.c281
1 files changed, 183 insertions, 98 deletions
diff --git a/tools/lemon/lemon.c b/tools/lemon/lemon.c
index 650a55c383..8eb5cbc487 100644
--- a/tools/lemon/lemon.c
+++ b/tools/lemon/lemon.c
@@ -391,6 +391,12 @@ struct lemon {
int nrule; /* Number of rules */
int nsymbol; /* Number of terminal and nonterminal symbols */
int nterminal; /* Number of terminal symbols */
+ int minShiftReduce; /* Minimum shift-reduce action value */
+ int errAction; /* Error action value */
+ int accAction; /* Accept action value */
+ int noAction; /* No-op action value */
+ int minReduce; /* Minimum reduce action */
+ int maxAction; /* Maximum action value of any kind */
struct symbol **symbols; /* Sorted array of pointers to symbols */
int errorcnt; /* Number of errors */
struct symbol *errsym; /* The error symbol */
@@ -415,6 +421,7 @@ struct lemon {
char *tokenprefix; /* A prefix added to token names in the .h file */
int nconflict; /* Number of parsing conflicts */
int nactiontab; /* Number of entries in the yy_action[] table */
+ int nlookaheadtab; /* Number of entries in yy_lookahead[] */
int tablesize; /* Total table size of all tables in bytes */
int basisflag; /* Print only basis configurations */
int has_fallback; /* True if any %fallback is seen in the grammar */
@@ -591,10 +598,12 @@ struct acttab {
int mxLookahead; /* Maximum aLookahead[].lookahead */
int nLookahead; /* Used slots in aLookahead[] */
int nLookaheadAlloc; /* Slots allocated in aLookahead[] */
+ int nterminal; /* Number of terminal symbols */
+ int nsymbol; /* total number of symbols */
};
/* Return the number of entries in the yy_action table */
-#define acttab_size(X) ((X)->nAction)
+#define acttab_lookahead_size(X) ((X)->nAction)
/* The value for the N-th entry in yy_action */
#define acttab_yyaction(X,N) ((X)->aAction[N].action)
@@ -612,14 +621,16 @@ PRIVATE void acttab_free(acttab *p){
#endif
/* Allocate a new acttab structure */
-PRIVATE acttab *acttab_alloc(void){
- acttab *p = (acttab *) calloc( 1, sizeof(*p) );
- if( p==0 ){
- fprintf(stderr,"Unable to allocate memory for a new acttab.");
- exit(1);
- }
- memset(p, 0, sizeof(*p));
- return p;
+PRIVATE acttab *acttab_alloc(int nsymbol, int nterminal) {
+ acttab *p = (acttab *)calloc(1, sizeof(*p));
+ if (p == 0) {
+ fprintf(stderr, "Unable to allocate memory for a new acttab.");
+ exit(1);
+ }
+ memset(p, 0, sizeof(*p));
+ p->nsymbol = nsymbol;
+ p->nterminal = nterminal;
+ return p;
}
/* Add a new action to the current transaction set.
@@ -659,16 +670,24 @@ PRIVATE void acttab_action(acttab *p, int lookahead, int action){
** to an empty set in preparation for a new round of acttab_action() calls.
**
** Return the offset into the action table of the new transaction.
+**
+** If the makeItSafe parameter is true, then the offset is chosen so that
+** it is impossible to overread the yy_lookaside[] table regardless of
+** the lookaside token. This is done for the terminal symbols, as they
+** come from external inputs and can contain syntax errors. When makeItSafe
+** is false, there is more flexibility in selecting offsets, resulting in
+** a smaller table. For non-terminal symbols, which are never syntax errors,
+** makeItSafe can be false.
*/
-PRIVATE int acttab_insert(acttab *p){
- int i, j, k, n;
- assert( p->nLookahead>0 );
+PRIVATE int acttab_insert(acttab *p, int makeItSafe) {
+ int i, j, k, n, end;
+ assert(p->nLookahead>0);
/* Make sure we have enough space to hold the expanded action table
** in the worst case. The worst case occurs if the transaction set
** must be appended to the current action table
*/
- n = p->mxLookahead + 1;
+ n = p->nsymbol + 1;
if( p->nAction + n >= p->nActionAlloc ){
int oldAlloc = p->nActionAlloc;
p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20;
@@ -690,7 +709,8 @@ PRIVATE int acttab_insert(acttab *p){
**
** i is the index in p->aAction[] where p->mnLookahead is inserted.
*/
- for(i=p->nAction-1; i>=0; i--){
+ end = makeItSafe ? p->mnLookahead : 0;
+ for (i = p->nAction - 1; i >= end; i--) {
if( p->aAction[i].lookahead==p->mnLookahead ){
/* All lookaheads and actions in the aLookahead[] transaction
** must match against the candidate aAction[i] entry. */
@@ -720,12 +740,13 @@ PRIVATE int acttab_insert(acttab *p){
** an empty offset in the aAction[] table in which we can add the
** aLookahead[] transaction.
*/
- if( i<0 ){
+ if (i<end) {
/* Look for holes in the aAction[] table that fit the current
** aLookahead[] transaction. Leave i set to the offset of the hole.
** If no holes are found, i is left at p->nAction, which means the
** transaction will be appended. */
- for(i=0; i<p->nActionAlloc - p->mxLookahead; i++){
+ i = makeItSafe ? p->mnLookahead : 0;
+ for (; i<p->nActionAlloc - p->mxLookahead; i++) {
if( p->aAction[i].lookahead<0 ){
for(j=0; j<p->nLookahead; j++){
k = p->aLookahead[j].lookahead - p->mnLookahead + i;
@@ -743,11 +764,19 @@ PRIVATE int acttab_insert(acttab *p){
}
}
/* Insert transaction set at index i. */
+#if 0
+ printf("Acttab:");
+ for (j = 0; j<p->nLookahead; j++) {
+ printf(" %d", p->aLookahead[j].lookahead);
+ }
+ printf(" inserted at %d\n", i);
+#endif
for(j=0; j<p->nLookahead; j++){
k = p->aLookahead[j].lookahead - p->mnLookahead + i;
p->aAction[k] = p->aLookahead[j];
if( k>=p->nAction ) p->nAction = k+1;
}
+ if (makeItSafe && i + p->nterminal >= p->nAction) p->nAction = i + p->nterminal + 1;
p->nLookahead = 0;
/* Return the offset that is added to the lookahead in order to get the
@@ -755,6 +784,16 @@ PRIVATE int acttab_insert(acttab *p){
return i - p->mnLookahead;
}
+/*
+** Return the size of the action table without the trailing syntax error
+** entries.
+*/
+int acttab_action_size(acttab *p) {
+ int n = p->nAction;
+ while (n>0 && p->aAction[n - 1].lookahead<0) { n--; }
+ return n;
+}
+
/********************** From the file "build.c" *****************************/
/*
** Routines to construction the finite state machine for the LEMON
@@ -1772,6 +1811,7 @@ int main(int argc _U_, char **argv)
stats_line("states", lem.nxstate);
stats_line("conflicts", lem.nconflict);
stats_line("action table entries", lem.nactiontab);
+ stats_line("lookahead table entries", lem.nlookaheadtab);
stats_line("total table size (bytes)", lem.tablesize);
}
if( lem.nconflict > 0 ){
@@ -2209,7 +2249,8 @@ enum e_state {
WAITING_FOR_FALLBACK_ID,
WAITING_FOR_WILDCARD_ID,
WAITING_FOR_CLASS_ID,
- WAITING_FOR_CLASS_TOKEN
+ WAITING_FOR_CLASS_TOKEN,
+ WAITING_FOR_TOKEN_NAME
};
struct pstate {
char *filename; /* Name of the input file */
@@ -2525,6 +2566,8 @@ to follow the previous rule.");
}else if( strcmp(x,"fallback")==0 ){
psp->fallback = 0;
psp->state = WAITING_FOR_FALLBACK_ID;
+ } else if (strcmp(x, "token") == 0) {
+ psp->state = WAITING_FOR_TOKEN_NAME;
}else if( strcmp(x,"wildcard")==0 ){
psp->state = WAITING_FOR_WILDCARD_ID;
}else if( strcmp(x,"token_class")==0 ){
@@ -2680,6 +2723,26 @@ to follow the previous rule.");
}
}
break;
+ case WAITING_FOR_TOKEN_NAME:
+ /* Tokens do not have to be declared before use. But they can be
+ ** in order to control their assigned integer number. The number for
+ ** each token is assigned when it is first seen. So by including
+ **
+ ** %token ONE TWO THREE
+ **
+ ** early in the grammar file, that assigns small consecutive values
+ ** to each of the tokens ONE TWO and THREE.
+ */
+ if (x[0] == '.') {
+ psp->state = WAITING_FOR_DECL_OR_RULE;
+ } else if (!ISUPPER(x[0])) {
+ ErrorMsg(psp->filename, psp->tokenlineno,
+ "%%token argument \"%s\" should be a token", x);
+ psp->errorcnt++;
+ } else {
+ (void)Symbol_new(x);
+ }
+ break;
case WAITING_FOR_WILDCARD_ID:
if( x[0]=='.' ){
psp->state = WAITING_FOR_DECL_OR_RULE;
@@ -3067,6 +3130,27 @@ PRIVATE FILE *file_open(
return fp;
}
+/* Print the text of a rule
+*/
+void rule_print(FILE *out, struct rule *rp) {
+ int i, j;
+ fprintf(out, "%s", rp->lhs->name);
+ /* if( rp->lhsalias ) fprintf(out,"(%s)",rp->lhsalias); */
+ fprintf(out, " ::=");
+ for (i = 0; i<rp->nrhs; i++) {
+ struct symbol *sp = rp->rhs[i];
+ if (sp->type == MULTITERMINAL) {
+ fprintf(out, " %s", sp->subsym[0]->name);
+ for (j = 1; j<sp->nsubsym; j++) {
+ fprintf(out, "|%s", sp->subsym[j]->name);
+ }
+ } else {
+ fprintf(out, " %s", sp->name);
+ }
+ /* if( rp->rhsalias[i] ) fprintf(out,"(%s)",rp->rhsalias[i]); */
+ }
+}
+
/* Duplicate the input file without comments and without actions
** on rules */
void Reprint(struct lemon *lemp)
@@ -3094,21 +3178,7 @@ void Reprint(struct lemon *lemp)
printf("\n");
}
for(rp=lemp->rule; rp; rp=rp->next){
- printf("%s",rp->lhs->name);
- /* if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */
- printf(" ::=");
- for(i=0; i<rp->nrhs; i++){
- sp = rp->rhs[i];
- if( sp->type==MULTITERMINAL ){
- printf(" %s", sp->subsym[0]->name);
- for(j=1; j<sp->nsubsym; j++){
- printf("|%s", sp->subsym[j]->name);
- }
- }else{
- printf(" %s", sp->name);
- }
- /* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */
- }
+ rule_print(stdout, rp);
printf(".");
if( rp->precsym ) printf(" [%s]",rp->precsym->name);
/* if( rp->code ) printf("\n %s",rp->code); */
@@ -3369,23 +3439,26 @@ PRIVATE char *pathsearch(char *argv0, char *name, int modemask)
*/
PRIVATE int compute_action(struct lemon *lemp, struct action *ap)
{
- int act;
- switch( ap->type ){
- case SHIFT: act = ap->x.stp->statenum; break;
- case SHIFTREDUCE: {
- act = ap->x.rp->iRule + lemp->nstate;
- /* Since a SHIFT is inherient after a prior REDUCE, convert any
- ** SHIFTREDUCE action with a nonterminal on the LHS into a simple
- ** REDUCE action: */
- if (ap->sp->index >= lemp->nterminal) act += lemp->nrule;
- break;
- }
- case REDUCE: act = ap->x.rp->iRule + lemp->nstate+lemp->nrule; break;
- case ERROR: act = lemp->nstate + lemp->nrule*2; break;
- case ACCEPT: act = lemp->nstate + lemp->nrule*2 + 1; break;
+ int act;
+ switch (ap->type) {
+ case SHIFT: act = ap->x.stp->statenum; break;
+ case SHIFTREDUCE: {
+ /* Since a SHIFT is inherient after a prior REDUCE, convert any
+ ** SHIFTREDUCE action with a nonterminal on the LHS into a simple
+ ** REDUCE action: */
+ if (ap->sp->index >= lemp->nterminal) {
+ act = lemp->minReduce + ap->x.rp->iRule;
+ } else {
+ act = lemp->minShiftReduce + ap->x.rp->iRule;
+ }
+ break;
+ }
+ case REDUCE: act = lemp->minReduce + ap->x.rp->iRule; break;
+ case ERROR: act = lemp->errAction; break;
+ case ACCEPT: act = lemp->accAction; break;
default: act = -1; break;
- }
- return act;
+ }
+ return act;
}
#define LINESIZE 1000
@@ -4092,6 +4165,13 @@ void ReportTable(
int mnNtOfst, mxNtOfst;
struct axset *ax;
+ lemp->minShiftReduce = lemp->nstate;
+ lemp->errAction = lemp->minShiftReduce + lemp->nrule;
+ lemp->accAction = lemp->errAction + 1;
+ lemp->noAction = lemp->accAction + 1;
+ lemp->minReduce = lemp->noAction + 1;
+ lemp->maxAction = lemp->minReduce + lemp->nrule;
+
in = tplt_open(lemp);
if( in==0 ) return;
out = file_open(lemp,".c","wb");
@@ -4130,7 +4210,7 @@ void ReportTable(
minimum_size_type(0, lemp->nsymbol+1, &szCodeType)); lineno++;
fprintf(out,"#define YYNOCODE %d\n",lemp->nsymbol+1); lineno++;
fprintf(out,"#define YYACTIONTYPE %s\n",
- minimum_size_type(0,lemp->nstate+lemp->nrule*2+5,&szActionType)); lineno++;
+ minimum_size_type(0, lemp->maxAction, &szActionType)); lineno++;
if( lemp->wildcard ){
fprintf(out,"#define YYWILDCARD %d\n",
lemp->wildcard->index); lineno++;
@@ -4198,7 +4278,7 @@ void ReportTable(
** of placing the largest action sets first */
for(i=0; i<lemp->nxstate*2; i++) ax[i].iOrder = i;
qsort(ax, lemp->nxstate*2, sizeof(ax[0]), axset_compare);
- pActtab = acttab_alloc();
+ pActtab = acttab_alloc(lemp->nsymbol, lemp->nterminal);
for(i=0; i<lemp->nxstate*2 && ax[i].nAction>0; i++){
stp = ax[i].stp;
if( ax[i].isTkn ){
@@ -4209,7 +4289,7 @@ void ReportTable(
if( action<0 ) continue;
acttab_action(pActtab, ap->sp->index, action);
}
- stp->iTknOfst = acttab_insert(pActtab);
+ stp->iTknOfst = acttab_insert(pActtab, 1);
if( stp->iTknOfst<mnTknOfst ) mnTknOfst = stp->iTknOfst;
if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst;
}else{
@@ -4221,7 +4301,7 @@ void ReportTable(
if( action<0 ) continue;
acttab_action(pActtab, ap->sp->index, action);
}
- stp->iNtOfst = acttab_insert(pActtab);
+ stp->iNtOfst = acttab_insert(pActtab, 0);
if( stp->iNtOfst<mnNtOfst ) mnNtOfst = stp->iNtOfst;
if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst;
}
@@ -4252,19 +4332,21 @@ void ReportTable(
/* Finish rendering the constants now that the action table has
** been computed */
- fprintf(out,"#define YYNSTATE %d\n",lemp->nxstate); lineno++;
- fprintf(out,"#define YYNRULE %d\n",lemp->nrule); lineno++;
- fprintf(out,"#define YY_MAX_SHIFT %d\n",lemp->nxstate-1); lineno++;
- fprintf(out,"#define YY_MIN_SHIFTREDUCE %d\n",lemp->nstate); lineno++;
- i = lemp->nstate + lemp->nrule;
- fprintf(out,"#define YY_MAX_SHIFTREDUCE %d\n", i-1); lineno++;
- fprintf(out,"#define YY_MIN_REDUCE %d\n", i); lineno++;
- i = lemp->nstate + lemp->nrule*2;
- fprintf(out,"#define YY_MAX_REDUCE %d\n", i-1); lineno++;
- fprintf(out,"#define YY_ERROR_ACTION %d\n", i); lineno++;
- fprintf(out,"#define YY_ACCEPT_ACTION %d\n", i+1); lineno++;
- fprintf(out,"#define YY_NO_ACTION %d\n", i+2); lineno++;
- tplt_xfer(lemp->name,in,out,&lineno);
+ fprintf(out, "#define YYNSTATE %d\n", lemp->nxstate); lineno++;
+ fprintf(out, "#define YYNRULE %d\n", lemp->nrule); lineno++;
+ fprintf(out, "#define YYNTOKEN %d\n", lemp->nterminal); lineno++;
+ fprintf(out, "#define YY_MAX_SHIFT %d\n", lemp->nxstate - 1); lineno++;
+ i = lemp->minShiftReduce;
+ fprintf(out, "#define YY_MIN_SHIFTREDUCE %d\n", i); lineno++;
+ i += lemp->nrule;
+ fprintf(out, "#define YY_MAX_SHIFTREDUCE %d\n", i - 1); lineno++;
+ fprintf(out, "#define YY_ERROR_ACTION %d\n", lemp->errAction); lineno++;
+ fprintf(out, "#define YY_ACCEPT_ACTION %d\n", lemp->accAction); lineno++;
+ fprintf(out, "#define YY_NO_ACTION %d\n", lemp->noAction); lineno++;
+ fprintf(out, "#define YY_MIN_REDUCE %d\n", lemp->minReduce); lineno++;
+ i = lemp->minReduce + lemp->nrule;
+ fprintf(out, "#define YY_MAX_REDUCE %d\n", i - 1); lineno++;
+ tplt_xfer(lemp->name, in, out, &lineno);
/* Now output the action table and its associates:
**
@@ -4279,25 +4361,26 @@ void ReportTable(
*/
/* Output the yy_action table */
- lemp->nactiontab = n = acttab_size(pActtab);
+ lemp->nactiontab = n = acttab_action_size(pActtab);
lemp->tablesize += n*szActionType;
fprintf(out,"#define YY_ACTTAB_COUNT (%d)\n", n); lineno++;
fprintf(out,"static const YYACTIONTYPE yy_action[] = {\n"); lineno++;
- for(i=j=0; i<n; i++){
- int action = acttab_yyaction(pActtab, i);
- if( action<0 ) action = lemp->nstate + lemp->nrule + 2;
- if( j==0 ) fprintf(out," /* %5d */ ", i);
- fprintf(out, " %4d,", action);
- if( j==9 || i==n-1 ){
- fprintf(out, "\n"); lineno++;
- j = 0;
- }else{
- j++;
- }
+ for (i = j = 0; i<n; i++) {
+ int action = acttab_yyaction(pActtab, i);
+ if (action<0) action = lemp->noAction;
+ if (j == 0) fprintf(out, " /* %5d */ ", i);
+ fprintf(out, " %4d,", action);
+ if (j == 9 || i == n - 1) {
+ fprintf(out, "\n"); lineno++;
+ j = 0;
+ } else {
+ j++;
+ }
}
fprintf(out, "};\n"); lineno++;
/* Output the yy_lookahead table */
+ lemp->nlookaheadtab = n = acttab_lookahead_size(pActtab);
lemp->tablesize += n*szCodeType;
fprintf(out,"static const YYCODETYPE yy_lookahead[] = {\n"); lineno++;
for(i=j=0; i<n; i++){
@@ -4317,7 +4400,6 @@ void ReportTable(
/* Output the yy_shift_ofst[] table */
n = lemp->nxstate;
while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--;
- fprintf(out, "#define YY_SHIFT_USE_DFLT (%d)\n", lemp->nactiontab); lineno++;
fprintf(out, "#define YY_SHIFT_COUNT (%d)\n", n-1); lineno++;
fprintf(out, "#define YY_SHIFT_MIN (%d)\n", mnTknOfst); lineno++;
fprintf(out, "#define YY_SHIFT_MAX (%d)\n", mxTknOfst); lineno++;
@@ -4342,7 +4424,6 @@ void ReportTable(
fprintf(out, "};\n"); lineno++;
/* Output the yy_reduce_ofst[] table */
- fprintf(out, "#define YY_REDUCE_USE_DFLT (%d)\n", mnNtOfst-1); lineno++;
n = lemp->nxstate;
while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--;
fprintf(out, "#define YY_REDUCE_COUNT (%d)\n", n-1); lineno++;
@@ -4371,16 +4452,20 @@ void ReportTable(
fprintf(out, "static const YYACTIONTYPE yy_default[] = {\n"); lineno++;
n = lemp->nxstate;
lemp->tablesize += n*szActionType;
- for(i=j=0; i<n; i++){
- stp = lemp->sorted[i];
- if( j==0 ) fprintf(out," /* %5d */ ", i);
- fprintf(out, " %4d,", stp->iDfltReduce+lemp->nstate+lemp->nrule);
- if( j==9 || i==n-1 ){
- fprintf(out, "\n"); lineno++;
- j = 0;
- }else{
- j++;
- }
+ for (i = j = 0; i<n; i++) {
+ stp = lemp->sorted[i];
+ if (j == 0) fprintf(out, " /* %5d */ ", i);
+ if (stp->iDfltReduce<0) {
+ fprintf(out, " %4d,", lemp->errAction);
+ } else {
+ fprintf(out, " %4d,", stp->iDfltReduce + lemp->minReduce);
+ }
+ if (j == 9 || i == n - 1) {
+ fprintf(out, "\n"); lineno++;
+ j = 0;
+ } else {
+ j++;
+ }
}
fprintf(out, "};\n"); lineno++;
tplt_xfer(lemp->name,in,out,&lineno);
@@ -4406,12 +4491,10 @@ void ReportTable(
/* Generate a table containing the symbolic name of every symbol
*/
- for(i=0; i<lemp->nsymbol; i++){
- lemon_sprintf(line,"\"%s\",",lemp->symbols[i]->name);
- fprintf(out," %-15s",line);
- if( (i&3)==3 ){ fprintf(out,"\n"); lineno++; }
+ for (i = 0; i<lemp->nsymbol; i++) {
+ lemon_sprintf(line, "\"%s\",", lemp->symbols[i]->name);
+ fprintf(out, " /* %4d */ \"%s\",\n", i, lemp->symbols[i]->name); lineno++;
}
- if( (i&3)!=0 ){ fprintf(out,"\n"); lineno++; }
tplt_xfer(lemp->name,in,out,&lineno);
/* Generate a table containing a text string that describes every
@@ -4498,8 +4581,10 @@ void ReportTable(
** Note: This code depends on the fact that rules are number
** sequentually beginning with 0.
*/
- for(rp=lemp->rule; rp; rp=rp->next){
- fprintf(out, " { %d, %d },\n", rp->lhs->index, -rp->nrhs); lineno++;
+ for (i = 0, rp = lemp->rule; rp; rp = rp->next, i++) {
+ fprintf(out, " { %4d, %4d }, /* (%d) ", rp->lhs->index, -rp->nrhs, i);
+ rule_print(out, rp);
+ fprintf(out, " */\n"); lineno++;
}
tplt_xfer(lemp->name,in,out,&lineno);
@@ -4765,7 +4850,7 @@ void ResortStates(struct lemon *lemp)
for(i=0; i<lemp->nstate; i++){
stp = lemp->sorted[i];
stp->nTknAct = stp->nNtAct = 0;
- stp->iDfltReduce = lemp->nrule; /* Init dflt action to "syntax error" */
+ stp->iDfltReduce = -1; /* Init dflt action to "syntax error" */
stp->iTknOfst = NO_OFFSET;
stp->iNtOfst = NO_OFFSET;
for(ap=stp->ap; ap; ap=ap->next){
@@ -4777,7 +4862,7 @@ void ResortStates(struct lemon *lemp)
stp->nNtAct++;
}else{
assert( stp->autoReduce==0 || stp->pDfltReduce==ap->x.rp );
- stp->iDfltReduce = iAction - lemp->nstate - lemp->nrule;
+ stp->iDfltReduce = iAction;
}
}
}