Ply updated to version 2.1

svn path=/trunk/; revision=19459
author: Tomas Kukosa <tomas.kukosa@siemens.com> 2006-10-09 06:24:03 +0000
committer: Tomas Kukosa <tomas.kukosa@siemens.com> 2006-10-09 06:24:03 +0000
commit: 410830c4e3e47fb506c2d792ab10f9ddd38e747d (patch)
tree: 2f401b4d2d304d85bf6ed7dd8a28df2fa5a0ed7e
parent: cf201496162307772c31c19bba651bcbbf6e5be4 (diff)
2 files changed, 653 insertions, 802 deletions
diff --git a/tools/lex.py b/tools/lex.py
index 15af21357b..beaace02df 100755
--- a/tools/lex.py
+++ b/tools/lex.py
@@ -5,8 +5,6 @@
 #
 # Copyright (C) 2001-2006, David M. Beazley
 #
-# $Header: /cvs/projects/PLY/lex.py,v 1.1.1.1 2004/05/21 15:34:10 beazley Exp $
-#
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
@@ -41,20 +39,6 @@
 #   -  The interface is geared towards LALR(1) and LR(1) parser
 #      generators.  That is tokens are generated one at a time
 #      rather than being generated in advanced all in one step.
-#
-# There are a few limitations of this module
-#
-#   -  The module interface makes it somewhat awkward to support more
-#      than one lexer at a time.  Although somewhat inelegant from a
-#      design perspective, this is rarely a practical concern for
-#      most compiler projects.
-#
-#   -  The lexer requires that the entire input text be read into
-#      a string before scanning.  I suppose that most machines have
-#      enough memory to make this a minor issues, but it makes
-#      the lexer somewhat difficult to use in interactive sessions
-#      or with streaming data.
-#
 #-----------------------------------------------------------------------------
 
 r"""
@@ -186,18 +170,19 @@ scanner you have defined.
 # -----------------------------------------------------------------------------
 
 
-__version__ = "1.8"
+__version__ = "2.1"
 
 import re, types, sys, copy
 
 # Available instance types.  This is used when lexers are defined by a class.
-# it's a little funky because I want to preserve backwards compatibility
+# It's a little funky because I want to preserve backwards compatibility
 # with Python 2.0 where types.ObjectType is undefined.
 
 try:
    _INSTANCETYPE = (types.InstanceType, types.ObjectType)
 except AttributeError:
    _INSTANCETYPE = types.InstanceType
+   class object: pass       # Note: needed if no new-style classes present
 
 # Exception thrown when invalid token encountered and no default
 class LexError(Exception):
@@ -206,9 +191,9 @@ class LexError(Exception):
          self.text = s
 
 # Token class
-class LexToken:
+class LexToken(object):
     def __str__(self):
-        return "LexToken(%s,%r,%d)" % (self.type,self.value,self.lineno)
+        return "LexToken(%s,%r,%d,%d)" % (self.type,self.value,self.lineno,self.lexpos)
     def __repr__(self):
         return str(self)
     def skip(self,n):
@@ -220,42 +205,66 @@ class LexToken:
 # -----------------------------------------------------------------------------
 # Lexer class
 #
+# This class encapsulates all of the methods and data associated with a lexer.
+#
 #    input()          -  Store a new string in the lexer
 #    token()          -  Get the next token
 # -----------------------------------------------------------------------------
 
 class Lexer:
     def __init__(self):
-        self.lexre = None           # Master regular expression
-	self.lexreflags = 0         # Option re compile flags
-        self.lexdata = None         # Actual input data (as a string)
-        self.lexpos = 0             # Current position in input text
-        self.lexlen = 0             # Length of the input text
-        self.lexindexfunc = [ ]     # Reverse mapping of groups to functions and types
-        self.lexerrorf = None       # Error rule (if any)
-        self.lextokens = None       # List of valid tokens
-        self.lexignore = None       # Ignored characters
-        self.lineno = 1             # Current line number
-        self.debug = 0              # Debugging mode
-        self.optimize = 0           # Optimized mode
-        self.token = self.errtoken
-
-    def __copy__(self):
+        self.lexre = None             # Master regular expression. This is a list of 
+                                      # tuples (re,findex) where re is a compiled
+                                      # regular expression and findex is a list
+                                      # mapping regex group numbers to rules
+
+        self.lexreflags = 0           # Optional re compile flags
+        self.lexdata = None           # Actual input data (as a string)
+        self.lexpos = 0               # Current position in input text
+        self.lexlen = 0               # Length of the input text
+        self.lexerrorf = None         # Error rule (if any)
+        self.lextokens = None         # List of valid tokens
+        self.lexignore = None         # Ignored characters
+        self.lexliterals = ""         # Literal characters that can be passed through
+        self.lexmodule = None         # Module
+        self.lineno = 1               # Current line number
+        self.lexdebug = 0             # Debugging mode
+        self.lexoptimize = 0          # Optimized mode
+
+    def clone(self,object=None):
         c = Lexer()
         c.lexre = self.lexre
-	c.lexreflags = self.lexreflags
+        c.lexreflags = self.lexreflags
         c.lexdata = self.lexdata
         c.lexpos = self.lexpos
         c.lexlen = self.lexlen
-        c.lexindexfunc = self.lexindexfunc
         c.lexerrorf = self.lexerrorf
         c.lextokens = self.lextokens
         c.lexignore = self.lexignore
-	c.debug = self.debug
+        c.lexdebug = self.lexdebug
         c.lineno = self.lineno
-        c.optimize = self.optimize
-        c.token = c.realtoken
-	return c
+        c.lexoptimize = self.lexoptimize
+        c.lexliterals = self.lexliterals
+        c.lexmodule   = self.lexmodule
+
+        # If the object parameter has been supplied, it means we are attaching the
+        # lexer to a new object.  In this case, we have to rebind the methods
+
+        if object:
+            newre = []
+            for cre, findex in c.lexre:
+                 # Loop over findex and adjust methods
+                 newfindex = []
+                 for f in findex:
+                     if not f or not f[0]: 
+                          newfindex.append(f)
+                          continue
+                     newfindex.append((getattr(object,f[0].__name__),f[1]))
+                 newre.append((cre,newfindex))
+            c.lexre = newre 
+            c.lexmodule = object
+
+        return c
 
     # ------------------------------------------------------------
     # input() - Push a new string into the lexer
@@ -266,33 +275,21 @@ class Lexer:
         self.lexdata = s
         self.lexpos = 0
         self.lexlen = len(s)
-        self.token = self.realtoken
-        
-        # Change the token routine to point to realtoken()
-        global token
-        if token == self.errtoken:
-            token = self.token
 
     # ------------------------------------------------------------
-    # errtoken() - Return error if token is called with no data
-    # ------------------------------------------------------------
-    def errtoken(self):
-        raise RuntimeError, "No input string given with input()"
-    
-    # ------------------------------------------------------------
     # token() - Return the next token from the Lexer
     #
     # Note: This function has been carefully implemented to be as fast
     # as possible.  Don't make changes unless you really know what
     # you are doing
     # ------------------------------------------------------------
-    def realtoken(self):
+    def token(self):
         # Make local copies of frequently referenced attributes
         lexpos    = self.lexpos
         lexlen    = self.lexlen
         lexignore = self.lexignore
         lexdata   = self.lexdata
-        
+
         while lexpos < lexlen:
             # This code provides some short-circuit code for whitespace, tabs, and other ignored characters
             if lexdata[lexpos] in lexignore:
@@ -300,61 +297,78 @@ class Lexer:
                 continue
 
             # Look for a regular expression match
-            m = self.lexre.match(lexdata,lexpos)
-            if m:
-                i = m.lastindex
-                lexpos = m.end()
+            for lexre,lexindexfunc in self.lexre:
+                m = lexre.match(lexdata,lexpos)
+                if not m: continue
+
                 tok = LexToken()
                 tok.value = m.group()
                 tok.lineno = self.lineno
+                tok.lexpos = lexpos
                 tok.lexer = self
-                func,tok.type = self.lexindexfunc[i]
+
+                lexpos = m.end()
+                i = m.lastindex
+                func,tok.type = lexindexfunc[i]
+                self.lexpos = lexpos
+
                 if not func:
-                    self.lexpos = lexpos
                     return tok
                 
                 # If token is processed by a function, call it
-                self.lexpos = lexpos
                 newtok = func(tok)
-                self.lineno = tok.lineno     # Update line number
                 
                 # Every function must return a token, if nothing, we just move to next token
-                if not newtok: continue
+                if not newtok: 
+                    lexpos = self.lexpos        # This is here in case user has updated lexpos.
+                    break
                 
                 # Verify type of the token.  If not in the token map, raise an error
-                if not self.optimize:
+                if not self.lexoptimize:
                     if not self.lextokens.has_key(newtok.type):
                         raise LexError, ("%s:%d: Rule '%s' returned an unknown token type '%s'" % (
                             func.func_code.co_filename, func.func_code.co_firstlineno,
                             func.__name__, newtok.type),lexdata[lexpos:])
 
                 return newtok
-
-            # No match. Call t_error() if defined.
-            if self.lexerrorf:
-                tok = LexToken()
-                tok.value = self.lexdata[lexpos:]
-                tok.lineno = self.lineno
-                tok.type = "error"
-                tok.lexer = self
-                oldpos = lexpos
-                newtok = self.lexerrorf(tok)
-                lexpos += getattr(tok,"_skipn",0)
-                if oldpos == lexpos:
-                    # Error method didn't change text position at all. This is an error.
+            else:
+                # No match, see if in literals
+                if lexdata[lexpos] in self.lexliterals:
+                    tok = LexToken()
+                    tok.value = lexdata[lexpos]
+                    tok.lineno = self.lineno
+                    tok.lexer = self
+                    tok.type = tok.value
+                    tok.lexpos = lexpos
+                    self.lexpos = lexpos + 1
+                    return tok
+        
+                # No match. Call t_error() if defined.
+                if self.lexerrorf:
+                    tok = LexToken()
+                    tok.value = self.lexdata[lexpos:]
+                    tok.lineno = self.lineno
+                    tok.type = "error"
+                    tok.lexer = self
+                    tok.lexpos = lexpos
+                    oldpos = lexpos
+                    newtok = self.lexerrorf(tok)
+                    lexpos += getattr(tok,"_skipn",0)
+                    if oldpos == lexpos:
+                        # Error method didn't change text position at all. This is an error.
+                        self.lexpos = lexpos
+                        raise LexError, ("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:])
+                    if not newtok: continue
                     self.lexpos = lexpos
-                    raise LexError, ("Scanning error. Illegal character '%s'" % (lexdata[lexpos]), lexdata[lexpos:])
-                if not newtok: continue
-                self.lexpos = lexpos
-                return newtok
+                    return newtok
 
-            self.lexpos = lexpos
-            raise LexError, ("No match found", lexdata[lexpos:])
+                self.lexpos = lexpos
+                raise LexError, ("No match found", lexdata[lexpos:])
 
-        # No more input data
         self.lexpos = lexpos + 1
+        if self.lexdata is None:
+             raise RuntimeError, "No input string given with input()"
         return None
-
         
 # -----------------------------------------------------------------------------
 # validate_file()
@@ -404,34 +418,74 @@ def validate_file(filename):
 
 def _read_lextab(lexer, fdict, module):
     exec "import %s as lextab" % module
-    lexer.lexre = re.compile(lextab._lexre, re.VERBOSE | lextab._lexreflags)
+    lexre = []
+    for regex,ltab in lextab._lexre:
+        ftab = []
+        for t in ltab:
+            if t and t[0]:
+                 ftab.append((fdict[t[0]], t[1]))
+            else:
+                 ftab.append(t)
+        lexre.append((re.compile(regex, re.VERBOSE | lextab._lexreflags), ftab))
+    lexer.lexre = lexre
     lexer.lexreflags = lextab._lexreflags
-    lexer.lexindexfunc = lextab._lextab
-    for i in range(len(lextab._lextab)):
-        t = lexer.lexindexfunc[i]
-        if t:
-            if t[0]:
-                lexer.lexindexfunc[i] = (fdict[t[0]],t[1])
     lexer.lextokens = lextab._lextokens
     lexer.lexignore = lextab._lexignore
     if lextab._lexerrorf:
         lexer.lexerrorf = fdict[lextab._lexerrorf]
-        
+    lexer.lexliterals = lextab._lexliterals
+
+# -----------------------------------------------------------------------------
+# _form_master_re()
+#
+# This function takes a list of all of the regex components and attempts to
+# form the master regular expression.  Given limitations in the Python re
+# module, it may be necessary to break the master regex into separate expressions.
+# -----------------------------------------------------------------------------
+
+def _form_master_re(relist,reflags,ldict):
+    if not relist: return []
+    regex = "|".join(relist)
+    try:
+        lexre = re.compile(regex,re.VERBOSE | reflags)
+
+        # Build the index to function map for the matching engine
+        lexindexfunc = [ None ] * (max(lexre.groupindex.values())+1)
+        for f,i in lexre.groupindex.items():
+            handle = ldict.get(f,None)
+            if type(handle) in (types.FunctionType, types.MethodType):
+                lexindexfunc[i] = (handle,handle.__name__[2:])
+            elif handle is not None:
+                # If rule was specified as a string, we build an anonymous
+                # callback function to carry out the action
+                lexindexfunc[i] = (None,f[2:])
+         
+        return [(lexre,lexindexfunc)],[regex]
+    except Exception,e:
+        m = int(len(relist)/2)
+        if m == 0: m = 1
+        llist, lre = _form_master_re(relist[:m],reflags,ldict)
+        rlist, rre = _form_master_re(relist[m:],reflags,ldict)
+        return llist+rlist, lre+rre
+ 
 # -----------------------------------------------------------------------------
 # lex(module)
 #
 # Build all of the regular expression rules from definitions in the supplied module
 # -----------------------------------------------------------------------------
-def lex(module=None,debug=0,optimize=0,lextab="lextab",reflags=0):
+def lex(module=None,object=None,debug=0,optimize=0,lextab="lextab",reflags=0):
+    global lexer
     ldict = None
-    regex = ""
+    regex_list = []
     error = 0
     files = { }
-    lexer = Lexer()
-    lexer.debug = debug
-    lexer.optimize = optimize
+    lexobj = Lexer()
+    lexobj.lexdebug = debug
+    lexobj.lexoptimize = optimize
     global token,input
     
+    if object: module = object
+
     if module:
         # User supplied a module object.
         if isinstance(module, types.ModuleType):
@@ -443,6 +497,7 @@ def lex(module=None,debug=0,optimize=0,lextab="lextab",reflags=0):
                 ldict[i] = v
         else:
             raise ValueError,"Expected a module or instance"
+        lexobj.lexmodule = module
         
     else:
         # No module given.  We might be able to get information from the caller.
@@ -456,11 +511,12 @@ def lex(module=None,debug=0,optimize=0,lextab="lextab",reflags=0):
 
     if optimize and lextab:
         try:
-            _read_lextab(lexer,ldict, lextab)
-            if not lexer.lexignore: lexer.lexignore = ""            
-            token = lexer.token
-            input = lexer.input
-            return lexer
+            _read_lextab(lexobj,ldict, lextab)
+            if not lexobj.lexignore: lexobj.lexignore = ""            
+            token = lexobj.token
+            input = lexobj.input
+            lexer = lexobj
+            return lexobj
         
         except ImportError:
             pass
@@ -480,7 +536,7 @@ def lex(module=None,debug=0,optimize=0,lextab="lextab",reflags=0):
         raise SyntaxError,"lex: tokens must be a list or tuple."
 
     # Build a dictionary of valid token names
-    lexer.lextokens = { }
+    lexobj.lextokens = { }
     if not optimize:
 
         # Utility function for verifying tokens
@@ -493,15 +549,26 @@ def lex(module=None,debug=0,optimize=0,lextab="lextab",reflags=0):
             if not is_identifier(n):
                 print "lex: Bad token name '%s'" % n
                 error = 1
-            if lexer.lextokens.has_key(n):
+            if lexobj.lextokens.has_key(n):
                 print "lex: Warning. Token '%s' multiply defined." % n
-            lexer.lextokens[n] = None
+            lexobj.lextokens[n] = None
     else:
-        for n in tokens: lexer.lextokens[n] = None
+        for n in tokens: lexobj.lextokens[n] = None
         
 
     if debug:
-        print "lex: tokens = '%s'" % lexer.lextokens.keys()
+        print "lex: tokens = '%s'" % lexobj.lextokens.keys()
+
+    # Get literals
+    if (module and isinstance(module,_INSTANCETYPE)):
+        literals = getattr(module,"literals","")
+    else:
+        try:
+            literals = ldict["literals"]
+        except KeyError:
+            literals = ""
+
+    lexobj.lexliterals = literals
 
     # Get a list of symbols with the t_ prefix
     tsymbols = [f for f in ldict.keys() if f[:2] == 't_']
@@ -559,15 +626,21 @@ def lex(module=None,debug=0,optimize=0,lextab="lextab",reflags=0):
                 continue
         
         if f.__name__ == 't_error':
-            lexer.lexerrorf = f
+            lexobj.lexerrorf = f
             continue
 
         if f.__doc__:
             if not optimize:
                 try:
-                    c = re.compile(f.__doc__, re.VERBOSE | reflags)
+                    c = re.compile("(?P<%s>%s)" % (f.__name__,f.__doc__), re.VERBOSE | reflags)
+                    if c.match(""):
+                         print "%s:%d: Regular expression for rule '%s' matches empty string." % (file,line,f.__name__)
+                         error = 1
+                         continue
                 except re.error,e:
                     print "%s:%d: Invalid regular expression for rule '%s'. %s" % (file,line,f.__name__,e)
+                    if '#' in f.__doc__:
+                         print "%s:%d. Make sure '#' in rule '%s' is escaped with '\\#'." % (file,line, f.__name__)                 
                     error = 1
                     continue
 
@@ -577,8 +650,7 @@ def lex(module=None,debug=0,optimize=0,lextab="lextab",reflags=0):
             # Okay. The regular expression seemed okay.  Let's append it to the master regular
             # expression we're building
   
-            if (regex): regex += "|"
-            regex += "(?P<%s>%s)" % (f.__name__,f.__doc__)
+            regex_list.append("(?P<%s>%s)" % (f.__name__,f.__doc__))
         else:
             print "%s:%d: No regular expression defined for rule '%s'" % (file,line,f.__name__)
 
@@ -586,7 +658,7 @@ def lex(module=None,debug=0,optimize=0,lextab="lextab",reflags=0):
     for name,r in ssymbols:
 
         if name == 't_ignore':
-            lexer.lexignore = r
+            lexobj.lexignore = r
             continue
         
         if not optimize:
@@ -595,41 +667,38 @@ def lex(module=None,debug=0,optimize=0,lextab="lextab",reflags=0):
                 error = 1
                 continue
         
-            if not lexer.lextokens.has_key(name[2:]):
+            if not lexobj.lextokens.has_key(name[2:]):
                 print "lex: Rule '%s' defined for an unspecified token %s." % (name,name[2:])
                 error = 1
                 continue
             try:
-                c = re.compile(r,re.VERBOSE)
+                c = re.compile("(?P<%s>%s)" % (name,r),re.VERBOSE | reflags)
+                if (c.match("")):
+                     print "lex: Regular expression for rule '%s' matches empty string." % name
+                     error = 1
+                     continue
             except re.error,e:
                 print "lex: Invalid regular expression for rule '%s'. %s" % (name,e)
+                if '#' in r:
+                     print "lex: Make sure '#' in rule '%s' is escaped with '\\#'." % name
+
                 error = 1
                 continue
             if debug:
                 print "lex: Adding rule %s -> '%s'" % (name,r)
                 
-        if regex: regex += "|"
-        regex += "(?P<%s>%s)" % (name,r)
+        regex_list.append("(?P<%s>%s)" % (name,r))
 
     if not optimize:
         for f in files.keys():
             if not validate_file(f):
                 error = 1
     try:
-        if debug:
-            print "lex: regex = '%s'" % regex
-        lexer.lexre = re.compile(regex, re.VERBOSE | reflags)
 
-        # Build the index to function map for the matching engine
-        lexer.lexindexfunc = [ None ] * (max(lexer.lexre.groupindex.values())+1)
-        for f,i in lexer.lexre.groupindex.items():
-            handle = ldict[f]
-            if type(handle) in (types.FunctionType, types.MethodType):
-                lexer.lexindexfunc[i] = (handle,handle.__name__[2:])
-            else:
-                # If rule was specified as a string, we build an anonymous
-                # callback function to carry out the action
-                lexer.lexindexfunc[i] = (None,f[2:])
+        lexobj.lexre, re_groups = _form_master_re(regex_list,reflags,ldict)
+        if debug:
+            for i in range(len(re_groups)):
+                 print "lex: regex[%d] = '%s'" % (i, re_groups[i])
 
         # If a lextab was specified, we create a file containing the precomputed
         # regular expression and index table
@@ -637,26 +706,32 @@ def lex(module=None,debug=0,optimize=0,lextab="lextab",reflags=0):
         if lextab and optimize:
             lt = open(lextab+".py","w")
             lt.write("# %s.py.  This file automatically created by PLY. Don't edit.\n" % lextab)
-            lt.write("_lexre = %s\n" % repr(regex))
+
+            tabre = []
+            for i in range(len(re_groups)):
+                 indexf = []
+                 for t in lexobj.lexre[i][1]:
+                     if t:
+                         if t[0]:
+                             indexf.append((t[0].__name__,t[1]))
+                         else:
+                             indexf.append((None,t[1]))
+                     else:
+                             indexf.append(None)
+                 tabre.append((re_groups[i],indexf))
+            
+            lt.write("_lexre = %s\n" % repr(tabre))
+
+            # Create an alternative lexre representation
+
             lt.write("_lexreflags = %d\n" % reflags)
-            lt.write("_lextab = [\n");
-            for i in range(0,len(lexer.lexindexfunc)):
-                t = lexer.lexindexfunc[i]
-                if t:
-                    if t[0]:
-                        lt.write("  ('%s',%s),\n"% (t[0].__name__, repr(t[1])))
-                    else:
-                        lt.write("  (None,%s),\n" % repr(t[1]))
-                else:
-                    lt.write("  None,\n")
-                    
-            lt.write("]\n");
-            lt.write("_lextokens = %s\n" % repr(lexer.lextokens))
-            lt.write("_lexignore = %s\n" % repr(lexer.lexignore))
-            if (lexer.lexerrorf):
-                lt.write("_lexerrorf = %s\n" % repr(lexer.lexerrorf.__name__))
+            lt.write("_lextokens = %s\n" % repr(lexobj.lextokens))
+            lt.write("_lexignore = %s\n" % repr(lexobj.lexignore))
+            if (lexobj.lexerrorf):
+                lt.write("_lexerrorf = %s\n" % repr(lexobj.lexerrorf.__name__))
             else:
                 lt.write("_lexerrorf = None\n")
+            lt.write("_lexliterals = %s\n" % repr(lexobj.lexliterals))
             lt.close()
         
     except re.error,e:
@@ -664,16 +739,17 @@ def lex(module=None,debug=0,optimize=0,lextab="lextab",reflags=0):
         error = 1
     if error:
         raise SyntaxError,"lex: Unable to build lexer."
-    if not lexer.lexerrorf:
+    if not lexobj.lexerrorf:
         print "lex: Warning. no t_error rule is defined."
 
-    if not lexer.lexignore: lexer.lexignore = ""
+    if not lexobj.lexignore: lexobj.lexignore = ""
     
     # Create global versions of the token() and input() functions
-    token = lexer.token
-    input = lexer.input
+    token = lexobj.token
+    input = lexobj.input
+    lexer = lexobj
     
-    return lexer
+    return lexobj
 
 # -----------------------------------------------------------------------------
 # run()
@@ -705,8 +781,22 @@ def runmain(lexer=None,data=None):
     while 1:
         tok = _token()
         if not tok: break
-        print "(%s,%r,%d)" % (tok.type, tok.value, tok.lineno)
+        print "(%s,%r,%d,%d)" % (tok.type, tok.value, tok.lineno,tok.lexpos)
         
+
+# -----------------------------------------------------------------------------
+# @TOKEN(regex)
+#
+# This decorator function can be used to set the regex expression on a function
+# when its docstring might need to be set in an alternative way
+# -----------------------------------------------------------------------------
+
+def TOKEN(r):
+    def set_doc(f):
+        f.__doc__ = r
+        return f
+    return set_doc
+
     
 
 
diff --git a/tools/yacc.py b/tools/yacc.py
index cc5840c139..bd63d8d949 100755
--- a/tools/yacc.py
+++ b/tools/yacc.py
@@ -5,8 +5,6 @@
 #
 # Copyright (C) 2001-2006, David M. Beazley
 #
-# $Header: /cvs/projects/PLY/yacc.py,v 1.6 2004/05/26 20:51:34 beazley Exp $
-#
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
@@ -25,8 +23,10 @@
 #
 #
 # This implements an LR parser that is constructed from grammar rules defined
-# as Python functions.  Roughly speaking, this module is a cross between
-# John Aycock's Spark system and the GNU bison utility.
+# as Python functions. The grammer is specified by supplying the BNF inside
+# Python documentation strings.  The inspiration for this technique was borrowed
+# from John Aycock's Spark parsing system.  PLY might be viewed as cross between
+# Spark and the GNU bison utility.
 #
 # The current implementation is only somewhat object-oriented. The
 # LR parser itself is defined in terms of an object (which allows multiple
@@ -36,8 +36,10 @@
 # time using threads (in which case they should have their head examined).
 #
 # This implementation supports both SLR and LALR(1) parsing.  LALR(1)
-# support was implemented by Elias Ioup (ezioup@alumni.uchicago.edu)
-# and hacked abit by Dave to run faster.
+# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
+# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
+# Techniques, and Tools" (The Dragon Book).  LALR(1) has since been replaced
+# by the more efficient DeRemer and Pennello algorithm.
 #
 # :::::::: WARNING :::::::
 #
@@ -48,7 +50,7 @@
 # own risk!
 # ----------------------------------------------------------------------------
 
-__version__ = "1.8"
+__version__ = "2.1"
 
 import types
 
@@ -63,7 +65,7 @@ yaccdebug   = 1                # Debugging mode.  If set, yacc generates a
 
 debug_file  = 'parser.out'     # Default name of the debugging file
 tab_module  = 'parsetab'       # Default name of the table module
-default_lr  = 'SLR'            # Default LR table generation method
+default_lr  = 'LALR'           # Default LR table generation method
 
 error_count = 3                # Number of symbols that must be shifted to leave recovery mode
 
@@ -100,13 +102,15 @@ class YaccSymbol:
 # for a symbol.
 
 class YaccProduction:
-    def __init__(self,s):
+    def __init__(self,s,stack=None):
         self.slice = s
         self.pbstack = []
+        self.stack = stack
 
     def __getitem__(self,n):
         if type(n) == types.IntType:
-             return self.slice[n].value
+             if n >= 0: return self.slice[n].value
+             else: return self.stack[n].value
         else:
              return [s.value for s in self.slice[n.start:n.stop:n.step]]
 
@@ -161,7 +165,7 @@ class Parser:
         del self.statestack[:]
         del self.symstack[:]
         sym = YaccSymbol()
-        sym.type = '$'
+        sym.type = '$end'
         self.symstack.append(sym)
         self.statestack.append(0)
         
@@ -177,7 +181,8 @@ class Parser:
 
         # If no lexer was given, we will try to use the lex module
         if not lexer:
-            import lex as lexer
+            import lex
+            lexer = lex.lexer
 
         pslice.lexer = lexer
         
@@ -193,12 +198,13 @@ class Parser:
         symstack   = [ ]                # Stack of grammar symbols
         self.symstack = symstack
 
+        pslice.stack = symstack         # Put in the production
         errtoken   = None               # Err token
 
-        # The start state is assumed to be (0,$)
+        # The start state is assumed to be (0,$end)
         statestack.append(0)
         sym = YaccSymbol()
-        sym.type = '$'
+        sym.type = '$end'
         symstack.append(sym)
         
         while 1:
@@ -214,7 +220,7 @@ class Parser:
                     lookahead = lookaheadstack.pop()
                 if not lookahead:
                     lookahead = YaccSymbol()
-                    lookahead.type = '$'
+                    lookahead.type = '$end'
             if debug:
                 errorlead = ("%s . %s" % (" ".join([xx.type for xx in symstack][1:]), str(lookahead))).lstrip()
 
@@ -225,14 +231,10 @@ class Parser:
 
             if debug > 1:
                 print 'action', t
-            if t is None:
-                t = actions.get((s,'#'),None)
-                if debug > 1:
-                    print 'default action', t
             if t is not None:
                 if t > 0:
                     # shift a symbol on the stack
-                    if ltype == '$':
+                    if ltype == '$end':
                         # Error, end of input
                         sys.stderr.write("yacc: Parse error. EOF\n")
                         return
@@ -311,7 +313,7 @@ class Parser:
                 if not self.errorcount:
                     self.errorcount = error_count
                     errtoken = lookahead
-                    if errtoken.type == '$':
+                    if errtoken.type == '$end':
                         errtoken = None               # End of file!
                     if self.errorfunc:
                         global errok,token,restart
@@ -347,7 +349,7 @@ class Parser:
                 # entire parse has been rolled back and we're completely hosed.   The token is
                 # discarded and we just keep going.
 
-                if len(statestack) <= 1 and lookahead.type != '$':
+                if len(statestack) <= 1 and lookahead.type != '$end':
                     lookahead = None
                     errtoken = None
                     # Nuke the pushback stack
@@ -358,7 +360,7 @@ class Parser:
                 # at the end of the file. nuke the top entry and generate an error token
 
                 # Start nuking entries on the stack
-                if lookahead.type == '$':
+                if lookahead.type == '$end':
                     # Whoa. We're really hosed here. Bail out
                     return 
 
@@ -465,9 +467,6 @@ def initialize_vars():
     global Nonterminals, First, Follow, Precedence, LRitems
     global Errorfunc, Signature, Requires
 
-    # LALR(1) globals
-    global Prodempty, TReductions, NTReductions, GotoSetNum, Canonical
-    
     Productions  = [None]  # A list of all of the productions.  The first
                            # entry is always reserved for the purpose of
                            # building an augmented grammar
@@ -502,22 +501,6 @@ def initialize_vars():
 
     Requires     = { }     # Requires list
 
-    # LALR(1) Initialization
-    Prodempty    = { }     # A dictionary of all productions that have an empty rule
-                           # of the form P : <empty>
-
-    TReductions  = { }     # A dictionary of precomputer reductions from
-                           # nonterminals to terminals
-
-    NTReductions = { }     # A dictionary of precomputed reductions from
-                           # nonterminals to nonterminals
-
-    GotoSetNum   = { }     # A dictionary that remembers goto sets based on
-                           # the state number and symbol
-
-    Canonical    = { }     # A list of LR item sets. A LR item set is a list of LR
-                           # items that represent the state of the parser
-
     # File objects used when creating the parser.out debugging file
     global _vf, _vfc
     _vf           = cStringIO.StringIO()
@@ -635,7 +618,20 @@ def add_production(f,file,line,prodname,syms):
         sys.stderr.write("%s:%d: Illegal rule name '%s'\n" % (file,line,prodname))
         return -1
 
-    for s in syms:
+    for x in range(len(syms)):
+        s = syms[x]
+        if s[0] in "'\"":
+             try:
+                 c = eval(s)
+                 if (len(c) > 1):
+                      sys.stderr.write("%s:%d: Literal token %s in rule '%s' may only be a single character\n" % (file,line,s, prodname)) 
+                      return -1
+                 if not Terminals.has_key(c):
+                      Terminals[c] = []
+                 syms[x] = c
+                 continue
+             except SyntaxError:
+                 pass
         if not is_identifier(s) and s != '%prec':
             sys.stderr.write("%s:%d: Illegal name '%s' in rule '%s'\n" % (file,line,s, prodname))
             return -1
@@ -766,8 +762,12 @@ def add_function(f):
                     if assign != ':' and assign != '::=':
                         sys.stderr.write("%s:%d: Syntax error. Expected ':'\n" % (file,dline))
                         return -1
+                         
+ 
                 e = add_production(f,file,dline,prodname,syms)
                 error += e
+
+                
             except StandardError:
                 sys.stderr.write("%s:%d: Syntax error in rule '%s'\n" % (file,dline,ps))
                 error -= 1
@@ -823,7 +823,7 @@ def compute_terminates():
     for t in Terminals.keys():
         Terminates[t] = 1
 
-    Terminates['$'] = 1
+    Terminates['$end'] = 1
 
     # Nonterminals:
 
@@ -1064,14 +1064,14 @@ def first(beta):
 # that might follow it.  Dragon book, p. 189.
 
 def compute_follow(start=None):
-    # Add '$' to the follow list of the start symbol
+    # Add '$end' to the follow list of the start symbol
     for k in Nonterminals.keys():
         Follow[k] = [ ]
 
     if not start:
         start = Productions[1].name
         
-    Follow[start] = [ '$' ]
+    Follow[start] = [ '$end' ]
         
     while 1:
         didadd = 0
@@ -1113,7 +1113,7 @@ def compute_first1():
     for t in Terminals.keys():
         First[t] = [t]
 
-    First['$'] = ['$']
+    First['$end'] = ['$end']
     First['#'] = ['#'] # what's this for?
 
     # Nonterminals:
@@ -1215,55 +1215,16 @@ def lr0_goto(I,x):
                 s[id(n)] = s1
             gs.append(n)
             s = s1
-    g = s.get('$',None)
+    g = s.get('$end',None)
     if not g:
         if gs:
             g = lr0_closure(gs)
-            s['$'] = g
+            s['$end'] = g
         else:
-            s['$'] = gs
+            s['$end'] = gs
     _lr_goto_cache[(id(I),x)] = g
     return g
 
-# Added for LALR(1)
-
-# Given a setnumber of an lr0 state and a symbol return the setnumber of the goto state 
-def lr0_goto_setnumber(I_setnumber, x):
-    global Canonical
-    global GotoSetNum
-
-    if GotoSetNum.has_key((I_setnumber, x)):
-        setnumber = GotoSetNum[(I_setnumber, x)]
-    else:
-        gset = lr0_goto(Canonical[I_setnumber], x)
-        if not gset:
-            return -1
-        else:
-            gsetlen = len(gset)            
-            for i in xrange(len(gset[0].setnumbers)):
-                inall = 1
-                for item in gset:
-                    if not item.setnumbers[i]:
-                        inall = 0
-                        break
-                if inall and len(Canonical[i]) == gsetlen:
-                    setnumber = i
-                    break          # Note: DB. I added this to improve performance.
-                                   # Not sure if this breaks the algorithm (it doesn't appear to).
-
-            GotoSetNum[(I_setnumber, x)] = setnumber
-            
-    return setnumber
-
-# Compute the kernel of a set of LR(0) items
-def lr0_kernel(I):
-    KI = [ ]
-    for p in I:
-        if p.name == "S'" or p.lr_index > 0 or p.len == 0:
-            KI.append(p)
-
-    return KI
-
 _lr0_cidhash = { }
 
 # Compute the LR(0) sets of item function
@@ -1297,468 +1258,382 @@ def lr0_items():
     return C
 
 # -----------------------------------------------------------------------------
-# slr_parse_table()
+#                       ==== LALR(1) Parsing ====
+#
+# LALR(1) parsing is almost exactly the same as SLR except that instead of
+# relying upon Follow() sets when performing reductions, a more selective
+# lookahead set that incorporates the state of the LR(0) machine is utilized.
+# Thus, we mainly just have to focus on calculating the lookahead sets.
+#
+# The method used here is due to DeRemer and Pennelo (1982).
+#
+# DeRemer, F. L., and T. J. Pennelo: "Efficient Computation of LALR(1)
+#     Lookahead Sets", ACM Transactions on Programming Languages and Systems,
+#     Vol. 4, No. 4, Oct. 1982, pp. 615-649
 #
-# This function constructs an SLR table.
+# Further details can also be found in:
+#
+#  J. Tremblay and P. Sorenson, "The Theory and Practice of Compiler Writing",
+#      McGraw-Hill Book Company, (1985).
+#
+# Note:  This implementation is a complete replacement of the LALR(1) 
+#        implementation in PLY-1.x releases.   That version was based on
+#        a less efficient algorithm and it had bugs in its implementation.
 # -----------------------------------------------------------------------------
-def slr_parse_table():
-    global _lr_method
-    goto = _lr_goto           # Goto array
-    action = _lr_action       # Action array
-    actionp = { }             # Action production array (temporary)
-
-    _lr_method = "SLR"
-    
-    n_srconflict = 0
-    n_rrconflict = 0
 
-    if yaccdebug:
-        sys.stderr.write("yacc: Generating SLR parsing table...\n")        
-        _vf.write("\n\nParsing method: SLR\n\n")
-        
-    # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
-    # This determines the number of states
-    
-    C = lr0_items()
+# -----------------------------------------------------------------------------
+# compute_nullable_nonterminals()
+#
+# Creates a dictionary containing all of the non-terminals that might produce
+# an empty production.   
+# -----------------------------------------------------------------------------
 
-    # Build the parser table, state by state
-    st = 0
-    for I in C:
-        # Loop over each production in I
-        actlist = [ ]              # List of actions
-        
-        if yaccdebug:
-            _vf.write("\nstate %d\n\n" % st)
-            for p in I:
-                _vf.write("    (%d) %s\n" % (p.number, str(p)))
-            _vf.write("\n")
+def compute_nullable_nonterminals():
+    nullable = {}
+    num_nullable = 0
+    while 1:
+       for p in Productions[1:]:
+           if p.len == 0:
+                nullable[p.name] = 1
+                continue
+           for t in p.prod:
+                if not nullable.has_key(t): break
+           else:
+                nullable[p.name] = 1
+       if len(nullable) == num_nullable: break
+       num_nullable = len(nullable)
+    return nullable
 
-        for p in I:
-            try:
-                if p.prod[-1] == ".":
-                    if p.name == "S'":
-                        # Start symbol. Accept!
-                        action[st,"$"] = 0
-                        actionp[st,"$"] = p
-                    else:
-                        # We are at the end of a production.  Reduce!
-                        for a in Follow[p.name]:
-                            actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
-                            r = action.get((st,a),None)
-                            if r is not None:
-                                # Whoa. Have a shift/reduce or reduce/reduce conflict
-                                if r > 0:
-                                    # Need to decide on shift or reduce here
-                                    # By default we favor shifting. Need to add
-                                    # some precedence rules here.
-                                    sprec,slevel = Productions[actionp[st,a].number].prec                                    
-                                    rprec,rlevel = Precedence.get(a,('right',0))
-                                    if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
-                                        # We really need to reduce here.  
-                                        action[st,a] = -p.number
-                                        actionp[st,a] = p
-                                        if not slevel and not rlevel:
-                                            _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
-                                            _vf.write("  ! shift/reduce conflict for %s resolved as reduce.\n" % a)
-                                            n_srconflict += 1
-                                    elif (slevel == rlevel) and (rprec == 'nonassoc'):
-                                        action[st,a] = None
-                                    else:
-                                        # Hmmm. Guess we'll keep the shift
-                                        if not slevel and not rlevel:
-                                            _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
-                                            _vf.write("  ! shift/reduce conflict for %s resolved as shift.\n" % a)
-                                            n_srconflict +=1                                    
-                                elif r < 0:
-                                    # Reduce/reduce conflict.   In this case, we favor the rule
-                                    # that was defined first in the grammar file
-                                    oldp = Productions[-r]
-                                    pp = Productions[p.number]
-                                    if oldp.line > pp.line:
-                                        action[st,a] = -p.number
-                                        actionp[st,a] = p
-                                    # sys.stderr.write("Reduce/reduce conflict in state %d\n" % st)
-                                    n_rrconflict += 1
-                                    _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, actionp[st,a].number, actionp[st,a]))
-                                    _vf.write("  ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,actionp[st,a].number, actionp[st,a]))
-                                else:
-                                    sys.stderr.write("Unknown conflict in state %d\n" % st)
-                            else:
-                                action[st,a] = -p.number
-                                actionp[st,a] = p
-                else:
-                    i = p.lr_index
-                    a = p.prod[i+1]       # Get symbol right after the "."
-                    if Terminals.has_key(a):
-                        g = lr0_goto(I,a)
-                        j = _lr0_cidhash.get(id(g),-1)
-                        if j >= 0:
-                            # We are in a shift state
-                            actlist.append((a,p,"shift and go to state %d" % j))
-                            r = action.get((st,a),None)
-                            if r is not None:
-                                # Whoa have a shift/reduce or shift/shift conflict
-                                if r > 0:
-                                    if r != j:
-                                        sys.stderr.write("Shift/shift conflict in state %d\n" % st)
-                                elif r < 0:
-                                    # Do a precedence check.
-                                    #   -  if precedence of reduce rule is higher, we reduce.
-                                    #   -  if precedence of reduce is same and left assoc, we reduce.
-                                    #   -  otherwise we shift
-                                    rprec,rlevel = Productions[actionp[st,a].number].prec
-                                    sprec,slevel = Precedence.get(a,('right',0))
-                                    if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')):
-                                        # We decide to shift here... highest precedence to shift
-                                        action[st,a] = j
-                                        actionp[st,a] = p
-                                        if not slevel and not rlevel:
-                                            n_srconflict += 1
-                                            _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
-                                            _vf.write("  ! shift/reduce conflict for %s resolved as shift.\n" % a)
-                                    elif (slevel == rlevel) and (rprec == 'nonassoc'):
-                                        action[st,a] = None
-                                    else:                                            
-                                        # Hmmm. Guess we'll keep the reduce
-                                        if not slevel and not rlevel:
-                                            n_srconflict +=1
-                                            _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
-                                            _vf.write("  ! shift/reduce conflict for %s resolved as reduce.\n" % a)
-                                            
-                                else:
-                                    sys.stderr.write("Unknown conflict in state %d\n" % st)
-                            else:
-                                action[st,a] = j
-                                actionp[st,a] = p
-                                
-            except StandardError,e:
-                raise YaccError, "Hosed in slr_parse_table", e
+# -----------------------------------------------------------------------------
+# find_nonterminal_trans(C)
+#
+# Given a set of LR(0) items, this functions finds all of the non-terminal
+# transitions.    These are transitions in which a dot appears immediately before
+# a non-terminal.   Returns a list of tuples of the form (state,N) where state
+# is the state number and N is the nonterminal symbol.
+#
+# The input C is the set of LR(0) items.
+# -----------------------------------------------------------------------------
 
-        # Print the actions associated with each terminal
-        if yaccdebug:
-          _actprint = { }
-          for a,p,m in actlist:
-            if action.has_key((st,a)):
-                if p is actionp[st,a]:
-                    _vf.write("    %-15s %s\n" % (a,m))
-                    _actprint[(a,m)] = 1
-          _vf.write("\n")
-          for a,p,m in actlist:
-            if action.has_key((st,a)):
-                if p is not actionp[st,a]:
-                    if not _actprint.has_key((a,m)):
-                        _vf.write("  ! %-15s [ %s ]\n" % (a,m))
-                        _actprint[(a,m)] = 1
-            
-        # Construct the goto table for this state
-        if yaccdebug:
-            _vf.write("\n")
-        nkeys = { }
-        for ii in I:
-            for s in ii.usyms:
-                if Nonterminals.has_key(s):
-                    nkeys[s] = None
-        for n in nkeys.keys():
-            g = lr0_goto(I,n)
-            j = _lr0_cidhash.get(id(g),-1)            
-            if j >= 0:
-                goto[st,n] = j
-                if yaccdebug:
-                    _vf.write("    %-30s shift and go to state %d\n" % (n,j))
+def find_nonterminal_transitions(C):
+     trans = []
+     for state in range(len(C)):
+         for p in C[state]:
+             if p.lr_index < p.len - 1:
+                  t = (state,p.prod[p.lr_index+1])
+                  if Nonterminals.has_key(t[1]):
+                        if t not in trans: trans.append(t)
+         state = state + 1
+     return trans
 
-        st += 1
+# -----------------------------------------------------------------------------
+# dr_relation()
+#
+# Computes the DR(p,A) relationships for non-terminal transitions.  The input
+# is a tuple (state,N) where state is a number and N is a nonterminal symbol.
+#
+# Returns a list of terminals.
+# -----------------------------------------------------------------------------
 
-    if yaccdebug:
-        if n_srconflict == 1:
-            sys.stderr.write("yacc: %d shift/reduce conflict\n" % n_srconflict)
-        if n_srconflict > 1:
-            sys.stderr.write("yacc: %d shift/reduce conflicts\n" % n_srconflict)
-        if n_rrconflict == 1:
-            sys.stderr.write("yacc: %d reduce/reduce conflict\n" % n_rrconflict)
-        if n_rrconflict > 1:
-            sys.stderr.write("yacc: %d reduce/reduce conflicts\n" % n_rrconflict)
+def dr_relation(C,trans,nullable):
+    dr_set = { }
+    state,N = trans
+    terms = []
 
+    g = lr0_goto(C[state],N)
+    for p in g:
+       if p.lr_index < p.len - 1:
+           a = p.prod[p.lr_index+1]
+           if Terminals.has_key(a):
+               if a not in terms: terms.append(a)
 
+    # This extra bit is to handle the start state
+    if state == 0 and N == Productions[0].prod[0]:
+       terms.append('$end')
+ 
+    return terms
 
 # -----------------------------------------------------------------------------
-#                       ==== LALR(1) Parsing ====
-# FINISHED!  5/20/2003 by Elias Ioup
+# reads_relation()
+#
+# Computes the READS() relation (p,A) READS (t,C).
 # -----------------------------------------------------------------------------
 
+def reads_relation(C, trans, empty):
+    # Look for empty transitions
+    rel = []
+    state, N = trans
 
-# Compute the lr1_closure of a set I.  I is a list of productions and setnumber
-# is the state that you want the lr items that are made from the to come from.
+    g = lr0_goto(C[state],N)
+    j = _lr0_cidhash.get(id(g),-1)
+    for p in g:
+        if p.lr_index < p.len - 1:
+             a = p.prod[p.lr_index + 1]
+             if empty.has_key(a):
+                  rel.append((j,a))
 
-_lr1_add_count = 0
+    return rel
 
-def lr1_closure(I, setnumber = 0):
-    global _add_count
-    global Nonterminals
+# -----------------------------------------------------------------------------
+# compute_lookback_includes()
+#
+# Determines the lookback and includes relations
+#
+# LOOKBACK:
+# 
+# This relation is determined by running the LR(0) state machine forward.
+# For example, starting with a production "N : . A B C", we run it forward
+# to obtain "N : A B C ."   We then build a relationship between this final
+# state and the starting state.   These relationships are stored in a dictionary
+# lookdict.   
+#
+# INCLUDES:
+#
+# Computes the INCLUDE() relation (p,A) INCLUDES (p',B).   
+#
+# This relation is used to determine non-terminal transitions that occur
+# inside of other non-terminal transition states.   (p,A) INCLUDES (p', B)
+# if the following holds:
+#
+#       B -> LAT, where T -> epsilon and p' -L-> p 
+#
+# L is essentially a prefix (which may be empty), T is a suffix that must be
+# able to derive an empty string.  State p' must lead to state p with the string L.
+# 
+# -----------------------------------------------------------------------------
 
-    _add_count += 1
-    prodlist = Productions
+def compute_lookback_includes(C,trans,nullable):
+    
+    lookdict = {}          # Dictionary of lookback relations
+    includedict = {}       # Dictionary of include relations
 
-    # Add everything in I to J        
-    J = I[:]
-    Jhash = { }
-    for j in J:
-        Jhash[id(j)] = 1
+    # Make a dictionary of non-terminal transitions
+    dtrans = {}
+    for t in trans:
+        dtrans[t] = 1
+    
+    # Loop over all transitions and compute lookbacks and includes
+    for state,N in trans:
+        lookb = []
+        includes = []
+        for p in C[state]:
+            if p.name != N: continue
         
-    didadd = 1
-    while didadd:
-        didadd = 0
-        for j in J:
-            jprod = j.prod
-            jlr_index = j.lr_index
-            jprodslice = jprod[jlr_index+2:]
-            
-            if jlr_index < len(jprod) - 1 and Nonterminals.has_key(jprod[jlr_index+1]):
-                first_syms = []
-
-                if j.lk_added.setdefault(setnumber, 0) < len(j.lookaheads[setnumber]): 
-                    for a in j.lookaheads[setnumber][j.lk_added[setnumber]:]: 
-                        # find b in FIRST(Xa) if j = [A->a.BX,a]
-                        temp_first_syms = first(jprodslice + (a,))
-                        for x in temp_first_syms:
-                            if x not in first_syms:
-                                first_syms.append(x)
-
-                j.lk_added[setnumber] = len(j.lookaheads[setnumber]) 
-
-                for x in j.lrafter:
-                    
-                    # Add B --> .G to J
-                    if x.lr_next.lookaheads.has_key(setnumber):
-                        _xlook = x.lr_next.lookaheads[setnumber]                        
-                        for s in first_syms:
-                            if s not in _xlook:
-                                _xlook.append(s)
-                                didadd = 1
-                    else:        
-                        x.lr_next.lookaheads[setnumber] = first_syms
-                        didadd = 1
-
-                    nid = id(x.lr_next)
-                    if not Jhash.has_key(nid):
-                        J.append(x.lr_next)
-                        Jhash[nid] = 1
-                        
-    return J
+            # Okay, we have a name match.  We now follow the production all the way
+            # through the state machine until we get the . on the right hand side
+
+            lr_index = p.lr_index
+            j = state
+            while lr_index < p.len - 1:
+                 lr_index = lr_index + 1
+                 t = p.prod[lr_index]
+
+                 # Check to see if this symbol and state are a non-terminal transition
+                 if dtrans.has_key((j,t)):
+                       # Yes.  Okay, there is some chance that this is an includes relation
+                       # the only way to know for certain is whether the rest of the 
+                       # production derives empty
+
+                       li = lr_index + 1
+                       while li < p.len:
+                            if Terminals.has_key(p.prod[li]): break      # No forget it
+                            if not nullable.has_key(p.prod[li]): break
+                            li = li + 1
+                       else:
+                            # Appears to be a relation between (j,t) and (state,N)
+                            includes.append((j,t))
+
+                 g = lr0_goto(C[j],t)               # Go to next set             
+                 j = _lr0_cidhash.get(id(g),-1)     # Go to next state
+             
+            # When we get here, j is the final state, now we have to locate the production
+            for r in C[j]:
+                 if r.name != p.name: continue
+                 if r.len != p.len:   continue
+                 i = 0
+                 # This look is comparing a production ". A B C" with "A B C ."
+                 while i < r.lr_index:
+                      if r.prod[i] != p.prod[i+1]: break
+                      i = i + 1
+                 else:
+                      lookb.append((j,r))
+        for i in includes:
+             if not includedict.has_key(i): includedict[i] = []
+             includedict[i].append((state,N))
+        lookdict[(state,N)] = lookb
+
+    return lookdict,includedict
 
-def add_lookaheads(K):
-    spontaneous = []
-    propogate = []
-
-    for setnumber in range(len(K)):
-        for kitem in K[setnumber]:
-            kitem.lookaheads[setnumber] = ['#']
-            J = lr1_closure([kitem], setnumber)
-
-            # find the lookaheads that are spontaneously created from closures
-            # and the propogations of lookaheads between lr items
-            for item in J:
-                if item.lr_index < len(item.prod)-1:
-                    for lookahead in item.lookaheads[setnumber]:
-                        goto_setnumber = lr0_goto_setnumber(setnumber, item.prod[item.lr_index+1]) 
-                        next = None 
-                        if lookahead != '#':
-                            if item.lr_next in K[goto_setnumber]:
-                                next = item.lr_next
-                            if next:
-                                spontaneous.append((next, (lookahead, goto_setnumber)))
-                        else:
-                            if goto_setnumber > -1:
-                                if item.lr_next in K[goto_setnumber]:
-                                    next = item.lr_next
-                                    
-                            if next:
-                                propogate.append(((kitem, setnumber), (next, goto_setnumber)))
+# -----------------------------------------------------------------------------
+# digraph()
+# traverse()
+#
+# The following two functions are used to compute set valued functions
+# of the form:
+#
+#     F(x) = F'(x) U U{F(y) | x R y}
+#
+# This is used to compute the values of Read() sets as well as FOLLOW sets
+# in LALR(1) generation.
+#
+# Inputs:  X    - An input set
+#          R    - A relation
+#          FP   - Set-valued function
+# ------------------------------------------------------------------------------
+
+def digraph(X,R,FP):
+    N = { }
+    for x in X:
+       N[x] = 0
+    stack = []
+    F = { }
+    for x in X:
+        if N[x] == 0: traverse(x,N,stack,F,X,R,FP)
+    return F
+
+def traverse(x,N,stack,F,X,R,FP):
+    stack.append(x)
+    d = len(stack)
+    N[x] = d
+    F[x] = FP(x)             # F(X) <- F'(x)
+    
+    rel = R(x)               # Get y's related to x
+    for y in rel:
+        if N[y] == 0:
+             traverse(y,N,stack,F,X,R,FP)
+        N[x] = min(N[x],N[y])
+        for a in F.get(y,[]):
+            if a not in F[x]: F[x].append(a)
+    if N[x] == d:
+       N[stack[-1]] = sys.maxint
+       F[stack[-1]] = F[x]
+       element = stack.pop()
+       while element != x:
+           N[stack[-1]] = sys.maxint
+           F[stack[-1]] = F[x]
+           element = stack.pop()
 
-        
+# -----------------------------------------------------------------------------
+# compute_read_sets()
+#
+# Given a set of LR(0) items, this function computes the read sets.
+#
+# Inputs:  C        =  Set of LR(0) items
+#          ntrans   = Set of nonterminal transitions
+#          nullable = Set of empty transitions
+#
+# Returns a set containing the read sets
+# -----------------------------------------------------------------------------
 
-        for x in K[setnumber]:
-            x.lookaheads[setnumber] = []
+def compute_read_sets(C, ntrans, nullable):
+    FP = lambda x: dr_relation(C,x,nullable)
+    R =  lambda x: reads_relation(C,x,nullable)
+    F = digraph(ntrans,R,FP)
+    return F
 
-    for x in spontaneous:
-        if x[1][0] not in x[0].lookaheads[x[1][1]]:
-            x[0].lookaheads[x[1][1]].append(x[1][0])
+# -----------------------------------------------------------------------------
+# compute_follow_sets()
+#
+# Given a set of LR(0) items, a set of non-terminal transitions, a readset, 
+# and an include set, this function computes the follow sets
+#
+# Follow(p,A) = Read(p,A) U U {Follow(p',B) | (p,A) INCLUDES (p',B)}
+#
+# Inputs:    
+#            ntrans     = Set of nonterminal transitions
+#            readsets   = Readset (previously computed)
+#            inclsets   = Include sets (previously computed)
+#
+# Returns a set containing the follow sets      
+# -----------------------------------------------------------------------------
 
-    K[0][0].lookaheads[0] = ['$']
+def compute_follow_sets(ntrans,readsets,inclsets):
+     FP = lambda x: readsets[x]
+     R  = lambda x: inclsets.get(x,[])
+     F = digraph(ntrans,R,FP)
+     return F
 
-    pitems = {}
-    for x in propogate:
-        if pitems.has_key(x[0]):
-            pitems[x[0]].append(x[1])
-        else:
-            pitems[x[0]] = []
-            pitems[x[0]].append(x[1])
-            
-    # propogate the lookaheads that were spontaneously generated
-    # based on the propogations produced above
-    stop = 0
-
-    while not stop:
-        stop = 1
-        kindex = 0
-        for set in K:
-            for item in set:
-                pkey = (item, kindex)
-                if pitems.has_key(pkey):
-                    for propogation in pitems[pkey]:
-                        gitem = propogation[0]
-                        gsetnumber = propogation[1]
-                        glookaheads = gitem.lookaheads[gsetnumber]
-                        for lookahead in item.lookaheads[kindex]:
-                            if lookahead not in glookaheads:
-                                glookaheads.append(lookahead)
-                                stop = 0
-            kindex += 1
-
-def ReduceNonterminals():
-    global Nonterminals
-
-    global TReductions
-    global NTReductions
-
-    for nt in Nonterminals.keys():
-        TReductions[nt] = []
-        NTReductions[nt] = []
-        
-    for nt in Nonterminals.keys():
-        terms = ReduceToTerminals(nt)
-        TReductions[nt].extend(terms)
-        if not NTReductions.has_key(nt):
-            ReduceToNonterminals(nt)
-        
+# -----------------------------------------------------------------------------
+# add_lookaheads()
+#
+# Attaches the lookahead symbols to grammar rules. 
+#
+# Inputs:    lookbacks         -  Set of lookback relations
+#            followset         -  Computed follow set
+#
+# This function directly attaches the lookaheads to productions contained
+# in the lookbacks set
+# -----------------------------------------------------------------------------
 
+def add_lookaheads(lookbacks,followset):
+    for trans,lb in lookbacks.items():
+        # Loop over productions in lookback
+        for state,p in lb:
+             if not p.lookaheads.has_key(state):
+                  p.lookaheads[state] = []
+             f = followset.get(trans,[])
+             for a in f:
+                  if a not in p.lookaheads[state]: p.lookaheads[state].append(a)
+
+# -----------------------------------------------------------------------------
+# add_lalr_lookaheads()
+#
+# This function does all of the work of adding lookahead information for use
+# with LALR parsing
+# -----------------------------------------------------------------------------
 
-def ReduceToTerminals(nt,cyclemap=None):
-    global Prodnames
-    global Terminals
-    reducedterminals = []
-    if not cyclemap: cyclemap = {}
+def add_lalr_lookaheads(C):
+    # Determine all of the nullable nonterminals
+    nullable = compute_nullable_nonterminals()
 
-    if cyclemap.has_key(nt): return []
-    cyclemap[nt] = 1
+    # Find all non-terminal transitions
+    trans = find_nonterminal_transitions(C)
 
-    for p in Prodnames[nt]:
-        if len(p.prod) > 0:
-            if Terminals.has_key(p.prod[0]):
-                if p.prod[0] not in reducedterminals:
-                    reducedterminals.append(p.prod[0])
-            else:
-                if p.prod[0] != nt:
-                    terms = ReduceToTerminals(p.prod[0],cyclemap)
-                    for t in terms:
-                        if t not in reducedterminals:
-                            reducedterminals.append(t)
-    del cyclemap[nt]			  
-    return reducedterminals
+    # Compute read sets
+    readsets = compute_read_sets(C,trans,nullable)
 
-            
-def ReduceToNonterminals(nt):
-    global Prodnames
-    global Nonterminals
-    global NTReductions
-    reducednonterminals = []
-
-    for p in Prodnames[nt]:
-        if len(p.prod) > 0:
-            if Nonterminals.has_key(p.prod[0]):
-                if p.prod[0] not in reducednonterminals:
-                    reducednonterminals.append(p.prod[0])
-                    if p.prod[0] != nt:
-                        if not NTReductions.has_key(p.prod[0]):
-                            ReduceToNonterminals(p.prod[0])
-                        
-                        nterms = NTReductions[p.prod[0]]
-                        for nt in nterms:
-                            if nt not in reducednonterminals:
-                                reducednonterminals.append(nt)
-                            
+    # Compute lookback/includes relations
+    lookd, included = compute_lookback_includes(C,trans,nullable)
 
-    NTReductions[nt] = reducednonterminals
+    # Compute LALR FOLLOW sets
+    followsets = compute_follow_sets(trans,readsets,included)
+    
+    # Add all of the lookaheads
+    add_lookaheads(lookd,followsets)
 
 # -----------------------------------------------------------------------------
-# lalr_parse_table()
+# lr_parse_table()
 #
-# This function constructs an LALR table.
+# This function constructs the parse tables for SLR or LALR
 # -----------------------------------------------------------------------------
-def lalr_parse_table():
+def lr_parse_table(method):
     global _lr_method
     goto = _lr_goto           # Goto array
     action = _lr_action       # Action array
     actionp = { }             # Action production array (temporary)
-    goto_cache = _lr_goto_cache
-    cid_hash = _lr0_cidhash
-    
-    _lr_method = "LALR"
+
+    _lr_method = method
     
     n_srconflict = 0
     n_rrconflict = 0
 
     if yaccdebug:
-        sys.stderr.write("yacc: Generating LALR(1) parsing table...\n")
-        _vf.write("\n\nParsing method: LALR(1)\n\n")
+        sys.stderr.write("yacc: Generating %s parsing table...\n" % method)        
+        _vf.write("\n\nParsing method: %s\n\n" % method)
         
     # Step 1: Construct C = { I0, I1, ... IN}, collection of LR(0) items
     # This determines the number of states
-
+    
     C = lr0_items()
 
-    global Canonical
-    Canonical = C
-
-    ###
-    # Create the kernel states.
-    ###
-    K = []
-    setC = [0]*len(C)
-    for x in C:
-        K.append(lr0_kernel(x))
-        for y in x:
-            y.setnumbers = setC[:]
-
-    _cindex = 0
-    for x in C:
-        for y in x:
-            y.lookaheads[_cindex] = [] 
-            y.setnumbers[_cindex] = 1
-        _cindex = _cindex + 1
-
-    ###
-    # Add lookaheads to the lr items
-    ###
-
-    add_lookaheads(K)
-
-    ###
-    # Do the reductions for parsing first and keep them in globals
-    ###
-
-    ReduceNonterminals()
-
-    global TReductions
-    global NTReductions
-    global Prodempty
-
-    EmptyAncestors = {}
-    for y in Prodempty.keys():
-        EmptyAncestors[y] = []
-    for x in NTReductions.items():
-        for y in x[1]:
-            if Prodempty.has_key(y):
-                EmptyAncestors[y].append(x[0])
-
+    if method == 'LALR':
+        add_lalr_lookaheads(C)
 
     # Build the parser table, state by state
     st = 0
     for I in C:
         # Loop over each production in I
         actlist = [ ]              # List of actions
-        acthash = { }
-        
-        idI = id(I)
         
         if yaccdebug:
             _vf.write("\nstate %d\n\n" % st)
@@ -1766,86 +1641,20 @@ def lalr_parse_table():
                 _vf.write("    (%d) %s\n" % (p.number, str(p)))
             _vf.write("\n")
 
-        global First
         for p in I:
             try:
                 if p.prod[-1] == ".":
                     if p.name == "S'":
                         # Start symbol. Accept!
-                        action[st,"$"] = 0
-                        actionp[st,"$"] = p
-                    elif len(p.prod) == 0:
-                        ancestors = EmptyAncestors[p.name]
-                        for i in ancestors:
-                            for s in K:
-                                if i in s:
-                                    input_list = []
-                                    plist = Productions[i.name]
-                                    for x in plist:
-                                        if len(x.prod) > 0 and x.prod[0] == p.name:
-                                            n = p.prod[1:]
-                                            d = x.prod[lr_index+2:]
-                                            for l in x.lookaheads.items():
-                                                flist = First[tuple(n+d+[l])]
-                                                for f in flist:
-                                                    if f not in input_list and f in p.lookaheads[st]:
-                                                        input_list.append(f)
-                                        
-                                    # We are at the end of a production.  Reduce!
-                                    #print "input_list: %s" % input_list
-                                    #print "Follow[p.name]: %s" % Follow[p.name]
-                                    for a in input_list:
-                                        actlist.append((a,p,"reduce using rule %d (%s) " % (p.number,p)))
-                                        r = action.get((st,a),None)
-                                        if r is not None:
-                                            # Whoa. Have a shift/reduce or reduce/reduce conflict
-                                            if r > 0:
-                                                # Need to decide on shift or reduce here
-                                                # By default we favor shifting. Need to add
-                                                # some precedence rules here.
-                                                sprec,slevel = Productions[actionp[st,a].number].prec                                    
-                                                rprec,rlevel = Precedence.get(a,('right',0))
-                                                if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
-                                                    # We really need to reduce here.  
-                                                    action[st,a] = -p.number
-                                                    actionp[st,a] = p
-                                                    if not slevel and not rlevel:
-                                                        _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
-                                                        _vf.write("  ! shift/reduce conflict for %s resolved as reduce.\n" % a)
-                                                        n_srconflict += 1
-                                                elif (slevel == rlevel) and (rprec == 'nonassoc'):
-                                                    action[st,a] = None
-                                                else:
-                                                    # Hmmm. Guess we'll keep the shift
-                                                    if not slevel and not rlevel:
-                                                        _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
-                                                        _vf.write("  ! shift/reduce conflict for %s resolved as shift.\n" % a)
-                                                        n_srconflict +=1                                    
-                                            elif r < 0:
-                                                # Reduce/reduce conflict.   In this case, we favor the rule
-                                                # that was defined first in the grammar file
-                                                oldp = Productions[-r]
-                                                pp = Productions[p.number]
-                                                if oldp.line > pp.line:
-                                                    action[st,a] = -p.number
-                                                    actionp[st,a] = p
-                                                    # print "Reduce/reduce conflict in state %d" % st
-                                                    n_rrconflict += 1
-                                                    _vfc.write("reduce/reduce conflict in state %d resolved using rule %d.\n" % (st, actionp[st,a].number))
-                                                    _vf.write("  ! reduce/reduce conflict for %s resolved using rule %d.\n" % (a,actionp[st,a].number))
-                                            else:
-                                                sys.stderr.write("Unknown conflict in state %d\n" % st)
-                                        else:
-                                            action[st,a] = -p.number
-                                            actionp[st,a] = p
-
-                                    break           # break out of the for s in K loop because we only want to make
-                                                    # sure that a production is in the Kernel
-                        
+                        action[st,"$end"] = 0
+                        actionp[st,"$end"] = p
                     else:
                         # We are at the end of a production.  Reduce!
-
-                        for a in p.lookaheads[st]:
+                        if method == 'LALR':
+                            laheads = p.lookaheads[st]
+                        else:
+                            laheads = Follow[p.name]
+                        for a in laheads:
                             actlist.append((a,p,"reduce using rule %d (%s)" % (p.number,p)))
                             r = action.get((st,a),None)
                             if r is not None:
@@ -1855,7 +1664,7 @@ def lalr_parse_table():
                                     # By default we favor shifting. Need to add
                                     # some precedence rules here.
                                     sprec,slevel = Productions[actionp[st,a].number].prec                                    
-                                    rprec,rlevel = Precedence.get(a,('right',0))                                    
+                                    rprec,rlevel = Precedence.get(a,('right',0))
                                     if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
                                         # We really need to reduce here.  
                                         action[st,a] = -p.number
@@ -1880,12 +1689,12 @@ def lalr_parse_table():
                                     if oldp.line > pp.line:
                                         action[st,a] = -p.number
                                         actionp[st,a] = p
-                                    # print "Reduce/reduce conflict in state %d" % st
+                                    # sys.stderr.write("Reduce/reduce conflict in state %d\n" % st)
                                     n_rrconflict += 1
-                                    _vfc.write("reduce/reduce conflict in state %d resolved using rule %d.\n" % (st, actionp[st,a].number))
-                                    _vf.write("  ! reduce/reduce conflict for %s resolved using rule %d.\n" % (a,actionp[st,a].number))
+                                    _vfc.write("reduce/reduce conflict in state %d resolved using rule %d (%s).\n" % (st, actionp[st,a].number, actionp[st,a]))
+                                    _vf.write("  ! reduce/reduce conflict for %s resolved using rule %d (%s).\n" % (a,actionp[st,a].number, actionp[st,a]))
                                 else:
-                                    print "Unknown conflict in state %d" % st
+                                    sys.stderr.write("Unknown conflict in state %d\n" % st)
                             else:
                                 action[st,a] = -p.number
                                 actionp[st,a] = p
@@ -1893,14 +1702,11 @@ def lalr_parse_table():
                     i = p.lr_index
                     a = p.prod[i+1]       # Get symbol right after the "."
                     if Terminals.has_key(a):
-                        g = goto_cache[(idI,a)]
-                        j = cid_hash.get(id(g),-1)
+                        g = lr0_goto(I,a)
+                        j = _lr0_cidhash.get(id(g),-1)
                         if j >= 0:
                             # We are in a shift state
-                            _k = (a,j)
-                            if not acthash.has_key(_k):
-                                actlist.append((a,p,"shift and go to state %d" % j))
-                                acthash[_k] = 1
+                            actlist.append((a,p,"shift and go to state %d" % j))
                             r = action.get((st,a),None)
                             if r is not None:
                                 # Whoa have a shift/reduce or shift/shift conflict
@@ -1936,102 +1742,54 @@ def lalr_parse_table():
                             else:
                                 action[st,a] = j
                                 actionp[st,a] = p
-                    else:
-                        nonterminal = a
-                        term_list = TReductions[nonterminal]
-                        # DB: This loop gets executed a lot.  Try to optimize
-                        for a in term_list:
-                            g = goto_cache[(idI,a)]
-                            j = cid_hash[id(g)]
-                            if j >= 0:
-                                # We are in a shift state
-                                # Don't put repeated shift actions on action list (performance hack)
-                                _k = (a,j)
-                                if not acthash.has_key(_k):
-                                    actlist.append((a,p,"shift and go to state "+str(j)))
-                                    acthash[_k] = 1
-                                    
-                                r = action.get((st,a),None)
-                                if r is not None:
-                                    # Whoa have a shift/reduce or shift/shift conflict
-                                    if r > 0:
-                                        if r != j:
-                                            sys.stderr.write("Shift/shift conflict in state %d\n" % st)
-                                        continue
-                                    elif r < 0:
-                                        # Do a precedence check.
-                                        #   -  if precedence of reduce rule is higher, we reduce.
-                                        #   -  if precedence of reduce is same and left assoc, we reduce.
-                                        #   -  otherwise we shift
-                                        rprec,rlevel = Productions[actionp[st,a].number].prec
-                                        sprec,slevel = Precedence.get(a,('right',0))
-                                        if (slevel > rlevel) or ((slevel == rlevel) and (rprec != 'left')):
-                                            # We decide to shift here... highest precedence to shift
-                                            action[st,a] = j
-                                            actionp[st,a] = p
-                                            if not slevel and not rlevel:
-                                                n_srconflict += 1
-                                                _vfc.write("shift/reduce conflict in state %d resolved as shift.\n" % st)
-                                                _vf.write("  ! shift/reduce conflict for %s resolved as shift.\n" % a)
-                                        elif (slevel == rlevel) and (rprec == 'nonassoc'):
-                                            action[st,a] = None
-                                        else:                                            
-                                            # Hmmm. Guess we'll keep the reduce
-                                            if not slevel and not rlevel:
-                                                n_srconflict +=1
-                                                _vfc.write("shift/reduce conflict in state %d resolved as reduce.\n" % st)
-                                                _vf.write("  ! shift/reduce conflict for %s resolved as reduce.\n" % a)
-                                            
-                                    else:
-                                        sys.stderr.write("Unknown conflict in state %d\n" % st)
-                                else:
-                                    action[st,a] = j
-                                    actionp[st,a] = p
-                    
+                                
             except StandardError,e:
-                raise YaccError, "Hosed in lalr_parse_table", e
+                raise YaccError, "Hosed in lr_parse_table", e
 
         # Print the actions associated with each terminal
         if yaccdebug:
+          _actprint = { }
           for a,p,m in actlist:
             if action.has_key((st,a)):
                 if p is actionp[st,a]:
                     _vf.write("    %-15s %s\n" % (a,m))
+                    _actprint[(a,m)] = 1
           _vf.write("\n")
-
           for a,p,m in actlist:
             if action.has_key((st,a)):
                 if p is not actionp[st,a]:
-                    _vf.write("  ! %-15s [ %s ]\n" % (a,m))
+                    if not _actprint.has_key((a,m)):
+                        _vf.write("  ! %-15s [ %s ]\n" % (a,m))
+                        _actprint[(a,m)] = 1
             
         # Construct the goto table for this state
+        if yaccdebug:
+            _vf.write("\n")
         nkeys = { }
         for ii in I:
             for s in ii.usyms:
                 if Nonterminals.has_key(s):
                     nkeys[s] = None
-
-        # Construct the goto table for this state
         for n in nkeys.keys():
             g = lr0_goto(I,n)
-            j = cid_hash.get(id(g),-1)            
+            j = _lr0_cidhash.get(id(g),-1)            
             if j >= 0:
                 goto[st,n] = j
                 if yaccdebug:
                     _vf.write("    %-30s shift and go to state %d\n" % (n,j))
 
         st += 1
+
     if yaccdebug:
         if n_srconflict == 1:
             sys.stderr.write("yacc: %d shift/reduce conflict\n" % n_srconflict)
         if n_srconflict > 1:
-            sys.stderr.write("yacc: %d shift/reduce conflicts\n" % n_srconflict)        
+            sys.stderr.write("yacc: %d shift/reduce conflicts\n" % n_srconflict)
         if n_rrconflict == 1:
             sys.stderr.write("yacc: %d reduce/reduce conflict\n" % n_rrconflict)
         if n_rrconflict > 1:
             sys.stderr.write("yacc: %d reduce/reduce conflicts\n" % n_rrconflict)
 
-    
 # -----------------------------------------------------------------------------
 #                          ==== LR Utility functions ====
 # -----------------------------------------------------------------------------
@@ -2145,6 +1903,7 @@ del _lr_goto_items
             else:
                 f.write("  None,\n")
         f.write("]\n")
+        
         f.close()
 
     except IOError,e:
@@ -2193,9 +1952,6 @@ def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module,
     files = { }
     error = 0
 
-    # Add starting symbol to signature
-    if start:
-        Signature.update(start)
 
     # Add parsing method to signature
     Signature.update(method)
@@ -2227,6 +1983,12 @@ def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module,
             f = f.f_back           # Walk out to our calling function
             ldict = f.f_globals    # Grab its globals dictionary
 
+    # Add starting symbol to signature
+    if not start:
+        start = ldict.get("start",None)
+    if start:
+        Signature.update(start)
+
     # If running in optimized mode.  We're going to
 
     if (optimize and lr_read_tables(tabmodule,1)):
@@ -2374,10 +2136,8 @@ def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module,
             compute_first1()
             compute_follow(start)
         
-            if method == 'SLR':
-                slr_parse_table()
-            elif method == 'LALR':
-                lalr_parse_table()
+            if method in ['SLR','LALR']:
+                lr_parse_table(method)
             else:
                 raise YaccError, "Unknown parsing method '%s'" % method
 
@@ -2408,6 +2168,9 @@ def yacc(method=default_lr, debug=yaccdebug, module=None, tabmodule=tab_module,
     global parse
     parse = p.parse
 
+    global parser
+    parser = p
+
     # Clean up all of the globals we created
     if (not optimize):
         yacc_cleanup()
@@ -2423,12 +2186,10 @@ def yacc_cleanup():
     global Productions, Prodnames, Prodmap, Terminals 
     global Nonterminals, First, Follow, Precedence, LRitems
     global Errorfunc, Signature, Requires
-    global Prodempty, TReductions, NTReductions, GotoSetNum, Canonical
     
     del Productions, Prodnames, Prodmap, Terminals
     del Nonterminals, First, Follow, Precedence, LRitems
     del Errorfunc, Signature, Requires
-    del Prodempty, TReductions, NTReductions, GotoSetNum, Canonical
     
     global _vf, _vfc
     del _vf, _vfc
author	Tomas Kukosa <tomas.kukosa@siemens.com>	2006-10-09 06:24:03 +0000
committer	Tomas Kukosa <tomas.kukosa@siemens.com>	2006-10-09 06:24:03 +0000
commit	410830c4e3e47fb506c2d792ab10f9ddd38e747d (patch)
tree	2f401b4d2d304d85bf6ed7dd8a28df2fa5a0ed7e
parent	cf201496162307772c31c19bba651bcbbf6e5be4 (diff)