YACC: Update yacc.py to 3.11

author: Anders Broman <a.broman58@gmail.com> 2023-10-03 12:48:40 +0200
committer: Anders Broman <a.broman58@gmail.com> 2023-10-03 12:48:40 +0200
commit: 14f9bcacc2007d8bdfb2348e68a4d63559d76600 (patch)
tree: affe14bf8b931105004521a31234e084a6805331
parent: c38e6ecbf6d5e637070453cbfca49b3b104f0094 (diff)
1 files changed, 84 insertions, 73 deletions
diff --git a/tools/yacc.py b/tools/yacc.py
index 1352e96334..ef843ba7d2 100644
--- a/tools/yacc.py
+++ b/tools/yacc.py
@@ -1,31 +1,11 @@
 # -----------------------------------------------------------------------------
 # ply: yacc.py
 #
-# Copyright (C) 2001-2015,
+# Copyright (C) 2001-2018
 # David M. Beazley (Dabeaz LLC)
 # All rights reserved.
 #
 # SPDX-License-Identifier: BSD-3-Clause
-# -----------------------------------------------------------------------------
-#
-# This implements an LR parser that is constructed from grammar rules defined
-# as Python functions. The grammer is specified by supplying the BNF inside
-# Python documentation strings.  The inspiration for this technique was borrowed
-# from John Aycock's Spark parsing system.  PLY might be viewed as cross between
-# Spark and the GNU bison utility.
-#
-# The current implementation is only somewhat object-oriented. The
-# LR parser itself is defined in terms of an object (which allows multiple
-# parsers to co-exist).  However, most of the variables used during table
-# construction are defined in terms of global variables.  Users shouldn't
-# notice unless they are trying to define multiple parsers at the same
-# time using threads (in which case they should have their head examined).
-#
-# This implementation supports both SLR and LALR(1) parsing.  LALR(1)
-# support was originally implemented by Elias Ioup (ezioup@alumni.uchicago.edu),
-# using the algorithm found in Aho, Sethi, and Ullman "Compilers: Principles,
-# Techniques, and Tools" (The Dragon Book).  LALR(1) has since been replaced
-# by the more efficient DeRemer and Pennello algorithm.
 #
 # :::::::: WARNING :::::::
 #
@@ -41,11 +21,10 @@ import types
 import sys
 import os.path
 import inspect
-import base64
 import warnings
 
-__version__    = '3.8'
-__tabversion__ = '3.8'
+__version__    = '3.11'
+__tabversion__ = '3.10'
 
 #-----------------------------------------------------------------------------
 #                     === User configurable parameters ===
@@ -245,6 +224,9 @@ class YaccProduction:
     def lexpos(self, n):
         return getattr(self.slice[n], 'lexpos', 0)
 
+    def set_lexpos(self, n, lexpos):
+        self.slice[n].lexpos = lexpos
+
     def lexspan(self, n):
         startpos = getattr(self.slice[n], 'lexpos', 0)
         endpos = getattr(self.slice[n], 'endlexpos', startpos)
@@ -286,7 +268,7 @@ class LRParser:
     # certain kinds of advanced parsing situations where the lexer and parser interact with
     # each other or change states (i.e., manipulation of scope, lexer states, etc.).
     #
-    # See:  https://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
+    # See:  http://www.gnu.org/software/bison/manual/html_node/Default-Reductions.html#Default-Reductions
     def set_defaulted_states(self):
         self.defaulted_states = {}
         for state, actions in self.action.items():
@@ -474,8 +456,9 @@ class LRParser:
                         try:
                             # Call the grammar rule with our special slice object
                             del symstack[-plen:]
-                            del statestack[-plen:]
+                            self.state = state
                             p.callable(pslice)
+                            del statestack[-plen:]
                             #--! DEBUG
                             debug.info('Result : %s', format_result(pslice[0]))
                             #--! DEBUG
@@ -484,14 +467,16 @@ class LRParser:
                             statestack.append(state)
                         except SyntaxError:
                             # If an error was set. Enter error recovery state
-                            lookaheadstack.append(lookahead)
-                            symstack.pop()
-                            statestack.pop()
+                            lookaheadstack.append(lookahead)    # Save the current lookahead token
+                            symstack.extend(targ[1:-1])         # Put the production slice back on the stack
+                            statestack.pop()                    # Pop back one state (before the reduce)
                             state = statestack[-1]
                             sym.type = 'error'
+                            sym.value = 'error'
                             lookahead = sym
                             errorcount = error_count
                             self.errorok = False
+
                         continue
                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
@@ -514,6 +499,7 @@ class LRParser:
 
                         try:
                             # Call the grammar rule with our special slice object
+                            self.state = state
                             p.callable(pslice)
                             #--! DEBUG
                             debug.info('Result : %s', format_result(pslice[0]))
@@ -523,14 +509,15 @@ class LRParser:
                             statestack.append(state)
                         except SyntaxError:
                             # If an error was set. Enter error recovery state
-                            lookaheadstack.append(lookahead)
-                            symstack.pop()
-                            statestack.pop()
+                            lookaheadstack.append(lookahead)    # Save the current lookahead token
+                            statestack.pop()                    # Pop back one state (before the reduce)
                             state = statestack[-1]
                             sym.type = 'error'
+                            sym.value = 'error'
                             lookahead = sym
                             errorcount = error_count
                             self.errorok = False
+
                         continue
                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
@@ -556,7 +543,7 @@ class LRParser:
                 # If there are any synchronization rules, they may
                 # catch it.
                 #
-                # In addition to pushing the error token, we call
+                # In addition to pushing the error token, we call call
                 # the user defined p_error() function if this is the
                 # first syntax error.  This function is only called if
                 # errorcount == 0.
@@ -569,6 +556,7 @@ class LRParser:
                     if self.errorfunc:
                         if errtoken and not hasattr(errtoken, 'lexer'):
                             errtoken.lexer = lexer
+                        self.state = state
                         tok = call_errorfunc(self.errorfunc, errtoken, self)
                         if self.errorok:
                             # User must have done some kind of panic
@@ -788,21 +776,24 @@ class LRParser:
                         try:
                             # Call the grammar rule with our special slice object
                             del symstack[-plen:]
-                            del statestack[-plen:]
+                            self.state = state
                             p.callable(pslice)
+                            del statestack[-plen:]
                             symstack.append(sym)
                             state = goto[statestack[-1]][pname]
                             statestack.append(state)
                         except SyntaxError:
                             # If an error was set. Enter error recovery state
-                            lookaheadstack.append(lookahead)
-                            symstack.pop()
-                            statestack.pop()
+                            lookaheadstack.append(lookahead)    # Save the current lookahead token
+                            symstack.extend(targ[1:-1])         # Put the production slice back on the stack
+                            statestack.pop()                    # Pop back one state (before the reduce)
                             state = statestack[-1]
                             sym.type = 'error'
+                            sym.value = 'error'
                             lookahead = sym
                             errorcount = error_count
                             self.errorok = False
+
                         continue
                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
@@ -825,20 +816,22 @@ class LRParser:
 
                         try:
                             # Call the grammar rule with our special slice object
+                            self.state = state
                             p.callable(pslice)
                             symstack.append(sym)
                             state = goto[statestack[-1]][pname]
                             statestack.append(state)
                         except SyntaxError:
                             # If an error was set. Enter error recovery state
-                            lookaheadstack.append(lookahead)
-                            symstack.pop()
-                            statestack.pop()
+                            lookaheadstack.append(lookahead)    # Save the current lookahead token
+                            statestack.pop()                    # Pop back one state (before the reduce)
                             state = statestack[-1]
                             sym.type = 'error'
+                            sym.value = 'error'
                             lookahead = sym
                             errorcount = error_count
                             self.errorok = False
+
                         continue
                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
@@ -856,7 +849,7 @@ class LRParser:
                 # If there are any synchronization rules, they may
                 # catch it.
                 #
-                # In addition to pushing the error token, we call
+                # In addition to pushing the error token, we call call
                 # the user defined p_error() function if this is the
                 # first syntax error.  This function is only called if
                 # errorcount == 0.
@@ -869,6 +862,7 @@ class LRParser:
                     if self.errorfunc:
                         if errtoken and not hasattr(errtoken, 'lexer'):
                             errtoken.lexer = lexer
+                        self.state = state
                         tok = call_errorfunc(self.errorfunc, errtoken, self)
                         if self.errorok:
                             # User must have done some kind of panic
@@ -1079,21 +1073,24 @@ class LRParser:
                         try:
                             # Call the grammar rule with our special slice object
                             del symstack[-plen:]
-                            del statestack[-plen:]
+                            self.state = state
                             p.callable(pslice)
+                            del statestack[-plen:]
                             symstack.append(sym)
                             state = goto[statestack[-1]][pname]
                             statestack.append(state)
                         except SyntaxError:
                             # If an error was set. Enter error recovery state
-                            lookaheadstack.append(lookahead)
-                            symstack.pop()
-                            statestack.pop()
+                            lookaheadstack.append(lookahead)    # Save the current lookahead token
+                            symstack.extend(targ[1:-1])         # Put the production slice back on the stack
+                            statestack.pop()                    # Pop back one state (before the reduce)
                             state = statestack[-1]
                             sym.type = 'error'
+                            sym.value = 'error'
                             lookahead = sym
                             errorcount = error_count
                             self.errorok = False
+
                         continue
                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
@@ -1111,20 +1108,22 @@ class LRParser:
 
                         try:
                             # Call the grammar rule with our special slice object
+                            self.state = state
                             p.callable(pslice)
                             symstack.append(sym)
                             state = goto[statestack[-1]][pname]
                             statestack.append(state)
                         except SyntaxError:
                             # If an error was set. Enter error recovery state
-                            lookaheadstack.append(lookahead)
-                            symstack.pop()
-                            statestack.pop()
+                            lookaheadstack.append(lookahead)    # Save the current lookahead token
+                            statestack.pop()                    # Pop back one state (before the reduce)
                             state = statestack[-1]
                             sym.type = 'error'
+                            sym.value = 'error'
                             lookahead = sym
                             errorcount = error_count
                             self.errorok = False
+
                         continue
                         # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 
@@ -1142,7 +1141,7 @@ class LRParser:
                 # If there are any synchronization rules, they may
                 # catch it.
                 #
-                # In addition to pushing the error token, we call
+                # In addition to pushing the error token, we call call
                 # the user defined p_error() function if this is the
                 # first syntax error.  This function is only called if
                 # errorcount == 0.
@@ -1155,6 +1154,7 @@ class LRParser:
                     if self.errorfunc:
                         if errtoken and not hasattr(errtoken, 'lexer'):
                             errtoken.lexer = lexer
+                        self.state = state
                         tok = call_errorfunc(self.errorfunc, errtoken, self)
                         if self.errorok:
                             # User must have done some kind of panic
@@ -1319,7 +1319,7 @@ class Production(object):
         p = LRItem(self, n)
         # Precompute the list of productions immediately following.
         try:
-            p.lr_after = Prodnames[p.prod[n+1]]
+            p.lr_after = self.Prodnames[p.prod[n+1]]
         except (IndexError, KeyError):
             p.lr_after = []
         try:
@@ -2260,7 +2260,6 @@ class LRGeneratedTable(LRTable):
     # -----------------------------------------------------------------------------
 
     def dr_relation(self, C, trans, nullable):
-        dr_set = {}
         state, N = trans
         terms = []
 
@@ -2544,8 +2543,13 @@ class LRGeneratedTable(LRTable):
                                         # Need to decide on shift or reduce here
                                         # By default we favor shifting. Need to add
                                         # some precedence rules here.
-                                        sprec, slevel = Productions[st_actionp[a].number].prec
-                                        rprec, rlevel = Precedence.get(a, ('right', 0))
+
+                                        # Shift precedence comes from the token
+                                        sprec, slevel = Precedence.get(a, ('right', 0))
+
+                                        # Reduce precedence comes from rule being reduced (p)
+                                        rprec, rlevel = Productions[p.number].prec
+
                                         if (slevel < rlevel) or ((slevel == rlevel) and (rprec == 'left')):
                                             # We really need to reduce here.
                                             st_action[a] = -p.number
@@ -2603,8 +2607,13 @@ class LRGeneratedTable(LRTable):
                                         #   -  if precedence of reduce rule is higher, we reduce.
                                         #   -  if precedence of reduce is same and left assoc, we reduce.
                                         #   -  otherwise we shift
-                                        rprec, rlevel = Productions[st_actionp[a].number].prec
+
+                                        # Shift precedence comes from the token
                                         sprec, slevel = Precedence.get(a, ('right', 0))
+
+                                        # Reduce precedence comes from the rule that could have been reduced
+                                        rprec, rlevel = Productions[st_actionp[a].number].prec
+
                                         if (slevel > rlevel) or ((slevel == rlevel) and (rprec == 'right')):
                                             # We decide to shift here... highest precedence to shift
                                             Productions[st_actionp[a].number].reduced -= 1
@@ -2684,6 +2693,7 @@ class LRGeneratedTable(LRTable):
             f.write('''
 # %s
 # This file is automatically generated. Do not edit.
+# pylint: disable=W,C,R
 _tabversion = %r
 
 _lr_method = %r
@@ -2917,28 +2927,20 @@ class ParserReflect(object):
 
     # Compute a signature over the grammar
     def signature(self):
+        parts = []
         try:
-            from hashlib import md5
-        except ImportError:
-            from md5 import md5
-        try:
-            sig = md5()
             if self.start:
-                sig.update(self.start.encode('latin-1'))
+                parts.append(self.start)
             if self.prec:
-                sig.update(''.join([''.join(p) for p in self.prec]).encode('latin-1'))
+                parts.append(''.join([''.join(p) for p in self.prec]))
             if self.tokens:
-                sig.update(' '.join(self.tokens).encode('latin-1'))
+                parts.append(' '.join(self.tokens))
             for f in self.pfuncs:
                 if f[3]:
-                    sig.update(f[3].encode('latin-1'))
+                    parts.append(f[3])
         except (TypeError, ValueError):
             pass
-
-        digest = base64.b16encode(sig.digest())
-        if sys.version_info[0] >= 3:
-            digest = digest.decode('latin-1')
-        return digest
+        return ''.join(parts)
 
     # -----------------------------------------------------------------------------
     # validate_modules()
@@ -2956,7 +2958,10 @@ class ParserReflect(object):
         fre = re.compile(r'\s*def\s+(p_[a-zA-Z_0-9]*)\(')
 
         for module in self.modules:
-            lines, linen = inspect.getsourcelines(module)
+            try:
+                lines, linen = inspect.getsourcelines(module)
+            except IOError:
+                continue
 
             counthash = {}
             for linen, line in enumerate(lines):
@@ -3026,7 +3031,7 @@ class ParserReflect(object):
             self.error = True
             return
 
-        self.tokens = tokens
+        self.tokens = sorted(tokens)
 
     # Validate the tokens
     def validate_tokens(self):
@@ -3084,7 +3089,7 @@ class ParserReflect(object):
             if not name.startswith('p_') or name == 'p_error':
                 continue
             if isinstance(item, (types.FunctionType, types.MethodType)):
-                line = item.__code__.co_firstlineno
+                line = getattr(item, 'co_firstlineno', item.__code__.co_firstlineno)
                 module = inspect.getmodule(item)
                 p_functions.append((line, module, name, item.__doc__))
 
@@ -3186,9 +3191,13 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star
     if module:
         _items = [(k, getattr(module, k)) for k in dir(module)]
         pdict = dict(_items)
-        # If no __file__ attribute is available, try to obtain it from the __module__ instead
+        # If no __file__ or __package__ attributes are available, try to obtain them
+        # from the __module__ instead
         if '__file__' not in pdict:
             pdict['__file__'] = sys.modules[pdict['__module__']].__file__
+        if '__package__' not in pdict and '__module__' in pdict:
+            if hasattr(sys.modules[pdict['__module__']], '__package__'):
+                pdict['__package__'] = sys.modules[pdict['__module__']].__package__
     else:
         pdict = get_caller_module_dict(2)
 
@@ -3262,7 +3271,7 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star
         else:
             debuglog = NullLogger()
 
-    debuglog.info('Created by PLY version %s (https://www.dabeaz.com/ply/)', __version__)
+    debuglog.info('Created by PLY version %s (http://www.dabeaz.com/ply)', __version__)
 
     errors = False
 
@@ -3430,6 +3439,8 @@ def yacc(method='LALR', debug=yaccdebug, module=None, tabmodule=tab_module, star
     if write_tables:
         try:
             lr.write_table(tabmodule, outputdir, signature)
+            if tabmodule in sys.modules:
+                del sys.modules[tabmodule]
         except IOError as e:
             errorlog.warning("Couldn't create %r. %s" % (tabmodule, e))
author	Anders Broman <a.broman58@gmail.com>	2023-10-03 12:48:40 +0200
committer	Anders Broman <a.broman58@gmail.com>	2023-10-03 12:48:40 +0200
commit	14f9bcacc2007d8bdfb2348e68a4d63559d76600 (patch)
tree	affe14bf8b931105004521a31234e084a6805331
parent	c38e6ecbf6d5e637070453cbfca49b3b104f0094 (diff)