aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Wu <peter@lekensteyn.nl>2015-06-24 20:03:51 +0200
committerPeter Wu <peter@lekensteyn.nl>2015-06-29 09:36:34 +0000
commit149d0b7e910f99f72a74e0c1a441f4db213cf3c1 (patch)
tree78adf0ce6830e614426984d5a0ac067fd1aba07f
parent11410338840cde193ccf90eb3031c37838468f1f (diff)
tools/asn2wrs.py: handle windows-1252 encoding
The RRC ASN.1 definitions resulted in a decode error in Python because the file is encoded as windows-1252 instead of UTF-8. This patch makes the tool more forgiving in handling windows-1252 encodings. Tested with Python 2.6.9, 2.7.10, 3.4.3. Change-Id: I9c9269e1065c98b8bcfb57ab4bfd21d5e183a656 Reviewed-on: https://code.wireshark.org/review/9133 Reviewed-by: Pascal Quantin <pascal.quantin@gmail.com> Reviewed-by: Peter Wu <peter@lekensteyn.nl>
-rwxr-xr-xtools/asn2wrs.py15
1 files changed, 12 insertions, 3 deletions
diff --git a/tools/asn2wrs.py b/tools/asn2wrs.py
index 55bb537953..a319961084 100755
--- a/tools/asn2wrs.py
+++ b/tools/asn2wrs.py
@@ -7977,9 +7977,18 @@ def eth_main():
input_file = fn
lexer.lineno = 1
if (ectx.srcdir): fn = ectx.srcdir + '/' + fn
- f = open (fn, "r")
- ast.extend(yacc.parse(f.read(), lexer=lexer, debug=pd))
- f.close ()
+ # Read ASN.1 definition, trying one of the common encodings.
+ data = open(fn, "rb").read()
+ for encoding in ('utf-8', 'windows-1252'):
+ try:
+ data = data.decode(encoding)
+ break
+ except:
+ warnings.warn_explicit("Decoding %s as %s failed, trying next." % (fn, encoding), UserWarning, '', 0)
+ # Py2 compat, name.translate in eth_output_hf_arr fails with unicode
+ if not isinstance(data, str):
+ data = data.encode('utf-8')
+ ast.extend(yacc.parse(data, lexer=lexer, debug=pd))
ectx.eth_clean()
if (ectx.merge_modules): # common output for all module
ectx.eth_clean()