diff options
author | Peter Wu <peter@lekensteyn.nl> | 2015-06-24 20:03:51 +0200 |
---|---|---|
committer | Peter Wu <peter@lekensteyn.nl> | 2015-06-29 09:36:34 +0000 |
commit | 149d0b7e910f99f72a74e0c1a441f4db213cf3c1 (patch) | |
tree | 78adf0ce6830e614426984d5a0ac067fd1aba07f /tools/asn2wrs.py | |
parent | 11410338840cde193ccf90eb3031c37838468f1f (diff) |
tools/asn2wrs.py: handle windows-1252 encoding
The RRC ASN.1 definitions resulted in a decode error in Python because
the file is encoded as windows-1252 instead of UTF-8. This patch makes
the tool more forgiving in handling windows-1252 encodings.
Tested with Python 2.6.9, 2.7.10, 3.4.3.
Change-Id: I9c9269e1065c98b8bcfb57ab4bfd21d5e183a656
Reviewed-on: https://code.wireshark.org/review/9133
Reviewed-by: Pascal Quantin <pascal.quantin@gmail.com>
Reviewed-by: Peter Wu <peter@lekensteyn.nl>
Diffstat (limited to 'tools/asn2wrs.py')
-rwxr-xr-x | tools/asn2wrs.py | 15 |
1 files changed, 12 insertions, 3 deletions
diff --git a/tools/asn2wrs.py b/tools/asn2wrs.py index 55bb537953..a319961084 100755 --- a/tools/asn2wrs.py +++ b/tools/asn2wrs.py @@ -7977,9 +7977,18 @@ def eth_main(): input_file = fn lexer.lineno = 1 if (ectx.srcdir): fn = ectx.srcdir + '/' + fn - f = open (fn, "r") - ast.extend(yacc.parse(f.read(), lexer=lexer, debug=pd)) - f.close () + # Read ASN.1 definition, trying one of the common encodings. + data = open(fn, "rb").read() + for encoding in ('utf-8', 'windows-1252'): + try: + data = data.decode(encoding) + break + except: + warnings.warn_explicit("Decoding %s as %s failed, trying next." % (fn, encoding), UserWarning, '', 0) + # Py2 compat, name.translate in eth_output_hf_arr fails with unicode + if not isinstance(data, str): + data = data.encode('utf-8') + ast.extend(yacc.parse(data, lexer=lexer, debug=pd)) ectx.eth_clean() if (ectx.merge_modules): # common output for all module ectx.eth_clean() |