diff options
author | Martin Mathieson <martin.mathieson@keysight.com> | 2020-08-03 23:44:14 +0100 |
---|---|---|
committer | Anders Broman <a.broman58@gmail.com> | 2020-08-08 09:55:18 +0000 |
commit | fd03c85d0f5c1e17337c2c1380e6018679f2e9c6 (patch) | |
tree | b6822aa78df4d9b3d098d839ae3e32fc6a1e271e /tools/check_typed_item_calls.py | |
parent | 3b47a55b0daeddb4edee6390387b0dfca1135be3 (diff) |
check_typed_item_calls.py: Look for items with the wrong type passed to APIs
Look for calls to certain proto APIs that require hf items of a certain type,
then check that the items passed in have one of the allowed types.
Currently takes around a minute to scan epan/dissectors. There are
a few issues that have not yet been fixed..
Hopefully this can be added to the PetriDish at some point.
Change-Id: Ic9eadcc3f1de03223606b5dca1cb45edcbe95e85
Reviewed-on: https://code.wireshark.org/review/38039
Petri-Dish: Martin Mathieson <martin.r.mathieson@googlemail.com>
Tested-by: Petri Dish Buildbot
Reviewed-by: Anders Broman <a.broman58@gmail.com>
Diffstat (limited to 'tools/check_typed_item_calls.py')
-rwxr-xr-x | tools/check_typed_item_calls.py | 254 |
1 files changed, 254 insertions, 0 deletions
diff --git a/tools/check_typed_item_calls.py b/tools/check_typed_item_calls.py new file mode 100755 index 0000000000..9714ab788b --- /dev/null +++ b/tools/check_typed_item_calls.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python3 +# Wireshark - Network traffic analyzer +# By Gerald Combs <gerald@wireshark.org> +# Copyright 1998 Gerald Combs +# +# SPDX-License-Identifier: GPL-2.0-or-later + +import os +import re +import argparse +import signal +import subprocess + +# This utility scans the dissector code for proto_tree_add_...() calls constrain the type of the +# item added, and checks that the used item is acceptable. +# +# Note that this can only work where the hf_item variable is passed in directly - where it +# is assigned to a different variable it isn't tracked. + +# TODO: +# Attempt to check length (where literal value is given). Arg position differs among functions. +# Currently assuming we'll find call + first 2 args in same line... +# Attempt to check for allowed encoding types (most likely will be literal values |'d)? + + +# Try to exit soon after Ctrl-C is pressed. +should_exit = False + +def signal_handler(sig, frame): + global should_exit + should_exit = True + print('You pressed Ctrl+C - exiting') + +signal.signal(signal.SIGINT, signal_handler) + + +issues_found = 0 + +# A call is an individual call to an API we are interested in. +# Internal to APICheck below. +class Call: + def __init__(self, hf_name, line_number): + self.hf_name = hf_name + self.line_number = line_number + + +# A check for a particular API function. +class APICheck: + def __init__(self, fun_name, allowed_types): + self.fun_name = fun_name + self.allowed_types = allowed_types + self.calls = [] + # RE captures function name + 1st 2 args (always tree + hfindex) + self.p = re.compile('.*' + self.fun_name + '\(([a-zA-Z0-9_]+),\s*([a-zA-Z0-9_]+)') + self.file = None + + def find_calls(self, file): + self.file = file + self.calls = [] + with open(file, 'r') as f: + for line_number, line in enumerate(f, start=1): + m = self.p.match(line) + if m: + self.calls.append(Call(m.group(2), line_number)) + + def check_against_items(self, items): + for call in self.calls: + if call.hf_name in items: + if not items[call.hf_name].item_type in self.allowed_types: + # Report this issue. + print('Error: ' + self.fun_name + '(.., ' + call.hf_name + ', ...) called at ' + + self.file + ':' + str(call.line_number) + + ' with type ' + items[call.hf_name].item_type) + print(' (allowed types are', self.allowed_types, ')\n') + # Inc global count of issues found. + global issues_found + issues_found += 1 + + +# The relevant parts of an hf item. Used as value in dict where hf variable name is key. +class Item: + def __init__(self, name, item_type): + self.name = name + self.item_type = item_type + + + +# These are APIs in proto.c that check a set of types at runtime and can print '.. is not of type ..' to the console +# if the type is not suitable. +apiChecks = [] +apiChecks.append(APICheck('proto_tree_add_item_ret_uint', { 'FT_CHAR', 'FT_UINT8', 'FT_UINT16', 'FT_UINT24', 'FT_UINT32'})) +apiChecks.append(APICheck('proto_tree_add_item_ret_int', { 'FT_INT8', 'FT_INT16', 'FT_INT24', 'FT_INT32'})) +apiChecks.append(APICheck('ptvcursor_add_ret_uint', { 'FT_CHAR', 'FT_UINT8', 'FT_UINT16', 'FT_UINT24', 'FT_UINT32'})) +apiChecks.append(APICheck('ptvcursor_add_ret_int', { 'FT_INT8', 'FT_INT16', 'FT_INT24', 'FT_INT32'})) +apiChecks.append(APICheck('ptvcursor_add_ret_string', { 'FT_STRING', 'FT_STRINGZ', 'FT_UINT_STRING', 'FT_STRINGZPAD'})) +apiChecks.append(APICheck('ptvcursor_add_ret_boolean', { 'FT_BOOLEAN'})) +apiChecks.append(APICheck('proto_tree_add_item_ret_uint64', { 'FT_UINT40', 'FT_UINT48', 'FT_UINT56', 'FT_UINT64'})) +apiChecks.append(APICheck('proto_tree_add_item_ret_int64', { 'FT_INT40', 'FT_INT48', 'FT_INT56', 'FT_INT64'})) +apiChecks.append(APICheck('proto_tree_add_item_ret_boolean', { 'FT_BOOLEAN'})) +apiChecks.append(APICheck('proto_tree_add_item_ret_string_and_length', { 'FT_STRING', 'FT_STRINGZ', 'FT_UINT_STRING', 'FT_STRINGZPAD'})) +apiChecks.append(APICheck('proto_tree_add_item_ret_display_string_and_length', { 'FT_STRING', 'FT_STRINGZ', 'FT_UINT_STRING', + 'FT_STRINGZPAD', 'FT_BYTES', 'FT_UINT_BYTES'})) +apiChecks.append(APICheck('proto_tree_add_item_ret_time_string', { 'FT_ABSOLUTE_TIME', 'FT_RELATIVE_TIME'})) +apiChecks.append(APICheck('proto_tree_add_uint', { 'FT_CHAR', 'FT_UINT8', 'FT_UINT16', 'FT_UINT24', 'FT_UINT32', 'FT_FRAMENUM'})) +apiChecks.append(APICheck('proto_tree_add_uint_format_value', { 'FT_CHAR', 'FT_UINT8', 'FT_UINT16', 'FT_UINT24', 'FT_UINT32', 'FT_FRAMENUM'})) +apiChecks.append(APICheck('proto_tree_add_uint_format', { 'FT_CHAR', 'FT_UINT8', 'FT_UINT16', 'FT_UINT24', 'FT_UINT32', 'FT_FRAMENUM'})) +apiChecks.append(APICheck('proto_tree_add_uint64', { 'FT_UINT40', 'FT_UINT48', 'FT_UINT56', 'FT_UINT64', 'FT_FRAMENUM'})) +apiChecks.append(APICheck('proto_tree_add_int64', { 'FT_INT40', 'FT_INT48', 'FT_INT56', 'FT_INT64'})) +apiChecks.append(APICheck('proto_tree_add_int64_format_value', { 'FT_INT40', 'FT_INT48', 'FT_INT56', 'FT_INT64'})) +apiChecks.append(APICheck('proto_tree_add_int64_format', { 'FT_INT40', 'FT_INT48', 'FT_INT56', 'FT_INT64'})) +apiChecks.append(APICheck('proto_tree_add_int', { 'FT_INT8', 'FT_INT16', 'FT_INT24', 'FT_INT32'})) +apiChecks.append(APICheck('proto_tree_add_int_format_value', { 'FT_INT8', 'FT_INT16', 'FT_INT24', 'FT_INT32'})) +apiChecks.append(APICheck('proto_tree_add_int_format', { 'FT_INT8', 'FT_INT16', 'FT_INT24', 'FT_INT32'})) +apiChecks.append(APICheck('proto_tree_add_boolean', { 'FT_BOOLEAN'})) +apiChecks.append(APICheck('proto_tree_add_boolean64', { 'FT_BOOLEAN'})) +apiChecks.append(APICheck('proto_tree_add_float', { 'FT_FLOAT'})) +apiChecks.append(APICheck('proto_tree_add_float_format', { 'FT_FLOAT'})) +apiChecks.append(APICheck('proto_tree_add_float_format_value', { 'FT_FLOAT'})) +apiChecks.append(APICheck('proto_tree_add_double', { 'FT_DOUBLE'})) +apiChecks.append(APICheck('proto_tree_add_double_format', { 'FT_DOUBLE'})) +apiChecks.append(APICheck('proto_tree_add_double_format_value', { 'FT_DOUBLE'})) +apiChecks.append(APICheck('proto_tree_add_string', { 'FT_STRING', 'FT_STRINGZ', 'FT_STRINGZPAD'})) +apiChecks.append(APICheck('proto_tree_add_string_format', { 'FT_STRING', 'FT_STRINGZ', 'FT_STRINGZPAD'})) +apiChecks.append(APICheck('proto_tree_add_string_format_value', { 'FT_STRING', 'FT_STRINGZ', 'FT_STRINGZPAD'})) +apiChecks.append(APICheck('proto_tree_add_guid', { 'FT_GUID'})) +apiChecks.append(APICheck('proto_tree_add_oid', { 'FT_OID'})) +apiChecks.append(APICheck('proto_tree_add_none_format', { 'FT_NONE'})) +# TODO: add proto_tree_add_ret_varint, eui APIs, uint64_bits, float_bits, boolean_bits? + + +def removeComments(code_string): + code_string = re.sub(re.compile("/\*.*?\*/",re.DOTALL ) ,"" ,code_string) # C-style comment + code_string = re.sub(re.compile("//.*?\n" ) ,"" ,code_string) # C++-style comment + return code_string + +# Look for hf items in a dissector file. +def find_items(filename): + items = {} + with open(filename, 'r') as f: + contents = f.read() + # Remove comments so as not to trip up RE. + contents = removeComments(contents) + matches = re.finditer(r'.*\{\s*\&(hf_.*),\s*{\s*\".+\",\s*\"([a-zA-Z0-9_\-\.]+)\",\s*([A-Z0-9_]*)', contents) + for m in matches: + # Store this item. + hf = m.group(1) + name = m.group(2) + item_type = m.group(3) + items[hf] = Item(name=name, item_type=item_type) + return items + + + +def isDissectorFile(filename): + p = re.compile('.*packet-.*\.c') + return p.match(filename) + +def findDissectorFilesInFolder(folder): + # Look at files in sorted order, to give some idea of how far through is. + files = [] + + for f in sorted(os.listdir(folder)): + if should_exit: + return + if isDissectorFile(f): + filename = os.path.join(folder, f) + files.append(filename) + return files + + +# Check the given dissector file. +def checkFile(filename): + # Find important parts of items. + items = find_items(filename) + + # Check each API + for c in apiChecks: + c.find_calls(filename) + c.check_against_items(items) + + +################################################################# +# Main logic. + +# command-line args. Controls which dissector files should be checked. +# If no args given, will just scan epan/dissectors folder. +parser = argparse.ArgumentParser(description='Check calls in dissectors') +parser.add_argument('--file', action='store', default='', + help='specify individual dissector file to test') +parser.add_argument('--commits', action='store', + help='last N commits to check') +parser.add_argument('--open', action='store_true', + help='check open files') + +args = parser.parse_args() + + +# Get files from wherever command-line args indicate. +files = [] +if args.file: + # Add single specified file.. + if not args.file.startswith('epan'): + files.append(os.path.join('epan', 'dissectors', args.file)) + else: + files.append(args.file) +elif args.commits: + # Get files affected by specified number of commits. + command = ['git', 'diff', '--name-only', 'HEAD~' + args.commits] + files = [f.decode('utf-8') + for f in subprocess.check_output(command).splitlines()] + # Will examine dissector files only + files = list(filter(lambda f : isDissectorFile(f), files)) +elif args.open: + # Unstaged changes. + command = ['git', 'diff', '--name-only'] + files = [f.decode('utf-8') + for f in subprocess.check_output(command).splitlines()] + # Only interested in dissector files. + files = list(filter(lambda f : isDissectorFile(f), files)) + # Staged changes. + command = ['git', 'diff', '--staged', '--name-only'] + files_staged = [f.decode('utf-8') + for f in subprocess.check_output(command).splitlines()] + # Only interested in dissector files. + files_staged = list(filter(lambda f : isDissectorFile(f), files_staged)) + for f in files: + files.append(f) + for f in files_staged: + if not f in files: + files.append(f) +else: + # Find all dissector files from folder. + files = findDissectorFilesInFolder(os.path.join('epan', 'dissectors')) + + +# If scanning a subset of files, list them here. +print('Examining:') +if args.file or args.commits or args.open: + if files: + print(' '.join(files), '\n') + else: + print('No files to check.\n') +else: + print('All dissector modules\n') + + +# Now check the files. +for f in files: + if should_exit: + exit(1) + checkFile(f) + +# Show summary. +print(issues_found, 'issues found') |