aboutsummaryrefslogtreecommitdiffstats
path: root/tools/check_static.py
blob: 86a384b5dd552da450ba2da2f566c74fc8dc3e0e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
#!/usr/bin/env python3
# Wireshark - Network traffic analyzer
# By Gerald Combs <gerald@wireshark.org>
# Copyright 1998 Gerald Combs
#
# SPDX-License-Identifier: GPL-2.0-or-later

import os
import re
import subprocess
import argparse
import signal

# Look for dissector symbols that could/should be static.
# This will not run on Windows..

# Try to exit soon after Ctrl-C is pressed.
should_exit = False

def signal_handler(sig, frame):
    global should_exit
    should_exit = True
    print('You pressed Ctrl+C - exiting')

signal.signal(signal.SIGINT, signal_handler)

# Allow this as a default build folder name...
build_folder = os.getcwd() + '-build' 

# Record which symbols are referred to (by a set of files).
class CalledSymbols:
    def __init__(self):
        self.referred = set()

    def addCalls(self, file):
        # Make sure that file is built.
        last_dir = os.path.split(os.path.dirname(file))[-1]
        if file.find('ui/cli') != -1:
            # A tshark target-only file
            object_file = os.path.join(build_folder, 'CMakeFiles', ('tshark' + '.dir'), file + '.o')
        elif file.find('ui/qt') != -1:
            object_file = os.path.join(build_folder, os.path.dirname(file), 'CMakeFiles', ('qtui' + '.dir'), os.path.basename(file) + '.o')
        else:
            object_file = os.path.join(build_folder, os.path.dirname(file), 'CMakeFiles', last_dir + '.dir', os.path.basename(file) + '.o')
        if not os.path.exists(object_file):
            print('Warning -', object_file, 'does not exist')
            return
        command = ['nm', object_file]
        for f in subprocess.check_output(command).splitlines():
            l = str(f)[2:-1]
            # Lines might or might not have an address before letter and symbol.
            p1 = re.compile(r'[0-9a-f]* ([a-zA-Z]) (.*)')
            p2 = re.compile(r'[ ]* ([a-zA-Z]) (.*)')

            m = p1.match(l)
            if not m:
                m = p2.match(l)
            if m:
                letter = m.group(1)
                function_name = m.group(2)

                # Only interested in undefined references to symbols.
                if letter == 'U':
                    self.referred.add(function_name)



# Record which symbols are defined in a single file.
class DefinedSymbols:
    def __init__(self, file):
        self.filename = file
        self.global_dict = {}
        self.header_file_contents = None

        # Make sure that file is built.
        object_file = os.path.join(build_folder, 'epan', 'dissectors', 'CMakeFiles', 'dissectors.dir', os.path.basename(file) + '.o')
        if not os.path.exists(object_file):
            print('Warning -', object_file, 'does not exist')
            return

        header_file= file.replace('.c', '.h')
        try:
            f = open(header_file, 'r')
            self.header_file_contents = f.read()
        except IOError:
            pass


        command = ['nm', object_file]
        for f in subprocess.check_output(command).splitlines():
            l = str(f)[2:-1]
            p = re.compile(r'[0-9a-f]* ([a-zA-Z]) (.*)')
            m = p.match(l)
            if m:
                letter = m.group(1)
                function_name = m.group(2)
                # Locally-defined symbols.
                if letter in 'TD':
                    self.add(function_name, l)

    def add(self, letter, function_name):
        self.global_dict[letter] = function_name

    def mentionedInHeaders(self, symbol):
        if self.header_file_contents:
             if self.header_file_contents.find(symbol) != -1:
                return True
        # Also check some of the 'common' header files that don't match the dissector file name.
        # TODO: could cache the contents of these files, but it's not that slow.
        common_mismatched_headers = [ os.path.join('epan', 'dissectors', 'packet-ncp-int.h'),
                                      os.path.join('epan', 'dissectors', 'packet-mq.h'),
                                      os.path.join('epan', 'dissectors', 'packet-ip.h'),
                                      os.path.join('epan', 'dissectors', 'packet-gsm_a_common.h'),
                                      os.path.join('epan', 'dissectors', 'packet-epl.h'),
                                      os.path.join('epan', 'dissectors', 'packet-bluetooth.h'),
                                      os.path.join('epan', 'dissectors', 'packet-dcerpc.h'),
                                      os.path.join('epan', 'ip_opts.h'),
                                      os.path.join('epan', 'eap.h')]
        for hf in common_mismatched_headers:
            try:
                f = open(hf)
                contents = f.read()
                if contents.find(symbol) != -1:
                    return True
            except EnvironmentError:
                pass

        return False

    def check(self, called_symbols):
        global issues_found
        for f in self.global_dict:
            if not f in called_symbols:
                mentioned_in_header = self.mentionedInHeaders(f)
                fun = self.global_dict[f]
                print(self.filename, '(' + fun + ')', 'is not referred to so could be static?', '(in header)' if mentioned_in_header else '')
                issues_found += 1



# Helper functions.

def isDissectorFile(filename):
    p = re.compile(r'(packet|file)-.*\.c')
    return p.match(filename)

# Test for whether the given dissector file was automatically generated.
def isGeneratedFile(filename):
    if not filename.endswith('.c'):
        return False

    # Open file
    f_read = open(os.path.join(filename), 'r')
    lines_tested = 0
    for line in f_read:
        # The comment to say that its generated is near the top, so give up once
        # get a few lines down.
        if lines_tested > 10:
            f_read.close()
            return False
        if (line.find('Generated automatically') != -1 or
            line.find('Autogenerated from') != -1 or
            line.find('is autogenerated') != -1 or
            line.find('automatically generated by Pidl') != -1 or
            line.find('Created by: The Qt Meta Object Compiler') != -1 or
            line.find('This file was generated') != -1 or
            line.find('This filter was automatically generated') != -1):

            f_read.close()
            return True
        lines_tested = lines_tested + 1

    # OK, looks like a hand-written file!
    f_read.close()
    return False


def findDissectorFilesInFolder(folder, include_generated):
    # Look at files in sorted order, to give some idea of how far through is.
    tmp_files = []

    for f in sorted(os.listdir(folder)):
        if should_exit:
            return
        if isDissectorFile(f):
            if include_generated or not isGeneratedFile(os.path.join('epan', 'dissectors', f)):
                filename = os.path.join(folder, f)
                tmp_files.append(filename)
    return tmp_files

def findFilesInFolder(folder):
    # Look at files in sorted order, to give some idea of how far through is.
    tmp_files = []

    for f in sorted(os.listdir(folder)):
        if should_exit:
            return
        if f.endswith('.c') or f.endswith('.cpp'):
            filename = os.path.join(folder, f)
            tmp_files.append(filename)
    return tmp_files


def is_dissector_file(filename):
    p = re.compile(r'.*packet-.*\.c')
    return p.match(filename)


issues_found = 0



#################################################################
# Main logic.

# command-line args.  Controls which dissector files should be checked.
# If no args given, will just scan epan/dissectors folder.
parser = argparse.ArgumentParser(description='Check calls in dissectors')
parser.add_argument('--build', action='store', default='',
                    help='build folder', required=False)
parser.add_argument('--file', action='store', default='',
                    help='specify individual dissector file to test')
parser.add_argument('--commits', action='store',
                    help='last N commits to check')
parser.add_argument('--open', action='store_true',
                    help='check open files')

args = parser.parse_args()


# Get files from wherever command-line args indicate.
files = []

if args.build:
    build_folder = args.build

if args.file:
    # Add single specified file..
    if not args.file.startswith('epan'):
        files.append(os.path.join('epan', 'dissectors', args.file))
    else:
        files.append(args.file)
elif args.commits:
    # Get files affected by specified number of commits.
    command = ['git', 'diff', '--name-only', 'HEAD~' + args.commits]
    files = [f.decode('utf-8')
             for f in subprocess.check_output(command).splitlines()]
    # Will examine dissector files only
    files = list(filter(lambda f : is_dissector_file(f), files))
elif args.open:
    # Unstaged changes.
    command = ['git', 'diff', '--name-only']
    files = [f.decode('utf-8')
             for f in subprocess.check_output(command).splitlines()]
    # Only interested in dissector files.
    files = list(filter(lambda f : is_dissector_file(f), files))
    # Staged changes.
    command = ['git', 'diff', '--staged', '--name-only']
    files_staged = [f.decode('utf-8')
                    for f in subprocess.check_output(command).splitlines()]
    # Only interested in dissector files.
    files_staged = list(filter(lambda f : is_dissector_file(f), files_staged))
    for f in files:
        files.append(f)
    for f in files_staged:
        if not f in files:
            files.append(f)
else:
    # Find all dissector files from folder.
    files = findDissectorFilesInFolder(os.path.join('epan', 'dissectors'),
                                       include_generated=False)


# If scanning a subset of files, list them here.
print('Examining:')
if args.file or args.commits or args.open:
    if files:
        print(' '.join(files), '\n')
    else:
        print('No files to check.\n')
else:
    print('All dissector modules\n')


if not os.path.isdir(build_folder):
    print('Build directory not valid', build_folder, '- please set with --build')
    exit(1)


# Get the set of called functions and referred-to data.
called = CalledSymbols()
for d in findDissectorFilesInFolder(os.path.join('epan', 'dissectors'), include_generated=True):
    called.addCalls(d)
called.addCalls(os.path.join('epan', 'dissectors', 'dissectors.c'))
# Also check calls from GUI code
for d in findFilesInFolder('ui'):
    called.addCalls(d)
for d in findFilesInFolder(os.path.join('ui', 'qt')):
    called.addCalls(d)
# These are from tshark..
for d in findFilesInFolder(os.path.join('ui', 'cli')):
    called.addCalls(d)


# Now check identified files.
for f in files:
    if should_exit:
        exit(1)
    DefinedSymbols(f).check(called.referred)

# Show summary.
print(issues_found, 'issues found')