scripts/verify_log_statements.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137

#!/usr/bin/env python3
__doc__ = '''
With regex magic, try to pinpoint all LOG* macro calls that lack a final newline.
Also find those that have non-printable characters or extra newlines.

Usage:

  ./verify_log_statements.py [-d|--debug] [dir] [file] [...]

Without args, default to '.'
'''

import re
import sys
import codecs
import os.path

# This regex matches the entire LOGxx(...) statement over multiple lines.
# It pinpoints the format string by looking for the first arg that contains quotes.
# It then matches any number of separate quoted strings, and accepts 0 or more args after that.
log_statement_re = re.compile(r'^[ \t]*LOG[_A-Z]+\(([^";,]*,)*[ \t\r\n]*(("[^"]*"[^";,]*)*)(,[^;]*|)\);',
                              re.MULTILINE | re.DOTALL)
fmt_re = re.compile(r'("[^"]*".*)*fmt')
osmo_stringify_re = re.compile("OSMO_STRINGIFY[_A-Z]*\([^)]*\)")

debug = ('-d' in sys.argv) or ('--debug' in sys.argv)

args = [x for x in sys.argv[1:] if not (x == '-d' or x == '--debug')]
if not args:
  args = ['.']

class error_found:
  def __init__(self, f, charpos, msg, text):
    self.f = f
    self.charpos = charpos
    self.msg = msg
    self.text = text
    self.line = None

def make_line_idx(file_content):
  line_idx = []
  pos = 0
  line_nr = 1
  line_idx.append((pos, line_nr))
  for line in file_content.split('\n'):
    pos += len(line)
    line_nr += 1
    line_idx.append((pos, line_nr))
    pos += 1 # newline char
  return line_idx

def char_pos_2_line(line_idx, sorted_char_positions):
  r = []
  line_i = 0
  next_line_i = 1
  for char_pos in sorted_char_positions:
    while (line_i+1) < len(line_idx) and char_pos > line_idx[line_i+1][0]:
      line_i += 1
    r.append(line_idx[line_i][1])
  return r

def check_file(f):
  if not (f.endswith('.h') or f.endswith('.c') or f.endswith('.cpp')):
    return []

  errors_found = []

  file_content = codecs.open(f, "r", "utf-8").read()

  for log in log_statement_re.finditer(file_content):
    quoted = log.group(2)

    # Skip 'LOG("bla" fmt )' strings that typically appear as #defines.
    if fmt_re.match(quoted):
      if debug:
        errors_found.append(error_found(f, log.start(), 'Skipping define', log.group(0)))
      continue

    # Drop PRI* parts of 'LOG("bla %"PRIu64" foo")'
    for n in (16,32,64):
      quoted = quoted.replace('PRIu' + str(n), '')
      quoted = quoted.replace('PRId' + str(n), '')
    quoted = ''.join(osmo_stringify_re.split(quoted))

    # Use py eval to join separate string constants: drop any tabs/newlines
    # that are not in quotes, between separate string constants.
    try:
      quoted = eval('(' + quoted + '\n)' )
    except:
      # hopefully eval broke because of some '## args' macro def
      if debug:
        ignored.append(error_found(f, log.start(), 'Ignoring', log.group(0)))
      continue

    # check for errors...

    # final newline
    if not quoted.endswith('\n'):
      errors_found.append(error_found(f, log.start(), 'Missing final newline', log.group(0)))

    # disallowed chars and extra newlines
    for c in quoted[:-1]:
      if not c.isprintable() and not c == '\t':
        if c == '\n':
          msg = 'Extraneous newline'
        else:
          msg = 'Illegal char'
        errors_found.append(error_found(f, log.start(), msg + ' %r' % c, log.group(0)))

  if not error_found:
    return []

  line_idx = make_line_idx(file_content)
  for r, line in zip(errors_found, char_pos_2_line(line_idx, [rr.charpos for rr in errors_found])):
    r.line = line

  return errors_found


all_errors_found = []
for f in args:
  if os.path.isdir(f):
    for parent_path, subdirs, files in os.walk(f, None, None):
      for ff in files:
        all_errors_found.extend(check_file(os.path.join(parent_path, ff)))
  else:
        all_errors_found.extend(check_file(f))

def print_errors(errs):
  for err in errs:
    print('%s: %s:%d\n%s\n' % (err.msg, err.f, err.line or 0, err.text))

print_errors(all_errors_found)

sys.exit(len(all_errors_found))

# vim: tabstop=2 shiftwidth=2 expandtab