scripts/verify_log_statements.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140

#!/usr/bin/env python3
__doc__ = '''
With regex magic, try to pinpoint all LOG* macro calls that lack a final newline.
Also find those that have non-printable characters or extra newlines.

Usage:

  ./verify_log_statements.py [-d|--debug] [dir] [file] [...]

Without args, default to '.'
'''

import re
import sys
import codecs
import os.path

# This regex matches the entire LOGxx(...) statement over multiple lines.
# It pinpoints the format string by looking for the first arg that contains quotes.
# It then matches any number of separate quoted strings, and accepts 0 or more args after that.
log_statement_re = re.compile(r'^[ \t]*LOG[_A-Z]+\(([^";,]*,)*[ \t\r\n]*(("[^"]*"[^";,]*)*)(,[^;]*|)\);',
                              re.MULTILINE | re.DOTALL)
fmt_re = re.compile(r'("[^"]*".*)*fmt')
osmo_stringify_re = re.compile("OSMO_STRINGIFY[_A-Z]*\([^)]*\)")

debug = ('-d' in sys.argv) or ('--debug' in sys.argv)

args = [x for x in sys.argv[1:] if not (x == '-d' or x == '--debug')]
if not args:
  args = ['.']

class error_found:
  def __init__(self, f, charpos, msg, text):
    self.f = f
    self.charpos = charpos
    self.msg = msg
    self.text = text
    self.line = None

def make_line_idx(file_content):
  line_idx = []
  pos = 0
  line_nr = 1
  line_idx.append((pos, line_nr))
  for line in file_content.split('\n'):
    pos += len(line)
    line_nr += 1
    line_idx.append((pos, line_nr))
    pos += 1 # newline char
  return line_idx

def char_pos_2_line(line_idx, sorted_char_positions):
  r = []
  line_i = 0
  next_line_i = 1
  for char_pos in sorted_char_positions:
    while (line_i+1) < len(line_idx) and char_pos > line_idx[line_i+1][0]:
      line_i += 1
    r.append(line_idx[line_i][1])
  return r

def check_file(f):
  if not (f.endswith('.h') or f.endswith('.c') or f.endswith('.cpp')):
    return []

  try:
    errors_found = []

    file_content = codecs.open(f, "r", "utf-8", errors='ignore').read()

    for log in log_statement_re.finditer(file_content):
      quoted = log.group(2)

      # Skip 'LOG("bla" fmt )' strings that typically appear as #defines.
      if fmt_re.match(quoted):
        if debug:
          errors_found.append(error_found(f, log.start(), 'Skipping define', log.group(0)))
        continue

      # Drop PRI* parts of 'LOG("bla %"PRIu64" foo")'
      for n in (16,32,64):
        quoted = quoted.replace('PRIu' + str(n), '')
        quoted = quoted.replace('PRId' + str(n), '')
      quoted = ''.join(osmo_stringify_re.split(quoted))

      # Use py eval to join separate string constants: drop any tabs/newlines
      # that are not in quotes, between separate string constants.
      try:
        quoted = eval('(' + quoted + '\n)' )
      except:
        # hopefully eval broke because of some '## args' macro def
        if debug:
          ignored.append(error_found(f, log.start(), 'Ignoring', log.group(0)))
        continue

      # check for errors...

      # final newline
      if not quoted.endswith('\n'):
        errors_found.append(error_found(f, log.start(), 'Missing final newline', log.group(0)))

      # disallowed chars and extra newlines
      for c in quoted[:-1]:
        if not c.isprintable() and not c == '\t':
          if c == '\n':
            msg = 'Extraneous newline'
          else:
            msg = 'Illegal char'
          errors_found.append(error_found(f, log.start(), msg + ' %r' % c, log.group(0)))

    if not error_found:
      return []

    line_idx = make_line_idx(file_content)
    for r, line in zip(errors_found, char_pos_2_line(line_idx, [rr.charpos for rr in errors_found])):
      r.line = line

    return errors_found
  except:
    print("ERROR WHILE PROCESSING %r" % f, file=sys.stderr)
    raise

all_errors_found = []
for f in args:
  if os.path.isdir(f):
    for parent_path, subdirs, files in os.walk(f, None, None):
      for ff in files:
        all_errors_found.extend(check_file(os.path.join(parent_path, ff)))
  else:
        all_errors_found.extend(check_file(f))

def print_errors(errs):
  for err in errs:
    print('%s: %s:%d\n%s\n' % (err.msg, err.f, err.line or 0, err.text))

print_errors(all_errors_found)

sys.exit(len(all_errors_found))

# vim: tabstop=2 shiftwidth=2 expandtab