blob: a32e4da4c11708dbbffd97a1467ff978b336ddd0
1 | #!/usr/bin/env python3 |
2 | |
3 | """Find Kconfig symbols that are referenced but not defined.""" |
4 | |
5 | # (c) 2014-2016 Valentin Rothberg <valentinrothberg@gmail.com> |
6 | # (c) 2014 Stefan Hengelein <stefan.hengelein@fau.de> |
7 | # |
8 | # Licensed under the terms of the GNU GPL License version 2 |
9 | |
10 | |
11 | import argparse |
12 | import difflib |
13 | import os |
14 | import re |
15 | import signal |
16 | import subprocess |
17 | import sys |
18 | from multiprocessing import Pool, cpu_count |
19 | |
20 | |
21 | # regex expressions |
22 | OPERATORS = r"&|\(|\)|\||\!" |
23 | SYMBOL = r"(?:\w*[A-Z0-9]\w*){2,}" |
24 | DEF = r"^\s*(?:menu){,1}config\s+(" + SYMBOL + r")\s*" |
25 | EXPR = r"(?:" + OPERATORS + r"|\s|" + SYMBOL + r")+" |
26 | DEFAULT = r"default\s+.*?(?:if\s.+){,1}" |
27 | STMT = r"^\s*(?:if|select|depends\s+on|(?:" + DEFAULT + r"))\s+" + EXPR |
28 | SOURCE_SYMBOL = r"(?:\W|\b)+[D]{,1}CONFIG_(" + SYMBOL + r")" |
29 | |
30 | # regex objects |
31 | REGEX_FILE_KCONFIG = re.compile(r".*Kconfig[\.\w+\-]*$") |
32 | REGEX_SYMBOL = re.compile(r'(?!\B)' + SYMBOL + r'(?!\B)') |
33 | REGEX_SOURCE_SYMBOL = re.compile(SOURCE_SYMBOL) |
34 | REGEX_KCONFIG_DEF = re.compile(DEF) |
35 | REGEX_KCONFIG_EXPR = re.compile(EXPR) |
36 | REGEX_KCONFIG_STMT = re.compile(STMT) |
37 | REGEX_KCONFIG_HELP = re.compile(r"^\s+(help|---help---)\s*$") |
38 | REGEX_FILTER_SYMBOLS = re.compile(r"[A-Za-z0-9]$") |
39 | REGEX_NUMERIC = re.compile(r"0[xX][0-9a-fA-F]+|[0-9]+") |
40 | REGEX_QUOTES = re.compile("(\"(.*?)\")") |
41 | |
42 | |
43 | def parse_options(): |
44 | """The user interface of this module.""" |
45 | usage = "Run this tool to detect Kconfig symbols that are referenced but " \ |
46 | "not defined in Kconfig. If no option is specified, " \ |
47 | "checkkconfigsymbols defaults to check your current tree. " \ |
48 | "Please note that specifying commits will 'git reset --hard\' " \ |
49 | "your current tree! You may save uncommitted changes to avoid " \ |
50 | "losing data." |
51 | |
52 | parser = argparse.ArgumentParser(description=usage) |
53 | |
54 | parser.add_argument('-c', '--commit', dest='commit', action='store', |
55 | default="", |
56 | help="check if the specified commit (hash) introduces " |
57 | "undefined Kconfig symbols") |
58 | |
59 | parser.add_argument('-d', '--diff', dest='diff', action='store', |
60 | default="", |
61 | help="diff undefined symbols between two commits " |
62 | "(e.g., -d commmit1..commit2)") |
63 | |
64 | parser.add_argument('-f', '--find', dest='find', action='store_true', |
65 | default=False, |
66 | help="find and show commits that may cause symbols to be " |
67 | "missing (required to run with --diff)") |
68 | |
69 | parser.add_argument('-i', '--ignore', dest='ignore', action='store', |
70 | default="", |
71 | help="ignore files matching this Python regex " |
72 | "(e.g., -i '.*defconfig')") |
73 | |
74 | parser.add_argument('-s', '--sim', dest='sim', action='store', default="", |
75 | help="print a list of max. 10 string-similar symbols") |
76 | |
77 | parser.add_argument('--force', dest='force', action='store_true', |
78 | default=False, |
79 | help="reset current Git tree even when it's dirty") |
80 | |
81 | parser.add_argument('--no-color', dest='color', action='store_false', |
82 | default=True, |
83 | help="don't print colored output (default when not " |
84 | "outputting to a terminal)") |
85 | |
86 | args = parser.parse_args() |
87 | |
88 | if args.commit and args.diff: |
89 | sys.exit("Please specify only one option at once.") |
90 | |
91 | if args.diff and not re.match(r"^[\w\-\.]+\.\.[\w\-\.]+$", args.diff): |
92 | sys.exit("Please specify valid input in the following format: " |
93 | "\'commit1..commit2\'") |
94 | |
95 | if args.commit or args.diff: |
96 | if not args.force and tree_is_dirty(): |
97 | sys.exit("The current Git tree is dirty (see 'git status'). " |
98 | "Running this script may\ndelete important data since it " |
99 | "calls 'git reset --hard' for some performance\nreasons. " |
100 | " Please run this script in a clean Git tree or pass " |
101 | "'--force' if you\nwant to ignore this warning and " |
102 | "continue.") |
103 | |
104 | if args.commit: |
105 | args.find = False |
106 | |
107 | if args.ignore: |
108 | try: |
109 | re.match(args.ignore, "this/is/just/a/test.c") |
110 | except: |
111 | sys.exit("Please specify a valid Python regex.") |
112 | |
113 | return args |
114 | |
115 | |
116 | def main(): |
117 | """Main function of this module.""" |
118 | args = parse_options() |
119 | |
120 | global COLOR |
121 | COLOR = args.color and sys.stdout.isatty() |
122 | |
123 | if args.sim and not args.commit and not args.diff: |
124 | sims = find_sims(args.sim, args.ignore) |
125 | if sims: |
126 | print("%s: %s" % (yel("Similar symbols"), ', '.join(sims))) |
127 | else: |
128 | print("%s: no similar symbols found" % yel("Similar symbols")) |
129 | sys.exit(0) |
130 | |
131 | # dictionary of (un)defined symbols |
132 | defined = {} |
133 | undefined = {} |
134 | |
135 | if args.commit or args.diff: |
136 | head = get_head() |
137 | |
138 | # get commit range |
139 | commit_a = None |
140 | commit_b = None |
141 | if args.commit: |
142 | commit_a = args.commit + "~" |
143 | commit_b = args.commit |
144 | elif args.diff: |
145 | split = args.diff.split("..") |
146 | commit_a = split[0] |
147 | commit_b = split[1] |
148 | undefined_a = {} |
149 | undefined_b = {} |
150 | |
151 | # get undefined items before the commit |
152 | reset(commit_a) |
153 | undefined_a, _ = check_symbols(args.ignore) |
154 | |
155 | # get undefined items for the commit |
156 | reset(commit_b) |
157 | undefined_b, defined = check_symbols(args.ignore) |
158 | |
159 | # report cases that are present for the commit but not before |
160 | for symbol in sorted(undefined_b): |
161 | # symbol has not been undefined before |
162 | if symbol not in undefined_a: |
163 | files = sorted(undefined_b.get(symbol)) |
164 | undefined[symbol] = files |
165 | # check if there are new files that reference the undefined symbol |
166 | else: |
167 | files = sorted(undefined_b.get(symbol) - |
168 | undefined_a.get(symbol)) |
169 | if files: |
170 | undefined[symbol] = files |
171 | |
172 | # reset to head |
173 | reset(head) |
174 | |
175 | # default to check the entire tree |
176 | else: |
177 | undefined, defined = check_symbols(args.ignore) |
178 | |
179 | # now print the output |
180 | for symbol in sorted(undefined): |
181 | print(red(symbol)) |
182 | |
183 | files = sorted(undefined.get(symbol)) |
184 | print("%s: %s" % (yel("Referencing files"), ", ".join(files))) |
185 | |
186 | sims = find_sims(symbol, args.ignore, defined) |
187 | sims_out = yel("Similar symbols") |
188 | if sims: |
189 | print("%s: %s" % (sims_out, ', '.join(sims))) |
190 | else: |
191 | print("%s: %s" % (sims_out, "no similar symbols found")) |
192 | |
193 | if args.find: |
194 | print("%s:" % yel("Commits changing symbol")) |
195 | commits = find_commits(symbol, args.diff) |
196 | if commits: |
197 | for commit in commits: |
198 | commit = commit.split(" ", 1) |
199 | print("\t- %s (\"%s\")" % (yel(commit[0]), commit[1])) |
200 | else: |
201 | print("\t- no commit found") |
202 | print() # new line |
203 | |
204 | |
205 | def reset(commit): |
206 | """Reset current git tree to %commit.""" |
207 | execute(["git", "reset", "--hard", commit]) |
208 | |
209 | |
210 | def yel(string): |
211 | """ |
212 | Color %string yellow. |
213 | """ |
214 | return "\033[33m%s\033[0m" % string if COLOR else string |
215 | |
216 | |
217 | def red(string): |
218 | """ |
219 | Color %string red. |
220 | """ |
221 | return "\033[31m%s\033[0m" % string if COLOR else string |
222 | |
223 | |
224 | def execute(cmd): |
225 | """Execute %cmd and return stdout. Exit in case of error.""" |
226 | try: |
227 | stdout = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False) |
228 | stdout = stdout.decode(errors='replace') |
229 | except subprocess.CalledProcessError as fail: |
230 | exit(fail) |
231 | return stdout |
232 | |
233 | |
234 | def find_commits(symbol, diff): |
235 | """Find commits changing %symbol in the given range of %diff.""" |
236 | commits = execute(["git", "log", "--pretty=oneline", |
237 | "--abbrev-commit", "-G", |
238 | symbol, diff]) |
239 | return [x for x in commits.split("\n") if x] |
240 | |
241 | |
242 | def tree_is_dirty(): |
243 | """Return true if the current working tree is dirty (i.e., if any file has |
244 | been added, deleted, modified, renamed or copied but not committed).""" |
245 | stdout = execute(["git", "status", "--porcelain"]) |
246 | for line in stdout: |
247 | if re.findall(r"[URMADC]{1}", line[:2]): |
248 | return True |
249 | return False |
250 | |
251 | |
252 | def get_head(): |
253 | """Return commit hash of current HEAD.""" |
254 | stdout = execute(["git", "rev-parse", "HEAD"]) |
255 | return stdout.strip('\n') |
256 | |
257 | |
258 | def partition(lst, size): |
259 | """Partition list @lst into eveni-sized lists of size @size.""" |
260 | return [lst[i::size] for i in range(size)] |
261 | |
262 | |
263 | def init_worker(): |
264 | """Set signal handler to ignore SIGINT.""" |
265 | signal.signal(signal.SIGINT, signal.SIG_IGN) |
266 | |
267 | |
268 | def find_sims(symbol, ignore, defined=[]): |
269 | """Return a list of max. ten Kconfig symbols that are string-similar to |
270 | @symbol.""" |
271 | if defined: |
272 | return sorted(difflib.get_close_matches(symbol, set(defined), 10)) |
273 | |
274 | pool = Pool(cpu_count(), init_worker) |
275 | kfiles = [] |
276 | for gitfile in get_files(): |
277 | if REGEX_FILE_KCONFIG.match(gitfile): |
278 | kfiles.append(gitfile) |
279 | |
280 | arglist = [] |
281 | for part in partition(kfiles, cpu_count()): |
282 | arglist.append((part, ignore)) |
283 | |
284 | for res in pool.map(parse_kconfig_files, arglist): |
285 | defined.extend(res[0]) |
286 | |
287 | return sorted(difflib.get_close_matches(symbol, set(defined), 10)) |
288 | |
289 | |
290 | def get_files(): |
291 | """Return a list of all files in the current git directory.""" |
292 | # use 'git ls-files' to get the worklist |
293 | stdout = execute(["git", "ls-files"]) |
294 | if len(stdout) > 0 and stdout[-1] == "\n": |
295 | stdout = stdout[:-1] |
296 | |
297 | files = [] |
298 | for gitfile in stdout.rsplit("\n"): |
299 | if ".git" in gitfile or "ChangeLog" in gitfile or \ |
300 | ".log" in gitfile or os.path.isdir(gitfile) or \ |
301 | gitfile.startswith("tools/"): |
302 | continue |
303 | files.append(gitfile) |
304 | return files |
305 | |
306 | |
307 | def check_symbols(ignore): |
308 | """Find undefined Kconfig symbols and return a dict with the symbol as key |
309 | and a list of referencing files as value. Files matching %ignore are not |
310 | checked for undefined symbols.""" |
311 | pool = Pool(cpu_count(), init_worker) |
312 | try: |
313 | return check_symbols_helper(pool, ignore) |
314 | except KeyboardInterrupt: |
315 | pool.terminate() |
316 | pool.join() |
317 | sys.exit(1) |
318 | |
319 | |
320 | def check_symbols_helper(pool, ignore): |
321 | """Helper method for check_symbols(). Used to catch keyboard interrupts in |
322 | check_symbols() in order to properly terminate running worker processes.""" |
323 | source_files = [] |
324 | kconfig_files = [] |
325 | defined_symbols = [] |
326 | referenced_symbols = dict() # {file: [symbols]} |
327 | |
328 | for gitfile in get_files(): |
329 | if REGEX_FILE_KCONFIG.match(gitfile): |
330 | kconfig_files.append(gitfile) |
331 | else: |
332 | if ignore and not re.match(ignore, gitfile): |
333 | continue |
334 | # add source files that do not match the ignore pattern |
335 | source_files.append(gitfile) |
336 | |
337 | # parse source files |
338 | arglist = partition(source_files, cpu_count()) |
339 | for res in pool.map(parse_source_files, arglist): |
340 | referenced_symbols.update(res) |
341 | |
342 | # parse kconfig files |
343 | arglist = [] |
344 | for part in partition(kconfig_files, cpu_count()): |
345 | arglist.append((part, ignore)) |
346 | for res in pool.map(parse_kconfig_files, arglist): |
347 | defined_symbols.extend(res[0]) |
348 | referenced_symbols.update(res[1]) |
349 | defined_symbols = set(defined_symbols) |
350 | |
351 | # inverse mapping of referenced_symbols to dict(symbol: [files]) |
352 | inv_map = dict() |
353 | for _file, symbols in referenced_symbols.items(): |
354 | for symbol in symbols: |
355 | inv_map[symbol] = inv_map.get(symbol, set()) |
356 | inv_map[symbol].add(_file) |
357 | referenced_symbols = inv_map |
358 | |
359 | undefined = {} # {symbol: [files]} |
360 | for symbol in sorted(referenced_symbols): |
361 | # filter some false positives |
362 | if symbol == "FOO" or symbol == "BAR" or \ |
363 | symbol == "FOO_BAR" or symbol == "XXX": |
364 | continue |
365 | if symbol not in defined_symbols: |
366 | if symbol.endswith("_MODULE"): |
367 | # avoid false positives for kernel modules |
368 | if symbol[:-len("_MODULE")] in defined_symbols: |
369 | continue |
370 | undefined[symbol] = referenced_symbols.get(symbol) |
371 | return undefined, defined_symbols |
372 | |
373 | |
374 | def parse_source_files(source_files): |
375 | """Parse each source file in @source_files and return dictionary with source |
376 | files as keys and lists of references Kconfig symbols as values.""" |
377 | referenced_symbols = dict() |
378 | for sfile in source_files: |
379 | referenced_symbols[sfile] = parse_source_file(sfile) |
380 | return referenced_symbols |
381 | |
382 | |
383 | def parse_source_file(sfile): |
384 | """Parse @sfile and return a list of referenced Kconfig symbols.""" |
385 | lines = [] |
386 | references = [] |
387 | |
388 | if not os.path.exists(sfile): |
389 | return references |
390 | |
391 | with open(sfile, "r", encoding='utf-8', errors='replace') as stream: |
392 | lines = stream.readlines() |
393 | |
394 | for line in lines: |
395 | if "CONFIG_" not in line: |
396 | continue |
397 | symbols = REGEX_SOURCE_SYMBOL.findall(line) |
398 | for symbol in symbols: |
399 | if not REGEX_FILTER_SYMBOLS.search(symbol): |
400 | continue |
401 | references.append(symbol) |
402 | |
403 | return references |
404 | |
405 | |
406 | def get_symbols_in_line(line): |
407 | """Return mentioned Kconfig symbols in @line.""" |
408 | return REGEX_SYMBOL.findall(line) |
409 | |
410 | |
411 | def parse_kconfig_files(args): |
412 | """Parse kconfig files and return tuple of defined and references Kconfig |
413 | symbols. Note, @args is a tuple of a list of files and the @ignore |
414 | pattern.""" |
415 | kconfig_files = args[0] |
416 | ignore = args[1] |
417 | defined_symbols = [] |
418 | referenced_symbols = dict() |
419 | |
420 | for kfile in kconfig_files: |
421 | defined, references = parse_kconfig_file(kfile) |
422 | defined_symbols.extend(defined) |
423 | if ignore and re.match(ignore, kfile): |
424 | # do not collect references for files that match the ignore pattern |
425 | continue |
426 | referenced_symbols[kfile] = references |
427 | return (defined_symbols, referenced_symbols) |
428 | |
429 | |
430 | def parse_kconfig_file(kfile): |
431 | """Parse @kfile and update symbol definitions and references.""" |
432 | lines = [] |
433 | defined = [] |
434 | references = [] |
435 | skip = False |
436 | |
437 | if not os.path.exists(kfile): |
438 | return defined, references |
439 | |
440 | with open(kfile, "r", encoding='utf-8', errors='replace') as stream: |
441 | lines = stream.readlines() |
442 | |
443 | for i in range(len(lines)): |
444 | line = lines[i] |
445 | line = line.strip('\n') |
446 | line = line.split("#")[0] # ignore comments |
447 | |
448 | if REGEX_KCONFIG_DEF.match(line): |
449 | symbol_def = REGEX_KCONFIG_DEF.findall(line) |
450 | defined.append(symbol_def[0]) |
451 | skip = False |
452 | elif REGEX_KCONFIG_HELP.match(line): |
453 | skip = True |
454 | elif skip: |
455 | # ignore content of help messages |
456 | pass |
457 | elif REGEX_KCONFIG_STMT.match(line): |
458 | line = REGEX_QUOTES.sub("", line) |
459 | symbols = get_symbols_in_line(line) |
460 | # multi-line statements |
461 | while line.endswith("\\"): |
462 | i += 1 |
463 | line = lines[i] |
464 | line = line.strip('\n') |
465 | symbols.extend(get_symbols_in_line(line)) |
466 | for symbol in set(symbols): |
467 | if REGEX_NUMERIC.match(symbol): |
468 | # ignore numeric values |
469 | continue |
470 | references.append(symbol) |
471 | |
472 | return defined, references |
473 | |
474 | |
475 | if __name__ == "__main__": |
476 | main() |
477 |