scripts/spdxcheck: Put excluded files and directories into a separate file

The files and directories which are excluded from scanning are currently
hard coded in the script. That's not maintainable and not accessible for
external tools.

Move the files and directories which should be excluded into a file.  The
default file is scripts/spdxexclude. This can be overridden with the
'-e $FILE' command line option.

The file format and syntax is similar to the .gitignore file.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
Thomas Gleixner 2022-05-16 12:27:29 +02:00 committed by Greg Kroah-Hartman
parent 67924b7141
commit 0509b270a3
2 changed files with 72 additions and 6 deletions

View File

@ -6,6 +6,7 @@ from argparse import ArgumentParser
from ply import lex, yacc from ply import lex, yacc
import locale import locale
import traceback import traceback
import fnmatch
import sys import sys
import git import git
import re import re
@ -106,6 +107,7 @@ class id_parser(object):
self.parser = yacc.yacc(module = self, write_tables = False, debug = False) self.parser = yacc.yacc(module = self, write_tables = False, debug = False)
self.lines_checked = 0 self.lines_checked = 0
self.checked = 0 self.checked = 0
self.excluded = 0
self.spdx_valid = 0 self.spdx_valid = 0
self.spdx_errors = 0 self.spdx_errors = 0
self.spdx_dirs = {} self.spdx_dirs = {}
@ -254,17 +256,47 @@ class id_parser(object):
di.update(fname, base, fail) di.update(fname, base, fail)
self.spdx_dirs[base] = di self.spdx_dirs[base] = di
class pattern(object):
def __init__(self, line):
self.pattern = line
self.match = self.match_file
if line == '.*':
self.match = self.match_dot
elif line.endswith('/'):
self.pattern = line[:-1]
self.match = self.match_dir
elif line.startswith('/'):
self.pattern = line[1:]
self.match = self.match_fn
def match_dot(self, fpath):
return os.path.basename(fpath).startswith('.')
def match_file(self, fpath):
return os.path.basename(fpath) == self.pattern
def match_fn(self, fpath):
return fnmatch.fnmatchcase(fpath, self.pattern)
def match_dir(self, fpath):
if self.match_fn(os.path.dirname(fpath)):
return True
return fpath.startswith(self.pattern)
def exclude_file(fpath):
for rule in exclude_rules:
if rule.match(fpath):
return True
return False
def scan_git_tree(tree, basedir, dirdepth): def scan_git_tree(tree, basedir, dirdepth):
parser.set_dirinfo(basedir, dirdepth) parser.set_dirinfo(basedir, dirdepth)
for el in tree.traverse(): for el in tree.traverse():
# Exclude stuff which would make pointless noise
# FIXME: Put this somewhere more sensible
if el.path.startswith("LICENSES"):
continue
if el.path.find("license-rules.rst") >= 0:
continue
if not os.path.isfile(el.path): if not os.path.isfile(el.path):
continue continue
if exclude_file(el.path):
parser.excluded += 1
continue
with open(el.path, 'rb') as fd: with open(el.path, 'rb') as fd:
parser.parse_lines(fd, args.maxlines, el.path) parser.parse_lines(fd, args.maxlines, el.path)
@ -273,6 +305,20 @@ def scan_git_subtree(tree, path, dirdepth):
tree = tree[p] tree = tree[p]
scan_git_tree(tree, path.strip('/'), dirdepth) scan_git_tree(tree, path.strip('/'), dirdepth)
def read_exclude_file(fname):
rules = []
if not fname:
return rules
with open(fname) as fd:
for line in fd:
line = line.strip()
if line.startswith('#'):
continue
if not len(line):
continue
rules.append(pattern(line))
return rules
if __name__ == '__main__': if __name__ == '__main__':
ap = ArgumentParser(description='SPDX expression checker') ap = ArgumentParser(description='SPDX expression checker')
@ -281,6 +327,8 @@ if __name__ == '__main__':
help='Show [sub]directory statistics.') help='Show [sub]directory statistics.')
ap.add_argument('-D', '--depth', type=int, default=-1, ap.add_argument('-D', '--depth', type=int, default=-1,
help='Directory depth for -d statistics. Default: unlimited') help='Directory depth for -d statistics. Default: unlimited')
ap.add_argument('-e', '--exclude',
help='File containing file patterns to exclude. Default: scripts/spdxexclude')
ap.add_argument('-f', '--files', action='store_true', ap.add_argument('-f', '--files', action='store_true',
help='Show files without SPDX.') help='Show files without SPDX.')
ap.add_argument('-m', '--maxlines', type=int, default=15, ap.add_argument('-m', '--maxlines', type=int, default=15,
@ -316,6 +364,15 @@ if __name__ == '__main__':
sys.stderr.write('%s\n' %traceback.format_exc()) sys.stderr.write('%s\n' %traceback.format_exc())
sys.exit(1) sys.exit(1)
try:
fname = args.exclude
if not fname:
fname = os.path.join(os.path.dirname(__file__), 'spdxexclude')
exclude_rules = read_exclude_file(fname)
except Exception as ex:
sys.stderr.write('FAIL: Reading exclude file %s: %s\n' %(fname, ex))
sys.exit(1)
try: try:
if len(args.path) and args.path[0] == '-': if len(args.path) and args.path[0] == '-':
stdin = os.fdopen(sys.stdin.fileno(), 'rb') stdin = os.fdopen(sys.stdin.fileno(), 'rb')
@ -349,6 +406,7 @@ if __name__ == '__main__':
sys.stderr.write('License IDs %12d\n' %len(spdx.licenses)) sys.stderr.write('License IDs %12d\n' %len(spdx.licenses))
sys.stderr.write('Exception IDs %12d\n' %len(spdx.exceptions)) sys.stderr.write('Exception IDs %12d\n' %len(spdx.exceptions))
sys.stderr.write('\n') sys.stderr.write('\n')
sys.stderr.write('Files excluded: %12d\n' %parser.excluded)
sys.stderr.write('Files checked: %12d\n' %parser.checked) sys.stderr.write('Files checked: %12d\n' %parser.checked)
sys.stderr.write('Lines checked: %12d\n' %parser.lines_checked) sys.stderr.write('Lines checked: %12d\n' %parser.lines_checked)
if parser.checked: if parser.checked:

8
scripts/spdxexclude Normal file
View File

@ -0,0 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
#
# Patterns for excluding files and directories
# Ignore the license directory and the licensing documentation which would
# create lots of noise for no value
LICENSES/
license-rules.rst