1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
|
#!/usr/bin/python3
"""
This script should find duplicates e.g. specific template is same as shared one
"""
import sys
import os
import re
import glob
import argparse
def recursive_globi(mask):
"""
Simple replacement of glob.globi(mask, recursive=true)
Reason: Older Python versions support
"""
parts = mask.split("**/")
if not len(parts) == 2:
raise NotImplementedError
search_root = parts[0]
# instead of '*' use regex '.*'
path_mask = parts[1].replace("*", ".*")
re_path_mask = re.compile(path_mask + "$")
for root, dirnames, filenames in os.walk(search_root):
dirnames.sort()
filenames.sort()
paths = filenames + dirnames
for path in paths:
full_path = os.path.join(root, path)
if re_path_mask.search(full_path):
yield full_path
class DuplicatesFinder(object):
def __init__(self, root_dir, specific_dirs_mask, shared_dir, shared_files_mask):
self._root_dir = root_dir
self._specific_dirs_mask = os.path.join(root_dir, specific_dirs_mask)
self._shared_dir = os.path.join(root_dir, shared_dir)
self._clear_normalized()
self._shared_files_mask = shared_files_mask
def _clear_normalized(self):
self._normalized = {}
def _get_normalized(self, file_path):
"""
Return cached normalized content of file
:param file_path:
:return:
"""
if file_path in self._normalized:
return self._normalized[file_path]
with open(file_path, 'r') as content_file:
content = content_file.read()
normalized = self._normalize_content(content)
self._normalized[file_path] = normalized
return normalized
def _compare_files(self, shared_filename, specific_filename):
if not os.path.isfile(specific_filename):
return False
shared_normalized = self._get_normalized(shared_filename)
specific_normalized = self._get_normalized(specific_filename)
return shared_normalized == specific_normalized
def _print_match(self, first_filename, second_filename):
print("Duplicate found! {}\t=>\t{}".format(first_filename, second_filename))
def search(self):
"""
:return: True if any duplicate found
"""
found = False
self._clear_normalized()
specific_dirs = list(self._specific_dirs())
# Walk all shared files
shared_files_mask = os.path.join(self._shared_dir, self._shared_files_mask)
for shared_filename in sorted(glob.glob(shared_files_mask)):
basename = os.path.basename(shared_filename)
# Walk all specific dirs
for specific_dir in specific_dirs:
# Get file to compare
specific_filename = os.path.join(specific_dir, basename)
# Compare
if self._compare_files(shared_filename, specific_filename):
found = True
self._print_match(shared_filename, specific_filename)
return found
def _specific_dirs(self):
for static_path in recursive_globi(self._specific_dirs_mask):
if not static_path.startswith(self._shared_dir):
yield static_path
def _normalize_content(self, content):
return content
class BashDuplicatesFinder(DuplicatesFinder):
def __init__(self, root_dir, specific_dirs_mask, shared_dir, shared_files_mask="*.sh"):
DuplicatesFinder.__init__(self, root_dir, specific_dirs_mask, shared_dir, shared_files_mask)
def _normalize_content(self, content):
# remove comments
# naive implementation (todo)
content = re.sub(r"^\s*#.*", "", content)
# remove empty lines
content = "\n".join([s for s in content.split("\n") if s])
return content
class OvalDuplicatesFinder(DuplicatesFinder):
def __init__(self, root_dir, specific_dirs_mask, shared_dir, shared_files_mask="*.xml"):
DuplicatesFinder.__init__(self, root_dir, specific_dirs_mask, shared_dir, shared_files_mask)
def _normalize_content(self, content):
# remove comments
# naive implementation (todo)
content = re.sub(r"^\s*#.*", "", content) # bash style comments - due to #platform
content = re.sub('<!--.*?-->', "", content, flags=re.DOTALL) # xml comments
# remove empty lines
content = "\n".join([s for s in content.split("\n") if s])
return content
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("root_ssg_directory", help="Path to root of ssg git repository")
return parser.parse_args()
def main():
"""
main function
"""
args = parse_args()
root_dir = args.root_ssg_directory
without_duplicates = True
# Static bash scripts
print("Static bash files:")
static_bash_finder = BashDuplicatesFinder(
root_dir,
os.path.join("**", "fixes", "bash"),
os.path.join("shared", "fixes", "bash")
)
if static_bash_finder.search():
without_duplicates = False
# Templates bash scripts
print("Bash templates:")
template_bash_finder = BashDuplicatesFinder(
root_dir,
os.path.join("**", "templates"),
os.path.join("shared", "templates"),
"template_BASH_*"
)
if template_bash_finder.search():
without_duplicates = False
# Static oval files
print("Static oval files:")
static_oval_finder = OvalDuplicatesFinder(
root_dir,
os.path.join("**", "checks", "oval"),
os.path.join("shared", "checks", "oval")
)
if static_oval_finder.search():
without_duplicates = False
# Templates oval files
print("Templates oval files:")
templates_oval_finder = OvalDuplicatesFinder(
root_dir,
os.path.join("**", "templates"),
os.path.join("shared", "templates"),
"template_OVAL_*"
)
if templates_oval_finder.search():
without_duplicates = False
# Scan results
if without_duplicates:
print("No duplicates found")
sys.exit(0)
else:
print("Duplicates found!")
sys.exit(1)
if __name__ == "__main__":
main()
|