1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266
|
#!/usr/bin/python3
from collections import defaultdict
import io
import re
import subprocess
# --------------------------------------------------------------------------------------------
# globals
# --------------------------------------------------------------------------------------------
definitionSet = set() # set of method_name
definitionToSourceLocationMap = dict()
# for the "unused methods" analysis
callDict = defaultdict(set) # map of from_method_name -> set(method_name)
# clang does not always use exactly the same numbers in the type-parameter vars it generates
# so I need to substitute them to ensure we can match correctly.
normalizeTypeParamsRegex = re.compile(r"type-parameter-\d+-\d+")
def normalizeTypeParams( line ):
line = normalizeTypeParamsRegex.sub("type-parameter-?-?", line)
# make some of the types a little prettier
line = line.replace("std::__debug", "std::")
line = line.replace("class ", "")
line = line.replace("struct ", "")
line = line.replace("_Bool", "bool")
return line
# --------------------------------------------------------------------------------------------
# primary input loop
# --------------------------------------------------------------------------------------------
cnt = 0
with io.open("workdir/loplugin.methodcycles.log", "r", buffering=1024*1024) as txt:
for line in txt:
tokens = line.strip().split("\t")
if tokens[0] == "definition:":
returnType = tokens[1]
nameAndParams = tokens[2]
sourceLocation = tokens[3]
funcInfo = (normalizeTypeParams(returnType) + " " + normalizeTypeParams(nameAndParams)).strip()
definitionSet.add(funcInfo)
definitionToSourceLocationMap[funcInfo] = sourceLocation
elif tokens[0] == "call:":
returnTypeFrom = tokens[1]
nameAndParamsFrom = tokens[2]
returnTypeTo = tokens[3]
nameAndParamsTo = tokens[4]
caller = (normalizeTypeParams(returnTypeFrom) + " " + normalizeTypeParams(nameAndParamsFrom)).strip()
callee = (normalizeTypeParams(returnTypeTo) + " " + normalizeTypeParams(nameAndParamsTo)).strip()
callDict[caller].add(callee)
else:
print( "unknown line: " + line)
cnt = cnt + 1
#if cnt > 100000: break
# sort the results using a "natural order" so sequences like [item1,item2,item10] sort nicely
def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
return [int(text) if text.isdigit() else text.lower()
for text in re.split(_nsre, s)]
# sort by both the source-line and the datatype, so the output file ordering is stable
# when we have multiple items on the same source line
def v_sort_key(v):
return natural_sort_key(v[1]) + [v[0]]
def sort_set_by_natural_key(s):
return sorted(s, key=lambda v: v_sort_key(v))
# --------------------------------------------------------------------------------------------
# analysis
# --------------------------------------------------------------------------------------------
# follow caller-callee chains, removing all methods reachable from a root method
def remove_reachable(callDict, startCaller):
worklist = list()
worklist.append(startCaller)
while len(worklist) > 0:
caller = worklist.pop()
if caller not in callDict:
continue
calleeSet = callDict[caller]
del callDict[caller]
if caller in definitionSet:
definitionSet.remove(caller)
for c in calleeSet:
worklist.append(c)
# look for all the external entry points and remove code called from there
to_be_removed = set()
to_be_removed.add("int main(int,char **)")
# random dynload entrypoints that we don't otherwise find
to_be_removed.add("bool TestImportOLE2(SvStream &)")
to_be_removed.add("void SbiRuntime::StepREDIMP()")
to_be_removed.add("_object * (anonymous namespace)::createUnoStructHelper(_object *,_object *,_object *)")
for caller in definitionSet:
if caller not in definitionToSourceLocationMap:
to_be_removed.append(caller)
continue
location = definitionToSourceLocationMap[caller]
if "include/com/" in location \
or "include/cppu/" in location \
or "include/cppuhelper/" in location \
or "include/osl/" in location \
or "include/rtl/" in location \
or "include/sal/" in location \
or "include/salhelper/" in location \
or "include/typelib/" in location \
or "include/uno/" in location \
or "workdir/UnpackedTarball/" in location \
or "workdir/UnoApiHeadersTarget/" in location \
or "workdir/CustomTarget/officecfg/" in location \
or "workdir/LexTarget/" in location \
or "workdir/CustomTarget/i18npool/localedata/" in location \
or "workdir/SdiTarget/" in location \
or "/qa/" in location \
or "include/test/" in location:
to_be_removed.add(caller)
# TODO calls to destructors are not mentioned in the AST, so we'll just have to assume they get called,
# which is not ideal
if "::~" in caller:
to_be_removed.add(caller)
# dyload entry points for VCL builder
if "(VclPtr<vcl::Window> & rRet, const VclPtr<vcl::Window> & pParent, VclBuilder::stringmap & rMap)" in caller:
to_be_removed.add(caller)
if "(VclPtr<vcl::Window> &,const VclPtr<vcl::Window> &,std::::map<rtl::OString, rtl::OUString, std::less<rtl::OString>, std::allocator<std::pair<const rtl::OString, rtl::OUString> > > &)" in caller:
to_be_removed.add(caller)
# find all the UNO load-by-symbol-name entrypoints
uno_constructor_entrypoints = set()
git_grep_process = subprocess.Popen("git grep -h 'constructor=' -- *.component", stdout=subprocess.PIPE, shell=True)
with git_grep_process.stdout as txt:
for line in txt:
idx1 = line.find(b"\"")
idx2 = line.find(b"\"", idx1 + 1)
func = line[idx1+1 : idx2]
uno_constructor_entrypoints.add(func.decode('utf-8'))
for caller in callDict:
if "(com::sun::star::uno::XComponentContext *,const com::sun::star::uno::Sequence<com::sun::star::uno::Any> &)" in caller:
for func in uno_constructor_entrypoints:
if func in caller:
to_be_removed.add(caller)
# remove everything reachable from the found entry points
for caller in to_be_removed:
remove_reachable(callDict, caller)
for caller in callDict:
callDict[caller] -= to_be_removed
# create a reverse call graph
inverseCallDict = defaultdict(set) # map of from_method_name -> set(method_name)
for caller in callDict:
for callee in callDict[caller]:
inverseCallDict[callee].add(caller)
print_tree_recurse_set = set() # protect against cycles
def print_tree(f, callDict, caller, depth):
if depth == 0:
f.write("\n") # add an empty line before each tree
print_tree_recurse_set.clear()
# protect against cycles
if caller in print_tree_recurse_set:
return
# when printing out trees, things that are not in the map are things that are reachable,
# so we're not interested in them
if caller not in callDict:
return
print_tree_recurse_set.add(caller)
f.write(" " * depth + caller + "\n")
f.write(" " * depth + definitionToSourceLocationMap[caller] + "\n")
calleeSet = callDict[caller]
for c in calleeSet:
print_tree(f, callDict, c, depth+1)
# find possible roots (ie. entrypoints) by looking for methods that are not called
def dump_possible_roots():
possibleRootList = list()
for caller in callDict:
if caller not in inverseCallDict and caller in definitionToSourceLocationMap:
possibleRootList.append(caller)
possibleRootList.sort()
# print out first 100 trees of caller->callees
count = 0
with open("compilerplugins/clang/methodcycles.roots", "wt") as f:
f.write("callDict size " + str(len(callDict)) + "\n")
f.write("possibleRootList size " + str(len(possibleRootList)) + "\n")
f.write("\n")
for caller in possibleRootList:
f.write(caller + "\n")
f.write(" " + definitionToSourceLocationMap[caller] + "\n")
#print_tree(f, caller, 0)
count = count + 1
#if count>1000: break
# Look for cycles in a directed graph
# Adapted from:
# https://codereview.stackexchange.com/questions/86021/check-if-a-directed-graph-contains-a-cycle
def print_cycles():
with open("compilerplugins/clang/methodcycles.results", "wt") as f:
path = set()
visited = set()
def printPath(path):
if len(path) < 2:
return
# we may have found a cycle, but if the cycle is called from outside the cycle
# the code is still in use.
for p in path:
for caller in inverseCallDict[p]:
if caller not in path:
return
f.write("found cycle\n")
for p in path:
f.write(" " + p + "\n")
f.write(" " + definitionToSourceLocationMap[p] + "\n")
f.write("\n")
def checkCyclic(vertex):
if vertex in visited:
return
visited.add(vertex)
path.add(vertex)
if vertex in callDict:
for neighbour in callDict[vertex]:
if neighbour in path:
printPath(path)
break
else:
checkCyclic(neighbour)
path.remove(vertex)
for caller in callDict:
checkCyclic(caller)
print_cycles()
# print partitioned sub-graphs
def print_partitions():
callDict2 = callDict
# Remove anything with no callees, and that is itself not called.
# After this stage, we should only be left with closed sub-graphs ie. partitions
while True:
to_be_removed.clear()
for caller in callDict2:
if len(callDict2[caller]) == 0 \
or caller not in inverseCallDict[caller]:
to_be_removed.add(caller)
if len(to_be_removed) == 0:
break
for caller in to_be_removed:
remove_reachable(callDict2, caller)
for caller in callDict2:
callDict2[caller] -= to_be_removed
count = 0
with open("compilerplugins/clang/methodcycles.partition.results", "wt") as f:
f.write("callDict size " + str(len(callDict2)) + "\n")
f.write("\n")
while len(callDict2) > 0:
print_tree(f, callDict2, next(iter(callDict2)), 0)
for c in print_tree_recurse_set:
callDict2.pop(c, None)
count = count + 1
if count>1000:
break
print_partitions()
|