1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747
|
#!/usr/bin/env python3
# SPDX-License-Identifier: MPL-2.0
# SPDX-FileCopyrightText: 2013-2018 Andrew McCreight <continuation@gmail.com>
# SPDX-FileCopyrightText: 2018 Andy Holmes <andrew.g.r.holmes@gmail.com>
#
# heapgraph.py - Top-level script for interpreting Garbage Collector heaps
import argparse
import copy
from collections import namedtuple
from collections import deque
import os
import re
import sys
try:
from heapdot import (output_dot_file, add_dot_graph_path,
add_dot_graph_unreachable)
except ImportError:
sys.stderr.write('DOT graph output not available\n')
NAME_ANNOTATION = '__heapgraph_name'
########################################################
# Command line arguments.
########################################################
parser = argparse.ArgumentParser(description='Find what is rooting or preventing an object from being collected in a GJS heap using a shortest-path breadth-first algorithm.')
parser.add_argument('heap_file', metavar='FILE',
help='Garbage collector heap from System.dumpHeap()')
parser.add_argument('targets', metavar='TARGET', nargs='*',
help='Heap address (eg. 0x7fa814054d00) or type prefix (eg. Array, Object, GObject, Function...)')
### Target Options
targ_opts = parser.add_argument_group('Target options')
targ_opts.add_argument('--edge', '-e', dest='edge_targets',
action='append', default=[],
help='Add an edge label to the list of targets')
targ_opts.add_argument('--function', '-f', dest='func_targets',
action='append', default=[],
help='Add a function name to the list of targets')
targ_opts.add_argument('--string', '-s', dest='string_targets',
action='append', default=[],
help='Add a string literal or String() to the list of targets')
targ_opts.add_argument('--annotation', '-a', dest='annotation_targets',
action='append', default=[],
help=f'Add a {NAME_ANNOTATION} annotation to the list of targets')
### Output Options
out_opts = parser.add_argument_group('Output Options')
out_opts.add_argument('--count', '-c', dest='count', action='store_true',
default=False,
help='Only count the matches for TARGET')
out_opts.add_argument('--dot-graph', '-d', dest='dot_graph',
action='store_true', default=False,
help='Output a DOT graph to FILE.dot')
out_opts.add_argument('--no-addr', '-na', dest='no_addr',
action='store_true', default=False,
help='Don\'t show addresses')
### Node and Root Filtering
filt_opts = parser.add_argument_group('Node/Root Filtering')
filt_opts.add_argument('--diff-heap', '-dh', dest='diff_heap', action='store',
metavar='FILE',
help='Don\'t show roots common to the heap FILE')
filt_opts.add_argument('--no-gray-roots', '-ng', dest='no_gray_roots',
action='store_true', default=False,
help='Don\'t show gray roots (marked to be collected)')
filt_opts.add_argument('--show-unreachable', '-u', action='store_true',
help="Show objects that have no path to a root but are not collected yet")
filt_opts.add_argument('--no-weak-maps', '-nwm', dest='no_weak_maps',
action='store_true', default=False,
help='Don\'t show WeakMaps')
filt_opts.add_argument('--show-global', '-g', dest='show_global',
action='store_true', default=False,
help='Show the global object (eg. globalThis/GjsGlobal)')
filt_opts.add_argument('--show-imports', '-i', dest='show_imports',
action='store_true', default=False,
help='Show import and module nodes (eg. imports.foo)')
filt_opts.add_argument('--hide-addr', '-ha', dest='hide_addrs', action='append',
metavar='ADDR', default=[],
help='Don\'t show roots with the heap address ADDR')
filt_opts.add_argument('--hide-node', '-hn', dest='hide_nodes', action='append',
metavar='LABEL', default=['self-hosting-global',
'GIRepositoryNamespace',
'GjsFileImporter',
'GjsGlobal',
'GjsInternalGlobal',
'GjsModule'],
help='Don\'t show nodes with labels containing LABEL')
filt_opts.add_argument('--hide-edge', '-he', dest='hide_edges', action='append',
metavar='LABEL', default=[NAME_ANNOTATION],
help="Don't show edges labeled LABEL")
###############################################################################
# Heap Patterns
###############################################################################
GraphAttribs = namedtuple('GraphAttribs',
'edge_labels node_labels roots root_labels weakMapEntries annotations')
WeakMapEntry = namedtuple('WeakMapEntry', 'weakMap key keyDelegate value')
addr_regex = re.compile(r'[A-F0-9]+$|0x[a-f0-9]+$')
node_regex = re.compile(r'((?:0x)?[a-fA-F0-9]+) (?:(B|G|W) )?([^\r\n]*)\r?$')
edge_regex = re.compile(r'> ((?:0x)?[a-fA-F0-9]+) (?:(B|G|W) )?([^\r\n]*)\r?$')
wme_regex = re.compile(r'WeakMapEntry map=((?:0x)?[a-zA-Z0-9]+|\(nil\)) key=((?:0x)?[a-zA-Z0-9]+|\(nil\)) keyDelegate=((?:0x)?[a-zA-Z0-9]+|\(nil\)) value=((?:0x)?[a-zA-Z0-9]+)')
func_regex = re.compile(r'Function(?: ([^/]+)(?:/([<|\w]+))?)?')
priv_regex = re.compile(r'([^ ]+) (0x[a-fA-F0-9]+$)')
string_regex = re.compile(r'^(?:sub)?string <(?:dependent|(?:permanent )?atom|(?:fat )?inline|linear): length (?:\d+)> (.*)\r?$')
###############################################################################
# Heap Parsing
###############################################################################
def parse_roots(fobj):
"""Parse the roots portion of a garbage collector heap."""
roots = {}
root_labels = {}
weakMapEntries = []
for line in fobj:
node = node_regex.match(line)
if node:
addr = node.group(1)
color = node.group(2)
label = node.group(3)
# Only overwrite an existing root with a black root.
if addr not in roots or color == 'B':
roots[addr] = (color == 'B')
# It would be classier to save all the root labels, though then
# we have to worry about gray vs black.
root_labels[addr] = label
else:
wme = wme_regex.match(line)
if wme:
weakMapEntries.append(WeakMapEntry(weakMap=wme.group(1),
key=wme.group(2),
keyDelegate=wme.group(3),
value=wme.group(4)))
# Skip comments, arenas, realms and zones
elif line[0] == '#':
continue
# Marks the end of the roots section
elif line[:10] == '==========':
break
else:
sys.stderr.write('Error: unknown line {}\n'.format(line))
exit(-1)
return [roots, root_labels, weakMapEntries]
def parse_graph(fobj):
"""Parse the node and edges of a garbage collector heap."""
edges = {}
edge_labels = {}
node_labels = {}
annotations = {}
def addNode (addr, node_label):
edges[addr] = {}
edge_labels[addr] = {}
if node_label != '':
node_labels[addr] = node_label
def addEdge(source, target, edge_label):
edges[source][target] = edges[source].get(target, 0) + 1
if edge_label != '':
edge_labels[source].setdefault(target, []).append(edge_label)
node_addr = None
second_pass_lines = []
for line in fobj:
if edge_regex.match(line):
second_pass_lines.append(line)
continue
node = node_regex.match(line)
if node:
second_pass_lines.append(line)
node_addr = node.group(1)
node_color = node.group(2)
node_label = node.group(3)
# Don't hide strings matching hide_nodes, as they may be labels
if string_regex.match(node_label) is not None:
addNode(node_addr, node_label)
continue
# Use this opportunity to map hide_nodes to addresses
for hide_node in args.hide_nodes:
if hide_node in node_label:
args.hide_addrs.append(node_addr)
break
else:
addNode(node_addr, node_label)
# Skip comments, arenas, realms and zones
elif line[0] == '#':
continue
else:
sys.stderr.write('Error: Unknown line: {}\n'.format(line[:-1]))
for line in second_pass_lines:
e = edge_regex.match(line)
if e:
target, edge_label = e.group(1, 3)
if edge_label == NAME_ANNOTATION:
s = string_regex.match(node_labels[target])
if s:
annotations[node_addr] = s.group(1)
if (node_addr not in args.hide_addrs and
edge_label not in args.hide_edges):
addEdge(node_addr, target, edge_label)
else:
node = node_regex.match(line)
if node:
node_addr = node.group(1)
# yar, should pass the root crud in and wedge it in here, or somewhere
return [edges, edge_labels, node_labels, annotations]
def parse_heap(fname):
"""Parse a garbage collector heap."""
try:
fobj = open(fname, 'r')
except:
sys.stderr.write('Error opening file {}\n'.format(fname))
exit(-1)
[roots, root_labels, weakMapEntries] = parse_roots(fobj)
[edges, edge_labels, node_labels, annotations] = parse_graph(fobj)
fobj.close()
graph = GraphAttribs(edge_labels=edge_labels, node_labels=node_labels,
roots=roots, root_labels=root_labels,
weakMapEntries=weakMapEntries, annotations=annotations)
return (edges, graph)
def find_nodes(fname):
"""Parse a garbage collector heap and return a list of node addresses."""
addrs = [];
try:
fobj = open(fname, 'r')
sys.stderr.write('Parsing {0}...'.format(fname))
except:
sys.stderr.write('Error opening file {}\n'.format(fname))
exit(-1)
# Whizz past the roots
for line in fobj:
if '==========\n' in line:
break
for line in fobj:
node = node_regex.match(line)
if node:
addrs.append(node.group(1))
fobj.close()
sys.stderr.write('done\n')
sys.stderr.flush()
return addrs
# Some applications may not care about multiple edges.
# They can instead use a single graph, which is represented as a map
# from a source node to a set of its destinations.
def to_single_graph(edges):
single_graph = {}
for origin, destinations in edges.items():
d = set([])
for destination, distance in destinations.items():
d.add(destination)
single_graph[origin] = d
return single_graph
def load_graph(fname):
sys.stderr.write('Parsing {0}...'.format(fname))
(edges, graph) = parse_heap(fname)
edges = to_single_graph(edges)
sys.stderr.write('done\n')
sys.stderr.flush()
return (edges, graph)
###############################################################################
# Path printing
###############################################################################
tree_graph_paths = {}
tree_graph_unreachables = set()
class style:
BOLD = '\033[1m'
ITALIC = '\033[3m'
UNDERLINE = '\033[4m'
PURPLE = '\033[0;36m'
END = '\033[0m'
def get_edge_label(graph, origin, destination):
elabel = lambda l: l[0] if len(l) == 2 else l
labels = graph.edge_labels.get(origin, {}).get(destination, [])
if len(labels) == 1:
label = labels[0]
if label == 'signal connection':
return 'GSignal'
else:
return label
elif len(labels) > 1:
return ', '.join([elabel(l) for l in labels])
else:
return ''
def get_node_label(graph, addr):
label = graph.node_labels[addr]
if label.endswith(' <no private>'):
label = label[:-13]
if label.startswith('Function '):
fm = func_regex.match(label)
if fm.group(2) == '<':
return 'Function via {}'.format(fm.group(1))
elif fm.group(2):
return 'Function {} in {}'.format(fm.group(2), fm.group(1))
else:
return label
if label.startswith('script'):
label = label[7:].split('/')[-1]
elif label.startswith('WeakMap'):
label = 'WeakMap'
elif label == 'base_shape':
label = 'shape'
elif label == 'type_object':
label = 'type'
return label[:50]
def get_node_annotation(graph, addr):
return graph.annotations.get(addr, None)
def format_node(graph, addr, parent=''):
node = get_node_label(graph, addr)
annotation = get_node_annotation(graph, addr)
has_priv = priv_regex.match(node)
# Color/Style
if os.isatty(1):
orig = style.UNDERLINE + 'jsobj@' + addr + style.END
if has_priv:
node = style.BOLD + has_priv.group(1) + style.END
orig += ' ' + style.UNDERLINE + 'priv@' + has_priv.group(2) + style.END
else:
node = style.BOLD + node + style.END
else:
orig = 'jsobj@' + addr
if has_priv:
node = has_priv.group(1)
orig += ' priv@' + has_priv.group(2)
# Add the annotation
if annotation:
if os.isatty(1):
node += ' "' + style.PURPLE + annotation + style.END + '"'
else:
node += ' "' + annotation + '"'
if args.no_addr:
return node
return node + ' ' + orig
def output_tree_graph(graph, data, base='', parent=''):
while data:
addr, children = data.popitem()
node = format_node(graph, addr, base)
# Labels
if parent:
edge = get_edge_label(graph, parent, addr)
else:
edge = graph.root_labels[addr]
# Color/Style
if os.isatty(1):
if parent:
edge = style.ITALIC + edge + style.END
else:
edge = style.BOLD + edge + style.END
# Print the path segment
if data:
print('{0}├─[{1}]─➤ [{2}]'.format(base, edge, node))
else:
print('{0}╰─[{1}]─➤ [{2}]'.format(base, edge, node))
# Print child segments
if children:
if data:
output_tree_graph(graph, children, base + '│ ', addr)
else:
output_tree_graph(graph, children, base + ' ', addr)
else:
if data:
print(base + '│ ')
else:
print(base + ' ')
def output_tree_unreachables(graph, data):
while data:
addr = data.pop()
node = format_node(graph, addr)
print(' • Unreachable: [{}]'.format(node))
def add_tree_graph_path(owner, path):
o = owner.setdefault(path.pop(0), {})
if path:
add_tree_graph_path(o, path)
def add_path(args, graph, path):
if args.dot_graph:
add_dot_graph_path(path)
else:
add_tree_graph_path(tree_graph_paths, path)
def add_unreachable(args, node):
if args.dot_graph:
add_dot_graph_unreachable(node)
else:
tree_graph_unreachables.add(node)
###############################################################################
# Breadth-first shortest path finding.
###############################################################################
def find_roots_bfs(args, edges, graph, target):
workList = deque()
distances = {}
def traverseWeakMapEntry(dist, k, m, v, label):
if not k in distances or not m in distances:
# Haven't found either the key or map yet.
return
if distances[k][0] > dist or distances[m][0] > dist:
# Either the key or the weak map is farther away, so we
# must wait for the farther one before processing it.
return
if v in distances:
return
distances[v] = (dist + 1, k, m, label)
workList.append(v)
# For now, ignore keyDelegates.
weakData = {}
for wme in graph.weakMapEntries:
weakData.setdefault(wme.weakMap, set([])).add(wme)
weakData.setdefault(wme.key, set([])).add(wme)
if wme.keyDelegate != '0x0':
weakData.setdefault(wme.keyDelegate, set([])).add(wme)
distances[startObject] = (-1, None)
workList.append(startObject)
# Search the graph.
while workList:
origin = workList.popleft()
dist = distances[origin][0]
# Found the target, stop digging
if origin == target:
continue
# origin does not point to any other nodes.
if not origin in edges:
continue
for destination in edges[origin]:
if destination not in distances:
distances[destination] = (dist + 1, origin)
workList.append(destination)
if origin in weakData:
for wme in weakData[origin]:
traverseWeakMapEntry(dist, wme.key, wme.weakMap, wme.value,
"value in WeakMap " + wme.weakMap)
traverseWeakMapEntry(dist, wme.keyDelegate, wme.weakMap, wme.key,
"key delegate in WeakMap " + wme.weakMap)
# Print out the paths by unwinding backwards to generate a path,
# then print the path. Accumulate any weak maps found during this
# process into the printWorkList queue, and print out what keeps
# them alive. Only print out why each map is alive once.
printWorkList = deque()
printWorkList.append(target)
printedThings = set([target])
while printWorkList:
p = printWorkList.popleft()
path = []
while p in distances:
path.append(p)
dist = distances[p]
if len(dist) == 2:
[_, p] = dist
else:
# The weak map key is probably more interesting,
# so follow it, and worry about the weak map later.
[_, k, m, label] = dist
graph.edge_labels[k].setdefault(p, []).append(label)
p = k
if not m in printedThings and not args.no_weak_maps:
printWorkList.append(m)
printedThings.add(m)
if path:
path.pop()
path.reverse()
add_path(args, graph, path)
elif args.show_unreachable:
# No path to a root. This object is eligible for collection on the
# next GC, but is still in an arena.
add_unreachable(args, target)
########################################################
# Target selection
########################################################
def target_edge(graph, target):
targets = set()
for origin, destinations in graph.edge_labels.items():
for destination in destinations:
if target in graph.edge_labels[origin][destination]:
targets.add(destination)
targets.add(origin)
sys.stderr.write('Found {} objects with edge label of {}\n'.format(len(targets), target))
return targets
def target_func(graph, target):
targets = set()
for addr, label in graph.node_labels.items():
if not label[:9] == 'Function ':
continue
if label[9:] == target:
targets.add(addr)
sys.stderr.write('Found {} functions named "{}"\n'.format(len(targets), target))
return targets
def target_gobject(graph, target):
targets = set()
for addr, label in graph.node_labels.items():
if label.endswith(target):
targets.add(addr)
sys.stderr.write('Found GObject with address of {}\n'.format(target))
return targets
def target_string(graph, target):
targets = set()
for addr, label in graph.node_labels.items():
if label[:7] == 'string ' and target in label[7:]:
targets.add(addr)
elif label[:10] == 'substring ' and target in label[10:]:
targets.add(addr)
sys.stderr.write('Found {} strings containing "{}"\n'.format(len(targets), target))
return targets
def target_type(graph, target):
targets = set()
for addr in edges.keys():
if graph.node_labels.get(addr, '')[0:len(target)] == target:
targets.add(addr)
sys.stderr.write('Found {} targets with type "{}"\n'.format(len(targets), target))
return targets
def target_annotation(graph, target):
targets = {addr for addr, label in graph.annotations.items()
if label == target}
sys.stderr.write(f'Found {len(targets)} targets with annotation "{target}"\n')
return targets
def select_targets(args, edges, graph):
targets = set()
for target in args.targets:
# If target seems like an address search for a JS Object, then GObject
if addr_regex.match(target):
if target in edges:
sys.stderr.write('Found object with address "{}"\n'.format(target))
targets.add(target)
else:
targets.update(target_gobject(graph, target))
else:
# Fallback to looking for JavaScript objects by class name
targets.update(target_type(graph, target))
for target in args.edge_targets:
targets.update(target_edge(graph, target))
for target in args.func_targets:
targets.update(target_func(graph, target))
for target in args.string_targets:
targets.update(target_string(graph, target))
for target in args.annotation_targets:
targets.update(target_annotation(graph, target))
return list(targets)
if __name__ == "__main__":
args = parser.parse_args()
# Node and Root Filtering
if args.show_global:
args.hide_nodes.remove('GjsGlobal')
args.hide_nodes.remove('GjsInternalGlobal')
if args.show_imports:
args.hide_nodes.remove('GjsFileImporter')
args.hide_nodes.remove('GjsModule')
args.hide_nodes.remove('GIRepositoryNamespace')
# Make sure we don't filter an explicit target
for target in args.targets:
if target in args.hide_nodes:
args.hide_nodes.remove(target)
# Heap diffing; we do these addrs separately due to the sheer amount
diff_addrs = []
if args.diff_heap:
diff_addrs = find_nodes(args.diff_heap)
# Load the graph
(edges, graph) = load_graph(args.heap_file)
targets = select_targets(args, edges, graph)
if len(targets) == 0:
sys.stderr.write('No targets found.\n')
sys.exit(-1)
elif args.count:
sys.exit(-1);
# Unlike JavaScript objects, GObjects can be "rooted" by their refcount so
# we have to use a fake root (repetitively)
rootEdges = set([])
for addr, isBlack in graph.roots.items():
if isBlack or not args.no_gray_roots:
rootEdges.add(addr)
startObject = 'FAKE START OBJECT'
edges[startObject] = rootEdges
for addr in targets:
if addr in edges and addr not in diff_addrs:
find_roots_bfs(args, edges, graph, addr)
if args.dot_graph:
output_dot_file(args, graph, targets, args.heap_file + ".dot")
else:
output_tree_graph(graph, tree_graph_paths)
output_tree_unreachables(graph, tree_graph_unreachables)
|