File: memwalk.rake

package info (click to toggle)
puppet 5.5.10-4
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 21,116 kB
  • sloc: ruby: 250,669; sh: 1,620; xml: 218; makefile: 151; sql: 103
file content (195 lines) | stat: -rw-r--r-- 5,944 bytes parent folder | download | duplicates (10)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# Walks the memory dumped into heap.json, and produces a graph of the memory dumped in diff.json
# If a single argument (a hex address to one object) is given, the graph is limited to this object and what references it
# The heap dumps should be in the format produced by Ruby ObjectSpace in Ruby version 2.1.0 or later.
#
# The command produces a .dot file that can be rendered with graphwiz dot into SVG. If a memwalk is performed for all
# objects in the diff.json, the output file name is memwalk.dot. If it is produced for a single address, the name of the
# output file is memwalk-<address>.dot
#
# The dot file can be rendered with something like: dot -Tsvg -omemwalk.svg memwalk.dot
#
desc "Process a diff.json of object ids, and a heap.json of a Ruby 2.1.0 ObjectSpace dump and produce a graph"
task :memwalk, [:id] do |t, args|
  puts "Memwalk"
  puts "Computing for #{args[:id] ? args[:id] : 'all'}"
  @single_id = args[:id] ? args[:id].to_i(16) : nil

  require 'json'
  #require 'debug'

  TYPE = "type".freeze
  ROOT = "root".freeze
  ROOT_UC = "ROOT".freeze
  ADDR = "address".freeze
  NODE = "NODE".freeze
  STRING = "STRING".freeze
  DATA = "DATA".freeze
  HASH = "HASH".freeze
  ARRAY = "ARRAY".freeze
  OBJECT = "OBJECT".freeze
  CLASS = "CLASS".freeze

  allocations = {}
  # An array of integer addresses of the objects to trace bindings for
  diff_index = {}
  puts "Reading data"
  begin
    puts "Reading diff"
    lines = 0;
    File.readlines("diff.json").each do | line |
      lines += 1
      diff = JSON.parse(line)
      case diff[ TYPE ]
      when STRING, DATA, HASH, ARRAY
        # skip the strings
      else
        diff_index[ diff[ ADDR ].to_i(16) ] = diff
      end
    end
    puts "Read #{lines} number of diffs"
  rescue => e
    raise "ERROR READING DIFF at line #{lines} #{e.message[0, 200]}"
  end

  begin
    puts "Reading heap"
    lines = 0
    allocation = nil
    File.readlines("heap.json").each do | line |
      lines += 1
      allocation = JSON.parse(line)
      case allocation[ TYPE ]
      when ROOT_UC
        # Graph for single id must include roots, as it may be a root that holds on to the reference
        # a global variable, thread, etc.
        #
        if @single_id
          allocations[ allocation[ ROOT ] ] = allocation
        end
      when NODE
        # skip the NODE objects - they represent the loaded ruby code
      when STRING
        # skip all strings - they are everywhere
      else
        allocations[ allocation[ ADDR ].to_i(16) ] = allocation
      end
    end
    puts "Read #{lines} number of entries"
  rescue => e
    require 'debug'
    puts "ERROR READING HEAP #{e.message[0, 200]}"
    raise e
  end
  @heap = allocations

  puts "Building reference index"
  # References is an index from a referenced object to an array with addresses to the objects that references it
  @references = Hash.new { |h, k| h[k] = [] }
  REFERENCES = "references".freeze
  allocations.each do |k,v|
    refs = v[ REFERENCES ]
    if refs.is_a?(Array)
      refs.each {|addr| @references[ addr.to_i(16) ] << k }
    end
  end

  @printed = Set.new()

  def print_object(addr, entry)
    # only print each node once
    return unless @printed.add?(addr)
    begin
    if addr.is_a?(String)
      @output.write( "x#{node_name(addr)} [label=\"#{node_label(addr, entry)}\\n#{addr}\"];\n")
    else
      @output.write( "x#{node_name(addr)} [label=\"#{node_label(addr, entry)}\\n#{addr.to_s(16)}\"];\n")
    end
    rescue => e
      require 'debug'
      raise e
    end
  end

  def node_label(addr, entry)
    if entry[ TYPE ] == OBJECT
      class_ref = entry[ "class" ].to_i(16)
      @heap[ class_ref ][ "name" ]
    elsif entry[ TYPE ] == CLASS
      "CLASS #{entry[ "name"]}"
    else
      entry[TYPE]
    end
  end

  def node_name(addr)
    return addr if addr.is_a? String
    addr.to_s(16)
  end

  def print_edge(from_addr, to_addr)
    @output.write("x#{node_name(from_addr)}->x#{node_name(to_addr)};\n")
  end

  def closure_and_edges(diff)
    edges = Set.new()
    walked = Set.new()
    puts "Number of diffs referenced = #{diff.count {|k,_| @references[k].is_a?(Array) && @references[k].size() > 0 }}"
    diff.each {|k,_| walk(k, edges, walked) }
    edges.each {|e| print_edge(*e) }
  end

  def walk(addr, edges, walked)
    if !@heap[ addr ].nil?
      print_object(addr, @heap[addr])

      @references [ addr ].each do |r|
        walk_to_object(addr, r, edges, walked)
      end
    end
  end

  def walk_to_object(to_addr, cursor, edges, walked)
    return unless walked
    # if walked to an object, or everything if a single_id is the target
    if @heap[ cursor ][ TYPE ] == OBJECT || (@single_id && @heap[ cursor ][ TYPE ] == ROOT_UC || @heap[ cursor ][ TYPE ] == CLASS )
      # and the edge is unique
      if edges.add?( [ cursor, to_addr ] )
        # then we may not have visited objects this objects is being referred from
        print_object(cursor, @heap[ cursor ])
        # Do not follow what binds a class
        if @heap[ cursor ][ TYPE ] != CLASS
          @references[ cursor ].each do |r|
            walk_to_object(cursor, r, edges, walked.add?(r))
            walked.delete(r)
          end
        end
      end
    else
      # continue search until Object
      @references[cursor].each do |r|
        walk_to_object(to_addr, r, edges, walked.add?(r))
      end
    end
  end

  def single_closure_and_edges(the_target)
    edges = Set.new()
    walked = Set.new()
    walk(the_target, edges, walked)
    edges.each {|e| print_edge(*e) }
  end

  puts "creating graph"
  if @single_id
    @output = File.open("memwalk-#{@single_id.to_s(16)}.dot", "w")
    @output.write("digraph root {\n")
    single_closure_and_edges(@single_id)
  else
    @output = File.open("memwalk.dot", "w")
    @output.write("digraph root {\n")
    closure_and_edges(diff_index)
  end
  @output.write("}\n")
  @output.close
  puts "done"
end