File: profile_allocations.py

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (149 lines) | stat: -rwxr-xr-x 4,544 bytes parent folder | download | duplicates (10)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/env python3
# Copyright 2021 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Parses allocation profiles from a trace and graphs the results.

This parses an allocation profile generated by PartitionAlloc in the thread
cache. This will only give data on Chrome instances where the thread cache is
enabled, and PA_THREAD_CACHE_ALLOC_STATS is defined, that is non-official
builds.

To collect a profile:
- Build a non-official chrome version (Should be a release build for accurate
  reports)
- Collect a trace with the memory-infra category enabled (in chrome://tracing)
- Save it as json.gz, and load it here.
"""

import argparse
import logging
import os

from matplotlib import pylab as plt
import numpy as np

from parse_trace import LoadTrace, GetAllocatorDumps, ProcessNamesAndLabels


def _ParseTrace(trace: dict) -> dict:
  """Parses a trace, and returns thread cache stats.

  Args:
    trace: As returned by LoadTrace()

  Returns:
    {pid  -> {'name': str, 'labels': str, 'data': np.array}.
    Where the data array contains 'size' and 'count' columns.
  """
  dumps = GetAllocatorDumps(trace)
  pid_to_name, pid_to_labels = ProcessNamesAndLabels(trace)

  result = {}
  for dump in dumps:
    pid = dump['pid']
    allocators = dump['args']['dumps']['allocators']

    # The browser process also has global dumps, we do not care about these.
    if 'global' in allocators:
      continue

    result[pid] = {
        'name': pid_to_name[pid],
        'labels': pid_to_labels.get(pid, '')
    }
    size_counts = []
    for allocator in allocators:
      if ('malloc/partitions/allocator/thread_cache/buckets_alloc/' not in
          allocator):
        continue
      size = int(allocator[allocator.rindex('/') + 1:])
      count = int(allocators[allocator]['attrs']['count']['value'], 16)
      size_counts.append((size, count))
      size_counts.sort()
      result[pid]['data'] = np.array(size_counts,
                                     dtype=[('size', np.int),
                                            ('count', np.int)])

  return result


def _PlotProcess(all_data: dict, pid: int, output_prefix: str):
  """Represents the allocation size distribution.

  Args:
    all_data: As returned by _ParseTrace().
    pid: PID to plot the data for.
    output_prefix: Prefix of the output file.
  """
  data = all_data[pid]
  logging.info('Plotting data for PID %d' % pid)

  # Allocations vs size.
  plt.figure(figsize=(16, 8))
  plt.title('Allocation count vs Size - %s - %s' %
            (data['name'], data['labels']))
  plt.xscale('log', base=2)
  plt.yscale('log', base=10)
  plt.stem(data['data']['size'], data['data']['count'])
  plt.xlabel('Size (log)')
  plt.ylabel('Allocations (log)')
  plt.savefig('%s_%d_count.png' % (output_prefix, pid), bbox_inches='tight')
  plt.close()

  # CDF.
  plt.figure(figsize=(16, 8))
  plt.title('CDF of allocation size - %s - %s' % (data['name'], data['labels']))
  cdf = np.cumsum(100. * data['data']['count']) / np.sum(data['data']['count'])

  for value in [512, 1024, 2048, 4096, 8192]:
    index = np.where(data['data']['size'] == value)[0]
    cdf_value = cdf[index]
    plt.axvline(x=value, ymin=0, ymax=cdf_value / 100., color='lightgrey')

  plt.step(data['data']['size'], cdf, color='black', where='post')
  plt.ylim(ymin=0, ymax=100)
  plt.xlim(xmin=10, xmax=1e6)
  plt.xscale('log', base=2)
  plt.xlabel('Size (log)')
  plt.ylabel('CDF (%)')
  plt.savefig('%s_%d_cdf.png' % (output_prefix, pid),
              bbox_inches='tight',
              dpi=300)
  plt.close()


def _CreateArgumentParser():
  parser = argparse.ArgumentParser()
  parser.add_argument(
      '--trace',
      type=str,
      required=True,
      help='Path to a trace.json[.gz] with memory-infra enabled.')
  parser.add_argument('--output-dir',
                      type=str,
                      required=True,
                      help='Output directory for graphs.')
  return parser


def main():
  logging.basicConfig(level=logging.INFO)
  parser = _CreateArgumentParser()
  args = parser.parse_args()

  logging.info('Loading the trace')
  trace = LoadTrace(args.trace)

  logging.info('Parsing the trace')
  stats_per_process = _ParseTrace(trace)

  logging.info('Plotting the results')
  for pid in stats_per_process:
    if 'data' in stats_per_process[pid]:
      _PlotProcess(stats_per_process, pid,
                   os.path.join(args.output_dir, 'result'))


if __name__ == '__main__':
  main()