File: profile_allocations.py

package info (click to toggle)

chromium 139.0.7258.127-1

links: PTS, VCS
area: main
in suites:
size: 6,122,068 kB
sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36

file content (149 lines) | stat: -rwxr-xr-x 4,544 bytes

parent folder | download | duplicates (10)

#!/usr/bin/env python3
# Copyright 2021 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Parses allocation profiles from a trace and graphs the results.

This parses an allocation profile generated by PartitionAlloc in the thread
cache. This will only give data on Chrome instances where the thread cache is
enabled, and PA_THREAD_CACHE_ALLOC_STATS is defined, that is non-official
builds.

To collect a profile:
- Build a non-official chrome version (Should be a release build for accurate
  reports)
- Collect a trace with the memory-infra category enabled (in chrome://tracing)
- Save it as json.gz, and load it here.
"""

import argparse
import logging
import os

from matplotlib import pylab as plt
import numpy as np

from parse_trace import LoadTrace, GetAllocatorDumps, ProcessNamesAndLabels


def _ParseTrace(trace: dict) -> dict:
  """Parses a trace, and returns thread cache stats.

  Args:
    trace: As returned by LoadTrace()

  Returns:
    {pid  -> {'name': str, 'labels': str, 'data': np.array}.
    Where the data array contains 'size' and 'count' columns.
  """
  dumps = GetAllocatorDumps(trace)
  pid_to_name, pid_to_labels = ProcessNamesAndLabels(trace)

  result = {}
  for dump in dumps:
    pid = dump['pid']
    allocators = dump['args']['dumps']['allocators']

    # The browser process also has global dumps, we do not care about these.
    if 'global' in allocators:
      continue

    result[pid] = {
        'name': pid_to_name[pid],
        'labels': pid_to_labels.get(pid, '')
    }
    size_counts = []
    for allocator in allocators:
      if ('malloc/partitions/allocator/thread_cache/buckets_alloc/' not in
          allocator):
        continue
      size = int(allocator[allocator.rindex('/') + 1:])
      count = int(allocators[allocator]['attrs']['count']['value'], 16)
      size_counts.append((size, count))
      size_counts.sort()
      result[pid]['data'] = np.array(size_counts,
                                     dtype=[('size', np.int),
                                            ('count', np.int)])

  return result


def _PlotProcess(all_data: dict, pid: int, output_prefix: str):
  """Represents the allocation size distribution.

  Args:
    all_data: As returned by _ParseTrace().
    pid: PID to plot the data for.
    output_prefix: Prefix of the output file.
  """
  data = all_data[pid]
  logging.info('Plotting data for PID %d' % pid)

  # Allocations vs size.
  plt.figure(figsize=(16, 8))
  plt.title('Allocation count vs Size - %s - %s' %
            (data['name'], data['labels']))
  plt.xscale('log', base=2)
  plt.yscale('log', base=10)
  plt.stem(data['data']['size'], data['data']['count'])
  plt.xlabel('Size (log)')
  plt.ylabel('Allocations (log)')
  plt.savefig('%s_%d_count.png' % (output_prefix, pid), bbox_inches='tight')
  plt.close()

  # CDF.
  plt.figure(figsize=(16, 8))
  plt.title('CDF of allocation size - %s - %s' % (data['name'], data['labels']))
  cdf = np.cumsum(100. * data['data']['count']) / np.sum(data['data']['count'])

  for value in [512, 1024, 2048, 4096, 8192]:
    index = np.where(data['data']['size'] == value)[0]
    cdf_value = cdf[index]
    plt.axvline(x=value, ymin=0, ymax=cdf_value / 100., color='lightgrey')

  plt.step(data['data']['size'], cdf, color='black', where='post')
  plt.ylim(ymin=0, ymax=100)
  plt.xlim(xmin=10, xmax=1e6)
  plt.xscale('log', base=2)
  plt.xlabel('Size (log)')
  plt.ylabel('CDF (%)')
  plt.savefig('%s_%d_cdf.png' % (output_prefix, pid),
              bbox_inches='tight',
              dpi=300)
  plt.close()


def _CreateArgumentParser():
  parser = argparse.ArgumentParser()
  parser.add_argument(
      '--trace',
      type=str,
      required=True,
      help='Path to a trace.json[.gz] with memory-infra enabled.')
  parser.add_argument('--output-dir',
                      type=str,
                      required=True,
                      help='Output directory for graphs.')
  return parser


def main():
  logging.basicConfig(level=logging.INFO)
  parser = _CreateArgumentParser()
  args = parser.parse_args()

  logging.info('Loading the trace')
  trace = LoadTrace(args.trace)

  logging.info('Parsing the trace')
  stats_per_process = _ParseTrace(trace)

  logging.info('Plotting the results')
  for pid in stats_per_process:
    if 'data' in stats_per_process[pid]:
      _PlotProcess(stats_per_process, pid,
                   os.path.join(args.output_dir, 'result'))


if __name__ == '__main__':
  main()