File: demangle.py

package info (click to toggle)
chromium 139.0.7258.127-2
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 6,122,156 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (141 lines) | stat: -rw-r--r-- 4,674 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# Copyright 2017 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Utilities for demangling C++ symbols."""

import collections
import itertools
import logging
import re
import subprocess

import path_util

_LOWER_HEX_PATTERN = re.compile(r'^[0-9a-f]*$')
_PROMOTED_GLOBAL_NAME_DEMANGLED_PATTERN = re.compile(
    r' \((\.\d+)?\.llvm\.\d+\)$')
_PROMOTED_GLOBAL_NAME_RAW_PATTERN = re.compile(r'(\.\d+)?\.llvm\.\d+$')

def StripLlvmPromotedGlobalNames(name):
  """Strips LLVM promoted global names suffix, and returns the result.

  LLVM can promote global names by adding the suffix '.llvm.1234', or
  '.1.llvm.1234', where the last numeric suffix is a hash. If demangle is
  sucessful, the suffix transforms into, e.g., ' (.llvm.1234)' or
  ' (.1.llvm.1234)'. Otherwise the suffix is left as is. This function strips
  the suffix to prevent it from intefering with name comparison.
  """
  llvm_pos = name.find('.llvm.')
  if llvm_pos < 0:
    return name  # Handles most cases.
  if name.endswith(')'):
    return _PROMOTED_GLOBAL_NAME_DEMANGLED_PATTERN.sub('', name)
  return _PROMOTED_GLOBAL_NAME_RAW_PATTERN.sub('', name)


def _CanDemangle(name):
  return name.startswith('_Z') or name.startswith('.Lswitch.table._Z')


def _ExtractDemanglablePart(names):
  """For each name in |names|, yields the part that can be demangled."""

  def _IsLowerHex(s):
    return _LOWER_HEX_PATTERN.match(s) is not None

  for name in names:
    # Strip prefixes before '_Z', e.g., '.Lswitch.table.'.
    pos = name.find('_Z')
    if pos > 0:
      name = name[pos:]

    # Some mangled symbols end with '$' followed by 32 lower-case hex digits,
    # and possibly '.cfi'. These interfere with demangling by cxxfilt, and
    # should be stripped.
    if name.endswith('.cfi'):
      name = name[:-4]
    if len(name) > 33 and name[-33] == '$' and _IsLowerHex(name[-32:]):
      yield name[:-33]
    else:
      yield name


def _PostProcessDemangledSymbol(old_name, new_name):
  new_name = StripLlvmPromotedGlobalNames(new_name)
  if old_name.startswith('.Lswitch.table.'):
    new_name = 'Switch table for ' + new_name  # Becomes ... [Switch table].
  return new_name


def _DemangleNames(names):
  """Uses cxxfilt to demangle a list of names."""
  # pylint: disable=unexpected-keyword-arg
  proc = subprocess.Popen([path_util.GetCppFiltPath()],
                          stdin=subprocess.PIPE,
                          stdout=subprocess.PIPE,
                          encoding='utf-8')
  in_data = '\n'.join(_ExtractDemanglablePart(names))
  stdout = proc.communicate(in_data)[0]
  assert proc.returncode == 0
  ret = [
      _PostProcessDemangledSymbol(old_name, new_name)
      for (old_name, new_name) in zip(names, stdout.splitlines())
  ]
  if logging.getLogger().isEnabledFor(logging.INFO):
    fail_count = sum(1 for s in ret if _CanDemangle(s))
    if fail_count:
      logging.info('* Failed to demangle %d/%d items', fail_count, len(ret))
  return ret


def DemangleRemainingSymbols(raw_symbols):
  """Demangles any symbols that need it."""
  to_process = [s for s in raw_symbols if _CanDemangle(s.full_name)]
  if not to_process:
    return

  logging.info('Demangling %d symbols', len(to_process))
  names = _DemangleNames([s.full_name for s in to_process])
  for i, name in enumerate(names):
    to_process[i].full_name = name


def DemangleSetsInDictsInPlace(key_to_names):
  """Demangles values as sets.

  |key_to_names| is a dict from key to sets (or lists) of mangled names.
  """
  all_names = []
  for names in key_to_names.values():
    all_names.extend(n for n in names if _CanDemangle(n))
  if not all_names:
    return key_to_names

  logging.info('Demangling %d values', len(all_names))
  it = iter(_DemangleNames(all_names))
  for key, names in key_to_names.items():
    key_to_names[key] = set(next(it) if _CanDemangle(n) else n for n in names)
  assert(next(it, None) is None)

  return None


def DemangleKeysAndMergeLists(name_to_list):
  """Demangles keys of a dict of lists, and returns the result.

  Keys may demangle to a common name. When this happens, the corresponding lists
  are merged in arbitrary order.
  """
  keys = [key for key in name_to_list if _CanDemangle(key)]
  if not keys:
    return name_to_list

  logging.info('Demangling %d keys', len(keys))
  key_iter = iter(_DemangleNames(keys))
  ret = collections.defaultdict(list)
  for key, val in name_to_list.items():
    ret[next(key_iter) if _CanDemangle(key) else key] += val
  assert(next(key_iter, None) is None)
  logging.info('* %d keys become %d keys' % (len(name_to_list), len(ret)))
  return ret