File: dwarfdump.py

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (302 lines) | stat: -rwxr-xr-x 9,572 bytes parent folder | download | duplicates (10)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
#!/usr/bin/env python3
# Copyright 2021 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Runs dwarfdump on passed-in .so."""

import argparse
import bisect
import dataclasses
import logging
import os
import re
import subprocess
import typing

import path_util


_DWARF_DUMP_FLAGS = ['--debug-info', '--recurse-depth=0']

# Matching and group examples:
# '0x00001234: DW_TAG_compile_unit' -> None
# '  DW_AT_low_pc  (0x123)' -> ('DW_', None)
# '  DW_AT_name  ("foo")' -> ('DW_', 'foo')
_RE_DW_AT_NAME = re.compile(r'\s+(DW_)(?:AT_name\s+\("(.*?)"\))?')


class _DwoNameLookup:
  """Helper to look up name (source file) from .dwo files

  dwarfdump of an ELF file normally specifies source files in DW_AT_name fields.
  However, debug fission can move debug info from ELF files to .dwo files. In
  this case, dwarfdump would omit DW_AT_name of affected symbols, and use
  DW_AT_GNU_dwo_name to specify the path (relative to output dir) of the
  matching .dwo files, whose dwarfdump would then specify the matching source
  file in DW_AT_name.

  This class performs cached lookup from .dwo to name (source file).
  """

  def __init__(self, any_path):
    finder = path_util.OutputDirectoryFinder(
        any_path_within_output_directory=any_path)
    self._output_path = finder.Detect()  # May be None.
    self._dwarf_dump_path = path_util.GetDwarfdumpPath()
    self._cache = {}

  def _ReadName(self, dwo_path):
    """Runs dwarfdump on .dwo to extract name.

    If this is not possible then returns |dwo_path|.
    """
    if self._output_path is None:
      return dwo_path
    # Assumption: |dwo_path| is relative to output path.
    real_dwo_path = os.path.join(self._output_path, dwo_path)
    cmd = [self._dwarf_dump_path, real_dwo_path] + _DWARF_DUMP_FLAGS
    proc = subprocess.Popen(cmd,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.DEVNULL,
                            encoding='utf-8')
    name = None
    state = 0
    # Scan output line by line, exit and terminate as soon as possible.
    for line in iter(proc.stdout.readline, ''):
      if state == 0:  # Scan for DW_TAG_compile_unit.
        if 'DW_TAG_compile_unit' in line:
          state = 1
      elif state == 1:  # scan for DW_AT_name.
        m = _RE_DW_AT_NAME.match(line)
        if not m:  # Not even matching prefix '  DW_'.
          break
        name = m.groups()[1]
        if name is not None:  # Extracted names.
          break
        # Else matches '  DW_': Continue scanning.
    proc.kill()
    return dwo_path if name is None else name

  def Lookup(self, dwo_path):
    """Looks up name in .dwo, with caching."""
    if dwo_path in self._cache:
      name = self._cache[dwo_path]
    else:
      name = self._ReadName(dwo_path)
      self._cache[dwo_path] = name
    return name

  def LogStats(self):
    if self._cache:
      num_success = sum(1 for k, v in self._cache.items() if k != v)
      logging.info('Successful .dwo lookups: %d / %d', num_success,
                   len(self._cache))


@dataclasses.dataclass(order=True)
class _AddressRange:
  start: int
  stop: int


class _SourceMapper:
  def __init__(self, range_info_list):
    self._range_info_list = range_info_list
    self._largest_address = 0

    if self._range_info_list:
      self._largest_address = self._range_info_list[-1][0].stop

  def FindSourceForTextAddress(self, address):
    """Returns source file path matching passed-in symbol address.

    Only symbols in the .text section of the elf file are supported.
    """
    # Bisect against stop = self._largest_address + 1 to avoid bisecting against
    # the "source path" tuple component.
    bisect_index = bisect.bisect_right(
        self._range_info_list,
        (_AddressRange(address, self._largest_address + 1), '')) - 1
    if bisect_index >= 0:
      info = self._range_info_list[bisect_index]
      if info[0].start <= address < info[0].stop:
        return info[1]

    return ''

  def NumberOfPaths(self):
    return len(set(info[1] for info in self._range_info_list))

  @property
  def num_ranges(self):
    return len(self._range_info_list)


def CreateAddressSourceMapper(elf_path):
  """Runs dwarfdump. Returns object for querying source path given address."""
  return _SourceMapper(_Parse(elf_path))


def CreateAddressSourceMapperForTest(lines, dwo_name_lookup=None):
  return _SourceMapper(_ParseDumpOutput(lines, dwo_name_lookup))


def ParseDumpOutputForTest(lines, dwo_name_lookup=None):
  return _ParseDumpOutput(lines, dwo_name_lookup)


def _Parse(elf_path):
  cmd = [path_util.GetDwarfdumpPath(), elf_path] + _DWARF_DUMP_FLAGS
  logging.debug('Running: %s', ' '.join(cmd))
  stdout = subprocess.check_output(cmd,
                                   stderr=subprocess.DEVNULL,
                                   encoding='utf-8')
  return _ParseDumpOutput(stdout.splitlines(), _DwoNameLookup(elf_path))


def _ParseDumpOutput(lines, dwo_name_lookup=None):
  """Parses passed-in dwarfdump stdout."""

  # List of (_AddressRange, source path) tuples.
  range_info_list = []

  line_it = iter(lines)
  line = next(line_it, None)
  while line is not None:
    if 'DW_TAG_compile_unit' not in line:
      line = next(line_it, None)
      continue

    line, address_ranges, source_path, dwo_path = _ParseCompileUnit(line_it)
    if (source_path or dwo_path) and address_ranges:
      for address_range in address_ranges:
        if dwo_path:
          source_path = (dwo_name_lookup.Lookup(dwo_path)
                         if dwo_name_lookup else dwo_path)
        range_info_list.append((address_range, source_path))

  if dwo_name_lookup:
    dwo_name_lookup.LogStats()

  return sorted(range_info_list)


def _ParseCompileUnit(line_it):
  """Parses DW_AT_compile_unit block.

  Example:
  0x000026: DW_AT_compile_unit
              DW_AT_low_pc  (0x02f)
              DW_AT_high_pc  (0x03f)
              DW_AT_name  ("foo.cc")
              DW_AT_GNU_dwo_name  ("foo.dwo")
  """
  source_path = None
  dwo_path = None
  single_range = _AddressRange(0, 0)
  range_addresses = []

  while True:
    line = next(line_it, None)

    dw_index = 0 if line is None else line.find('DW_')
    if dw_index < 0:
      continue

    if line is None or line.startswith('DW_TAG', dw_index):
      if range_addresses:
        # If compile unit specifies both DW_AT_ranges and DW_AT_low_pc,
        # DW_AT_low_pc is base offset. Base offset is currently unsupported.
        assert single_range.start == 0
      elif single_range.start > 0:
        range_addresses.append(single_range)
      return (line, range_addresses, source_path, dwo_path)

    if line.startswith('DW_AT_low_pc', dw_index):
      single_range.start = int(_ExtractDwValue(line), 16)
      if single_range.stop == 0:
        single_range.stop = single_range.start + 1
    elif line.startswith('DW_AT_high_pc', dw_index):
      single_range.stop = int(_ExtractDwValue(line), 16)
    elif line.startswith('DW_AT_name', dw_index):
      source_path = _ExtractDwValue(line)
    elif line.startswith('DW_AT_GNU_dwo_name', dw_index):
      dwo_path = _ExtractDwValue(line)
    elif line.startswith('DW_AT_ranges', dw_index):
      range_addresses = _ParseRanges(line_it)


def _ParseRanges(line_it):
  """Parses DW_AT_ranges from dwarfdump stdout.

  Example:
  [0x1, 0x2)
  [0x5, 0x10))
  """
  range_addresses = []

  line = next(line_it, None)
  while line is not None:
    num_opening_brackets = line.count('(') + line.count('[')
    num_closing_brackets = line.count(')') + line.count(']')

    tokens = line.strip('([]) \t').split(',')
    if len(tokens) == 2:
      start_address = int(tokens[0], 16)
      end_address = int(tokens[1], 16)
      # Dwarf spec does not assign special meaning to empty ranges.
      if start_address != end_address:
        range_addresses.append(_AddressRange(start_address, end_address))

    if num_closing_brackets > num_opening_brackets:
      break
    line = next(line_it, None)

  return range_addresses


def _ExtractDwValue(line):
  """Extract DW_AT_ value from dwarfdump stdout.

  Examples:
  DW_AT_name  ("foo.cc")
  DW_AT_decl_line  (177)
  DW_AT_low_pc  (0x2)
  """
  lparen_index = line.rfind('(')
  if lparen_index < 0:
    return None
  rparen_index = line.find(')', lparen_index + 1)
  if rparen_index < 0:
    return None
  if (lparen_index < rparen_index - 2 and line[lparen_index + 1] == '"'
      and line[rparen_index - 1] == '"'):
    lparen_index += 1
    rparen_index -= 1
  return line[lparen_index + 1:rparen_index]


def main():
  parser = argparse.ArgumentParser()
  group = parser.add_mutually_exclusive_group(required=True)
  group.add_argument('--dwarf-dump-output', type=os.path.realpath)
  group.add_argument('--elf-file', type=os.path.realpath)

  args = parser.parse_args()
  logging.basicConfig(level=logging.DEBUG,
                      format='%(levelname).1s %(relativeCreated)6d %(message)s')

  if args.dwarf_dump_output:
    dwo_name_lookup = _DwoNameLookup(args.dwarf_dump_output)
    with open(args.dwarf_dump_output, 'r') as f:
      source_mapper = CreateAddressSourceMapperForTest(f.read().splitlines(),
                                                       dwo_name_lookup)
  else:
    assert args.elf_file
    source_mapper = CreateAddressSourceMapper(args.elf_file)
  logging.warning('Found %d source paths across %d ranges',
                  source_mapper.NumberOfPaths(), source_mapper.num_ranges)


if __name__ == '__main__':
  main()