1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
|
#!/usr/bin/env python3
# Copyright 2021 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Runs dwarfdump on passed-in .so."""
import argparse
import bisect
import dataclasses
import logging
import os
import re
import subprocess
import typing
import path_util
_DWARF_DUMP_FLAGS = ['--debug-info', '--recurse-depth=0']
# Matching and group examples:
# '0x00001234: DW_TAG_compile_unit' -> None
# ' DW_AT_low_pc (0x123)' -> ('DW_', None)
# ' DW_AT_name ("foo")' -> ('DW_', 'foo')
_RE_DW_AT_NAME = re.compile(r'\s+(DW_)(?:AT_name\s+\("(.*?)"\))?')
class _DwoNameLookup:
"""Helper to look up name (source file) from .dwo files
dwarfdump of an ELF file normally specifies source files in DW_AT_name fields.
However, debug fission can move debug info from ELF files to .dwo files. In
this case, dwarfdump would omit DW_AT_name of affected symbols, and use
DW_AT_GNU_dwo_name to specify the path (relative to output dir) of the
matching .dwo files, whose dwarfdump would then specify the matching source
file in DW_AT_name.
This class performs cached lookup from .dwo to name (source file).
"""
def __init__(self, any_path):
finder = path_util.OutputDirectoryFinder(
any_path_within_output_directory=any_path)
self._output_path = finder.Detect() # May be None.
self._dwarf_dump_path = path_util.GetDwarfdumpPath()
self._cache = {}
def _ReadName(self, dwo_path):
"""Runs dwarfdump on .dwo to extract name.
If this is not possible then returns |dwo_path|.
"""
if self._output_path is None:
return dwo_path
# Assumption: |dwo_path| is relative to output path.
real_dwo_path = os.path.join(self._output_path, dwo_path)
cmd = [self._dwarf_dump_path, real_dwo_path] + _DWARF_DUMP_FLAGS
proc = subprocess.Popen(cmd,
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
encoding='utf-8')
name = None
state = 0
# Scan output line by line, exit and terminate as soon as possible.
for line in iter(proc.stdout.readline, ''):
if state == 0: # Scan for DW_TAG_compile_unit.
if 'DW_TAG_compile_unit' in line:
state = 1
elif state == 1: # scan for DW_AT_name.
m = _RE_DW_AT_NAME.match(line)
if not m: # Not even matching prefix ' DW_'.
break
name = m.groups()[1]
if name is not None: # Extracted names.
break
# Else matches ' DW_': Continue scanning.
proc.kill()
return dwo_path if name is None else name
def Lookup(self, dwo_path):
"""Looks up name in .dwo, with caching."""
if dwo_path in self._cache:
name = self._cache[dwo_path]
else:
name = self._ReadName(dwo_path)
self._cache[dwo_path] = name
return name
def LogStats(self):
if self._cache:
num_success = sum(1 for k, v in self._cache.items() if k != v)
logging.info('Successful .dwo lookups: %d / %d', num_success,
len(self._cache))
@dataclasses.dataclass(order=True)
class _AddressRange:
start: int
stop: int
class _SourceMapper:
def __init__(self, range_info_list):
self._range_info_list = range_info_list
self._largest_address = 0
if self._range_info_list:
self._largest_address = self._range_info_list[-1][0].stop
def FindSourceForTextAddress(self, address):
"""Returns source file path matching passed-in symbol address.
Only symbols in the .text section of the elf file are supported.
"""
# Bisect against stop = self._largest_address + 1 to avoid bisecting against
# the "source path" tuple component.
bisect_index = bisect.bisect_right(
self._range_info_list,
(_AddressRange(address, self._largest_address + 1), '')) - 1
if bisect_index >= 0:
info = self._range_info_list[bisect_index]
if info[0].start <= address < info[0].stop:
return info[1]
return ''
def NumberOfPaths(self):
return len(set(info[1] for info in self._range_info_list))
@property
def num_ranges(self):
return len(self._range_info_list)
def CreateAddressSourceMapper(elf_path):
"""Runs dwarfdump. Returns object for querying source path given address."""
return _SourceMapper(_Parse(elf_path))
def CreateAddressSourceMapperForTest(lines, dwo_name_lookup=None):
return _SourceMapper(_ParseDumpOutput(lines, dwo_name_lookup))
def ParseDumpOutputForTest(lines, dwo_name_lookup=None):
return _ParseDumpOutput(lines, dwo_name_lookup)
def _Parse(elf_path):
cmd = [path_util.GetDwarfdumpPath(), elf_path] + _DWARF_DUMP_FLAGS
logging.debug('Running: %s', ' '.join(cmd))
stdout = subprocess.check_output(cmd,
stderr=subprocess.DEVNULL,
encoding='utf-8')
return _ParseDumpOutput(stdout.splitlines(), _DwoNameLookup(elf_path))
def _ParseDumpOutput(lines, dwo_name_lookup=None):
"""Parses passed-in dwarfdump stdout."""
# List of (_AddressRange, source path) tuples.
range_info_list = []
line_it = iter(lines)
line = next(line_it, None)
while line is not None:
if 'DW_TAG_compile_unit' not in line:
line = next(line_it, None)
continue
line, address_ranges, source_path, dwo_path = _ParseCompileUnit(line_it)
if (source_path or dwo_path) and address_ranges:
for address_range in address_ranges:
if dwo_path:
source_path = (dwo_name_lookup.Lookup(dwo_path)
if dwo_name_lookup else dwo_path)
range_info_list.append((address_range, source_path))
if dwo_name_lookup:
dwo_name_lookup.LogStats()
return sorted(range_info_list)
def _ParseCompileUnit(line_it):
"""Parses DW_AT_compile_unit block.
Example:
0x000026: DW_AT_compile_unit
DW_AT_low_pc (0x02f)
DW_AT_high_pc (0x03f)
DW_AT_name ("foo.cc")
DW_AT_GNU_dwo_name ("foo.dwo")
"""
source_path = None
dwo_path = None
single_range = _AddressRange(0, 0)
range_addresses = []
while True:
line = next(line_it, None)
dw_index = 0 if line is None else line.find('DW_')
if dw_index < 0:
continue
if line is None or line.startswith('DW_TAG', dw_index):
if range_addresses:
# If compile unit specifies both DW_AT_ranges and DW_AT_low_pc,
# DW_AT_low_pc is base offset. Base offset is currently unsupported.
assert single_range.start == 0
elif single_range.start > 0:
range_addresses.append(single_range)
return (line, range_addresses, source_path, dwo_path)
if line.startswith('DW_AT_low_pc', dw_index):
single_range.start = int(_ExtractDwValue(line), 16)
if single_range.stop == 0:
single_range.stop = single_range.start + 1
elif line.startswith('DW_AT_high_pc', dw_index):
single_range.stop = int(_ExtractDwValue(line), 16)
elif line.startswith('DW_AT_name', dw_index):
source_path = _ExtractDwValue(line)
elif line.startswith('DW_AT_GNU_dwo_name', dw_index):
dwo_path = _ExtractDwValue(line)
elif line.startswith('DW_AT_ranges', dw_index):
range_addresses = _ParseRanges(line_it)
def _ParseRanges(line_it):
"""Parses DW_AT_ranges from dwarfdump stdout.
Example:
[0x1, 0x2)
[0x5, 0x10))
"""
range_addresses = []
line = next(line_it, None)
while line is not None:
num_opening_brackets = line.count('(') + line.count('[')
num_closing_brackets = line.count(')') + line.count(']')
tokens = line.strip('([]) \t').split(',')
if len(tokens) == 2:
start_address = int(tokens[0], 16)
end_address = int(tokens[1], 16)
# Dwarf spec does not assign special meaning to empty ranges.
if start_address != end_address:
range_addresses.append(_AddressRange(start_address, end_address))
if num_closing_brackets > num_opening_brackets:
break
line = next(line_it, None)
return range_addresses
def _ExtractDwValue(line):
"""Extract DW_AT_ value from dwarfdump stdout.
Examples:
DW_AT_name ("foo.cc")
DW_AT_decl_line (177)
DW_AT_low_pc (0x2)
"""
lparen_index = line.rfind('(')
if lparen_index < 0:
return None
rparen_index = line.find(')', lparen_index + 1)
if rparen_index < 0:
return None
if (lparen_index < rparen_index - 2 and line[lparen_index + 1] == '"'
and line[rparen_index - 1] == '"'):
lparen_index += 1
rparen_index -= 1
return line[lparen_index + 1:rparen_index]
def main():
parser = argparse.ArgumentParser()
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('--dwarf-dump-output', type=os.path.realpath)
group.add_argument('--elf-file', type=os.path.realpath)
args = parser.parse_args()
logging.basicConfig(level=logging.DEBUG,
format='%(levelname).1s %(relativeCreated)6d %(message)s')
if args.dwarf_dump_output:
dwo_name_lookup = _DwoNameLookup(args.dwarf_dump_output)
with open(args.dwarf_dump_output, 'r') as f:
source_mapper = CreateAddressSourceMapperForTest(f.read().splitlines(),
dwo_name_lookup)
else:
assert args.elf_file
source_mapper = CreateAddressSourceMapper(args.elf_file)
logging.warning('Found %d source paths across %d ranges',
source_mapper.NumberOfPaths(), source_mapper.num_ranges)
if __name__ == '__main__':
main()
|