1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283
|
#!/usr/bin/env vpython3
# Copyright 2015 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Symbolizes a log file produced by cyprofile instrumentation.
Given a log file and the binary being profiled, creates an orderfile.
"""
import argparse
import logging
import multiprocessing
import os
import re
import sys
import tempfile
import cygprofile_utils
import process_profiles
import symbol_extractor
class _SymbolNotFoundException(Exception):
"""Exception used during internal processing."""
def __init__(self, value):
super().__init__(value)
self.value = value
def __str__(self):
return repr(self.value)
def GetObjectFilenames(obj_dir):
"""Returns all a list of .o files in a given directory tree."""
obj_files = []
# Scan _obj_dir recursively for .o files.
for (dirpath, _, filenames) in os.walk(obj_dir):
for file_name in filenames:
if file_name.endswith('.o'):
obj_files.append(os.path.join(dirpath, file_name))
return obj_files
class ObjectFileProcessor:
"""Processes symbols found in the object file tree.
This notably includes the section names of each symbol, as well as component
information that can be taken from the directory (not yet implemented).
"""
def __init__(self, obj_dir):
"""Initialize.
Args:
obj_dir (str) The root of the object directory.
"""
self._obj_dir = obj_dir
self._symbol_to_sections_map = None
def GetSymbolToSectionsMap(self):
"""Scans object files to find symbol section names.
Returns:
{symbol: linker section(s)}
"""
if self._symbol_to_sections_map is not None:
return self._symbol_to_sections_map
symbol_infos = self._GetAllSymbolInfos()
self._symbol_to_sections_map = {}
symbol_warnings = cygprofile_utils.WarningCollector(300)
for symbol_info in symbol_infos:
symbol = symbol_info.name
if symbol.startswith('.LTHUNK'):
continue
section = symbol_info.section
if ((symbol in self._symbol_to_sections_map) and
(symbol_info.section not in self._symbol_to_sections_map[symbol])):
self._symbol_to_sections_map[symbol].append(section)
if not self._SameCtorOrDtorNames(
symbol, self._symbol_to_sections_map[symbol][0].lstrip('.text.')):
symbol_warnings.Write('Symbol ' + symbol +
' unexpectedly in more than one section: ' +
', '.join(self._symbol_to_sections_map[symbol]))
elif not section.startswith('.text.'):
# Assembly functions have section ".text". These are all grouped
# together near the end of the orderfile via an explicit ".text" entry.
if section != '.text':
symbol_warnings.Write('Symbol ' + symbol +
' in incorrect section ' + section)
else:
# In most cases we expect just one item in this list, and maybe 4 or so
# in the worst case.
self._symbol_to_sections_map[symbol] = [section]
symbol_warnings.WriteEnd('bad sections')
return self._symbol_to_sections_map
@classmethod
def _SameCtorOrDtorNames(cls, symbol1, symbol2):
"""Returns True if two symbols refer to the same constructor or destructor.
The Itanium C++ ABI specifies dual constructor and destructor emmission
(section 5.1.4.3):
https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-special-ctor-dtor
To avoid fully parsing all mangled symbols, a heuristic is used with
c++filt.
Note: some compilers may name generated copies differently. If this becomes
an issue this heuristic will need to be updated.
"""
# Check if this is the understood case of constructor/destructor
# signatures. GCC emits up to three types of constructor/destructors:
# complete, base, and allocating. If they're all the same they'll
# get folded together.
return (re.search('(C[123]|D[012])E', symbol1) and
symbol_extractor.DemangleSymbol(symbol1) ==
symbol_extractor.DemangleSymbol(symbol2))
def _GetAllSymbolInfos(self):
obj_files = GetObjectFilenames(self._obj_dir)
pool = multiprocessing.Pool()
# Hopefully the object files are in the page cache at this point as
# typically the library has just been built before the object files are
# scanned. Hence IO should not be a problem, and there is no need for a
# concurrency limit on the pool.
symbol_infos_nested = pool.map(
symbol_extractor.SymbolInfosFromBinary, obj_files)
pool.close()
pool.join()
result = []
for symbol_infos in symbol_infos_nested:
result += symbol_infos
return result
class OffsetOrderfileGenerator:
"""Generates an orderfile from instrumented build offsets.
The object directory, SymbolOffsetProcessor and dump offsets must all be from
the same build.
"""
def __init__(self, symbol_offset_processor, obj_processor):
"""Initialize the generator.
Args:
symbol_offset_processor (process_profiles.SymbolOffsetProcessor).
obj_processor (ObjectFileProcessor).
"""
self._offset_to_symbols = symbol_offset_processor.OffsetToSymbolsMap()
self._obj_processor = obj_processor
def GetOrderedSections(self, offsets):
"""Get ordered list of sections corresponding to offsets.
Args:
offsets ([int]) dump offsets, from same build as parameters to __init__.
Returns:
[str] ordered list of sections suitable for use as an orderfile, or
None if failure.
"""
symbol_to_sections = self._obj_processor.GetSymbolToSectionsMap()
if not symbol_to_sections:
logging.error('No symbol section names found')
return None
unknown_symbol_warnings = cygprofile_utils.WarningCollector(300)
symbol_not_found_errors = cygprofile_utils.WarningCollector(
300, level=logging.ERROR)
seen_sections = set()
ordered_sections = []
success = True
for offset in offsets:
try:
symbol_infos = self._SymbolsAtOffset(offset)
for symbol_info in symbol_infos:
if symbol_info.name in symbol_to_sections:
sections = symbol_to_sections[symbol_info.name]
for section in sections:
if not section in seen_sections:
ordered_sections.append(section)
seen_sections.add(section)
else:
unknown_symbol_warnings.Write(
'No known section for symbol ' + symbol_info.name)
except _SymbolNotFoundException:
symbol_not_found_errors.Write(
'Did not find function in binary. offset: ' + hex(offset))
success = False
unknown_symbol_warnings.WriteEnd('no known section for symbol.')
symbol_not_found_errors.WriteEnd('symbol not found in the binary.')
if not success:
return None
return ordered_sections
def _SymbolsAtOffset(self, offset):
if offset in self._offset_to_symbols:
return self._offset_to_symbols[offset]
if offset % 2 and (offset - 1) in self._offset_to_symbols:
# On ARM, odd addresses are used to signal thumb instruction. They are
# generated by setting the LSB to 1 (see
# http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0471e/Babfjhia.html).
# TODO(lizeb): Make sure this hack doesn't propagate to other archs.
return self._offset_to_symbols[offset - 1]
raise _SymbolNotFoundException(offset)
def _WarnAboutDuplicates(offsets):
"""Warns about duplicate offsets.
Args:
offsets: list of offsets to check for duplicates
Returns:
True if there are no duplicates, False otherwise.
"""
seen_offsets = set()
ok = True
for offset in offsets:
if offset not in seen_offsets:
seen_offsets.add(offset)
else:
ok = False
logging.warning('Duplicate offset: ' + hex(offset))
return ok
def _ReadReachedOffsets(filename):
"""Reads and returns a list of reached offsets."""
with open(filename, 'r') as f:
offsets = [int(x.rstrip('\n')) for x in f.xreadlines()]
return offsets
def _CreateArgumentParser():
parser = argparse.ArgumentParser(
description='Creates an orderfile from profiles.')
parser.add_argument('--target-arch', help='Unused')
parser.add_argument('--reached-offsets', type=str, required=True,
help='Path to the reached offsets')
parser.add_argument('--native-library', type=str, required=True,
help='Path to the unstripped instrumented library')
parser.add_argument('--output', type=str, required=True,
help='Output filename')
return parser
def main():
parser = _CreateArgumentParser()
args = parser.parse_args()
offsets = _ReadReachedOffsets(args.reached_offsets)
assert offsets
_WarnAboutDuplicates(offsets)
processor = process_profiles.SymbolOffsetProcessor(args.native_library)
ordered_symbols = processor.GetOrderedSymbols(offsets)
if ordered_symbols is None:
return 1
success = False
temp_filename = None
output_file = None
try:
(fd, temp_filename) = tempfile.mkstemp(dir=os.path.dirname(args.output))
output_file = os.fdopen(fd, 'w')
output_file.write('\n'.join(ordered_symbols))
output_file.close()
os.rename(temp_filename, args.output)
temp_filename = None
success = True
finally:
if output_file:
output_file.close()
if temp_filename:
os.remove(temp_filename)
return 0 if success else 1
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO)
sys.exit(main())
|