1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872
|
# Copyright 2022 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Functions for creating native code symbols from ELF files."""
import calendar
import collections
import dataclasses
import datetime
import itertools
import logging
import os
import posixpath
import re
import subprocess
import sys
import tempfile
import ar
import archive_util
import demangle
import dwarfdump
import linker_map_parser
import models
import ninja_parser
import nm
import obj_analyzer
import parallel
import path_util
import readelf
import string_extract
import zip_util
# When ensuring matching section sizes between .elf and .map files, these
# sections should be ignored. When lld creates a combined library with
# partitions, some sections (like .text) exist in each partition, but the ones
# below are common. At library splitting time, llvm-objcopy pulls what's needed
# from these sections into the new libraries. Hence, the ELF sections will end
# up smaller than the combined .map file sections.
_SECTION_SIZE_BLOCKLIST = ['.symtab', '.shstrtab', '.strtab']
# A limit on the number of symbols an address can have, before these symbols
# are compacted into shared symbols. Increasing this value causes more data
# to be stored .size files, but is also more expensive.
# Effect as of Oct 2017, with min_pss = max:
# 1: shared .text syms = 1772874 bytes, file size = 9.43MiB (645476 syms).
# 2: shared .text syms = 1065654 bytes, file size = 9.58MiB (669952 syms).
# 6: shared .text syms = 464058 bytes, file size = 10.11MiB (782693 syms).
# 10: shared .text syms = 365648 bytes, file size = 10.24MiB (813758 syms).
# 20: shared .text syms = 86202 bytes, file size = 10.38MiB (854548 syms).
# 40: shared .text syms = 48424 bytes, file size = 10.50MiB (890396 syms).
# 50: shared .text syms = 41860 bytes, file size = 10.54MiB (902304 syms).
# max: shared .text syms = 0 bytes, file size = 11.10MiB (1235449 syms).
_MAX_SAME_NAME_ALIAS_COUNT = 40 # 50kb is basically negligible.
# Holds computation state that is live only when an output directory exists.
@dataclasses.dataclass
class _OutputDirectoryContext:
elf_object_paths: list # Non-None only when elf_path is.
known_inputs: list # Non-None only when elf_path is.
output_directory: str
thin_archives: list
@dataclasses.dataclass
class ElfInfo:
architecture: str # Results of ArchFromElf().
build_id: str # Result of BuildIdFromElf().
section_ranges: dict # Results of SectionInfoFromElf().
size: int # Result of os.path.getsize().
def OverheadSize(self):
section_sizes_total_without_bss = sum(
size for k, (_, size) in self.section_ranges.items()
if k not in models.BSS_SECTIONS)
ret = self.size - section_sizes_total_without_bss
assert ret >= 0, 'Negative ELF overhead {}'.format(ret)
return ret
def _CreateElfInfo(elf_path):
return ElfInfo(architecture=readelf.ArchFromElf(elf_path),
build_id=readelf.BuildIdFromElf(elf_path),
section_ranges=readelf.SectionInfoFromElf(elf_path),
size=os.path.getsize(elf_path))
def _AddSourcePathsUsingObjectPaths(ninja_source_mapper, raw_symbols):
logging.info('Looking up source paths from ninja files')
for symbol in raw_symbols:
# Native symbols and pak symbols use object paths.
object_path = symbol.object_path
if not object_path:
continue
# We don't have source info for prebuilt .a files.
if not os.path.isabs(object_path) and not object_path.startswith('..'):
symbol.source_path = ninja_source_mapper.FindSourceForPath(object_path)
assert ninja_source_mapper.unmatched_paths_count == 0, (
'One or more source file paths could not be found. Likely caused by '
'.ninja files being generated at a different time than the .map file.')
def _AddSourcePathsUsingAddress(dwarf_source_mapper, raw_symbols):
logging.debug('Looking up source paths from dwarfdump')
query_count = 0
match_count = 0
for symbol in raw_symbols:
if symbol.section_name != models.SECTION_TEXT:
continue
query_count += 1
source_path = dwarf_source_mapper.FindSourceForTextAddress(symbol.address)
if source_path:
match_count += 1
symbol.source_path = source_path
logging.info('dwarfdump found paths for %d of %d .text symbols.', match_count,
query_count)
# Majority of unmatched queries are for assembly source files (ex libav1d)
# and v8 builtins.
if query_count > 0:
unmatched_ratio = (query_count - match_count) / query_count
assert unmatched_ratio < 0.2, (
'Percentage of failing |dwarf_source_mapper| queries ' +
'({}%) >= 20% '.format(unmatched_ratio * 100) +
'FindSourceForTextAddress() likely has a bug.')
def _ConnectNmAliases(raw_symbols):
"""Ensures |aliases| is set correctly for all symbols."""
prev_sym = raw_symbols[0]
for sym in raw_symbols[1:]:
# Don't merge bss symbols.
if sym.address > 0 and prev_sym.address == sym.address:
# Don't merge padding-only symbols (** symbol gaps).
if prev_sym.size > 0:
# Don't merge if already merged.
if prev_sym.aliases is None or prev_sym.aliases is not sym.aliases:
if prev_sym.aliases:
prev_sym.aliases.append(sym)
else:
prev_sym.aliases = [prev_sym, sym]
sym.aliases = prev_sym.aliases
prev_sym = sym
def _AssignNmAliasPathsAndCreatePathAliases(raw_symbols, object_paths_by_name):
num_found_paths = 0
num_unknown_names = 0
num_path_mismatches = 0
num_aliases_created = 0
ret = []
for symbol in raw_symbols:
ret.append(symbol)
full_name = symbol.full_name
# '__typeid_' symbols appear in linker .map only, and not nm output.
if full_name.startswith('__typeid_'):
if object_paths_by_name.get(full_name):
logging.warning('Found unexpected __typeid_ symbol in nm output: %s',
full_name)
continue
# Don't skip if symbol.IsBss(). This is needed for LLD-LTO to work, since
# .bss object_path data are unavailable for linker_map_parser, and need to
# be extracted here. For regular LLD flow, incorrect aliased symbols can
# arise. But that's a lesser evil compared to having LLD-LTO .bss missing
# object_path and source_path.
# TODO(huangs): Fix aliased symbols for the LLD case.
if (symbol.IsStringLiteral() or not full_name or full_name[0] in '*.'
or # e.g. ** merge symbols, .Lswitch.table
full_name == 'startup'):
continue
object_paths = object_paths_by_name.get(full_name)
if object_paths:
num_found_paths += 1
else:
# Happens a lot with code that has LTO enabled (linker creates symbols).
num_unknown_names += 1
continue
if symbol.object_path and symbol.object_path not in object_paths:
if num_path_mismatches < 10:
logging.warning('Symbol path reported by .map not found by nm.')
logging.warning('sym=%r', symbol)
logging.warning('paths=%r', object_paths)
object_paths.append(symbol.object_path)
object_paths.sort()
num_path_mismatches += 1
symbol.object_path = object_paths[0]
if len(object_paths) > 1:
# Create one symbol for each object_path.
aliases = symbol.aliases or [symbol]
symbol.aliases = aliases
num_aliases_created += len(object_paths) - 1
for object_path in object_paths[1:]:
new_sym = models.Symbol(symbol.section_name,
symbol.size,
address=symbol.address,
full_name=full_name,
object_path=object_path,
aliases=aliases)
aliases.append(new_sym)
ret.append(new_sym)
logging.debug(
'Cross-referenced %d symbols with nm output. '
'num_unknown_names=%d num_path_mismatches=%d '
'num_aliases_created=%d', num_found_paths, num_unknown_names,
num_path_mismatches, num_aliases_created)
# Currently: num_unknown_names=1246 out of 591206 (0.2%).
if num_unknown_names > min(20, len(raw_symbols) * 0.01):
logging.warning(
'Abnormal number of symbols not found in .o files (%d of %d)',
num_unknown_names, len(raw_symbols))
return ret
def _DiscoverMissedObjectPaths(raw_symbols, known_inputs):
# Missing object paths are caused by .a files added by -l flags, which are not
# listed as explicit inputs within .ninja rules.
missed_inputs = set()
for symbol in raw_symbols:
path = symbol.object_path
if path.endswith(')'):
# Convert foo/bar.a(baz.o) -> foo/bar.a
path = path[:path.rindex('(')]
if path and path not in known_inputs:
missed_inputs.add(path)
return missed_inputs
def _CreateMergeStringsReplacements(merge_string_syms,
list_of_positions_by_object_path):
"""Creates replacement symbols for |merge_syms|."""
ret = []
STRING_LITERAL_NAME = models.STRING_LITERAL_NAME
assert len(merge_string_syms) == len(list_of_positions_by_object_path)
tups = zip(merge_string_syms, list_of_positions_by_object_path)
for merge_sym, positions_by_object_path in tups:
merge_sym_address = merge_sym.address
new_symbols = []
ret.append(new_symbols)
for object_path, positions in positions_by_object_path.items():
for offset, size in positions:
address = merge_sym_address + offset
symbol = models.Symbol(models.SECTION_RODATA,
size,
address=address,
full_name=STRING_LITERAL_NAME,
object_path=object_path)
new_symbols.append(symbol)
logging.debug('Created %d string literal symbols', sum(len(x) for x in ret))
logging.debug('Sorting string literals')
for symbols in ret:
# For de-duping & alias creation, order by address & size.
# For alias symbol ordering, sort by object_path.
symbols.sort(key=lambda x: (x.address, -x.size, x.object_path))
logging.debug('Deduping string literals')
num_removed = 0
size_removed = 0
num_aliases = 0
for i, symbols in enumerate(ret):
if not symbols:
continue
prev_symbol = symbols[0]
new_symbols = [prev_symbol]
for symbol in symbols[1:]:
padding = symbol.address - prev_symbol.end_address
if (prev_symbol.address == symbol.address
and prev_symbol.size == symbol.size):
# String is an alias.
num_aliases += 1
aliases = prev_symbol.aliases
if aliases:
aliases.append(symbol)
symbol.aliases = aliases
else:
aliases = [prev_symbol, symbol]
prev_symbol.aliases = aliases
symbol.aliases = aliases
elif padding + symbol.size <= 0:
# String is a substring of prior one.
num_removed += 1
size_removed += symbol.size
continue
elif padding < 0:
# String overlaps previous one. Adjust to not overlap.
symbol.address -= padding
symbol.size += padding
new_symbols.append(symbol)
prev_symbol = symbol
ret[i] = new_symbols
logging.debug(
'Removed %d overlapping string literals (%d bytes) & created %d aliases',
num_removed, size_removed, num_aliases)
return ret
def _AddOutlinedSymbolCountsFromNm(raw_symbols, names_by_address):
logging.debug('Update symbol names')
# linker_map_parser extracts '** outlined function' without knowing how many
# such symbols exist at each address. nm has this information, and stores the
# value as, e.g., '** outlined function * 5'. Copy the information over.
for s in raw_symbols:
if s.full_name.startswith('** outlined function'):
name_list = names_by_address.get(s.address)
if name_list:
for name in name_list:
if name.startswith('** outlined function'):
s.full_name = name
break
def _AddNmAliases(raw_symbols, names_by_address):
"""Adds symbols that were removed by identical code folding."""
# Step 1: Create list of (index_of_symbol, name_list).
logging.debug('Creating alias list')
replacements = []
num_new_symbols = 0
num_missing = 0
missing_names = collections.defaultdict(list)
for i, s in enumerate(raw_symbols):
# Don't alias padding-only symbols (e.g. ** symbol gap)
if s.size_without_padding == 0:
continue
# Also skip artificial symbols that won't appear in nm output.
if s.full_name.startswith('** CFI jump table'):
continue
name_list = names_by_address.get(s.address)
if name_list:
if s.full_name not in name_list:
num_missing += 1
missing_names[s.full_name].append(s.address)
# Sometimes happens for symbols from assembly files.
if num_missing < 10:
logging.debug('Name missing from aliases: %s %s (addr=%x)',
s.full_name, name_list, s.address)
continue
replacements.append((i, name_list))
num_new_symbols += len(name_list) - 1
if missing_names and logging.getLogger().isEnabledFor(logging.INFO):
for address, names in names_by_address.items():
for name in names:
if name in missing_names:
logging.info('Missing name %s is at address %x instead of [%s]' %
(name, address, ','.join('%x' % a
for a in missing_names[name])))
is_small_file = len(raw_symbols) < 1000
if not is_small_file and num_new_symbols / len(raw_symbols) < .05:
logging.warning(
'Number of aliases is oddly low (%.0f%%). It should '
'usually be around 25%%.', num_new_symbols / len(raw_symbols) * 100)
# Step 2: Create new symbols as siblings to each existing one.
logging.debug('Creating %d new symbols from nm output', num_new_symbols)
expected_num_symbols = len(raw_symbols) + num_new_symbols
ret = []
prev_src = 0
for cur_src, name_list in replacements:
ret += raw_symbols[prev_src:cur_src]
prev_src = cur_src + 1
sym = raw_symbols[cur_src]
# Create symbols (|sym| gets recreated and discarded).
new_syms = []
for full_name in name_list:
# Do not set |aliases| in order to avoid being pruned by
# CompactLargeAliasesIntoSharedSymbols(), which assumes aliases differ
# only by path. The field will be set afterwards by _ConnectNmAliases().
new_syms.append(
models.Symbol(sym.section_name,
sym.size,
address=sym.address,
full_name=full_name))
ret += new_syms
ret += raw_symbols[prev_src:]
assert expected_num_symbols == len(ret)
return ret
def _ResolveThinArchivePaths(raw_symbols, thin_archives):
"""Converts object_paths for thin archives to external .o paths."""
for symbol in raw_symbols:
object_path = symbol.object_path
if object_path.endswith(')'):
start_idx = object_path.rindex('(')
archive_path = object_path[:start_idx]
if archive_path in thin_archives:
subpath = object_path[start_idx + 1:-1]
symbol.object_path = ar.CreateThinObjectPath(archive_path, subpath)
def _DeduceObjectPathForSwitchTables(raw_symbols, object_paths_by_name):
# Example: foo (.67.rel)
# Example: bar (.67)
strip_num_suffix_regexp = re.compile(r'\s+\(\.\d+.*?\)$')
num_switch_tables = 0
num_unassigned = 0
num_deduced = 0
num_arbitrations = 0
for s in raw_symbols:
if s.full_name.startswith('Switch table for '):
num_switch_tables += 1
# Strip 'Switch table for ' prefix.
name = s.full_name[17:]
# Strip, e.g., ' (.123)' suffix.
name = re.sub(strip_num_suffix_regexp, '', name)
object_paths = object_paths_by_name.get(name, None)
if not s.object_path:
if object_paths is None:
num_unassigned += 1
else:
num_deduced += 1
# If ambiguity arises, arbitrate by taking the first.
s.object_path = object_paths[0]
if len(object_paths) > 1:
num_arbitrations += 1
else:
assert object_paths, 'Name was: ' + name
assert s.object_path in object_paths, s.object_path
if num_switch_tables > 0:
logging.info(
'Found %d switch tables: Deduced %d object paths with ' +
'%d arbitrations. %d remain unassigned.', num_switch_tables,
num_deduced, num_arbitrations, num_unassigned)
def _ParseElfInfo(native_spec, outdir_context=None):
"""Adds ELF section ranges and symbols."""
assert native_spec.map_path or native_spec.elf_path, (
'Need a linker map or an ELF file.')
assert native_spec.map_path or not native_spec.track_string_literals, (
'track_string_literals not yet implemented without map file')
if native_spec.elf_path:
elf_section_ranges = readelf.SectionInfoFromElf(native_spec.elf_path)
# Run nm on the elf file to retrieve the list of symbol names per-address.
# This list is required because the .map file contains only a single name
# for each address, yet multiple symbols are often coalesced when they are
# identical. This coalescing happens mainly for small symbols and for C++
# templates. Such symbols make up ~500kb of libchrome.so on Android.
elf_nm_result = nm.CollectAliasesByAddressAsync(native_spec.elf_path)
# Run nm on all .o/.a files to retrieve the symbol names within them.
# The list is used to detect when multiple .o files contain the same symbol
# (e.g. inline functions), and to update the object_path / source_path
# fields accordingly.
# Looking in object files is required because the .map file choses a
# single path for these symbols.
# Rather than record all paths for each symbol, set the paths to be the
# common ancestor of all paths.
if outdir_context and native_spec.map_path:
bulk_analyzer = obj_analyzer.BulkObjectFileAnalyzer(
outdir_context.output_directory,
track_string_literals=native_spec.track_string_literals)
bulk_analyzer.AnalyzePaths(outdir_context.elf_object_paths)
if native_spec.map_path:
logging.info('Parsing Linker Map')
map_section_ranges, raw_symbols, linker_map_extras = (
linker_map_parser.ParseFile(native_spec.map_path))
if outdir_context and outdir_context.thin_archives:
_ResolveThinArchivePaths(raw_symbols, outdir_context.thin_archives)
else:
logging.info('Collecting symbols from nm')
raw_symbols = nm.CreateUniqueSymbols(native_spec.elf_path,
elf_section_ranges)
if native_spec.elf_path and native_spec.map_path:
logging.debug('Validating section sizes')
differing_elf_section_sizes = {}
differing_map_section_sizes = {}
for k, (_, elf_size) in elf_section_ranges.items():
if k in _SECTION_SIZE_BLOCKLIST:
continue
_, map_size = map_section_ranges.get(k)
if map_size != elf_size:
differing_map_section_sizes[k] = map_size
differing_elf_section_sizes[k] = elf_size
if differing_map_section_sizes:
logging.error('ELF file and .map file do not agree on section sizes.')
logging.error('readelf: %r', differing_elf_section_sizes)
logging.error('.map file: %r', differing_map_section_sizes)
sys.exit(1)
if native_spec.elf_path and native_spec.map_path and outdir_context:
missed_object_paths = _DiscoverMissedObjectPaths(
raw_symbols, outdir_context.known_inputs)
missed_object_paths = ar.ExpandThinArchives(
missed_object_paths, outdir_context.output_directory)[0]
bulk_analyzer.AnalyzePaths(missed_object_paths)
bulk_analyzer.SortPaths()
if native_spec.track_string_literals:
merge_string_syms = [
s for s in raw_symbols if s.full_name == '** merge strings'
or s.full_name == '** lld merge strings'
]
# More likely for there to be a bug in supersize than an ELF to not have a
# single string literal.
assert merge_string_syms
string_ranges = [(s.address, s.size) for s in merge_string_syms]
bulk_analyzer.AnalyzeStringLiterals(native_spec.elf_path, string_ranges)
# Map file for some reason doesn't demangle all names.
# Demangle prints its own log statement.
demangle.DemangleRemainingSymbols(raw_symbols)
object_paths_by_name = {}
if native_spec.elf_path:
logging.info(
'Adding symbols removed by identical code folding (as reported by nm)')
# This normally does not block (it's finished by this time).
names_by_address = elf_nm_result.get()
if native_spec.map_path:
# This rewrites outlined symbols from |map_path|, and can be skipped if
# symbols already came from nm (e.g., for dwarf mode).
_AddOutlinedSymbolCountsFromNm(raw_symbols, names_by_address)
raw_symbols = _AddNmAliases(raw_symbols, names_by_address)
if native_spec.map_path and outdir_context:
object_paths_by_name = bulk_analyzer.GetSymbolNames()
logging.debug(
'Fetched path information for %d symbols from %d files',
len(object_paths_by_name),
len(outdir_context.elf_object_paths) + len(missed_object_paths))
_DeduceObjectPathForSwitchTables(raw_symbols, object_paths_by_name)
# For aliases, this provides path information where there wasn't any.
logging.info('Creating aliases for symbols shared by multiple paths')
raw_symbols = _AssignNmAliasPathsAndCreatePathAliases(
raw_symbols, object_paths_by_name)
if native_spec.track_string_literals:
logging.info('Waiting for string literal extraction to complete.')
list_of_positions_by_object_path = bulk_analyzer.GetStringPositions()
bulk_analyzer.Close()
if native_spec.track_string_literals:
logging.info('Deconstructing ** merge strings into literals')
replacements = _CreateMergeStringsReplacements(
merge_string_syms, list_of_positions_by_object_path)
for merge_sym, literal_syms in zip(merge_string_syms, replacements):
# Don't replace if no literals were found.
if literal_syms:
# Re-find the symbols since aliases cause their indices to change.
idx = raw_symbols.index(merge_sym)
# This assignment is a bit slow (causes array to be shifted), but
# is fast enough since len(merge_string_syms) < 10.
raw_symbols[idx:idx + 1] = literal_syms
if native_spec.map_path:
linker_map_parser.DeduceObjectPathsFromThinMap(raw_symbols,
linker_map_extras)
if native_spec.elf_path and native_spec.track_string_literals:
sym_and_string_literals = string_extract.ReadStringLiterals(
raw_symbols, native_spec.elf_path)
for sym, data in sym_and_string_literals:
sym.full_name = string_extract.GetNameOfStringLiteralBytes(data)
# If we have an ELF file, use its ranges as the source of truth, since some
# sections can differ from the .map.
return (elf_section_ranges if native_spec.elf_path else map_section_ranges,
raw_symbols, object_paths_by_name)
def _AddUnattributedSectionSymbols(raw_symbols, section_ranges, source_path):
# Create symbols for ELF sections not covered by existing symbols.
logging.info('Searching for symbol gaps...')
new_syms_by_section = collections.defaultdict(list)
seen_sections = set()
for section_name, group in itertools.groupby(
raw_symbols, lambda s: s.section_name):
seen_sections.add(section_name)
# Get last Last symbol in group.
sym = None # Needed for pylint.
for sym in group:
pass
end_address = sym.end_address # pylint: disable=undefined-loop-variable
size_from_syms = end_address - section_ranges[section_name][0]
overhead = section_ranges[section_name][1] - size_from_syms
assert overhead >= 0, (
'Last symbol (%s) ends %d bytes after section boundary (%x)' %
(sym, -overhead, sum(section_ranges[section_name])))
if overhead > 0 and section_name not in models.BSS_SECTIONS:
new_syms_by_section[section_name].append(
models.Symbol(section_name,
overhead,
address=end_address,
full_name='** {} (unattributed)'.format(section_name),
source_path=source_path))
logging.info('Last symbol in %s does not reach end of section, gap=%d',
section_name, overhead)
# Sections that should not bundle into ".other".
unsummed_sections, summed_sections = models.ClassifySections(
section_ranges.keys())
ret = []
other_symbols = []
# Sort keys to ensure consistent order (> 1 sections may have address = 0).
for section_name, (_, section_size) in list(section_ranges.items()):
if section_name in seen_sections:
continue
# Handle sections that don't appear in |raw_symbols|.
if (section_name not in unsummed_sections
and section_name not in summed_sections):
other_symbols.append(
models.Symbol(models.SECTION_OTHER,
section_size,
full_name='** ELF Section: {}'.format(section_name),
source_path=source_path))
archive_util.ExtendSectionRange(section_ranges, models.SECTION_OTHER,
section_size)
else:
ret.append(
models.Symbol(section_name,
section_size,
full_name='** ELF Section: {}'.format(section_name),
source_path=source_path))
other_symbols.sort(key=lambda s: (s.address, s.full_name))
# TODO(agrieve): It would probably simplify things to use a dict of
# section_name->raw_symbols while creating symbols.
# Merge |new_syms_by_section| into |raw_symbols| while maintaining ordering.
for section_name, group in itertools.groupby(
raw_symbols, lambda s: s.section_name):
ret.extend(group)
ret.extend(new_syms_by_section[section_name])
return ret, other_symbols
def ParseNinjaFiles(output_directory, elf_paths_to_find_inputs_for=None):
logging.info('Parsing ninja files')
ninja_source_mapper = ninja_parser.Parse(output_directory,
elf_paths_to_find_inputs_for)
logging.debug('Parsed %d .ninja files. Linker inputs=%d of %d',
ninja_source_mapper.parsed_file_count,
ninja_source_mapper.inputs_map_count,
len(elf_paths_to_find_inputs_for))
if elf_paths_to_find_inputs_for:
for path in elf_paths_to_find_inputs_for:
assert ninja_source_mapper.GetInputsForBinary(path), (
'Failed to find any link commands in ninja files for ' + path)
return ninja_source_mapper
def _ElfInfoFromApk(apk_path, apk_so_path):
with zip_util.UnzipToTemp(apk_path, apk_so_path) as temp:
return _CreateElfInfo(temp)
def _CountRelocationsFromElf(elf_path):
args = [path_util.GetReadElfPath(), '-r', elf_path]
stdout = subprocess.check_output(args).decode('ascii')
relocations = re.findall(
r'Relocation section .* at offset .* contains (\d+) entries', stdout)
return sum([int(i) for i in relocations])
def _FindToolchainSubdirs(output_directory):
return [
n for n in os.listdir(output_directory)
if os.path.exists(os.path.join(output_directory, n, 'toolchain.ninja'))
]
def CreateMetadata(*, native_spec, elf_info, shorten_path):
"""Returns metadata for the given native_spec / elf_info."""
logging.debug('Constructing native metadata')
native_metadata = {}
native_metadata[models.METADATA_ELF_ALGORITHM] = native_spec.algorithm
if elf_info:
native_metadata[models.METADATA_ELF_ARCHITECTURE] = elf_info.architecture
native_metadata[models.METADATA_ELF_BUILD_ID] = elf_info.build_id
if native_spec.apk_so_path:
native_metadata[models.METADATA_ELF_APK_PATH] = native_spec.apk_so_path
if native_spec.elf_path:
native_metadata[models.METADATA_ELF_FILENAME] = shorten_path(
native_spec.elf_path)
timestamp_obj = datetime.datetime.fromtimestamp(
os.path.getmtime(native_spec.elf_path), datetime.timezone.utc)
timestamp = calendar.timegm(timestamp_obj.timetuple())
native_metadata[models.METADATA_ELF_MTIME] = timestamp
if native_spec.map_path:
native_metadata[models.METADATA_MAP_FILENAME] = shorten_path(
native_spec.map_path)
return native_metadata
def CreateSymbols(*,
apk_spec,
native_spec,
output_directory=None,
ninja_source_mapper=None,
pak_id_map=None):
"""Creates native symbols for the given native_spec.
Args:
apk_spec: Instance of ApkSpec, or None.
native_spec: Instance of NativeSpec.
output_directory: Build output directory. If None, source_paths and symbol
alias information will not be recorded.
ninja_source_mapper: From ninja_parser.Parse()
pak_id_map: Instance of PakIdMap.
Returns:
A tuple of (section_ranges, raw_symbols, elf_info, metrics_by_file), where
metrics_by_file is a dict from file name to a dict of {metric_name: value}.
"""
apk_elf_info_result = None
if apk_spec and native_spec.apk_so_path:
# Extraction takes around 1 second, so do it in parallel.
apk_elf_info_result = parallel.ForkAndCall(
_ElfInfoFromApk, (apk_spec.apk_path, native_spec.apk_so_path))
raw_symbols = []
dwarf_source_mapper = None
section_ranges = {}
ninja_elf_object_paths = None
metrics_by_file = {}
if ninja_source_mapper and native_spec.map_path:
# Finds all objects passed to the linker and creates a map of .o -> .cc.
elf_path = native_spec.combined_elf_path or native_spec.elf_path
if elf_path:
ninja_elf_object_paths = ninja_source_mapper.GetInputsForBinary(elf_path)
assert ninja_elf_object_paths, 'Failed to find link step for ' + elf_path
elif native_spec.elf_path:
logging.info('Parsing source path info via dwarfdump')
dwarf_source_mapper = dwarfdump.CreateAddressSourceMapper(
native_spec.elf_path)
logging.info('Found %d source paths across %s ranges',
dwarf_source_mapper.NumberOfPaths(),
dwarf_source_mapper.num_ranges)
# Start by finding elf_object_paths so that nm can run on them while the
# linker .map is being parsed.
if ninja_elf_object_paths:
elf_object_paths, thin_archives = ar.ExpandThinArchives(
ninja_elf_object_paths, output_directory)
known_inputs = set(elf_object_paths)
known_inputs.update(ninja_elf_object_paths)
else:
elf_object_paths = []
known_inputs = None
# When we don't know which elf file is used, just search all paths.
# TODO(agrieve): Seems to be used only for tests. Remove?
if ninja_source_mapper and native_spec.map_path:
thin_archives = set(
p for p in ninja_source_mapper.IterAllPaths() if p.endswith('.a')
and ar.IsThinArchive(os.path.join(output_directory, p)))
else:
thin_archives = None
if output_directory:
toolchain_subdirs = _FindToolchainSubdirs(output_directory)
outdir_context = _OutputDirectoryContext(elf_object_paths=elf_object_paths,
known_inputs=known_inputs,
output_directory=output_directory,
thin_archives=thin_archives)
else:
toolchain_subdirs = None
outdir_context = None
object_paths_by_name = None
if native_spec.elf_path or native_spec.map_path:
section_ranges, raw_symbols, object_paths_by_name = _ParseElfInfo(
native_spec, outdir_context=outdir_context)
if pak_id_map and native_spec.map_path:
# For trichrome, pak files are in different apks than native library,
# so need to pass along pak_id_map separately and ensure
# TrichromeLibrary appears first in .ssargs file.
logging.debug('Extracting pak IDs from symbol names')
pak_id_map.Update(object_paths_by_name, ninja_source_mapper)
elf_info = None
if apk_elf_info_result:
logging.debug('Extracting section sizes from .so within .apk')
elf_info = apk_elf_info_result.get()
if native_spec.elf_path:
expected_build_id = readelf.BuildIdFromElf(native_spec.elf_path)
assert elf_info.build_id == expected_build_id, (
'BuildID of {} != $APK/{}: {} != {}'.format(native_spec.elf_path,
native_spec.apk_so_path,
expected_build_id,
elf_info.build_id))
elif native_spec.elf_path:
# Strip ELF before capturing section information to avoid recording
# debug sections.
with tempfile.NamedTemporaryFile(
suffix=os.path.basename(native_spec.elf_path)) as f:
strip_path = path_util.GetStripPath()
subprocess.run([
strip_path, '--strip-debug', '--strip-unneeded', '-o', f.name,
native_spec.elf_path
],
check=True)
elf_info = _CreateElfInfo(f.name)
if elf_info:
section_ranges = elf_info.section_ranges.copy()
if native_spec.elf_path:
key = posixpath.basename(native_spec.elf_path)
metrics_by_file[key] = {
f'{models.METRICS_SIZE}/{k}': size
for (k, (offset, size)) in section_ranges.items()
}
relocations_count = _CountRelocationsFromElf(native_spec.elf_path)
metrics_by_file[key][
f'{models.METRICS_COUNT}/{models.METRICS_COUNT_RELOCATIONS}'] = (
relocations_count)
source_path = ''
if native_spec.apk_so_path:
# Put section symbols under $NATIVE/libfoo.so (abi)/...
source_path = '{}/{} ({})'.format(
models.NATIVE_PREFIX_PATH, posixpath.basename(native_spec.apk_so_path),
elf_info.architecture)
raw_symbols, other_symbols = _AddUnattributedSectionSymbols(
raw_symbols, section_ranges, source_path)
if elf_info:
elf_overhead_size = elf_info.OverheadSize()
elf_overhead_symbol = models.Symbol(models.SECTION_OTHER,
elf_overhead_size,
full_name='Overhead: ELF file',
source_path=source_path)
archive_util.ExtendSectionRange(section_ranges, models.SECTION_OTHER,
elf_overhead_size)
other_symbols.append(elf_overhead_symbol)
# Always have .other come last.
other_symbols.sort(key=lambda s: (s.IsOverhead(), s.full_name.startswith(
'**'), s.address, s.full_name))
if ninja_source_mapper:
_AddSourcePathsUsingObjectPaths(ninja_source_mapper, raw_symbols)
elif dwarf_source_mapper:
_AddSourcePathsUsingAddress(dwarf_source_mapper, raw_symbols)
raw_symbols.extend(other_symbols)
# Path normalization must come before compacting aliases so that
# ancestor paths do not mix generated and non-generated paths.
archive_util.NormalizePaths(raw_symbols,
gen_dir_regex=native_spec.gen_dir_regex,
toolchain_subdirs=toolchain_subdirs)
if native_spec.elf_path or native_spec.map_path:
logging.info('Converting excessive aliases into shared-path symbols')
archive_util.CompactLargeAliasesIntoSharedSymbols(
raw_symbols, _MAX_SAME_NAME_ALIAS_COUNT)
logging.debug('Connecting nm aliases')
_ConnectNmAliases(raw_symbols)
return section_ranges, raw_symbols, elf_info, metrics_by_file
|