File: native.py

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (872 lines) | stat: -rw-r--r-- 35,861 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
# Copyright 2022 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Functions for creating native code symbols from ELF files."""

import calendar
import collections
import dataclasses
import datetime
import itertools
import logging
import os
import posixpath
import re
import subprocess
import sys
import tempfile

import ar
import archive_util
import demangle
import dwarfdump
import linker_map_parser
import models
import ninja_parser
import nm
import obj_analyzer
import parallel
import path_util
import readelf
import string_extract
import zip_util

# When ensuring matching section sizes between .elf and .map files, these
# sections should be ignored. When lld creates a combined library with
# partitions, some sections (like .text) exist in each partition, but the ones
# below are common. At library splitting time, llvm-objcopy pulls what's needed
# from these sections into the new libraries. Hence, the ELF sections will end
# up smaller than the combined .map file sections.
_SECTION_SIZE_BLOCKLIST = ['.symtab', '.shstrtab', '.strtab']

# A limit on the number of symbols an address can have, before these symbols
# are compacted into shared symbols. Increasing this value causes more data
# to be stored .size files, but is also more expensive.
# Effect as of Oct 2017, with min_pss = max:
# 1: shared .text syms = 1772874 bytes, file size = 9.43MiB (645476 syms).
# 2: shared .text syms = 1065654 bytes, file size = 9.58MiB (669952 syms).
# 6: shared .text syms = 464058 bytes, file size = 10.11MiB (782693 syms).
# 10: shared .text syms = 365648 bytes, file size = 10.24MiB (813758 syms).
# 20: shared .text syms = 86202 bytes, file size = 10.38MiB (854548 syms).
# 40: shared .text syms = 48424 bytes, file size = 10.50MiB (890396 syms).
# 50: shared .text syms = 41860 bytes, file size = 10.54MiB (902304 syms).
# max: shared .text syms = 0 bytes, file size = 11.10MiB (1235449 syms).
_MAX_SAME_NAME_ALIAS_COUNT = 40  # 50kb is basically negligible.


# Holds computation state that is live only when an output directory exists.
@dataclasses.dataclass
class _OutputDirectoryContext:
  elf_object_paths: list  # Non-None only when elf_path is.
  known_inputs: list  # Non-None only when elf_path is.
  output_directory: str
  thin_archives: list


@dataclasses.dataclass
class ElfInfo:
  architecture: str  # Results of ArchFromElf().
  build_id: str  # Result of BuildIdFromElf().
  section_ranges: dict  # Results of SectionInfoFromElf().
  size: int  # Result of os.path.getsize().

  def OverheadSize(self):
    section_sizes_total_without_bss = sum(
        size for k, (_, size) in self.section_ranges.items()
        if k not in models.BSS_SECTIONS)
    ret = self.size - section_sizes_total_without_bss
    assert ret >= 0, 'Negative ELF overhead {}'.format(ret)
    return ret


def _CreateElfInfo(elf_path):
  return ElfInfo(architecture=readelf.ArchFromElf(elf_path),
                 build_id=readelf.BuildIdFromElf(elf_path),
                 section_ranges=readelf.SectionInfoFromElf(elf_path),
                 size=os.path.getsize(elf_path))


def _AddSourcePathsUsingObjectPaths(ninja_source_mapper, raw_symbols):
  logging.info('Looking up source paths from ninja files')
  for symbol in raw_symbols:
    # Native symbols and pak symbols use object paths.
    object_path = symbol.object_path
    if not object_path:
      continue

    # We don't have source info for prebuilt .a files.
    if not os.path.isabs(object_path) and not object_path.startswith('..'):
      symbol.source_path = ninja_source_mapper.FindSourceForPath(object_path)
  assert ninja_source_mapper.unmatched_paths_count == 0, (
      'One or more source file paths could not be found. Likely caused by '
      '.ninja files being generated at a different time than the .map file.')


def _AddSourcePathsUsingAddress(dwarf_source_mapper, raw_symbols):
  logging.debug('Looking up source paths from dwarfdump')
  query_count = 0
  match_count = 0
  for symbol in raw_symbols:
    if symbol.section_name != models.SECTION_TEXT:
      continue
    query_count += 1
    source_path = dwarf_source_mapper.FindSourceForTextAddress(symbol.address)
    if source_path:
      match_count += 1
      symbol.source_path = source_path
  logging.info('dwarfdump found paths for %d of %d .text symbols.', match_count,
               query_count)
  # Majority of unmatched queries are for assembly source files (ex libav1d)
  # and v8 builtins.
  if query_count > 0:
    unmatched_ratio = (query_count - match_count) / query_count
    assert unmatched_ratio < 0.2, (
        'Percentage of failing |dwarf_source_mapper| queries ' +
        '({}%) >= 20% '.format(unmatched_ratio * 100) +
        'FindSourceForTextAddress() likely has a bug.')


def _ConnectNmAliases(raw_symbols):
  """Ensures |aliases| is set correctly for all symbols."""
  prev_sym = raw_symbols[0]
  for sym in raw_symbols[1:]:
    # Don't merge bss symbols.
    if sym.address > 0 and prev_sym.address == sym.address:
      # Don't merge padding-only symbols (** symbol gaps).
      if prev_sym.size > 0:
        # Don't merge if already merged.
        if prev_sym.aliases is None or prev_sym.aliases is not sym.aliases:
          if prev_sym.aliases:
            prev_sym.aliases.append(sym)
          else:
            prev_sym.aliases = [prev_sym, sym]
          sym.aliases = prev_sym.aliases
    prev_sym = sym


def _AssignNmAliasPathsAndCreatePathAliases(raw_symbols, object_paths_by_name):
  num_found_paths = 0
  num_unknown_names = 0
  num_path_mismatches = 0
  num_aliases_created = 0
  ret = []
  for symbol in raw_symbols:
    ret.append(symbol)
    full_name = symbol.full_name
    # '__typeid_' symbols appear in linker .map only, and not nm output.
    if full_name.startswith('__typeid_'):
      if object_paths_by_name.get(full_name):
        logging.warning('Found unexpected __typeid_ symbol in nm output: %s',
                        full_name)
      continue

    # Don't skip if symbol.IsBss(). This is needed for LLD-LTO to work, since
    # .bss object_path data are unavailable for linker_map_parser, and need to
    # be extracted here. For regular LLD flow, incorrect aliased symbols can
    # arise. But that's a lesser evil compared to having LLD-LTO .bss missing
    # object_path and source_path.
    # TODO(huangs): Fix aliased symbols for the LLD case.
    if (symbol.IsStringLiteral() or not full_name or full_name[0] in '*.'
        or  # e.g. ** merge symbols, .Lswitch.table
        full_name == 'startup'):
      continue

    object_paths = object_paths_by_name.get(full_name)
    if object_paths:
      num_found_paths += 1
    else:
      # Happens a lot with code that has LTO enabled (linker creates symbols).
      num_unknown_names += 1
      continue

    if symbol.object_path and symbol.object_path not in object_paths:
      if num_path_mismatches < 10:
        logging.warning('Symbol path reported by .map not found by nm.')
        logging.warning('sym=%r', symbol)
        logging.warning('paths=%r', object_paths)
      object_paths.append(symbol.object_path)
      object_paths.sort()
      num_path_mismatches += 1

    symbol.object_path = object_paths[0]

    if len(object_paths) > 1:
      # Create one symbol for each object_path.
      aliases = symbol.aliases or [symbol]
      symbol.aliases = aliases
      num_aliases_created += len(object_paths) - 1
      for object_path in object_paths[1:]:
        new_sym = models.Symbol(symbol.section_name,
                                symbol.size,
                                address=symbol.address,
                                full_name=full_name,
                                object_path=object_path,
                                aliases=aliases)
        aliases.append(new_sym)
        ret.append(new_sym)

  logging.debug(
      'Cross-referenced %d symbols with nm output. '
      'num_unknown_names=%d num_path_mismatches=%d '
      'num_aliases_created=%d', num_found_paths, num_unknown_names,
      num_path_mismatches, num_aliases_created)
  # Currently: num_unknown_names=1246 out of 591206 (0.2%).
  if num_unknown_names > min(20, len(raw_symbols) * 0.01):
    logging.warning(
        'Abnormal number of symbols not found in .o files (%d of %d)',
        num_unknown_names, len(raw_symbols))
  return ret


def _DiscoverMissedObjectPaths(raw_symbols, known_inputs):
  # Missing object paths are caused by .a files added by -l flags, which are not
  # listed as explicit inputs within .ninja rules.
  missed_inputs = set()
  for symbol in raw_symbols:
    path = symbol.object_path
    if path.endswith(')'):
      # Convert foo/bar.a(baz.o) -> foo/bar.a
      path = path[:path.rindex('(')]
    if path and path not in known_inputs:
      missed_inputs.add(path)
  return missed_inputs


def _CreateMergeStringsReplacements(merge_string_syms,
                                    list_of_positions_by_object_path):
  """Creates replacement symbols for |merge_syms|."""
  ret = []
  STRING_LITERAL_NAME = models.STRING_LITERAL_NAME
  assert len(merge_string_syms) == len(list_of_positions_by_object_path)
  tups = zip(merge_string_syms, list_of_positions_by_object_path)
  for merge_sym, positions_by_object_path in tups:
    merge_sym_address = merge_sym.address
    new_symbols = []
    ret.append(new_symbols)
    for object_path, positions in positions_by_object_path.items():
      for offset, size in positions:
        address = merge_sym_address + offset
        symbol = models.Symbol(models.SECTION_RODATA,
                               size,
                               address=address,
                               full_name=STRING_LITERAL_NAME,
                               object_path=object_path)
        new_symbols.append(symbol)

  logging.debug('Created %d string literal symbols', sum(len(x) for x in ret))
  logging.debug('Sorting string literals')
  for symbols in ret:
    # For de-duping & alias creation, order by address & size.
    # For alias symbol ordering, sort by object_path.
    symbols.sort(key=lambda x: (x.address, -x.size, x.object_path))

  logging.debug('Deduping string literals')
  num_removed = 0
  size_removed = 0
  num_aliases = 0
  for i, symbols in enumerate(ret):
    if not symbols:
      continue
    prev_symbol = symbols[0]
    new_symbols = [prev_symbol]
    for symbol in symbols[1:]:
      padding = symbol.address - prev_symbol.end_address
      if (prev_symbol.address == symbol.address
          and prev_symbol.size == symbol.size):
        # String is an alias.
        num_aliases += 1
        aliases = prev_symbol.aliases
        if aliases:
          aliases.append(symbol)
          symbol.aliases = aliases
        else:
          aliases = [prev_symbol, symbol]
          prev_symbol.aliases = aliases
          symbol.aliases = aliases
      elif padding + symbol.size <= 0:
        # String is a substring of prior one.
        num_removed += 1
        size_removed += symbol.size
        continue
      elif padding < 0:
        # String overlaps previous one. Adjust to not overlap.
        symbol.address -= padding
        symbol.size += padding
      new_symbols.append(symbol)
      prev_symbol = symbol
    ret[i] = new_symbols

  logging.debug(
      'Removed %d overlapping string literals (%d bytes) & created %d aliases',
      num_removed, size_removed, num_aliases)
  return ret


def _AddOutlinedSymbolCountsFromNm(raw_symbols, names_by_address):
  logging.debug('Update symbol names')
  # linker_map_parser extracts '** outlined function' without knowing how many
  # such symbols exist at each address. nm has this information, and stores the
  # value as, e.g., '** outlined function * 5'. Copy the information over.
  for s in raw_symbols:
    if s.full_name.startswith('** outlined function'):
      name_list = names_by_address.get(s.address)
      if name_list:
        for name in name_list:
          if name.startswith('** outlined function'):
            s.full_name = name
            break


def _AddNmAliases(raw_symbols, names_by_address):
  """Adds symbols that were removed by identical code folding."""
  # Step 1: Create list of (index_of_symbol, name_list).
  logging.debug('Creating alias list')
  replacements = []
  num_new_symbols = 0
  num_missing = 0
  missing_names = collections.defaultdict(list)
  for i, s in enumerate(raw_symbols):
    # Don't alias padding-only symbols (e.g. ** symbol gap)
    if s.size_without_padding == 0:
      continue
    # Also skip artificial symbols that won't appear in nm output.
    if s.full_name.startswith('** CFI jump table'):
      continue
    name_list = names_by_address.get(s.address)
    if name_list:
      if s.full_name not in name_list:
        num_missing += 1
        missing_names[s.full_name].append(s.address)
        # Sometimes happens for symbols from assembly files.
        if num_missing < 10:
          logging.debug('Name missing from aliases: %s %s (addr=%x)',
                        s.full_name, name_list, s.address)
        continue
      replacements.append((i, name_list))
      num_new_symbols += len(name_list) - 1

  if missing_names and logging.getLogger().isEnabledFor(logging.INFO):
    for address, names in names_by_address.items():
      for name in names:
        if name in missing_names:
          logging.info('Missing name %s is at address %x instead of [%s]' %
                       (name, address, ','.join('%x' % a
                                                for a in missing_names[name])))

  is_small_file = len(raw_symbols) < 1000
  if not is_small_file and num_new_symbols / len(raw_symbols) < .05:
    logging.warning(
        'Number of aliases is oddly low (%.0f%%). It should '
        'usually be around 25%%.', num_new_symbols / len(raw_symbols) * 100)

  # Step 2: Create new symbols as siblings to each existing one.
  logging.debug('Creating %d new symbols from nm output', num_new_symbols)
  expected_num_symbols = len(raw_symbols) + num_new_symbols
  ret = []
  prev_src = 0
  for cur_src, name_list in replacements:
    ret += raw_symbols[prev_src:cur_src]
    prev_src = cur_src + 1
    sym = raw_symbols[cur_src]
    # Create symbols (|sym| gets recreated and discarded).
    new_syms = []
    for full_name in name_list:
      # Do not set |aliases| in order to avoid being pruned by
      # CompactLargeAliasesIntoSharedSymbols(), which assumes aliases differ
      # only by path. The field will be set afterwards by _ConnectNmAliases().
      new_syms.append(
          models.Symbol(sym.section_name,
                        sym.size,
                        address=sym.address,
                        full_name=full_name))
    ret += new_syms
  ret += raw_symbols[prev_src:]
  assert expected_num_symbols == len(ret)
  return ret


def _ResolveThinArchivePaths(raw_symbols, thin_archives):
  """Converts object_paths for thin archives to external .o paths."""
  for symbol in raw_symbols:
    object_path = symbol.object_path
    if object_path.endswith(')'):
      start_idx = object_path.rindex('(')
      archive_path = object_path[:start_idx]
      if archive_path in thin_archives:
        subpath = object_path[start_idx + 1:-1]
        symbol.object_path = ar.CreateThinObjectPath(archive_path, subpath)


def _DeduceObjectPathForSwitchTables(raw_symbols, object_paths_by_name):
  # Example: foo (.67.rel)
  # Example: bar (.67)
  strip_num_suffix_regexp = re.compile(r'\s+\(\.\d+.*?\)$')
  num_switch_tables = 0
  num_unassigned = 0
  num_deduced = 0
  num_arbitrations = 0
  for s in raw_symbols:
    if s.full_name.startswith('Switch table for '):
      num_switch_tables += 1
      # Strip 'Switch table for ' prefix.
      name = s.full_name[17:]
      # Strip, e.g., ' (.123)' suffix.
      name = re.sub(strip_num_suffix_regexp, '', name)
      object_paths = object_paths_by_name.get(name, None)
      if not s.object_path:
        if object_paths is None:
          num_unassigned += 1
        else:
          num_deduced += 1
          # If ambiguity arises, arbitrate by taking the first.
          s.object_path = object_paths[0]
          if len(object_paths) > 1:
            num_arbitrations += 1
      else:
        assert object_paths, 'Name was: ' + name
        assert s.object_path in object_paths, s.object_path
  if num_switch_tables > 0:
    logging.info(
        'Found %d switch tables: Deduced %d object paths with ' +
        '%d arbitrations. %d remain unassigned.', num_switch_tables,
        num_deduced, num_arbitrations, num_unassigned)


def _ParseElfInfo(native_spec, outdir_context=None):
  """Adds ELF section ranges and symbols."""
  assert native_spec.map_path or native_spec.elf_path, (
      'Need a linker map or an ELF file.')
  assert native_spec.map_path or not native_spec.track_string_literals, (
      'track_string_literals not yet implemented without map file')
  if native_spec.elf_path:
    elf_section_ranges = readelf.SectionInfoFromElf(native_spec.elf_path)

    # Run nm on the elf file to retrieve the list of symbol names per-address.
    # This list is required because the .map file contains only a single name
    # for each address, yet multiple symbols are often coalesced when they are
    # identical. This coalescing happens mainly for small symbols and for C++
    # templates. Such symbols make up ~500kb of libchrome.so on Android.
    elf_nm_result = nm.CollectAliasesByAddressAsync(native_spec.elf_path)

    # Run nm on all .o/.a files to retrieve the symbol names within them.
    # The list is used to detect when multiple .o files contain the same symbol
    # (e.g. inline functions), and to update the object_path / source_path
    # fields accordingly.
    # Looking in object files is required because the .map file choses a
    # single path for these symbols.
    # Rather than record all paths for each symbol, set the paths to be the
    # common ancestor of all paths.
    if outdir_context and native_spec.map_path:
      bulk_analyzer = obj_analyzer.BulkObjectFileAnalyzer(
          outdir_context.output_directory,
          track_string_literals=native_spec.track_string_literals)
      bulk_analyzer.AnalyzePaths(outdir_context.elf_object_paths)

  if native_spec.map_path:
    logging.info('Parsing Linker Map')
    map_section_ranges, raw_symbols, linker_map_extras = (
        linker_map_parser.ParseFile(native_spec.map_path))

    if outdir_context and outdir_context.thin_archives:
      _ResolveThinArchivePaths(raw_symbols, outdir_context.thin_archives)
  else:
    logging.info('Collecting symbols from nm')
    raw_symbols = nm.CreateUniqueSymbols(native_spec.elf_path,
                                         elf_section_ranges)

  if native_spec.elf_path and native_spec.map_path:
    logging.debug('Validating section sizes')
    differing_elf_section_sizes = {}
    differing_map_section_sizes = {}
    for k, (_, elf_size) in elf_section_ranges.items():
      if k in _SECTION_SIZE_BLOCKLIST:
        continue
      _, map_size = map_section_ranges.get(k)
      if map_size != elf_size:
        differing_map_section_sizes[k] = map_size
        differing_elf_section_sizes[k] = elf_size
    if differing_map_section_sizes:
      logging.error('ELF file and .map file do not agree on section sizes.')
      logging.error('readelf: %r', differing_elf_section_sizes)
      logging.error('.map file: %r', differing_map_section_sizes)
      sys.exit(1)

  if native_spec.elf_path and native_spec.map_path and outdir_context:
    missed_object_paths = _DiscoverMissedObjectPaths(
        raw_symbols, outdir_context.known_inputs)
    missed_object_paths = ar.ExpandThinArchives(
        missed_object_paths, outdir_context.output_directory)[0]
    bulk_analyzer.AnalyzePaths(missed_object_paths)
    bulk_analyzer.SortPaths()
    if native_spec.track_string_literals:
      merge_string_syms = [
          s for s in raw_symbols if s.full_name == '** merge strings'
          or s.full_name == '** lld merge strings'
      ]
      # More likely for there to be a bug in supersize than an ELF to not have a
      # single string literal.
      assert merge_string_syms
      string_ranges = [(s.address, s.size) for s in merge_string_syms]
      bulk_analyzer.AnalyzeStringLiterals(native_spec.elf_path, string_ranges)

  # Map file for some reason doesn't demangle all names.
  # Demangle prints its own log statement.
  demangle.DemangleRemainingSymbols(raw_symbols)

  object_paths_by_name = {}
  if native_spec.elf_path:
    logging.info(
        'Adding symbols removed by identical code folding (as reported by nm)')
    # This normally does not block (it's finished by this time).
    names_by_address = elf_nm_result.get()
    if native_spec.map_path:
      # This rewrites outlined symbols from |map_path|, and can be skipped if
      # symbols already came from nm (e.g., for dwarf mode).
      _AddOutlinedSymbolCountsFromNm(raw_symbols, names_by_address)

    raw_symbols = _AddNmAliases(raw_symbols, names_by_address)

    if native_spec.map_path and outdir_context:
      object_paths_by_name = bulk_analyzer.GetSymbolNames()
      logging.debug(
          'Fetched path information for %d symbols from %d files',
          len(object_paths_by_name),
          len(outdir_context.elf_object_paths) + len(missed_object_paths))
      _DeduceObjectPathForSwitchTables(raw_symbols, object_paths_by_name)
      # For aliases, this provides path information where there wasn't any.
      logging.info('Creating aliases for symbols shared by multiple paths')
      raw_symbols = _AssignNmAliasPathsAndCreatePathAliases(
          raw_symbols, object_paths_by_name)

      if native_spec.track_string_literals:
        logging.info('Waiting for string literal extraction to complete.')
        list_of_positions_by_object_path = bulk_analyzer.GetStringPositions()
      bulk_analyzer.Close()

      if native_spec.track_string_literals:
        logging.info('Deconstructing ** merge strings into literals')
        replacements = _CreateMergeStringsReplacements(
            merge_string_syms, list_of_positions_by_object_path)
        for merge_sym, literal_syms in zip(merge_string_syms, replacements):
          # Don't replace if no literals were found.
          if literal_syms:
            # Re-find the symbols since aliases cause their indices to change.
            idx = raw_symbols.index(merge_sym)
            # This assignment is a bit slow (causes array to be shifted), but
            # is fast enough since len(merge_string_syms) < 10.
            raw_symbols[idx:idx + 1] = literal_syms

  if native_spec.map_path:
    linker_map_parser.DeduceObjectPathsFromThinMap(raw_symbols,
                                                   linker_map_extras)

  if native_spec.elf_path and native_spec.track_string_literals:
    sym_and_string_literals = string_extract.ReadStringLiterals(
        raw_symbols, native_spec.elf_path)
    for sym, data in sym_and_string_literals:
      sym.full_name = string_extract.GetNameOfStringLiteralBytes(data)

  # If we have an ELF file, use its ranges as the source of truth, since some
  # sections can differ from the .map.
  return (elf_section_ranges if native_spec.elf_path else map_section_ranges,
          raw_symbols, object_paths_by_name)


def _AddUnattributedSectionSymbols(raw_symbols, section_ranges, source_path):
  # Create symbols for ELF sections not covered by existing symbols.
  logging.info('Searching for symbol gaps...')
  new_syms_by_section = collections.defaultdict(list)
  seen_sections = set()

  for section_name, group in itertools.groupby(
      raw_symbols, lambda s: s.section_name):
    seen_sections.add(section_name)
    # Get last Last symbol in group.
    sym = None  # Needed for pylint.
    for sym in group:
      pass
    end_address = sym.end_address  # pylint: disable=undefined-loop-variable
    size_from_syms = end_address - section_ranges[section_name][0]
    overhead = section_ranges[section_name][1] - size_from_syms
    assert overhead >= 0, (
        'Last symbol (%s) ends %d bytes after section boundary (%x)' %
        (sym, -overhead, sum(section_ranges[section_name])))
    if overhead > 0 and section_name not in models.BSS_SECTIONS:
      new_syms_by_section[section_name].append(
          models.Symbol(section_name,
                        overhead,
                        address=end_address,
                        full_name='** {} (unattributed)'.format(section_name),
                        source_path=source_path))
      logging.info('Last symbol in %s does not reach end of section, gap=%d',
                   section_name, overhead)

  # Sections that should not bundle into ".other".
  unsummed_sections, summed_sections = models.ClassifySections(
      section_ranges.keys())
  ret = []
  other_symbols = []
  # Sort keys to ensure consistent order (> 1 sections may have address = 0).
  for section_name, (_, section_size) in list(section_ranges.items()):
    if section_name in seen_sections:
      continue
    # Handle sections that don't appear in |raw_symbols|.
    if (section_name not in unsummed_sections
        and section_name not in summed_sections):
      other_symbols.append(
          models.Symbol(models.SECTION_OTHER,
                        section_size,
                        full_name='** ELF Section: {}'.format(section_name),
                        source_path=source_path))
      archive_util.ExtendSectionRange(section_ranges, models.SECTION_OTHER,
                                      section_size)
    else:
      ret.append(
          models.Symbol(section_name,
                        section_size,
                        full_name='** ELF Section: {}'.format(section_name),
                        source_path=source_path))
  other_symbols.sort(key=lambda s: (s.address, s.full_name))

  # TODO(agrieve): It would probably simplify things to use a dict of
  #     section_name->raw_symbols while creating symbols.
  # Merge |new_syms_by_section| into |raw_symbols| while maintaining ordering.
  for section_name, group in itertools.groupby(
      raw_symbols, lambda s: s.section_name):
    ret.extend(group)
    ret.extend(new_syms_by_section[section_name])
  return ret, other_symbols


def ParseNinjaFiles(output_directory, elf_paths_to_find_inputs_for=None):
  logging.info('Parsing ninja files')
  ninja_source_mapper = ninja_parser.Parse(output_directory,
                                           elf_paths_to_find_inputs_for)
  logging.debug('Parsed %d .ninja files. Linker inputs=%d of %d',
                ninja_source_mapper.parsed_file_count,
                ninja_source_mapper.inputs_map_count,
                len(elf_paths_to_find_inputs_for))
  if elf_paths_to_find_inputs_for:
    for path in elf_paths_to_find_inputs_for:
      assert ninja_source_mapper.GetInputsForBinary(path), (
          'Failed to find any link commands in ninja files for ' + path)

  return ninja_source_mapper


def _ElfInfoFromApk(apk_path, apk_so_path):
  with zip_util.UnzipToTemp(apk_path, apk_so_path) as temp:
    return _CreateElfInfo(temp)


def _CountRelocationsFromElf(elf_path):
  args = [path_util.GetReadElfPath(), '-r', elf_path]
  stdout = subprocess.check_output(args).decode('ascii')
  relocations = re.findall(
      r'Relocation section .* at offset .* contains (\d+) entries', stdout)
  return sum([int(i) for i in relocations])


def _FindToolchainSubdirs(output_directory):
  return [
      n for n in os.listdir(output_directory)
      if os.path.exists(os.path.join(output_directory, n, 'toolchain.ninja'))
  ]


def CreateMetadata(*, native_spec, elf_info, shorten_path):
  """Returns metadata for the given native_spec / elf_info."""
  logging.debug('Constructing native metadata')
  native_metadata = {}
  native_metadata[models.METADATA_ELF_ALGORITHM] = native_spec.algorithm

  if elf_info:
    native_metadata[models.METADATA_ELF_ARCHITECTURE] = elf_info.architecture
    native_metadata[models.METADATA_ELF_BUILD_ID] = elf_info.build_id

  if native_spec.apk_so_path:
    native_metadata[models.METADATA_ELF_APK_PATH] = native_spec.apk_so_path

  if native_spec.elf_path:
    native_metadata[models.METADATA_ELF_FILENAME] = shorten_path(
        native_spec.elf_path)
    timestamp_obj = datetime.datetime.fromtimestamp(
        os.path.getmtime(native_spec.elf_path), datetime.timezone.utc)
    timestamp = calendar.timegm(timestamp_obj.timetuple())
    native_metadata[models.METADATA_ELF_MTIME] = timestamp

  if native_spec.map_path:
    native_metadata[models.METADATA_MAP_FILENAME] = shorten_path(
        native_spec.map_path)
  return native_metadata


def CreateSymbols(*,
                  apk_spec,
                  native_spec,
                  output_directory=None,
                  ninja_source_mapper=None,
                  pak_id_map=None):
  """Creates native symbols for the given native_spec.

  Args:
    apk_spec: Instance of ApkSpec, or None.
    native_spec: Instance of NativeSpec.
    output_directory: Build output directory. If None, source_paths and symbol
        alias information will not be recorded.
    ninja_source_mapper: From ninja_parser.Parse()
    pak_id_map: Instance of PakIdMap.

  Returns:
    A tuple of (section_ranges, raw_symbols, elf_info, metrics_by_file), where
    metrics_by_file is a dict from file name to a dict of {metric_name: value}.
  """
  apk_elf_info_result = None
  if apk_spec and native_spec.apk_so_path:
    # Extraction takes around 1 second, so do it in parallel.
    apk_elf_info_result = parallel.ForkAndCall(
        _ElfInfoFromApk, (apk_spec.apk_path, native_spec.apk_so_path))

  raw_symbols = []
  dwarf_source_mapper = None
  section_ranges = {}
  ninja_elf_object_paths = None
  metrics_by_file = {}
  if ninja_source_mapper and native_spec.map_path:
    # Finds all objects passed to the linker and creates a map of .o -> .cc.
    elf_path = native_spec.combined_elf_path or native_spec.elf_path
    if elf_path:
      ninja_elf_object_paths = ninja_source_mapper.GetInputsForBinary(elf_path)
      assert ninja_elf_object_paths, 'Failed to find link step for ' + elf_path
  elif native_spec.elf_path:
    logging.info('Parsing source path info via dwarfdump')
    dwarf_source_mapper = dwarfdump.CreateAddressSourceMapper(
        native_spec.elf_path)
    logging.info('Found %d source paths across %s ranges',
                 dwarf_source_mapper.NumberOfPaths(),
                 dwarf_source_mapper.num_ranges)

  # Start by finding elf_object_paths so that nm can run on them while the
  # linker .map is being parsed.
  if ninja_elf_object_paths:
    elf_object_paths, thin_archives = ar.ExpandThinArchives(
        ninja_elf_object_paths, output_directory)
    known_inputs = set(elf_object_paths)
    known_inputs.update(ninja_elf_object_paths)
  else:
    elf_object_paths = []
    known_inputs = None
    # When we don't know which elf file is used, just search all paths.
    # TODO(agrieve): Seems to be used only for tests. Remove?
    if ninja_source_mapper and native_spec.map_path:
      thin_archives = set(
          p for p in ninja_source_mapper.IterAllPaths() if p.endswith('.a')
          and ar.IsThinArchive(os.path.join(output_directory, p)))
    else:
      thin_archives = None

  if output_directory:
    toolchain_subdirs = _FindToolchainSubdirs(output_directory)
    outdir_context = _OutputDirectoryContext(elf_object_paths=elf_object_paths,
                                             known_inputs=known_inputs,
                                             output_directory=output_directory,
                                             thin_archives=thin_archives)
  else:
    toolchain_subdirs = None
    outdir_context = None

  object_paths_by_name = None
  if native_spec.elf_path or native_spec.map_path:
    section_ranges, raw_symbols, object_paths_by_name = _ParseElfInfo(
        native_spec, outdir_context=outdir_context)
    if pak_id_map and native_spec.map_path:
      # For trichrome, pak files are in different apks than native library,
      # so need to pass along pak_id_map separately and ensure
      # TrichromeLibrary appears first in .ssargs file.
      logging.debug('Extracting pak IDs from symbol names')
      pak_id_map.Update(object_paths_by_name, ninja_source_mapper)

  elf_info = None
  if apk_elf_info_result:
    logging.debug('Extracting section sizes from .so within .apk')
    elf_info = apk_elf_info_result.get()
    if native_spec.elf_path:
      expected_build_id = readelf.BuildIdFromElf(native_spec.elf_path)
      assert elf_info.build_id == expected_build_id, (
          'BuildID of {} != $APK/{}: {} != {}'.format(native_spec.elf_path,
                                                      native_spec.apk_so_path,
                                                      expected_build_id,
                                                      elf_info.build_id))
  elif native_spec.elf_path:
    # Strip ELF before capturing section information to avoid recording
    # debug sections.
    with tempfile.NamedTemporaryFile(
        suffix=os.path.basename(native_spec.elf_path)) as f:
      strip_path = path_util.GetStripPath()
      subprocess.run([
          strip_path, '--strip-debug', '--strip-unneeded', '-o', f.name,
          native_spec.elf_path
      ],
                     check=True)
      elf_info = _CreateElfInfo(f.name)

  if elf_info:
    section_ranges = elf_info.section_ranges.copy()
    if native_spec.elf_path:
      key = posixpath.basename(native_spec.elf_path)
      metrics_by_file[key] = {
          f'{models.METRICS_SIZE}/{k}': size
          for (k, (offset, size)) in section_ranges.items()
      }
      relocations_count = _CountRelocationsFromElf(native_spec.elf_path)
      metrics_by_file[key][
          f'{models.METRICS_COUNT}/{models.METRICS_COUNT_RELOCATIONS}'] = (
              relocations_count)

  source_path = ''
  if native_spec.apk_so_path:
    # Put section symbols under $NATIVE/libfoo.so (abi)/...
    source_path = '{}/{} ({})'.format(
        models.NATIVE_PREFIX_PATH, posixpath.basename(native_spec.apk_so_path),
        elf_info.architecture)

  raw_symbols, other_symbols = _AddUnattributedSectionSymbols(
      raw_symbols, section_ranges, source_path)

  if elf_info:
    elf_overhead_size = elf_info.OverheadSize()
    elf_overhead_symbol = models.Symbol(models.SECTION_OTHER,
                                        elf_overhead_size,
                                        full_name='Overhead: ELF file',
                                        source_path=source_path)
    archive_util.ExtendSectionRange(section_ranges, models.SECTION_OTHER,
                                    elf_overhead_size)
    other_symbols.append(elf_overhead_symbol)

  # Always have .other come last.
  other_symbols.sort(key=lambda s: (s.IsOverhead(), s.full_name.startswith(
      '**'), s.address, s.full_name))

  if ninja_source_mapper:
    _AddSourcePathsUsingObjectPaths(ninja_source_mapper, raw_symbols)
  elif dwarf_source_mapper:
    _AddSourcePathsUsingAddress(dwarf_source_mapper, raw_symbols)

  raw_symbols.extend(other_symbols)

  # Path normalization must come before compacting aliases so that
  # ancestor paths do not mix generated and non-generated paths.
  archive_util.NormalizePaths(raw_symbols,
                              gen_dir_regex=native_spec.gen_dir_regex,
                              toolchain_subdirs=toolchain_subdirs)

  if native_spec.elf_path or native_spec.map_path:
    logging.info('Converting excessive aliases into shared-path symbols')
    archive_util.CompactLargeAliasesIntoSharedSymbols(
        raw_symbols, _MAX_SAME_NAME_ALIAS_COUNT)

    logging.debug('Connecting nm aliases')
    _ConnectNmAliases(raw_symbols)

  return section_ranges, raw_symbols, elf_info, metrics_by_file