File: archive.py

package info (click to toggle)
chromium 139.0.7258.127-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 6,122,068 kB
  • sloc: cpp: 35,100,771; ansic: 7,163,530; javascript: 4,103,002; python: 1,436,920; asm: 946,517; xml: 746,709; pascal: 187,653; perl: 88,691; sh: 88,436; objc: 79,953; sql: 51,488; cs: 44,583; fortran: 24,137; makefile: 22,147; tcl: 15,277; php: 13,980; yacc: 8,984; ruby: 7,485; awk: 3,720; lisp: 3,096; lex: 1,327; ada: 727; jsp: 228; sed: 36
file content (1230 lines) | stat: -rw-r--r-- 48,869 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
# Copyright 2017 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Main Python API for analyzing binary size."""

import argparse
import collections
import dataclasses
import functools
import logging
import os
import posixpath
import re
import shlex
import subprocess
import time
import zipfile

import apk
import apkanalyzer
import archive_util
import data_quality
import describe
import dex_deobfuscate
import dir_metadata
import file_format
import function_signature
import json_config_parser
import models
import native
import pakfile
import parallel
import path_util
import readelf
import zip_util


@dataclasses.dataclass
class NativeSpec:
  # One (or more) of apk_so_path, map_path, elf_path must be non-None.
  # Path within the .apk of the .so file. Non-None only when apk_spec is.
  apk_so_path: str = None
  # Path to linker map file (if present).
  map_path: str = None
  # Path to unstripped ELF file (if present).
  elf_path: str = None
  # Path to unstripped ELF file before being split into partitions (if present).
  combined_elf_path: str = None
  # Whether to create symbols for each string literal.
  track_string_literals: bool = True
  # component to use for all symbols.
  component: str = None
  # Regular expression that will match generated files.
  gen_dir_regex: str = None
  # source_path prefix to use for all symbols.
  source_path_prefix: str = None

  @property
  def algorithm(self):
    if self.map_path:
      return 'linker_map'
    if self.elf_path:
      return 'dwarf'
    return 'sections'


@dataclasses.dataclass
class PakSpec:
  # One of pak_paths or apk_pak_paths must be non-None.
  pak_paths: list = None
  apk_pak_paths: list = None
  pak_info_path: str = None


@dataclasses.dataclass
class ApkSpec:
  # Path the .apk file. Never None.
  # This is a temp file when .apks is being analyzed.
  apk_path: str
  # Path to .minimal.apks (when analyzing bundles).
  minimal_apks_path: str = None
  # Proguard mapping path.
  mapping_path: str = None
  # Path to the .pathmap.txt file for the apk. Used to deobfuscate res/ files.
  resources_pathmap_path: str = None
  # Name of the apk split when .apks is being analyzed.
  split_name: str = None
  # Path such as: out/Release/size-info/BaseName
  size_info_prefix: str = None
  # Whether to break down classes.dex.
  analyze_dex: bool = True
  # Whether to create symbols for each string literal.
  track_string_literals: bool = True
  # Dict of apk_path -> source_path, provided by json config.
  path_defaults: dict = None
  # Component to use for symbols when not specified by DIR_METADATA, provided by
  # json config.
  default_component: str = ''
  # Paths to not create .other symbols for.
  ignore_apk_paths: set = dataclasses.field(default_factory=set)


@dataclasses.dataclass
class ContainerSpec:
  container_name: str
  apk_spec: ApkSpec
  pak_spec: PakSpec
  native_spec: NativeSpec
  source_directory: str
  output_directory: str


def _NormalizeNames(raw_symbols):
  """Ensures that all names are formatted in a useful way.

  This includes:
    - Deriving |name| and |template_name| from |full_name|.
    - Stripping of return types (for functions).
    - Moving "vtable for" and the like to be suffixes rather than prefixes.
  """
  found_prefixes = set()
  for symbol in raw_symbols:
    full_name = symbol.full_name

    # See comment in _CalculatePadding() about when this can happen. Don't
    # process names for non-native sections.
    if symbol.IsPak():
      # full_name: "about_ui_resources.grdp: IDR_ABOUT_UI_CREDITS_HTML".
      space_idx = full_name.rindex(' ')
      name = full_name[space_idx + 1:]
      symbol.template_name = name
      symbol.name = name
    elif (full_name.startswith('**') or symbol.IsOverhead()
          or symbol.IsOther()):
      symbol.template_name = full_name
      symbol.name = full_name
    elif symbol.IsStringLiteral():  # Handles native and DEX strings.
      symbol.full_name = full_name
      symbol.template_name = full_name
      symbol.name = full_name
    elif symbol.IsDex():
      symbol.full_name, symbol.template_name, symbol.name = (
          function_signature.ParseJava(full_name))
    elif symbol.IsNative():
      # Remove [clone] suffix, and set flag accordingly.
      # Search from left-to-right, as multiple [clone]s can exist.
      # Example name suffixes:
      #     [clone .part.322]  # GCC
      #     [clone .isra.322]  # GCC
      #     [clone .constprop.1064]  # GCC
      #     [clone .11064]  # clang
      # http://unix.stackexchange.com/questions/223013/function-symbol-gets-part-suffix-after-compilation
      idx = full_name.find(' [clone ')
      if idx != -1:
        full_name = full_name[:idx]
        symbol.flags |= models.FLAG_CLONE

      # Clones for C symbols.
      if symbol.section == 't':
        idx = full_name.rfind('.')
        if idx != -1 and full_name[idx + 1:].isdigit():
          new_name = full_name[:idx]
          # Generated symbols that end with .123 but are not clones.
          # Find these via:
          # size_info.symbols.WhereInSection('t').WhereIsGroup().SortedByCount()
          if new_name not in ('__tcf_0', 'startup'):
            full_name = new_name
            symbol.flags |= models.FLAG_CLONE
            # Remove .part / .isra / .constprop.
            idx = full_name.rfind('.', 0, idx)
            if idx != -1:
              full_name = full_name[:idx]

      # E.g.: vtable for FOO
      idx = full_name.find(' for ', 0, 30)
      if idx != -1:
        found_prefixes.add(full_name[:idx + 4])
        full_name = '{} [{}]'.format(full_name[idx + 5:], full_name[:idx])

      # E.g.: virtual thunk to FOO
      idx = full_name.find(' to ', 0, 30)
      if idx != -1:
        found_prefixes.add(full_name[:idx + 3])
        full_name = '{} [{}]'.format(full_name[idx + 4:], full_name[:idx])

      # Strip out return type, and split out name, template_name.
      # Function parsing also applies to non-text symbols.
      # E.g. Function statics.
      symbol.full_name, symbol.template_name, symbol.name = (
          function_signature.Parse(full_name))

      # Remove anonymous namespaces (they just harm clustering).
      symbol.template_name = symbol.template_name.replace(
          '(anonymous namespace)::', '')
      symbol.full_name = symbol.full_name.replace(
          '(anonymous namespace)::', '')
      non_anonymous_name = symbol.name.replace('(anonymous namespace)::', '')
      if symbol.name != non_anonymous_name:
        symbol.flags |= models.FLAG_ANONYMOUS
        symbol.name = non_anonymous_name

    # Allow using "is" to compare names (and should help with RAM). This applies
    # to all symbols.
    function_signature.InternSameNames(symbol)

  logging.debug('Found name prefixes of: %r', found_prefixes)


def LoadAndPostProcessSizeInfo(path, file_obj=None):
  """Returns a SizeInfo for the given |path|."""
  logging.debug('Loading results from: %s', path)
  size_info = file_format.LoadSizeInfo(path, file_obj=file_obj)
  logging.info('Normalizing symbol names')
  _NormalizeNames(size_info.raw_symbols)
  logging.info('Loaded %d symbols', len(size_info.raw_symbols))
  return size_info


def LoadAndPostProcessDeltaSizeInfo(path, file_obj=None):
  """Returns a tuple of SizeInfos for the given |path|."""
  logging.debug('Loading results from: %s', path)
  before_size_info, after_size_info, _, _ = (file_format.LoadDeltaSizeInfo(
      path, file_obj=file_obj))
  logging.info('Normalizing symbol names')
  _NormalizeNames(before_size_info.raw_symbols)
  _NormalizeNames(after_size_info.raw_symbols)
  logging.info('Loaded %d + %d symbols', len(before_size_info.raw_symbols),
               len(after_size_info.raw_symbols))
  return before_size_info, after_size_info


def CreateBuildConfig(output_directory, source_directory, url=None, title=None):
  """Creates the dict to use for SizeInfo.build_info."""
  logging.debug('Constructing build_config')
  build_config = {}
  if output_directory:
    gn_args = _ParseGnArgs(os.path.join(output_directory, 'args.gn'))
    build_config[models.BUILD_CONFIG_GN_ARGS] = gn_args
    build_config[models.BUILD_CONFIG_OUT_DIRECTORY] = os.path.relpath(
        output_directory, start=source_directory)
  git_rev = _DetectGitRevision(source_directory)
  if git_rev:
    build_config[models.BUILD_CONFIG_GIT_REVISION] = git_rev
  if url is not None:
    build_config[models.BUILD_CONFIG_URL] = url
  if title is not None:
    build_config[models.BUILD_CONFIG_TITLE] = title

  return build_config


def _CreateMetadata(container_spec, elf_info):
  logging.debug('Constructing metadata')
  metadata = {}
  apk_spec = container_spec.apk_spec
  native_spec = container_spec.native_spec
  output_directory = container_spec.output_directory

  # Ensure all paths are relative to output directory to make them hermetic.
  if output_directory:
    shorten_path = lambda path: os.path.relpath(path, output_directory)
  else:
    # If output directory is unavailable, just store basenames.
    shorten_path = os.path.basename

  if apk_spec:
    apk_metadata = apk.CreateMetadata(apk_spec=apk_spec,
                                      include_file_details=not native_spec,
                                      shorten_path=shorten_path)
    assert not (metadata.keys() & apk_metadata.keys())
    metadata.update(apk_metadata)

  if native_spec:
    native_metadata = native.CreateMetadata(native_spec=native_spec,
                                            elf_info=elf_info,
                                            shorten_path=shorten_path)
    assert not (metadata.keys() & native_metadata.keys())
    metadata.update(native_metadata)

  logging.debug('Constructing metadata (done)')
  return metadata


def _CreatePakSymbols(*, pak_spec, pak_id_map, apk_spec, output_directory):
  logging.debug('Creating Pak symbols')
  section_ranges = {}
  if apk_spec:
    assert apk_spec.size_info_prefix
    # Can modify |section_ranges|.
    raw_symbols = pakfile.CreatePakSymbolsFromApk(section_ranges,
                                                  apk_spec.apk_path,
                                                  pak_spec.apk_pak_paths,
                                                  pak_spec.pak_info_path,
                                                  pak_id_map)
  else:
    # Can modify |section_ranges|.
    raw_symbols = pakfile.CreatePakSymbolsFromFiles(section_ranges,
                                                    pak_spec.pak_paths,
                                                    pak_spec.pak_info_path,
                                                    output_directory,
                                                    pak_id_map)
  return section_ranges, raw_symbols


def _CreateContainerSymbols(container_spec, apk_file_manager,
                            apk_analyzer_results, ninja_source_mapper,
                            pak_id_map, component_overrides,
                            dex_deobfuscator_cache):
  container_name = container_spec.container_name
  apk_spec = container_spec.apk_spec
  pak_spec = container_spec.pak_spec
  native_spec = container_spec.native_spec
  output_directory = container_spec.output_directory
  source_directory = container_spec.source_directory

  logging.info('Starting on container: %s', container_spec)

  raw_symbols = []
  section_sizes = {}
  metrics_by_file = {}
  default_component = apk_spec.default_component if apk_spec else ''

  def add_syms(section_ranges,
               new_raw_symbols,
               source_path_prefix=None,
               component=None,
               paths_already_normalized=False):
    new_section_sizes = {
        k: size
        for k, (address, size) in section_ranges.items()
    }
    if models.SECTION_OTHER in new_section_sizes:
      section_sizes[models.SECTION_OTHER] = section_sizes.get(
          models.SECTION_OTHER, 0) + new_section_sizes[models.SECTION_OTHER]
      del new_section_sizes[models.SECTION_OTHER]

    assert not (set(section_sizes) & set(new_section_sizes)), (
        'Section collision: {}\n\n {}'.format(section_sizes, new_section_sizes))
    section_sizes.update(new_section_sizes)

    # E.g.: native.CreateSymbols() already calls NormalizePaths().
    if not paths_already_normalized:
      archive_util.NormalizePaths(new_raw_symbols)

    if source_path_prefix:
      # Prefix the source_path for all symbols that have a source_path assigned,
      # and that don't have it set to $APK or $GOOGLE3.
      for s in new_raw_symbols:
        if s.source_path and s.source_path[0] != '$':
          s.source_path = source_path_prefix + s.source_path

    if component is not None:
      for s in new_raw_symbols:
        s.component = component
    else:
      dir_metadata.PopulateComponents(new_raw_symbols,
                                      source_directory,
                                      component_overrides,
                                      default_component=default_component)
    raw_symbols.extend(new_raw_symbols)

  elf_info = None
  if native_spec:
    section_ranges, native_symbols, elf_info, native_metrics_by_file = (
        native.CreateSymbols(apk_spec=apk_spec,
                             native_spec=native_spec,
                             output_directory=output_directory,
                             ninja_source_mapper=ninja_source_mapper,
                             pak_id_map=pak_id_map))
    add_syms(section_ranges,
             native_symbols,
             source_path_prefix=native_spec.source_path_prefix,
             component=native_spec.component,
             paths_already_normalized=True)
    metrics_by_file.update(native_metrics_by_file)
  elif apk_spec and apk_spec.analyze_dex:
    logging.info('Analyzing DEX')
    apk_infolist = apk_file_manager.InfoList(apk_spec.apk_path)
    dex_total_size = sum(i.file_size for i in apk_infolist
                         if i.filename.endswith('.dex'))
    if dex_total_size > 0:
      mapping_path = apk_spec.mapping_path  # May be None.
      class_deobfuscation_map = (
          dex_deobfuscator_cache.GetForMappingFile(mapping_path))
      section_ranges, dex_symbols, dex_metrics_by_file = (
          apkanalyzer.CreateDexSymbols(apk_spec.apk_path,
                                       apk_analyzer_results[container_name],
                                       dex_total_size, class_deobfuscation_map,
                                       apk_spec.size_info_prefix,
                                       apk_spec.track_string_literals))
      add_syms(section_ranges, dex_symbols)
      metrics_by_file.update(dex_metrics_by_file)

  if pak_spec:
    section_ranges, pak_symbols = _CreatePakSymbols(
        pak_spec=pak_spec,
        pak_id_map=pak_id_map,
        apk_spec=apk_spec,
        output_directory=output_directory)
    add_syms(section_ranges, pak_symbols)
  apk_metadata = {}

  # This function can get called multiple times for the same APK file, to
  # process .so files that are treated as containers. The |not native_spec|
  # condition below skips these cases to prevent redundant symbol creation.
  if not native_spec and apk_spec:
    logging.info('Analyzing ARSC')
    arsc_section_ranges, arsc_symbols, arsc_metrics_by_file = (
        apk.CreateArscSymbols(apk_spec))
    add_syms(arsc_section_ranges, arsc_symbols)
    metrics_by_file.update(arsc_metrics_by_file)

    other_section_ranges, other_symbols, apk_metadata, apk_metrics_by_file = (
        apk.CreateApkOtherSymbols(apk_spec))
    add_syms(other_section_ranges, other_symbols)
    metrics_by_file.update(apk_metrics_by_file)

  metadata = _CreateMetadata(container_spec, elf_info)
  assert not (metadata.keys() & apk_metadata.keys())
  metadata.update(apk_metadata)
  container = models.Container(name=container_name,
                               metadata=metadata,
                               section_sizes=section_sizes,
                               metrics_by_file=metrics_by_file)
  for symbol in raw_symbols:
    symbol.container = container

  return raw_symbols


def _DetectGitRevision(directory):
  """Runs git rev-parse to get the SHA1 hash of the current revision.

  Args:
    directory: Path to directory where rev-parse command will be run.

  Returns:
    A string with the SHA1 hash, or None if an error occured.
  """
  try:
    git_rev = subprocess.check_output(
        ['git', '-C', directory, 'rev-parse', 'HEAD']).decode('ascii')
    return git_rev.rstrip()
  except Exception:
    logging.warning('Failed to detect git revision for file metadata.')
    return None


def _ParseGnArgs(args_path):
  """Returns a list of normalized "key=value" strings."""
  args = {}
  with open(args_path) as f:
    for l in f:
      # Strips #s even if within string literal. Not a problem in practice.
      parts = l.split('#')[0].split('=')
      if len(parts) != 2:
        continue
      args[parts[0].strip()] = parts[1].strip()
  return ["%s=%s" % x for x in sorted(args.items())]


def _AddContainerArguments(parser, is_top_args=False):
  """Add arguments applicable to a single container."""

  # Main file argument: Exactly one should be specified (perhaps via -f).
  # _IdentifyInputFile() should be kept updated.
  group = parser.add_argument_group(title='Main Input')
  group = group.add_mutually_exclusive_group(required=True)
  group.add_argument('-f',
                     metavar='FILE',
                     help='Auto-identify input file type.')
  group.add_argument('--apk-file',
                     help='.apk file to measure. Other flags can generally be '
                     'derived when this is used.')
  group.add_argument('--minimal-apks-file',
                     help='.minimal.apks file to measure. Other flags can '
                     'generally be derived when this is used.')
  group.add_argument('--elf-file', help='Path to input ELF file.')
  group.add_argument('--map-file',
                     help='Path to input .map(.gz) file. Defaults to '
                     '{{elf_file}}.map(.gz)?. If given without '
                     '--elf-file, no size metadata will be recorded.')
  group.add_argument('--pak-file',
                     action='append',
                     default=[],
                     dest='pak_files',
                     help='Paths to pak files.')
  if is_top_args:
    group.add_argument('--ssargs-file',
                       help='Path to SuperSize multi-container arguments file.')

  group = parser.add_argument_group(title='What to Analyze')
  group.add_argument('--java-only',
                     action='store_true',
                     help='Run on only Java symbols')
  group.add_argument('--native-only',
                     action='store_true',
                     help='Run on only native symbols')
  group.add_argument('--no-java',
                     action='store_true',
                     help='Do not run on Java symbols')
  group.add_argument('--no-native',
                     action='store_true',
                     help='Do not run on native symbols')
  if is_top_args:
    group.add_argument('--container-filter',
                       help='Regular expression for which containers to create')

  group = parser.add_argument_group(title='Analysis Options for Native Code')
  group.add_argument('--no-map-file',
                     dest='ignore_linker_map',
                     action='store_true',
                     help='Use debug information to capture symbol sizes '
                     'instead of linker map file.')
  # Used by tests to override path to APK-discovered files.
  group.add_argument('--aux-elf-file', help=argparse.SUPPRESS)
  group.add_argument(
      '--aux-map-file',
      help='Path to linker map to use when --elf-file is provided')

  group = parser.add_argument_group(title='APK options')
  group.add_argument('--mapping-file',
                     help='Proguard .mapping file for deobfuscation.')
  group.add_argument('--resources-pathmap-file',
                     help='.pathmap.txt file that contains a maping from '
                     'original resource paths to shortened resource paths.')
  group.add_argument('--abi-filter',
                     dest='abi_filters',
                     action='append',
                     help='For apks with multiple ABIs, break down native '
                     'libraries for this ABI. Defaults to 64-bit when both '
                     '32 and 64 bit are present.')

  group = parser.add_argument_group(title='Analysis Options for Pak Files')
  group.add_argument('--pak-info-file',
                     help='This file should contain all ids found in the pak '
                     'files that have been passed in. If not specified, '
                     '${pak_file}.info is assumed.')

  group = parser.add_argument_group(title='Analysis Options (shared)')
  group.add_argument('--source-directory',
                     help='Custom path to the root source directory.')
  group.add_argument('--output-directory',
                     help='Path to the root build directory.')
  group.add_argument('--symbols-dir',
                     default='lib.unstripped',
                     help='Relative path containing unstripped .so files '
                     '(for symbols) w.r.t. the output directory.')
  group.add_argument('--no-string-literals',
                     action='store_true',
                     help=('Do not create symbols for string literals '
                           '(applies to DEX and Native).'))
  if is_top_args:
    group.add_argument('--json-config', help='Path to a supersize.json.')
    group.add_argument('--no-output-directory',
                       action='store_true',
                       help='Do not auto-detect --output-directory.')
    group.add_argument('--check-data-quality',
                       action='store_true',
                       help='Perform sanity checks to ensure there is no '
                       'missing data.')


def AddArguments(parser):
  parser.add_argument('size_file', help='Path to output .size file.')
  parser.add_argument('--title',
                      help='Value for the "title" build_config entry.')
  parser.add_argument('--url', help='Value for the "url" build_config entry.')
  _AddContainerArguments(parser, is_top_args=True)


def _IdentifyInputFile(args, on_config_error):
  """Identifies main input file type from |args.f|, and updates |args|.

  Identification is performed on filename alone, i.e., the file need not exist.
  The result is written to a field in |args|. If the field exists then it
  simply gets overwritten.

  If '.' is missing from |args.f| then --elf-file is assumed.

  Returns:
    The primary input file.
"""
  if args.f:
    if args.f.endswith('.minimal.apks'):
      args.minimal_apks_file = args.f
    elif args.f.endswith('.apk'):
      args.apk_file = args.f
    elif args.f.endswith('.so') or '.' not in os.path.basename(args.f):
      args.elf_file = args.f
    elif args.f.endswith('.map') or args.f.endswith('.map.gz'):
      args.map_file = args.f
    elif args.f.endswith('.pak'):
      args.pak_files.append(args.f)
    elif args.f.endswith('.ssargs'):
      # Fails if trying to nest them, which should never happen.
      args.ssargs_file = args.f
    else:
      on_config_error('Cannot identify file ' + args.f)
    args.f = None

  ret = [
      args.apk_file, args.elf_file, args.minimal_apks_file,
      args.__dict__.get('ssargs_file'), args.map_file
  ] + (args.pak_files or [])
  ret = [v for v in ret if v]
  if not ret:
    on_config_error(
        'Must pass at least one of --apk-file, --minimal-apks-file, '
        '--elf-file, --map-file, --pak-file, --ssargs-file')
  return ret[0]


def ParseSsargs(lines):
  """Parses .ssargs data.

  An .ssargs file is a text file to specify multiple containers as input to
  SuperSize-archive. After '#'-based comments, start / end whitespaces, and
  empty lines are stripped, each line specifies a distinct container. Format:
  * Positional argument: |name| for the container.
  * Main input file specified by -f, --apk-file, --elf-file, etc.:
    * Can be an absolute path.
    * Can be a relative path. In this case, it's up to the caller to supply the
      base directory.
    * -f switch must not specify another .ssargs file.
  * For supported switches: See _AddContainerArguments().

  Args:
    lines: An iterator containing lines of .ssargs data.
  Returns:
    A list of arguments, one for each container.
  Raises:
    ValueError: Parse error, including input line number.
  """
  sub_args_list = []
  parser = argparse.ArgumentParser(add_help=False)
  parser.error = lambda msg: (_ for _ in ()).throw(ValueError(msg))
  parser.add_argument('name')
  _AddContainerArguments(parser)
  try:
    for lineno, line in enumerate(lines, 1):
      toks = shlex.split(line, comments=True)
      if not toks:  # Skip if line is empty after stripping comments.
        continue
      sub_args_list.append(parser.parse_args(toks))
  except ValueError as e:
    e.args = ('Line %d: %s' % (lineno, e.args[0]), )
    raise e
  return sub_args_list


def _MakeNativeSpec(json_config, **kwargs):
  native_spec = NativeSpec(**kwargs)
  if native_spec.elf_path or native_spec.map_path:
    basename = os.path.basename(native_spec.elf_path or native_spec.map_path)
    native_spec.component = json_config.ComponentForNativeFile(basename)
    native_spec.gen_dir_regex = json_config.GenDirRegexForNativeFile(basename)
    native_spec.source_path_prefix = json_config.SourcePathPrefixForNativeFile(
        basename)

  if not native_spec.map_path:
    # TODO(crbug.com/40757867): Implement string literal tracking without map
    #     files. nm emits some string literal symbols, but most are missing.
    native_spec.track_string_literals = False
    return native_spec

  return native_spec


def _ElfIsMainPartition(elf_path):
  section_ranges = readelf.SectionInfoFromElf(elf_path)
  return models.SECTION_PART_END in section_ranges.keys()


def _DeduceMapPath(elf_path):
  if _ElfIsMainPartition(elf_path):
    map_path = elf_path.replace('.so', '__combined.so') + '.map'
  else:
    map_path = elf_path + '.map'
  if not os.path.exists(map_path):
    map_path += '.gz'
    if not os.path.exists(map_path):
      map_path = None

  if map_path:
    logging.debug('Detected map_path=%s', map_path)
  return map_path


def _CreateNativeSpecs(*, tentative_output_dir, symbols_dir, apk_infolist,
                       elf_path, map_path, abi_filters, auto_abi_filters,
                       track_string_literals, ignore_linker_map, json_config,
                       on_config_error):
  if ignore_linker_map:
    map_path = None
  elif (map_path and not map_path.endswith('.map')
        and not map_path.endswith('.map.gz')):
    on_config_error('Expected --map-file to end with .map or .map.gz')
  elif elf_path and not map_path:
    map_path = _DeduceMapPath(elf_path)

  ret = []
  # if --elf-path or --map-path (rather than --aux-elf-path, --aux-map-path):
  if not apk_infolist:
    if map_path or elf_path:
      combined_elf_path = None
      if map_path and '__combined.so' in map_path:
        combined_elf_path = elf_path[:-3] + '__combined.so'

      ret.append(
          _MakeNativeSpec(json_config,
                          apk_so_path=None,
                          map_path=map_path,
                          elf_path=elf_path,
                          combined_elf_path=combined_elf_path,
                          track_string_literals=track_string_literals))
    return abi_filters, ret

  lib_infos = [
      f for f in apk_infolist if f.filename.endswith('.so') and f.file_size > 0
  ]

  # Sort so elf_path/map_path applies largest non-filtered library.
  matches_abi = lambda n: not abi_filters or any(f in n for f in abi_filters)
  lib_infos.sort(key=lambda x: (not matches_abi(x.filename), -x.file_size))

  for lib_info in lib_infos:
    apk_so_path = lib_info.filename
    cur_elf_path = None
    cur_map_path = None
    if not matches_abi(apk_so_path):
      logging.debug('Not breaking down %s: secondary ABI', apk_so_path)
    elif apk_so_path.endswith('_partition.so'):
      # TODO(agrieve): Support symbol breakdowns for partitions (they exist in
      #     the __combined .map file. Debug information (nm output) is shared
      #     with base partition.
      logging.debug('Not breaking down %s: partitioned library', apk_so_path)
    else:
      if elf_path:
        # Consume --aux-elf-file for the largest matching binary.
        cur_elf_path = elf_path
        elf_path = None
      elif tentative_output_dir:
        # TODO(crbug.com/40229168): Remove handling the legacy library prefix
        # 'crazy.' when there is no longer interest in size comparisons for
        # these pre-N APKs.
        cur_elf_path = os.path.join(
            tentative_output_dir, symbols_dir,
            posixpath.basename(apk_so_path.replace('crazy.', '')))
        if os.path.exists(cur_elf_path):
          logging.debug('Detected elf_path=%s', cur_elf_path)
        else:
          # TODO(agrieve): Not able to find libcrashpad_handler_trampoline.so.
          logging.debug('Not breaking down %s because file does not exist: %s',
                        apk_so_path, cur_elf_path)
          cur_elf_path = None

      if map_path:
        # Consume --aux-map-file for first non-skipped elf.
        cur_map_path = map_path
        map_path = None
      elif cur_elf_path and not ignore_linker_map:
        cur_map_path = _DeduceMapPath(cur_elf_path)

      if auto_abi_filters:
        abi_filters = [posixpath.basename(posixpath.dirname(apk_so_path))]
        logging.info('Detected --abi-filter %s', abi_filters[0])
        auto_abi_filters = False

    combined_elf_path = None
    if cur_map_path and '__combined.so' in cur_map_path:
      combined_elf_path = cur_elf_path[:-3] + '__combined.so'

    ret.append(
        _MakeNativeSpec(json_config,
                        apk_so_path=apk_so_path,
                        map_path=cur_map_path,
                        elf_path=cur_elf_path,
                        combined_elf_path=combined_elf_path,
                        track_string_literals=track_string_literals))

  return abi_filters, ret


# Cache to prevent excess log messages.
@functools.lru_cache
def _DeduceMappingPath(mapping_path, apk_prefix):
  if apk_prefix:
    if not mapping_path:
      possible_mapping_path = apk_prefix + '.mapping'
      if os.path.exists(possible_mapping_path):
        mapping_path = possible_mapping_path
        logging.debug('Detected --mapping-file=%s', mapping_path)
      else:
        logging.warning('Could not find proguard mapping file at %s',
                        possible_mapping_path)
  return mapping_path


# Cache to prevent excess log messages.
@functools.lru_cache
def _DeducePathmapPath(resources_pathmap_path, apk_prefix):
  if apk_prefix:
    if not resources_pathmap_path:
      possible_pathmap_path = apk_prefix + '.pathmap.txt'
      # This could be pointing to a stale pathmap file if path shortening was
      # previously enabled but is disabled for the current build. However, since
      # current apk/aab will have unshortened paths, looking those paths up in
      # the stale pathmap which is keyed by shortened paths would not find any
      # mapping and thus should not cause any issues.
      if os.path.exists(possible_pathmap_path):
        resources_pathmap_path = possible_pathmap_path
        logging.debug('Detected --resources-pathmap-file=%s',
                      resources_pathmap_path)
      # Path shortening is optional, so do not warn for missing file.
  return resources_pathmap_path


def _ReadMultipleArgsFromStream(lines, base_dir, err_prefix, on_config_error):
  try:
    ret = ParseSsargs(lines)
  except ValueError as e:
    on_config_error('%s: %s' % (err_prefix, e.args[0]))
  for sub_args in ret:
    for k, v in sub_args.__dict__.items():
      # Translate file arguments to be relative to |sub_dir|.
      if (k.endswith('_file') or k == 'f') and isinstance(v, str):
        sub_args.__dict__[k] = os.path.join(base_dir, v)
  return ret


def _ReadMultipleArgsFromFile(ssargs_file, on_config_error):
  with open(ssargs_file, 'r') as fh:
    lines = list(fh)
  err_prefix = 'In file ' + ssargs_file
  # Supply |base_dir| as the directory containing the .ssargs file, to ensure
  # consistent behavior wherever SuperSize-archive runs.
  base_dir = os.path.dirname(os.path.abspath(ssargs_file))
  return _ReadMultipleArgsFromStream(lines, base_dir, err_prefix,
                                     on_config_error)


# Both |top_args| and |sub_args| may be modified.
def _CreateContainerSpecs(apk_file_manager,
                          top_args,
                          sub_args,
                          json_config,
                          base_container_name,
                          on_config_error,
                          split_name=None):
  sub_args.source_directory = (sub_args.source_directory
                               or top_args.source_directory)
  sub_args.output_directory = (sub_args.output_directory
                               or top_args.output_directory)
  analyze_native = not (sub_args.java_only or sub_args.no_native
                        or top_args.java_only or top_args.no_native)
  analyze_dex = not (sub_args.native_only or sub_args.no_java
                     or top_args.native_only or top_args.no_java)
  only_java_or_native = (sub_args.native_only or top_args.native_only
                         or sub_args.java_only or top_args.java_only)
  analyze_pak = not only_java_or_native and bool(sub_args.output_directory)
  analyze_res = not only_java_or_native and bool(sub_args.output_directory)

  if split_name:
    apk_path = apk_file_manager.SplitPath(sub_args.minimal_apks_file,
                                          split_name)
    base_container_name = f'{base_container_name}/{split_name}.apk'
    # Make on-demand a part of the name so that:
    # * It's obvious from the name which DFMs are on-demand.
    # * Diffs that change an on-demand status show as adds/removes.
    if _IsOnDemand(apk_path):
      base_container_name += '?'
  else:
    apk_path = sub_args.apk_file

  apk_prefix = sub_args.minimal_apks_file or sub_args.apk_file
  if apk_prefix:
    # Allow either .minimal.apks or just .apks.
    apk_prefix = apk_prefix.replace('.minimal.apks', '.aab')
    apk_prefix = apk_prefix.replace('.apks', '.aab')

  mapping_path = None
  if analyze_dex:
    mapping_path = _DeduceMappingPath(sub_args.mapping_file, apk_prefix)
  resources_pathmap_path = _DeducePathmapPath(sub_args.resources_pathmap_file,
                                              apk_prefix)
  apk_spec = None
  if apk_prefix:
    apk_spec = ApkSpec(apk_path=apk_path,
                       minimal_apks_path=sub_args.minimal_apks_file,
                       mapping_path=mapping_path,
                       resources_pathmap_path=resources_pathmap_path,
                       split_name=split_name)
    size_info_prefix = os.path.join(top_args.output_directory, 'size-info',
                                    os.path.basename(apk_prefix))
    if analyze_res:
      apk_spec.size_info_prefix = size_info_prefix
      res_info_path = apk_spec.size_info_prefix + '.res.info'
      if not os.path.exists(res_info_path):
        on_config_error('File not found: ' + res_info_path)
    apk_spec.analyze_dex = analyze_dex
    apk_spec.track_string_literals = not (top_args.no_string_literals
                                          or sub_args.no_string_literals)
    apk_spec.default_component = json_config.DefaultComponentForSplit(
        split_name)
    apk_spec.path_defaults = json_config.ApkPathDefaults()

  pak_spec = None
  apk_pak_paths = None
  apk_infolist = None
  if apk_spec:
    apk_infolist = apk_file_manager.InfoList(apk_path)
    apk_pak_paths = [
        f.filename for f in apk_infolist
        if archive_util.RemoveAssetSuffix(f.filename).endswith('.pak')
    ]

  if analyze_pak and (apk_pak_paths or sub_args.pak_files):
    if apk_spec:
      pak_info_path = size_info_prefix + '.pak.info'
    else:
      pak_info_path = sub_args.pak_info_file
    if pak_info_path and not os.path.exists(pak_info_path):
      on_config_error(f'File not found: {pak_info_file}. '
                      'Ensure is_official_build=true, or use --native-only')

    pak_spec = PakSpec(pak_paths=sub_args.pak_files,
                       pak_info_path=pak_info_path,
                       apk_pak_paths=apk_pak_paths)

  if analyze_native:
    # Allow top-level --abi-filter to override values set in .ssargs.
    abi_filters = top_args.abi_filters or sub_args.abi_filters
    aux_elf_file = sub_args.aux_elf_file
    aux_map_file = sub_args.aux_map_file
    if split_name not in (None, 'base'):
      aux_elf_file = None
      aux_map_file = None

    auto_abi_filters = not abi_filters and split_name == 'base'
    abi_filters, native_specs = _CreateNativeSpecs(
        tentative_output_dir=top_args.output_directory,
        symbols_dir=sub_args.symbols_dir,
        apk_infolist=apk_infolist,
        elf_path=sub_args.elf_file or aux_elf_file,
        map_path=sub_args.map_file or aux_map_file,
        abi_filters=abi_filters,
        auto_abi_filters=auto_abi_filters,
        track_string_literals=not (top_args.no_string_literals
                                   or sub_args.no_string_literals),
        ignore_linker_map=(top_args.ignore_linker_map
                           or sub_args.ignore_linker_map),
        json_config=json_config,
        on_config_error=on_config_error)

    # For app bundles, use a consistent ABI for all splits.
    if auto_abi_filters:
      top_args.abi_filters = abi_filters
  else:
    native_specs = []

  ret = [
      ContainerSpec(container_name=base_container_name,
                    apk_spec=apk_spec,
                    pak_spec=pak_spec,
                    native_spec=None,
                    source_directory=sub_args.source_directory,
                    output_directory=sub_args.output_directory)
  ]
  if apk_spec is None:
    # Special case for when pointed at a single ELF, use just one container.
    assert len(native_specs) <= 1
    ret[0].native_spec = native_specs[0] if native_specs else None
  else:
    apk_spec.ignore_apk_paths.update(s.apk_so_path for s in native_specs)
    if pak_spec and pak_spec.apk_pak_paths:
      apk_spec.ignore_apk_paths.update(pak_spec.apk_pak_paths)
    if apk_spec.analyze_dex:
      apk_spec.ignore_apk_paths.update(i.filename for i in apk_infolist
                                       if i.filename.endswith('.dex'))
    apk_spec.ignore_apk_paths.add(apk.RESOURCES_ARSC_FILE)

    for native_spec in native_specs:
      so_name = posixpath.basename(native_spec.apk_so_path)
      abi = posixpath.basename(posixpath.dirname(native_spec.apk_so_path))
      container_name = f'{base_container_name}/{so_name} ({abi})'
      # Use same apk_spec so that all containers for the apk_spec can be found.
      ret.append(
          ContainerSpec(container_name=container_name,
                        apk_spec=apk_spec,
                        pak_spec=None,
                        native_spec=native_spec,
                        source_directory=sub_args.source_directory,
                        output_directory=sub_args.output_directory))
  return ret


def _IsOnDemand(apk_path):
  # Check if the manifest specifies whether or not to extract native libs.
  output = subprocess.check_output([
      path_util.GetAapt2Path(), 'dump', 'xmltree', '--file',
      'AndroidManifest.xml', apk_path
  ]).decode('ascii')

  def parse_attr(namespace, name):
    # A: http://schemas.android.com/apk/res/android:isFeatureSplit(0x...)=true
    # A: http://schemas.android.com/apk/distribution:onDemand=true
    m = re.search(f'A: (?:.*?/{namespace}:)?{name}' + r'(?:\(.*?\))?=(\w+)',
                  output)
    return m and m.group(1) == 'true'

  is_feature_split = parse_attr('android', 'isFeatureSplit')
  # Can use <dist:on-demand>, or <module dist:onDemand="true">.
  on_demand = parse_attr('distribution', 'onDemand') or 'on-demand' in output
  on_demand = bool(on_demand and is_feature_split)

  return on_demand


def _CreateAllContainerSpecs(apk_file_manager, top_args, json_config,
                             on_config_error):
  main_file = _IdentifyInputFile(top_args, on_config_error)
  if top_args.no_output_directory:
    top_args.output_directory = None
  else:
    output_directory_finder = path_util.OutputDirectoryFinder(
        value=top_args.output_directory,
        any_path_within_output_directory=main_file)
    top_args.output_directory = output_directory_finder.Finalized()

  if not top_args.source_directory:
    top_args.source_directory = path_util.GetSrcRootFromOutputDirectory(
        top_args.output_directory)
    assert top_args.source_directory

  if top_args.ssargs_file:
    sub_args_list = _ReadMultipleArgsFromFile(top_args.ssargs_file,
                                              on_config_error)
  else:
    sub_args_list = [top_args]

  # Do a quick first pass to ensure inputs have been built.
  for sub_args in sub_args_list:
    main_file = _IdentifyInputFile(sub_args, on_config_error)
    if not os.path.exists(main_file):
      raise Exception('Input does not exist: ' + main_file)

  # Each element in |sub_args_list| specifies a container.
  ret = []
  for sub_args in sub_args_list:
    main_file = _IdentifyInputFile(sub_args, on_config_error)
    if hasattr(sub_args, 'name'):
      container_name = sub_args.name
    else:
      container_name = os.path.basename(main_file)
    if set(container_name) & set('<>?'):
      parser.error('Container name cannot have characters in "<>?"')


    if sub_args.minimal_apks_file:
      split_names = apk_file_manager.ExtractSplits(sub_args.minimal_apks_file)
      for split_name in split_names:
        ret += _CreateContainerSpecs(apk_file_manager,
                                     top_args,
                                     sub_args,
                                     json_config,
                                     container_name,
                                     on_config_error,
                                     split_name=split_name)
    else:
      ret += _CreateContainerSpecs(apk_file_manager, top_args, sub_args,
                                   json_config, container_name, on_config_error)
  all_names = [c.container_name for c in ret]
  assert len(set(all_names)) == len(all_names), \
      'Found duplicate container names: ' + '\n'.join(sorted(all_names))

  return ret


def _FilterContainerSpecs(container_specs, container_re=None):
  ret = []
  seen_container_names = set()
  for container_spec in container_specs:
    container_name = container_spec.container_name
    if container_name in seen_container_names:
      raise ValueError('Duplicate container name: {}'.format(container_name))
    seen_container_names.add(container_name)

    if container_re and not container_re.search(container_name):
      logging.info('Skipping filtered container %s', container_name)
      continue
    ret.append(container_spec)
  return ret


def CreateSizeInfo(container_specs, build_config, json_config,
                   apk_file_manager):
  def sort_key(container_spec):
    # Native containers come first to ensure pak_id_map is populated before
    # any pak_spec is encountered.
    if container_spec.native_spec:
      # Do the most complicated container first, since its most likely to fail.
      if container_spec.native_spec.algorithm == 'linker_map':
        native_key = 0
      elif container_spec.native_spec.algorithm == 'dwarf':
        native_key = 1
      else:
        native_key = 2
    else:
      native_key = 3
    return (native_key, container_spec.container_name)

  container_specs.sort(key=sort_key)

  dex_containers = [
      c for c in container_specs
      if not c.native_spec and c.apk_spec and c.apk_spec.analyze_dex
  ]
  # Running ApkAnalyzer concurrently saves ~30 seconds for Monochrome.apks.
  apk_analyzer_results = {}
  if dex_containers:
    logging.info('Kicking of ApkAnalyzer for %d .apk files',
                 len(dex_containers))
    for container_spec in dex_containers:
      apk_analyzer_results[container_spec.container_name] = (
          apkanalyzer.RunApkAnalyzerAsync(container_spec.apk_spec.apk_path,
                                          container_spec.apk_spec.mapping_path))

  ninja_containers = [
      c for c in container_specs
      if c.native_spec and c.output_directory and c.native_spec.map_path
  ]
  ninja_source_mapper = None
  if ninja_containers:
    elf_paths_to_find_inputs_for = [
        c.native_spec.combined_elf_path or c.native_spec.elf_path
        for c in ninja_containers
        if c.native_spec.combined_elf_path or c.native_spec.elf_path
    ]
    ninja_source_mapper = native.ParseNinjaFiles(
        ninja_containers[0].output_directory, elf_paths_to_find_inputs_for)

  raw_symbols_list = []
  pak_id_map = pakfile.PakIdMap()
  dex_deobfuscator_cache = dex_deobfuscate.CachedDexDeobfuscators()
  for container_spec in container_specs:
    raw_symbols = _CreateContainerSymbols(container_spec, apk_file_manager,
                                          apk_analyzer_results,
                                          ninja_source_mapper, pak_id_map,
                                          json_config.ComponentOverrides(),
                                          dex_deobfuscator_cache)
    assert raw_symbols, f'{container_spec.container_name} had no symbols.'
    raw_symbols_list.append(raw_symbols)

  # Normalize names before sorting.
  logging.info('Normalizing symbol names')
  for raw_symbols in raw_symbols_list:
    _NormalizeNames(raw_symbols)

  # Sorting must happen after normalization.
  logging.info('Sorting symbols')
  for raw_symbols in raw_symbols_list:
    file_format.SortSymbols(raw_symbols)

  logging.debug('Accumulating symbols')
  # Containers should always have at least one symbol.
  container_list = [syms[0].container for syms in raw_symbols_list]
  all_raw_symbols = []
  for raw_symbols in raw_symbols_list:
    all_raw_symbols += raw_symbols

  file_format.CalculatePadding(all_raw_symbols)

  return models.SizeInfo(build_config, container_list, all_raw_symbols)


def Run(top_args, on_config_error):
  path_util.CheckLlvmToolsAvailable()

  if not top_args.size_file.endswith('.size'):
    on_config_error('size_file must end with .size')
  if top_args.check_data_quality:
    start_time = time.time()

  container_re = None
  if top_args.container_filter:
    try:
      container_re = re.compile(top_args.container_filter)
    except Exception as e:
      on_config_error(f'Bad --container-filter input: {e}')

  json_config_path = top_args.json_config
  if not json_config_path:
    json_config_path = path_util.GetDefaultJsonConfigPath()
    logging.info('Using --json-config=%s', json_config_path)
  json_config = json_config_parser.Parse(json_config_path, on_config_error)

  with zip_util.ApkFileManager() as apk_file_manager:
    container_specs = _CreateAllContainerSpecs(apk_file_manager, top_args,
                                               json_config, on_config_error)
    container_specs = _FilterContainerSpecs(container_specs, container_re)

    build_config = CreateBuildConfig(top_args.output_directory,
                                     top_args.source_directory,
                                     url=top_args.url,
                                     title=top_args.title)
    size_info = CreateSizeInfo(container_specs, build_config, json_config,
                               apk_file_manager)

  if logging.getLogger().isEnabledFor(logging.DEBUG):
    for line in data_quality.DescribeSizeInfoCoverage(size_info):
      logging.debug(line)
  logging.info('Recorded info for %d symbols', len(size_info.raw_symbols))
  for container in size_info.containers:
    logging.info('Recording metadata: \n  %s',
                 '\n  '.join(describe.DescribeDict(container.metadata)))

  logging.info('Saving result to %s', top_args.size_file)
  file_format.SaveSizeInfo(size_info, top_args.size_file)
  size_in_mb = os.path.getsize(top_args.size_file) / 1024.0 / 1024.0
  logging.info('Done. File size is %.2fMiB.', size_in_mb)

  if top_args.check_data_quality:
    logging.info('Checking data quality')
    data_quality.CheckDataQuality(size_info, not top_args.no_string_literals)
    duration = (time.time() - start_time) / 60
    if duration > 10:
      raise data_quality.QualityCheckError(
          'Command should not take longer than 10 minutes.'
          ' Took {:.1f} minutes.'.format(duration))