1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230
|
# Copyright 2017 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Main Python API for analyzing binary size."""
import argparse
import collections
import dataclasses
import functools
import logging
import os
import posixpath
import re
import shlex
import subprocess
import time
import zipfile
import apk
import apkanalyzer
import archive_util
import data_quality
import describe
import dex_deobfuscate
import dir_metadata
import file_format
import function_signature
import json_config_parser
import models
import native
import pakfile
import parallel
import path_util
import readelf
import zip_util
@dataclasses.dataclass
class NativeSpec:
# One (or more) of apk_so_path, map_path, elf_path must be non-None.
# Path within the .apk of the .so file. Non-None only when apk_spec is.
apk_so_path: str = None
# Path to linker map file (if present).
map_path: str = None
# Path to unstripped ELF file (if present).
elf_path: str = None
# Path to unstripped ELF file before being split into partitions (if present).
combined_elf_path: str = None
# Whether to create symbols for each string literal.
track_string_literals: bool = True
# component to use for all symbols.
component: str = None
# Regular expression that will match generated files.
gen_dir_regex: str = None
# source_path prefix to use for all symbols.
source_path_prefix: str = None
@property
def algorithm(self):
if self.map_path:
return 'linker_map'
if self.elf_path:
return 'dwarf'
return 'sections'
@dataclasses.dataclass
class PakSpec:
# One of pak_paths or apk_pak_paths must be non-None.
pak_paths: list = None
apk_pak_paths: list = None
pak_info_path: str = None
@dataclasses.dataclass
class ApkSpec:
# Path the .apk file. Never None.
# This is a temp file when .apks is being analyzed.
apk_path: str
# Path to .minimal.apks (when analyzing bundles).
minimal_apks_path: str = None
# Proguard mapping path.
mapping_path: str = None
# Path to the .pathmap.txt file for the apk. Used to deobfuscate res/ files.
resources_pathmap_path: str = None
# Name of the apk split when .apks is being analyzed.
split_name: str = None
# Path such as: out/Release/size-info/BaseName
size_info_prefix: str = None
# Whether to break down classes.dex.
analyze_dex: bool = True
# Whether to create symbols for each string literal.
track_string_literals: bool = True
# Dict of apk_path -> source_path, provided by json config.
path_defaults: dict = None
# Component to use for symbols when not specified by DIR_METADATA, provided by
# json config.
default_component: str = ''
# Paths to not create .other symbols for.
ignore_apk_paths: set = dataclasses.field(default_factory=set)
@dataclasses.dataclass
class ContainerSpec:
container_name: str
apk_spec: ApkSpec
pak_spec: PakSpec
native_spec: NativeSpec
source_directory: str
output_directory: str
def _NormalizeNames(raw_symbols):
"""Ensures that all names are formatted in a useful way.
This includes:
- Deriving |name| and |template_name| from |full_name|.
- Stripping of return types (for functions).
- Moving "vtable for" and the like to be suffixes rather than prefixes.
"""
found_prefixes = set()
for symbol in raw_symbols:
full_name = symbol.full_name
# See comment in _CalculatePadding() about when this can happen. Don't
# process names for non-native sections.
if symbol.IsPak():
# full_name: "about_ui_resources.grdp: IDR_ABOUT_UI_CREDITS_HTML".
space_idx = full_name.rindex(' ')
name = full_name[space_idx + 1:]
symbol.template_name = name
symbol.name = name
elif (full_name.startswith('**') or symbol.IsOverhead()
or symbol.IsOther()):
symbol.template_name = full_name
symbol.name = full_name
elif symbol.IsStringLiteral(): # Handles native and DEX strings.
symbol.full_name = full_name
symbol.template_name = full_name
symbol.name = full_name
elif symbol.IsDex():
symbol.full_name, symbol.template_name, symbol.name = (
function_signature.ParseJava(full_name))
elif symbol.IsNative():
# Remove [clone] suffix, and set flag accordingly.
# Search from left-to-right, as multiple [clone]s can exist.
# Example name suffixes:
# [clone .part.322] # GCC
# [clone .isra.322] # GCC
# [clone .constprop.1064] # GCC
# [clone .11064] # clang
# http://unix.stackexchange.com/questions/223013/function-symbol-gets-part-suffix-after-compilation
idx = full_name.find(' [clone ')
if idx != -1:
full_name = full_name[:idx]
symbol.flags |= models.FLAG_CLONE
# Clones for C symbols.
if symbol.section == 't':
idx = full_name.rfind('.')
if idx != -1 and full_name[idx + 1:].isdigit():
new_name = full_name[:idx]
# Generated symbols that end with .123 but are not clones.
# Find these via:
# size_info.symbols.WhereInSection('t').WhereIsGroup().SortedByCount()
if new_name not in ('__tcf_0', 'startup'):
full_name = new_name
symbol.flags |= models.FLAG_CLONE
# Remove .part / .isra / .constprop.
idx = full_name.rfind('.', 0, idx)
if idx != -1:
full_name = full_name[:idx]
# E.g.: vtable for FOO
idx = full_name.find(' for ', 0, 30)
if idx != -1:
found_prefixes.add(full_name[:idx + 4])
full_name = '{} [{}]'.format(full_name[idx + 5:], full_name[:idx])
# E.g.: virtual thunk to FOO
idx = full_name.find(' to ', 0, 30)
if idx != -1:
found_prefixes.add(full_name[:idx + 3])
full_name = '{} [{}]'.format(full_name[idx + 4:], full_name[:idx])
# Strip out return type, and split out name, template_name.
# Function parsing also applies to non-text symbols.
# E.g. Function statics.
symbol.full_name, symbol.template_name, symbol.name = (
function_signature.Parse(full_name))
# Remove anonymous namespaces (they just harm clustering).
symbol.template_name = symbol.template_name.replace(
'(anonymous namespace)::', '')
symbol.full_name = symbol.full_name.replace(
'(anonymous namespace)::', '')
non_anonymous_name = symbol.name.replace('(anonymous namespace)::', '')
if symbol.name != non_anonymous_name:
symbol.flags |= models.FLAG_ANONYMOUS
symbol.name = non_anonymous_name
# Allow using "is" to compare names (and should help with RAM). This applies
# to all symbols.
function_signature.InternSameNames(symbol)
logging.debug('Found name prefixes of: %r', found_prefixes)
def LoadAndPostProcessSizeInfo(path, file_obj=None):
"""Returns a SizeInfo for the given |path|."""
logging.debug('Loading results from: %s', path)
size_info = file_format.LoadSizeInfo(path, file_obj=file_obj)
logging.info('Normalizing symbol names')
_NormalizeNames(size_info.raw_symbols)
logging.info('Loaded %d symbols', len(size_info.raw_symbols))
return size_info
def LoadAndPostProcessDeltaSizeInfo(path, file_obj=None):
"""Returns a tuple of SizeInfos for the given |path|."""
logging.debug('Loading results from: %s', path)
before_size_info, after_size_info, _, _ = (file_format.LoadDeltaSizeInfo(
path, file_obj=file_obj))
logging.info('Normalizing symbol names')
_NormalizeNames(before_size_info.raw_symbols)
_NormalizeNames(after_size_info.raw_symbols)
logging.info('Loaded %d + %d symbols', len(before_size_info.raw_symbols),
len(after_size_info.raw_symbols))
return before_size_info, after_size_info
def CreateBuildConfig(output_directory, source_directory, url=None, title=None):
"""Creates the dict to use for SizeInfo.build_info."""
logging.debug('Constructing build_config')
build_config = {}
if output_directory:
gn_args = _ParseGnArgs(os.path.join(output_directory, 'args.gn'))
build_config[models.BUILD_CONFIG_GN_ARGS] = gn_args
build_config[models.BUILD_CONFIG_OUT_DIRECTORY] = os.path.relpath(
output_directory, start=source_directory)
git_rev = _DetectGitRevision(source_directory)
if git_rev:
build_config[models.BUILD_CONFIG_GIT_REVISION] = git_rev
if url is not None:
build_config[models.BUILD_CONFIG_URL] = url
if title is not None:
build_config[models.BUILD_CONFIG_TITLE] = title
return build_config
def _CreateMetadata(container_spec, elf_info):
logging.debug('Constructing metadata')
metadata = {}
apk_spec = container_spec.apk_spec
native_spec = container_spec.native_spec
output_directory = container_spec.output_directory
# Ensure all paths are relative to output directory to make them hermetic.
if output_directory:
shorten_path = lambda path: os.path.relpath(path, output_directory)
else:
# If output directory is unavailable, just store basenames.
shorten_path = os.path.basename
if apk_spec:
apk_metadata = apk.CreateMetadata(apk_spec=apk_spec,
include_file_details=not native_spec,
shorten_path=shorten_path)
assert not (metadata.keys() & apk_metadata.keys())
metadata.update(apk_metadata)
if native_spec:
native_metadata = native.CreateMetadata(native_spec=native_spec,
elf_info=elf_info,
shorten_path=shorten_path)
assert not (metadata.keys() & native_metadata.keys())
metadata.update(native_metadata)
logging.debug('Constructing metadata (done)')
return metadata
def _CreatePakSymbols(*, pak_spec, pak_id_map, apk_spec, output_directory):
logging.debug('Creating Pak symbols')
section_ranges = {}
if apk_spec:
assert apk_spec.size_info_prefix
# Can modify |section_ranges|.
raw_symbols = pakfile.CreatePakSymbolsFromApk(section_ranges,
apk_spec.apk_path,
pak_spec.apk_pak_paths,
pak_spec.pak_info_path,
pak_id_map)
else:
# Can modify |section_ranges|.
raw_symbols = pakfile.CreatePakSymbolsFromFiles(section_ranges,
pak_spec.pak_paths,
pak_spec.pak_info_path,
output_directory,
pak_id_map)
return section_ranges, raw_symbols
def _CreateContainerSymbols(container_spec, apk_file_manager,
apk_analyzer_results, ninja_source_mapper,
pak_id_map, component_overrides,
dex_deobfuscator_cache):
container_name = container_spec.container_name
apk_spec = container_spec.apk_spec
pak_spec = container_spec.pak_spec
native_spec = container_spec.native_spec
output_directory = container_spec.output_directory
source_directory = container_spec.source_directory
logging.info('Starting on container: %s', container_spec)
raw_symbols = []
section_sizes = {}
metrics_by_file = {}
default_component = apk_spec.default_component if apk_spec else ''
def add_syms(section_ranges,
new_raw_symbols,
source_path_prefix=None,
component=None,
paths_already_normalized=False):
new_section_sizes = {
k: size
for k, (address, size) in section_ranges.items()
}
if models.SECTION_OTHER in new_section_sizes:
section_sizes[models.SECTION_OTHER] = section_sizes.get(
models.SECTION_OTHER, 0) + new_section_sizes[models.SECTION_OTHER]
del new_section_sizes[models.SECTION_OTHER]
assert not (set(section_sizes) & set(new_section_sizes)), (
'Section collision: {}\n\n {}'.format(section_sizes, new_section_sizes))
section_sizes.update(new_section_sizes)
# E.g.: native.CreateSymbols() already calls NormalizePaths().
if not paths_already_normalized:
archive_util.NormalizePaths(new_raw_symbols)
if source_path_prefix:
# Prefix the source_path for all symbols that have a source_path assigned,
# and that don't have it set to $APK or $GOOGLE3.
for s in new_raw_symbols:
if s.source_path and s.source_path[0] != '$':
s.source_path = source_path_prefix + s.source_path
if component is not None:
for s in new_raw_symbols:
s.component = component
else:
dir_metadata.PopulateComponents(new_raw_symbols,
source_directory,
component_overrides,
default_component=default_component)
raw_symbols.extend(new_raw_symbols)
elf_info = None
if native_spec:
section_ranges, native_symbols, elf_info, native_metrics_by_file = (
native.CreateSymbols(apk_spec=apk_spec,
native_spec=native_spec,
output_directory=output_directory,
ninja_source_mapper=ninja_source_mapper,
pak_id_map=pak_id_map))
add_syms(section_ranges,
native_symbols,
source_path_prefix=native_spec.source_path_prefix,
component=native_spec.component,
paths_already_normalized=True)
metrics_by_file.update(native_metrics_by_file)
elif apk_spec and apk_spec.analyze_dex:
logging.info('Analyzing DEX')
apk_infolist = apk_file_manager.InfoList(apk_spec.apk_path)
dex_total_size = sum(i.file_size for i in apk_infolist
if i.filename.endswith('.dex'))
if dex_total_size > 0:
mapping_path = apk_spec.mapping_path # May be None.
class_deobfuscation_map = (
dex_deobfuscator_cache.GetForMappingFile(mapping_path))
section_ranges, dex_symbols, dex_metrics_by_file = (
apkanalyzer.CreateDexSymbols(apk_spec.apk_path,
apk_analyzer_results[container_name],
dex_total_size, class_deobfuscation_map,
apk_spec.size_info_prefix,
apk_spec.track_string_literals))
add_syms(section_ranges, dex_symbols)
metrics_by_file.update(dex_metrics_by_file)
if pak_spec:
section_ranges, pak_symbols = _CreatePakSymbols(
pak_spec=pak_spec,
pak_id_map=pak_id_map,
apk_spec=apk_spec,
output_directory=output_directory)
add_syms(section_ranges, pak_symbols)
apk_metadata = {}
# This function can get called multiple times for the same APK file, to
# process .so files that are treated as containers. The |not native_spec|
# condition below skips these cases to prevent redundant symbol creation.
if not native_spec and apk_spec:
logging.info('Analyzing ARSC')
arsc_section_ranges, arsc_symbols, arsc_metrics_by_file = (
apk.CreateArscSymbols(apk_spec))
add_syms(arsc_section_ranges, arsc_symbols)
metrics_by_file.update(arsc_metrics_by_file)
other_section_ranges, other_symbols, apk_metadata, apk_metrics_by_file = (
apk.CreateApkOtherSymbols(apk_spec))
add_syms(other_section_ranges, other_symbols)
metrics_by_file.update(apk_metrics_by_file)
metadata = _CreateMetadata(container_spec, elf_info)
assert not (metadata.keys() & apk_metadata.keys())
metadata.update(apk_metadata)
container = models.Container(name=container_name,
metadata=metadata,
section_sizes=section_sizes,
metrics_by_file=metrics_by_file)
for symbol in raw_symbols:
symbol.container = container
return raw_symbols
def _DetectGitRevision(directory):
"""Runs git rev-parse to get the SHA1 hash of the current revision.
Args:
directory: Path to directory where rev-parse command will be run.
Returns:
A string with the SHA1 hash, or None if an error occured.
"""
try:
git_rev = subprocess.check_output(
['git', '-C', directory, 'rev-parse', 'HEAD']).decode('ascii')
return git_rev.rstrip()
except Exception:
logging.warning('Failed to detect git revision for file metadata.')
return None
def _ParseGnArgs(args_path):
"""Returns a list of normalized "key=value" strings."""
args = {}
with open(args_path) as f:
for l in f:
# Strips #s even if within string literal. Not a problem in practice.
parts = l.split('#')[0].split('=')
if len(parts) != 2:
continue
args[parts[0].strip()] = parts[1].strip()
return ["%s=%s" % x for x in sorted(args.items())]
def _AddContainerArguments(parser, is_top_args=False):
"""Add arguments applicable to a single container."""
# Main file argument: Exactly one should be specified (perhaps via -f).
# _IdentifyInputFile() should be kept updated.
group = parser.add_argument_group(title='Main Input')
group = group.add_mutually_exclusive_group(required=True)
group.add_argument('-f',
metavar='FILE',
help='Auto-identify input file type.')
group.add_argument('--apk-file',
help='.apk file to measure. Other flags can generally be '
'derived when this is used.')
group.add_argument('--minimal-apks-file',
help='.minimal.apks file to measure. Other flags can '
'generally be derived when this is used.')
group.add_argument('--elf-file', help='Path to input ELF file.')
group.add_argument('--map-file',
help='Path to input .map(.gz) file. Defaults to '
'{{elf_file}}.map(.gz)?. If given without '
'--elf-file, no size metadata will be recorded.')
group.add_argument('--pak-file',
action='append',
default=[],
dest='pak_files',
help='Paths to pak files.')
if is_top_args:
group.add_argument('--ssargs-file',
help='Path to SuperSize multi-container arguments file.')
group = parser.add_argument_group(title='What to Analyze')
group.add_argument('--java-only',
action='store_true',
help='Run on only Java symbols')
group.add_argument('--native-only',
action='store_true',
help='Run on only native symbols')
group.add_argument('--no-java',
action='store_true',
help='Do not run on Java symbols')
group.add_argument('--no-native',
action='store_true',
help='Do not run on native symbols')
if is_top_args:
group.add_argument('--container-filter',
help='Regular expression for which containers to create')
group = parser.add_argument_group(title='Analysis Options for Native Code')
group.add_argument('--no-map-file',
dest='ignore_linker_map',
action='store_true',
help='Use debug information to capture symbol sizes '
'instead of linker map file.')
# Used by tests to override path to APK-discovered files.
group.add_argument('--aux-elf-file', help=argparse.SUPPRESS)
group.add_argument(
'--aux-map-file',
help='Path to linker map to use when --elf-file is provided')
group = parser.add_argument_group(title='APK options')
group.add_argument('--mapping-file',
help='Proguard .mapping file for deobfuscation.')
group.add_argument('--resources-pathmap-file',
help='.pathmap.txt file that contains a maping from '
'original resource paths to shortened resource paths.')
group.add_argument('--abi-filter',
dest='abi_filters',
action='append',
help='For apks with multiple ABIs, break down native '
'libraries for this ABI. Defaults to 64-bit when both '
'32 and 64 bit are present.')
group = parser.add_argument_group(title='Analysis Options for Pak Files')
group.add_argument('--pak-info-file',
help='This file should contain all ids found in the pak '
'files that have been passed in. If not specified, '
'${pak_file}.info is assumed.')
group = parser.add_argument_group(title='Analysis Options (shared)')
group.add_argument('--source-directory',
help='Custom path to the root source directory.')
group.add_argument('--output-directory',
help='Path to the root build directory.')
group.add_argument('--symbols-dir',
default='lib.unstripped',
help='Relative path containing unstripped .so files '
'(for symbols) w.r.t. the output directory.')
group.add_argument('--no-string-literals',
action='store_true',
help=('Do not create symbols for string literals '
'(applies to DEX and Native).'))
if is_top_args:
group.add_argument('--json-config', help='Path to a supersize.json.')
group.add_argument('--no-output-directory',
action='store_true',
help='Do not auto-detect --output-directory.')
group.add_argument('--check-data-quality',
action='store_true',
help='Perform sanity checks to ensure there is no '
'missing data.')
def AddArguments(parser):
parser.add_argument('size_file', help='Path to output .size file.')
parser.add_argument('--title',
help='Value for the "title" build_config entry.')
parser.add_argument('--url', help='Value for the "url" build_config entry.')
_AddContainerArguments(parser, is_top_args=True)
def _IdentifyInputFile(args, on_config_error):
"""Identifies main input file type from |args.f|, and updates |args|.
Identification is performed on filename alone, i.e., the file need not exist.
The result is written to a field in |args|. If the field exists then it
simply gets overwritten.
If '.' is missing from |args.f| then --elf-file is assumed.
Returns:
The primary input file.
"""
if args.f:
if args.f.endswith('.minimal.apks'):
args.minimal_apks_file = args.f
elif args.f.endswith('.apk'):
args.apk_file = args.f
elif args.f.endswith('.so') or '.' not in os.path.basename(args.f):
args.elf_file = args.f
elif args.f.endswith('.map') or args.f.endswith('.map.gz'):
args.map_file = args.f
elif args.f.endswith('.pak'):
args.pak_files.append(args.f)
elif args.f.endswith('.ssargs'):
# Fails if trying to nest them, which should never happen.
args.ssargs_file = args.f
else:
on_config_error('Cannot identify file ' + args.f)
args.f = None
ret = [
args.apk_file, args.elf_file, args.minimal_apks_file,
args.__dict__.get('ssargs_file'), args.map_file
] + (args.pak_files or [])
ret = [v for v in ret if v]
if not ret:
on_config_error(
'Must pass at least one of --apk-file, --minimal-apks-file, '
'--elf-file, --map-file, --pak-file, --ssargs-file')
return ret[0]
def ParseSsargs(lines):
"""Parses .ssargs data.
An .ssargs file is a text file to specify multiple containers as input to
SuperSize-archive. After '#'-based comments, start / end whitespaces, and
empty lines are stripped, each line specifies a distinct container. Format:
* Positional argument: |name| for the container.
* Main input file specified by -f, --apk-file, --elf-file, etc.:
* Can be an absolute path.
* Can be a relative path. In this case, it's up to the caller to supply the
base directory.
* -f switch must not specify another .ssargs file.
* For supported switches: See _AddContainerArguments().
Args:
lines: An iterator containing lines of .ssargs data.
Returns:
A list of arguments, one for each container.
Raises:
ValueError: Parse error, including input line number.
"""
sub_args_list = []
parser = argparse.ArgumentParser(add_help=False)
parser.error = lambda msg: (_ for _ in ()).throw(ValueError(msg))
parser.add_argument('name')
_AddContainerArguments(parser)
try:
for lineno, line in enumerate(lines, 1):
toks = shlex.split(line, comments=True)
if not toks: # Skip if line is empty after stripping comments.
continue
sub_args_list.append(parser.parse_args(toks))
except ValueError as e:
e.args = ('Line %d: %s' % (lineno, e.args[0]), )
raise e
return sub_args_list
def _MakeNativeSpec(json_config, **kwargs):
native_spec = NativeSpec(**kwargs)
if native_spec.elf_path or native_spec.map_path:
basename = os.path.basename(native_spec.elf_path or native_spec.map_path)
native_spec.component = json_config.ComponentForNativeFile(basename)
native_spec.gen_dir_regex = json_config.GenDirRegexForNativeFile(basename)
native_spec.source_path_prefix = json_config.SourcePathPrefixForNativeFile(
basename)
if not native_spec.map_path:
# TODO(crbug.com/40757867): Implement string literal tracking without map
# files. nm emits some string literal symbols, but most are missing.
native_spec.track_string_literals = False
return native_spec
return native_spec
def _ElfIsMainPartition(elf_path):
section_ranges = readelf.SectionInfoFromElf(elf_path)
return models.SECTION_PART_END in section_ranges.keys()
def _DeduceMapPath(elf_path):
if _ElfIsMainPartition(elf_path):
map_path = elf_path.replace('.so', '__combined.so') + '.map'
else:
map_path = elf_path + '.map'
if not os.path.exists(map_path):
map_path += '.gz'
if not os.path.exists(map_path):
map_path = None
if map_path:
logging.debug('Detected map_path=%s', map_path)
return map_path
def _CreateNativeSpecs(*, tentative_output_dir, symbols_dir, apk_infolist,
elf_path, map_path, abi_filters, auto_abi_filters,
track_string_literals, ignore_linker_map, json_config,
on_config_error):
if ignore_linker_map:
map_path = None
elif (map_path and not map_path.endswith('.map')
and not map_path.endswith('.map.gz')):
on_config_error('Expected --map-file to end with .map or .map.gz')
elif elf_path and not map_path:
map_path = _DeduceMapPath(elf_path)
ret = []
# if --elf-path or --map-path (rather than --aux-elf-path, --aux-map-path):
if not apk_infolist:
if map_path or elf_path:
combined_elf_path = None
if map_path and '__combined.so' in map_path:
combined_elf_path = elf_path[:-3] + '__combined.so'
ret.append(
_MakeNativeSpec(json_config,
apk_so_path=None,
map_path=map_path,
elf_path=elf_path,
combined_elf_path=combined_elf_path,
track_string_literals=track_string_literals))
return abi_filters, ret
lib_infos = [
f for f in apk_infolist if f.filename.endswith('.so') and f.file_size > 0
]
# Sort so elf_path/map_path applies largest non-filtered library.
matches_abi = lambda n: not abi_filters or any(f in n for f in abi_filters)
lib_infos.sort(key=lambda x: (not matches_abi(x.filename), -x.file_size))
for lib_info in lib_infos:
apk_so_path = lib_info.filename
cur_elf_path = None
cur_map_path = None
if not matches_abi(apk_so_path):
logging.debug('Not breaking down %s: secondary ABI', apk_so_path)
elif apk_so_path.endswith('_partition.so'):
# TODO(agrieve): Support symbol breakdowns for partitions (they exist in
# the __combined .map file. Debug information (nm output) is shared
# with base partition.
logging.debug('Not breaking down %s: partitioned library', apk_so_path)
else:
if elf_path:
# Consume --aux-elf-file for the largest matching binary.
cur_elf_path = elf_path
elf_path = None
elif tentative_output_dir:
# TODO(crbug.com/40229168): Remove handling the legacy library prefix
# 'crazy.' when there is no longer interest in size comparisons for
# these pre-N APKs.
cur_elf_path = os.path.join(
tentative_output_dir, symbols_dir,
posixpath.basename(apk_so_path.replace('crazy.', '')))
if os.path.exists(cur_elf_path):
logging.debug('Detected elf_path=%s', cur_elf_path)
else:
# TODO(agrieve): Not able to find libcrashpad_handler_trampoline.so.
logging.debug('Not breaking down %s because file does not exist: %s',
apk_so_path, cur_elf_path)
cur_elf_path = None
if map_path:
# Consume --aux-map-file for first non-skipped elf.
cur_map_path = map_path
map_path = None
elif cur_elf_path and not ignore_linker_map:
cur_map_path = _DeduceMapPath(cur_elf_path)
if auto_abi_filters:
abi_filters = [posixpath.basename(posixpath.dirname(apk_so_path))]
logging.info('Detected --abi-filter %s', abi_filters[0])
auto_abi_filters = False
combined_elf_path = None
if cur_map_path and '__combined.so' in cur_map_path:
combined_elf_path = cur_elf_path[:-3] + '__combined.so'
ret.append(
_MakeNativeSpec(json_config,
apk_so_path=apk_so_path,
map_path=cur_map_path,
elf_path=cur_elf_path,
combined_elf_path=combined_elf_path,
track_string_literals=track_string_literals))
return abi_filters, ret
# Cache to prevent excess log messages.
@functools.lru_cache
def _DeduceMappingPath(mapping_path, apk_prefix):
if apk_prefix:
if not mapping_path:
possible_mapping_path = apk_prefix + '.mapping'
if os.path.exists(possible_mapping_path):
mapping_path = possible_mapping_path
logging.debug('Detected --mapping-file=%s', mapping_path)
else:
logging.warning('Could not find proguard mapping file at %s',
possible_mapping_path)
return mapping_path
# Cache to prevent excess log messages.
@functools.lru_cache
def _DeducePathmapPath(resources_pathmap_path, apk_prefix):
if apk_prefix:
if not resources_pathmap_path:
possible_pathmap_path = apk_prefix + '.pathmap.txt'
# This could be pointing to a stale pathmap file if path shortening was
# previously enabled but is disabled for the current build. However, since
# current apk/aab will have unshortened paths, looking those paths up in
# the stale pathmap which is keyed by shortened paths would not find any
# mapping and thus should not cause any issues.
if os.path.exists(possible_pathmap_path):
resources_pathmap_path = possible_pathmap_path
logging.debug('Detected --resources-pathmap-file=%s',
resources_pathmap_path)
# Path shortening is optional, so do not warn for missing file.
return resources_pathmap_path
def _ReadMultipleArgsFromStream(lines, base_dir, err_prefix, on_config_error):
try:
ret = ParseSsargs(lines)
except ValueError as e:
on_config_error('%s: %s' % (err_prefix, e.args[0]))
for sub_args in ret:
for k, v in sub_args.__dict__.items():
# Translate file arguments to be relative to |sub_dir|.
if (k.endswith('_file') or k == 'f') and isinstance(v, str):
sub_args.__dict__[k] = os.path.join(base_dir, v)
return ret
def _ReadMultipleArgsFromFile(ssargs_file, on_config_error):
with open(ssargs_file, 'r') as fh:
lines = list(fh)
err_prefix = 'In file ' + ssargs_file
# Supply |base_dir| as the directory containing the .ssargs file, to ensure
# consistent behavior wherever SuperSize-archive runs.
base_dir = os.path.dirname(os.path.abspath(ssargs_file))
return _ReadMultipleArgsFromStream(lines, base_dir, err_prefix,
on_config_error)
# Both |top_args| and |sub_args| may be modified.
def _CreateContainerSpecs(apk_file_manager,
top_args,
sub_args,
json_config,
base_container_name,
on_config_error,
split_name=None):
sub_args.source_directory = (sub_args.source_directory
or top_args.source_directory)
sub_args.output_directory = (sub_args.output_directory
or top_args.output_directory)
analyze_native = not (sub_args.java_only or sub_args.no_native
or top_args.java_only or top_args.no_native)
analyze_dex = not (sub_args.native_only or sub_args.no_java
or top_args.native_only or top_args.no_java)
only_java_or_native = (sub_args.native_only or top_args.native_only
or sub_args.java_only or top_args.java_only)
analyze_pak = not only_java_or_native and bool(sub_args.output_directory)
analyze_res = not only_java_or_native and bool(sub_args.output_directory)
if split_name:
apk_path = apk_file_manager.SplitPath(sub_args.minimal_apks_file,
split_name)
base_container_name = f'{base_container_name}/{split_name}.apk'
# Make on-demand a part of the name so that:
# * It's obvious from the name which DFMs are on-demand.
# * Diffs that change an on-demand status show as adds/removes.
if _IsOnDemand(apk_path):
base_container_name += '?'
else:
apk_path = sub_args.apk_file
apk_prefix = sub_args.minimal_apks_file or sub_args.apk_file
if apk_prefix:
# Allow either .minimal.apks or just .apks.
apk_prefix = apk_prefix.replace('.minimal.apks', '.aab')
apk_prefix = apk_prefix.replace('.apks', '.aab')
mapping_path = None
if analyze_dex:
mapping_path = _DeduceMappingPath(sub_args.mapping_file, apk_prefix)
resources_pathmap_path = _DeducePathmapPath(sub_args.resources_pathmap_file,
apk_prefix)
apk_spec = None
if apk_prefix:
apk_spec = ApkSpec(apk_path=apk_path,
minimal_apks_path=sub_args.minimal_apks_file,
mapping_path=mapping_path,
resources_pathmap_path=resources_pathmap_path,
split_name=split_name)
size_info_prefix = os.path.join(top_args.output_directory, 'size-info',
os.path.basename(apk_prefix))
if analyze_res:
apk_spec.size_info_prefix = size_info_prefix
res_info_path = apk_spec.size_info_prefix + '.res.info'
if not os.path.exists(res_info_path):
on_config_error('File not found: ' + res_info_path)
apk_spec.analyze_dex = analyze_dex
apk_spec.track_string_literals = not (top_args.no_string_literals
or sub_args.no_string_literals)
apk_spec.default_component = json_config.DefaultComponentForSplit(
split_name)
apk_spec.path_defaults = json_config.ApkPathDefaults()
pak_spec = None
apk_pak_paths = None
apk_infolist = None
if apk_spec:
apk_infolist = apk_file_manager.InfoList(apk_path)
apk_pak_paths = [
f.filename for f in apk_infolist
if archive_util.RemoveAssetSuffix(f.filename).endswith('.pak')
]
if analyze_pak and (apk_pak_paths or sub_args.pak_files):
if apk_spec:
pak_info_path = size_info_prefix + '.pak.info'
else:
pak_info_path = sub_args.pak_info_file
if pak_info_path and not os.path.exists(pak_info_path):
on_config_error(f'File not found: {pak_info_file}. '
'Ensure is_official_build=true, or use --native-only')
pak_spec = PakSpec(pak_paths=sub_args.pak_files,
pak_info_path=pak_info_path,
apk_pak_paths=apk_pak_paths)
if analyze_native:
# Allow top-level --abi-filter to override values set in .ssargs.
abi_filters = top_args.abi_filters or sub_args.abi_filters
aux_elf_file = sub_args.aux_elf_file
aux_map_file = sub_args.aux_map_file
if split_name not in (None, 'base'):
aux_elf_file = None
aux_map_file = None
auto_abi_filters = not abi_filters and split_name == 'base'
abi_filters, native_specs = _CreateNativeSpecs(
tentative_output_dir=top_args.output_directory,
symbols_dir=sub_args.symbols_dir,
apk_infolist=apk_infolist,
elf_path=sub_args.elf_file or aux_elf_file,
map_path=sub_args.map_file or aux_map_file,
abi_filters=abi_filters,
auto_abi_filters=auto_abi_filters,
track_string_literals=not (top_args.no_string_literals
or sub_args.no_string_literals),
ignore_linker_map=(top_args.ignore_linker_map
or sub_args.ignore_linker_map),
json_config=json_config,
on_config_error=on_config_error)
# For app bundles, use a consistent ABI for all splits.
if auto_abi_filters:
top_args.abi_filters = abi_filters
else:
native_specs = []
ret = [
ContainerSpec(container_name=base_container_name,
apk_spec=apk_spec,
pak_spec=pak_spec,
native_spec=None,
source_directory=sub_args.source_directory,
output_directory=sub_args.output_directory)
]
if apk_spec is None:
# Special case for when pointed at a single ELF, use just one container.
assert len(native_specs) <= 1
ret[0].native_spec = native_specs[0] if native_specs else None
else:
apk_spec.ignore_apk_paths.update(s.apk_so_path for s in native_specs)
if pak_spec and pak_spec.apk_pak_paths:
apk_spec.ignore_apk_paths.update(pak_spec.apk_pak_paths)
if apk_spec.analyze_dex:
apk_spec.ignore_apk_paths.update(i.filename for i in apk_infolist
if i.filename.endswith('.dex'))
apk_spec.ignore_apk_paths.add(apk.RESOURCES_ARSC_FILE)
for native_spec in native_specs:
so_name = posixpath.basename(native_spec.apk_so_path)
abi = posixpath.basename(posixpath.dirname(native_spec.apk_so_path))
container_name = f'{base_container_name}/{so_name} ({abi})'
# Use same apk_spec so that all containers for the apk_spec can be found.
ret.append(
ContainerSpec(container_name=container_name,
apk_spec=apk_spec,
pak_spec=None,
native_spec=native_spec,
source_directory=sub_args.source_directory,
output_directory=sub_args.output_directory))
return ret
def _IsOnDemand(apk_path):
# Check if the manifest specifies whether or not to extract native libs.
output = subprocess.check_output([
path_util.GetAapt2Path(), 'dump', 'xmltree', '--file',
'AndroidManifest.xml', apk_path
]).decode('ascii')
def parse_attr(namespace, name):
# A: http://schemas.android.com/apk/res/android:isFeatureSplit(0x...)=true
# A: http://schemas.android.com/apk/distribution:onDemand=true
m = re.search(f'A: (?:.*?/{namespace}:)?{name}' + r'(?:\(.*?\))?=(\w+)',
output)
return m and m.group(1) == 'true'
is_feature_split = parse_attr('android', 'isFeatureSplit')
# Can use <dist:on-demand>, or <module dist:onDemand="true">.
on_demand = parse_attr('distribution', 'onDemand') or 'on-demand' in output
on_demand = bool(on_demand and is_feature_split)
return on_demand
def _CreateAllContainerSpecs(apk_file_manager, top_args, json_config,
on_config_error):
main_file = _IdentifyInputFile(top_args, on_config_error)
if top_args.no_output_directory:
top_args.output_directory = None
else:
output_directory_finder = path_util.OutputDirectoryFinder(
value=top_args.output_directory,
any_path_within_output_directory=main_file)
top_args.output_directory = output_directory_finder.Finalized()
if not top_args.source_directory:
top_args.source_directory = path_util.GetSrcRootFromOutputDirectory(
top_args.output_directory)
assert top_args.source_directory
if top_args.ssargs_file:
sub_args_list = _ReadMultipleArgsFromFile(top_args.ssargs_file,
on_config_error)
else:
sub_args_list = [top_args]
# Do a quick first pass to ensure inputs have been built.
for sub_args in sub_args_list:
main_file = _IdentifyInputFile(sub_args, on_config_error)
if not os.path.exists(main_file):
raise Exception('Input does not exist: ' + main_file)
# Each element in |sub_args_list| specifies a container.
ret = []
for sub_args in sub_args_list:
main_file = _IdentifyInputFile(sub_args, on_config_error)
if hasattr(sub_args, 'name'):
container_name = sub_args.name
else:
container_name = os.path.basename(main_file)
if set(container_name) & set('<>?'):
parser.error('Container name cannot have characters in "<>?"')
if sub_args.minimal_apks_file:
split_names = apk_file_manager.ExtractSplits(sub_args.minimal_apks_file)
for split_name in split_names:
ret += _CreateContainerSpecs(apk_file_manager,
top_args,
sub_args,
json_config,
container_name,
on_config_error,
split_name=split_name)
else:
ret += _CreateContainerSpecs(apk_file_manager, top_args, sub_args,
json_config, container_name, on_config_error)
all_names = [c.container_name for c in ret]
assert len(set(all_names)) == len(all_names), \
'Found duplicate container names: ' + '\n'.join(sorted(all_names))
return ret
def _FilterContainerSpecs(container_specs, container_re=None):
ret = []
seen_container_names = set()
for container_spec in container_specs:
container_name = container_spec.container_name
if container_name in seen_container_names:
raise ValueError('Duplicate container name: {}'.format(container_name))
seen_container_names.add(container_name)
if container_re and not container_re.search(container_name):
logging.info('Skipping filtered container %s', container_name)
continue
ret.append(container_spec)
return ret
def CreateSizeInfo(container_specs, build_config, json_config,
apk_file_manager):
def sort_key(container_spec):
# Native containers come first to ensure pak_id_map is populated before
# any pak_spec is encountered.
if container_spec.native_spec:
# Do the most complicated container first, since its most likely to fail.
if container_spec.native_spec.algorithm == 'linker_map':
native_key = 0
elif container_spec.native_spec.algorithm == 'dwarf':
native_key = 1
else:
native_key = 2
else:
native_key = 3
return (native_key, container_spec.container_name)
container_specs.sort(key=sort_key)
dex_containers = [
c for c in container_specs
if not c.native_spec and c.apk_spec and c.apk_spec.analyze_dex
]
# Running ApkAnalyzer concurrently saves ~30 seconds for Monochrome.apks.
apk_analyzer_results = {}
if dex_containers:
logging.info('Kicking of ApkAnalyzer for %d .apk files',
len(dex_containers))
for container_spec in dex_containers:
apk_analyzer_results[container_spec.container_name] = (
apkanalyzer.RunApkAnalyzerAsync(container_spec.apk_spec.apk_path,
container_spec.apk_spec.mapping_path))
ninja_containers = [
c for c in container_specs
if c.native_spec and c.output_directory and c.native_spec.map_path
]
ninja_source_mapper = None
if ninja_containers:
elf_paths_to_find_inputs_for = [
c.native_spec.combined_elf_path or c.native_spec.elf_path
for c in ninja_containers
if c.native_spec.combined_elf_path or c.native_spec.elf_path
]
ninja_source_mapper = native.ParseNinjaFiles(
ninja_containers[0].output_directory, elf_paths_to_find_inputs_for)
raw_symbols_list = []
pak_id_map = pakfile.PakIdMap()
dex_deobfuscator_cache = dex_deobfuscate.CachedDexDeobfuscators()
for container_spec in container_specs:
raw_symbols = _CreateContainerSymbols(container_spec, apk_file_manager,
apk_analyzer_results,
ninja_source_mapper, pak_id_map,
json_config.ComponentOverrides(),
dex_deobfuscator_cache)
assert raw_symbols, f'{container_spec.container_name} had no symbols.'
raw_symbols_list.append(raw_symbols)
# Normalize names before sorting.
logging.info('Normalizing symbol names')
for raw_symbols in raw_symbols_list:
_NormalizeNames(raw_symbols)
# Sorting must happen after normalization.
logging.info('Sorting symbols')
for raw_symbols in raw_symbols_list:
file_format.SortSymbols(raw_symbols)
logging.debug('Accumulating symbols')
# Containers should always have at least one symbol.
container_list = [syms[0].container for syms in raw_symbols_list]
all_raw_symbols = []
for raw_symbols in raw_symbols_list:
all_raw_symbols += raw_symbols
file_format.CalculatePadding(all_raw_symbols)
return models.SizeInfo(build_config, container_list, all_raw_symbols)
def Run(top_args, on_config_error):
path_util.CheckLlvmToolsAvailable()
if not top_args.size_file.endswith('.size'):
on_config_error('size_file must end with .size')
if top_args.check_data_quality:
start_time = time.time()
container_re = None
if top_args.container_filter:
try:
container_re = re.compile(top_args.container_filter)
except Exception as e:
on_config_error(f'Bad --container-filter input: {e}')
json_config_path = top_args.json_config
if not json_config_path:
json_config_path = path_util.GetDefaultJsonConfigPath()
logging.info('Using --json-config=%s', json_config_path)
json_config = json_config_parser.Parse(json_config_path, on_config_error)
with zip_util.ApkFileManager() as apk_file_manager:
container_specs = _CreateAllContainerSpecs(apk_file_manager, top_args,
json_config, on_config_error)
container_specs = _FilterContainerSpecs(container_specs, container_re)
build_config = CreateBuildConfig(top_args.output_directory,
top_args.source_directory,
url=top_args.url,
title=top_args.title)
size_info = CreateSizeInfo(container_specs, build_config, json_config,
apk_file_manager)
if logging.getLogger().isEnabledFor(logging.DEBUG):
for line in data_quality.DescribeSizeInfoCoverage(size_info):
logging.debug(line)
logging.info('Recorded info for %d symbols', len(size_info.raw_symbols))
for container in size_info.containers:
logging.info('Recording metadata: \n %s',
'\n '.join(describe.DescribeDict(container.metadata)))
logging.info('Saving result to %s', top_args.size_file)
file_format.SaveSizeInfo(size_info, top_args.size_file)
size_in_mb = os.path.getsize(top_args.size_file) / 1024.0 / 1024.0
logging.info('Done. File size is %.2fMiB.', size_in_mb)
if top_args.check_data_quality:
logging.info('Checking data quality')
data_quality.CheckDataQuality(size_info, not top_args.no_string_literals)
duration = (time.time() - start_time) / 60
if duration > 10:
raise data_quality.QualityCheckError(
'Command should not take longer than 10 minutes.'
' Took {:.1f} minutes.'.format(duration))
|