1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
|
# Copyright 2022 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Functions for creating APK symbols."""
import logging
import os
import posixpath
import re
import zipfile
import archive_util
import arsc_parser
import file_format
import models
import zip_util
RESOURCES_ARSC_FILE = 'resources.arsc'
class _ResourcePathDeobfuscator:
def __init__(self, pathmap_path):
self._pathmap = self._LoadResourcesPathmap(pathmap_path)
def _LoadResourcesPathmap(self, pathmap_path):
"""Loads the pathmap of obfuscated resource paths.
Returns: A dict mapping from obfuscated paths to original paths or an
empty dict if passed a None |pathmap_path|.
"""
if pathmap_path is None:
return {}
pathmap = {}
with open(pathmap_path, 'r') as f:
for line in f:
line = line.strip()
if line.startswith('--') or line == '':
continue
original, renamed = line.split(' -> ')
pathmap[renamed] = original
return pathmap
def MaybeRemapPath(self, path):
long_path = self._pathmap.get(path)
if long_path:
return long_path
# if processing a .minimal.apks, we are actually just processing the base
# split.
long_path = self._pathmap.get('base/{}'.format(path))
if long_path:
# The first 5 chars are 'base/', which we don't need because we are
# looking directly inside the base apk.
return long_path[5:]
return path
class _ResourceSourceMapper:
def __init__(self, size_info_prefix, path_defaults):
self._path_defaults = path_defaults or {}
if size_info_prefix:
self._res_info = self._LoadResInfo(size_info_prefix)
else:
self._res_info = dict()
self._pattern_dollar_underscore = re.compile(r'\$+(.*?)(?:__\d)+')
self._pattern_version_suffix = re.compile(r'-v\d+/')
@staticmethod
def _ParseResInfoFile(res_info_path):
with open(res_info_path, 'r') as info_file:
return dict(l.rstrip().split('\t') for l in info_file)
def _LoadResInfo(self, size_info_prefix):
apk_res_info_path = size_info_prefix + '.res.info'
res_info_without_root = self._ParseResInfoFile(apk_res_info_path)
# We package resources in the res/ folder only in the apk.
res_info = {
os.path.join('res', dest): source
for dest, source in res_info_without_root.items()
}
res_info.update(self._path_defaults)
return res_info
def FindSourceForPath(self, path):
# Sometimes android adds $ in front and __# before extension.
path = self._pattern_dollar_underscore.sub(r'\1', path)
path = archive_util.RemoveAssetSuffix(path)
ret = self._res_info.get(path)
if ret:
return ret
# Android build tools may append extra -v flags for the root dir.
path = self._pattern_version_suffix.sub('/', path)
ret = self._res_info.get(path)
if ret:
return ret
return ''
def CreateArscSymbols(apk_spec):
"""Creates symbols for resources"""
raw_symbols = []
metrics_by_file = {}
with zipfile.ZipFile(apk_spec.apk_path) as src_zip:
arsc_infos = [
info for info in src_zip.infolist()
if info.filename == RESOURCES_ARSC_FILE
]
if len(arsc_infos) != 0:
assert len(arsc_infos) == 1
filename = arsc_infos[0].filename
metrics = {}
arsc_data = src_zip.read(arsc_infos[0])
arsc_file = arsc_parser.ArscFile(arsc_data)
source_path = posixpath.join(models.APK_PREFIX_PATH, filename)
overhead = len(arsc_data)
for inner_path, chunk in arsc_file.VisitPreOrder():
if not chunk.children: # Leaf chunk.
name = chunk.symbol_name()
sym_source_path = (f'{source_path}/{inner_path}'
if inner_path else source_path)
sym = models.Symbol(models.SECTION_ARSC,
chunk.size - chunk.placeholder,
source_path=sym_source_path,
full_name=name)
raw_symbols.append(sym)
if chunk.placeholder:
placeholder_sym = (models.Symbol(
models.SECTION_ARSC,
chunk.placeholder,
source_path=sym_source_path,
full_name=f'{name} (placeholders)'))
raw_symbols.append(placeholder_sym)
if isinstance(chunk, arsc_parser.ArscResTableTypeSpec):
metrics[f'{models.METRICS_COUNT}/{chunk.type_str}'] = (
chunk.entry_count)
overhead -= chunk.size
if overhead > 0:
raw_symbols.append(
models.Symbol(models.SECTION_ARSC,
overhead,
source_path=source_path,
full_name='Overhead: ARSC'))
metrics_by_file[filename] = metrics
section_ranges = {}
archive_util.ExtendSectionRange(section_ranges, models.SECTION_ARSC,
sum(s.size for s in raw_symbols))
return section_ranges, raw_symbols, metrics_by_file
def CreateMetadata(apk_spec, include_file_details, shorten_path):
"""Returns metadata for the given apk_spec."""
logging.debug('Constructing APK metadata')
apk_metadata = {}
if include_file_details:
if apk_spec.mapping_path:
apk_metadata[models.METADATA_PROGUARD_MAPPING_FILENAME] = shorten_path(
apk_spec.mapping_path)
if apk_spec.minimal_apks_path:
apk_metadata[models.METADATA_APK_FILENAME] = shorten_path(
apk_spec.minimal_apks_path)
apk_metadata[models.METADATA_APK_SPLIT_NAME] = apk_spec.split_name
else:
apk_metadata[models.METADATA_APK_FILENAME] = shorten_path(apk_spec.apk_path)
return apk_metadata
def CreateApkOtherSymbols(apk_spec):
"""Creates symbols for resources / assets within the apk.
Returns:
A tuple of (section_ranges, raw_symbols, apk_metadata, apk_metrics_by_file).
"""
logging.info('Creating symbols for other APK entries')
res_source_mapper = _ResourceSourceMapper(apk_spec.size_info_prefix,
apk_spec.path_defaults)
resource_deobfuscator = _ResourcePathDeobfuscator(
apk_spec.resources_pathmap_path)
raw_symbols = []
zip_info_total = 0
zipalign_total = 0
with zipfile.ZipFile(apk_spec.apk_path) as z:
signing_block_size = zip_util.MeasureApkSignatureBlock(z)
for zip_info in z.infolist():
zip_info_total += zip_info.compress_size
# Account for zipalign overhead that exists in local file header.
zipalign_total += zip_util.ReadZipInfoExtraFieldLength(z, zip_info)
# Account for zipalign overhead that exists in central directory header.
# Happens when python aligns entries in apkbuilder.py, but does not
# exist when using Android's zipalign. E.g. for bundle .apks files.
zipalign_total += len(zip_info.extra)
# Skip files that we explicitly analyze: .so, .dex, .pak, and .arsc.
if zip_info.filename in apk_spec.ignore_apk_paths:
continue
resource_filename = resource_deobfuscator.MaybeRemapPath(
zip_info.filename)
source_path = res_source_mapper.FindSourceForPath(resource_filename)
if not source_path:
source_path = posixpath.join(models.APK_PREFIX_PATH, resource_filename)
raw_symbols.append(
models.Symbol(
models.SECTION_OTHER,
zip_info.compress_size,
source_path=source_path,
full_name=resource_filename)) # Full name must disambiguate
# Store zipalign overhead and signing block size as metadata rather than an
# "Overhead:" symbol because they fluctuate in size, and would be a source of
# noise in symbol diffs if included as symbols (http://crbug.com/1130754).
# Might be even better if we had an option in Tiger Viewer to ignore certain
# symbols, but taking this as a short-cut for now.
apk_metadata = {
models.METADATA_ZIPALIGN_OVERHEAD: zipalign_total,
models.METADATA_SIGNING_BLOCK_SIZE: signing_block_size,
}
apk_metrics_by_file = {}
apk_metrics_by_file[posixpath.basename(apk_spec.apk_path)] = {
f'{models.METRICS_SIZE}/{models.METRICS_SIZE_APK_FILE}':
os.path.getsize(apk_spec.apk_path),
}
# Overhead includes:
# * Size of all local zip headers (minus zipalign padding).
# * Size of central directory & end of central directory.
overhead_size = (os.path.getsize(apk_spec.apk_path) - zip_info_total -
zipalign_total - signing_block_size)
assert overhead_size >= 0, 'Apk overhead must be non-negative'
zip_overhead_symbol = models.Symbol(models.SECTION_OTHER,
overhead_size,
full_name='Overhead: APK file')
raw_symbols.append(zip_overhead_symbol)
section_ranges = {}
archive_util.ExtendSectionRange(section_ranges, models.SECTION_OTHER,
sum(s.size for s in raw_symbols))
file_format.SortSymbols(raw_symbols)
return section_ranges, raw_symbols, apk_metadata, apk_metrics_by_file
|