1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419
|
# Copyright 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import logging
import os
import re
import struct
import zipfile
# The default zipfile python module cannot open APKs properly, but this
# fixes it. Note that simply importing this file is sufficient to
# ensure that zip works correctly for all other modules. See:
# http://bugs.python.org/issue14315
# https://hg.python.org/cpython/rev/6dd5e9556a60#l2.8
def _PatchZipFile():
# pylint: disable=protected-access
oldDecodeExtra = zipfile.ZipInfo._decodeExtra
def decodeExtra(self):
try:
oldDecodeExtra(self)
except struct.error:
pass
zipfile.ZipInfo._decodeExtra = decodeExtra
_PatchZipFile()
class ApkZipInfo(object):
"""Models a single file entry from an ApkReader.
This is very similar to the zipfile.ZipInfo class. It provides a few
properties describing the entry:
- filename (same as ZipInfo.filename)
- file_size (same as ZipInfo.file_size)
- compress_size (same as ZipInfo.file_size)
- file_offset (note: not provided by ZipInfo)
And a few useful methods: IsCompressed() and IsElfFile().
Entries can be created by using ApkReader() methods.
"""
def __init__(self, zip_file, zip_info):
"""Construct instance. Do not call this directly. Use ApkReader methods."""
self._file = zip_file
self._info = zip_info
self._file_offset = None
@property
def filename(self):
"""Entry's file path within APK."""
return self._info.filename
@property
def file_size(self):
"""Entry's extracted file size in bytes."""
return self._info.file_size
@property
def compress_size(self):
"""Entry' s compressed file size in bytes."""
return self._info.compress_size
@property
def file_offset(self):
"""Entry's starting file offset in the APK."""
if self._file_offset is None:
self._file_offset = self._ZipFileOffsetFromLocalHeader(
self._file.fp, self._info.header_offset)
return self._file_offset
def __repr__(self):
"""Convert to string for debugging."""
return 'ApkZipInfo["%s",size=0x%x,compressed=0x%x,offset=0x%x]' % (
self.filename, self.file_size, self.compress_size, self.file_offset)
def IsCompressed(self):
"""Returns True iff the entry is compressed."""
return self._info.compress_type != zipfile.ZIP_STORED
def IsElfFile(self):
"""Returns True iff the entry is an ELF file."""
with self._file.open(self._info, 'r') as f:
return f.read(4) == '\x7fELF'
@staticmethod
def _ZipFileOffsetFromLocalHeader(fd, local_header_offset):
"""Return a file's start offset from its zip archive local header.
Args:
fd: Input file object.
local_header_offset: Local header offset (from its ZipInfo entry).
Returns:
file start offset.
"""
FILE_NAME_LEN_OFFSET = 26
FILE_NAME_OFFSET = 30
fd.seek(local_header_offset + FILE_NAME_LEN_OFFSET)
file_name_len = struct.unpack('H', fd.read(2))[0]
extra_field_len = struct.unpack('H', fd.read(2))[0]
file_offset = (local_header_offset + FILE_NAME_OFFSET +
file_name_len + extra_field_len)
return file_offset
class ApkReader(object):
"""A convenience class used to read the content of APK files.
Its design is very similar to the one from zipfile.ZipFile, except
that its returns ApkZipInfo entries which provide a |file_offset|
property that can be used to know where a given file is located inside
the archive.
It is also easy to mock for unit-testing (see MockApkReader in
apk_utils_unittest.py) without creating any files on disk.
Usage is the following:
- Create an instance using a with statement (for proper unit-testing).
- Call ListEntries() to list all entries in the archive. This returns
a list of ApkZipInfo entries.
- Or call FindEntry() corresponding to a given path within the archive.
For example:
with ApkReader(input_apk_path) as reader:
info = reader.FindEntry('lib/armeabi-v7a/libfoo.so')
if info.IsCompressed() or not info.IsElfFile():
raise Exception('Invalid library path")
The ApkZipInfo can be used to inspect the entry's metadata, or read its
content with the ReadAll() method. See its documentation for all details.
"""
def __init__(self, apk_path):
"""Initialize instance."""
self._zip_file = zipfile.ZipFile(apk_path, 'r')
self._path = apk_path
def __enter__(self):
"""Python context manager entry."""
return self
def __exit__(self, *kwargs):
"""Python context manager exit."""
self.Close()
@property
def path(self):
"""The corresponding input APK path."""
return self._path
def Close(self):
"""Close the reader (and underlying ZipFile instance)."""
self._zip_file.close()
def ListEntries(self):
"""Return a list of ApkZipInfo entries for this APK."""
result = []
for info in self._zip_file.infolist():
result.append(ApkZipInfo(self._zip_file, info))
return result
def FindEntry(self, file_path):
"""Return an ApkZipInfo instance for a given archive file path.
Args:
file_path: zip file path.
Return:
A new ApkZipInfo entry on success.
Raises:
KeyError on failure (entry not found).
"""
info = self._zip_file.getinfo(file_path)
return ApkZipInfo(self._zip_file, info)
class ApkNativeLibraries(object):
"""A class for the list of uncompressed shared libraries inside an APK.
Create a new instance by passing the path to an input APK, then use
the FindLibraryByOffset() method to find the native shared library path
corresponding to a given file offset.
GetAbiList() and GetLibrariesList() can also be used to inspect
the state of the instance.
"""
def __init__(self, apk_reader):
"""Initialize instance.
Args:
apk_reader: An ApkReader instance corresponding to the input APK.
"""
self._native_libs = []
for entry in apk_reader.ListEntries():
# Chromium uses so-called 'placeholder' native shared libraries
# that have a size of 0, and are only used to deal with bugs in
# older Android system releases (they are never loaded and cannot
# appear in stack traces). Ignore these here to avoid generating
# confusing results.
if entry.file_size == 0:
continue
# Only uncompressed libraries can appear in stack traces.
if entry.IsCompressed():
continue
# Only consider files within lib/ and with a filename ending with .so
# at the moment. NOTE: Do not require a 'lib' prefix, since that would
# prevent finding the 'crazy.libXXX.so' libraries used by Chromium.
if (not entry.filename.startswith('lib/') or
not entry.filename.endswith('.so')):
continue
lib_path = entry.filename
self._native_libs.append(
(lib_path, entry.file_offset, entry.file_offset + entry.file_size))
def IsEmpty(self):
"""Return true iff the list is empty."""
return not bool(self._native_libs)
def GetLibraries(self):
"""Return the list of all library paths in this instance."""
return sorted([x[0] for x in self._native_libs])
def GetDumpList(self):
"""Retrieve full library map.
Returns:
A list of (lib_path, file_offset, file_size) tuples, sorted
in increasing |file_offset| values.
"""
result = []
for entry in self._native_libs:
lib_path, file_start, file_end = entry
result.append((lib_path, file_start, file_end - file_start))
return sorted(result, key=lambda x: x[1])
def FindLibraryByOffset(self, file_offset):
"""Find the native library at a given file offset.
Args:
file_offset: File offset within the original APK.
Returns:
Returns a (lib_path, lib_offset) tuple on success, or (None, 0)
on failure. Note that lib_path will omit the 'lib/$ABI/' prefix,
lib_offset is the adjustment of file_offset within the library.
"""
for lib_path, start_offset, end_offset in self._native_libs:
if file_offset >= start_offset and file_offset < end_offset:
return (lib_path, file_offset - start_offset)
return (None, 0)
class ApkLibraryPathTranslator(object):
"""Translates APK file paths + byte offsets into library path + offset.
The purpose of this class is to translate a native shared library path
that points to an APK into a new device-specific path that points to a
native shared library, as if it was installed there. E.g.:
('/data/data/com.example.app-1/base.apk', 0x123be00)
would be translated into:
('/data/data/com.example.app-1/base.apk!lib/libfoo.so', 0x3be00)
If the original APK (installed as base.apk) contains an uncompressed shared
library under lib/armeabi-v7a/libfoo.so at offset 0x120000.
Note that the virtual device path after the ! doesn't necessarily match
the path inside the .apk. This doesn't really matter for the rest of
the symbolization functions since only the file's base name can be used
to find the corresponding file on the host.
Usage is the following:
1/ Create new instance.
2/ Call AddHostApk() one or several times to add the host path
of an APK, its package name, and device-installed named.
3/ Call TranslatePath() to translate a (path, offset) tuple corresponding
to an on-device APK, into the corresponding virtual device library
path and offset.
"""
# Depending on the version of the system, a non-system APK might be installed
# on a path that looks like the following:
#
# * /data/..../<package_name>-<number>.apk, where <number> is used to
# distinguish several versions of the APK during package updates.
#
# * /data/..../<package_name>-<suffix>/base.apk, where <suffix> is a
# string of random ASCII characters following the dash after the
# package name. This serves as a way to distinguish the installation
# paths during package update, and randomize its final location
# (to prevent apps from hard-coding the paths to other apps).
#
# Note that the 'base.apk' name comes from the system.
#
# * /data/.../<package_name>-<suffix>/<split_name>.apk, where <suffix>
# is the same as above, and <split_name> is the name of am app bundle
# split APK.
#
# System APKs are installed on paths that look like /system/app/Foo.apk
# but this class ignores them intentionally.
# Compiler regular expression for the first format above.
_RE_APK_PATH_1 = re.compile(
r'/data/.*/(?P<package_name>[A-Za-z0-9_.]+)-(?P<version>[0-9]+)\.apk')
# Compiled regular expression for the second and third formats above.
_RE_APK_PATH_2 = re.compile(
r'/data/.*/(?P<package_name>[A-Za-z0-9_.]+)-(?P<suffix>[^/]+)/' +
r'(?P<apk_name>.+\.apk)')
def __init__(self):
"""Initialize instance. Call AddHostApk() to add host apk file paths."""
self._path_map = {} # Maps (package_name, apk_name) to host-side APK path.
self._libs_map = {} # Maps APK host path to ApkNativeLibrariesMap instance.
def AddHostApk(self, package_name, native_libs, device_apk_name=None):
"""Add a file path to the host APK search list.
Args:
package_name: Corresponding apk package name.
native_libs: ApkNativeLibraries instance for the corresponding APK.
device_apk_name: Optional expected name of the installed APK on the
device. This is only useful when symbolizing app bundle that run on
Android L+. I.e. it will be ignored in other cases.
"""
if native_libs.IsEmpty():
logging.debug('Ignoring host APK without any uncompressed native ' +
'libraries: %s', device_apk_name)
return
# If the APK name is not provided, use the default of 'base.apk'. This
# will be ignored if we find <package_name>-<number>.apk file paths
# in the input, but will work properly for Android L+, as long as we're
# not using Android app bundles.
device_apk_name = device_apk_name or 'base.apk'
key = "%s/%s" % (package_name, device_apk_name)
if key in self._libs_map:
raise KeyError('There is already an APK associated with (%s)' % key)
self._libs_map[key] = native_libs
@staticmethod
def _MatchApkDeviceInstallPath(apk_path):
"""Check whether a given path matches an installed APK device file path.
Args:
apk_path: Device-specific file path.
Returns:
On success, a (package_name, apk_name) tuple. On failure, (None. None).
"""
m = ApkLibraryPathTranslator._RE_APK_PATH_1.match(apk_path)
if m:
return (m.group('package_name'), 'base.apk')
m = ApkLibraryPathTranslator._RE_APK_PATH_2.match(apk_path)
if m:
return (m.group('package_name'), m.group('apk_name'))
return (None, None)
def TranslatePath(self, apk_path, apk_offset):
"""Translate a potential apk file path + offset into library path + offset.
Args:
apk_path: Library or apk file path on the device (e.g.
'/data/data/com.example.app-XSAHKSJH/base.apk').
apk_offset: Byte offset within the library or apk.
Returns:
a new (lib_path, lib_offset) tuple. If |apk_path| points to an APK,
then this function searches inside the corresponding host-side APKs
(added with AddHostApk() above) for the corresponding uncompressed
native shared library at |apk_offset|, if found, this returns a new
device-specific path corresponding to a virtual installation of said
library with an adjusted offset.
Otherwise, just return the original (apk_path, apk_offset) values.
"""
if not apk_path.endswith('.apk'):
return (apk_path, apk_offset)
apk_package, apk_name = self._MatchApkDeviceInstallPath(apk_path)
if not apk_package:
return (apk_path, apk_offset)
key = '%s/%s' % (apk_package, apk_name)
native_libs = self._libs_map.get(key)
if not native_libs:
logging.debug('Unknown %s package', key)
return (apk_path, apk_offset)
lib_name, new_offset = native_libs.FindLibraryByOffset(apk_offset)
if not lib_name:
logging.debug('Invalid offset in %s.apk package: %d', key, apk_offset)
return (apk_path, apk_offset)
lib_name = os.path.basename(lib_name)
# Some libraries are stored with a crazy. prefix inside the APK, this
# is done to prevent the PackageManager from extracting the libraries
# at installation time when running on pre Android M systems, where the
# system linker cannot load libraries directly from APKs.
crazy_prefix = 'crazy.'
if lib_name.startswith(crazy_prefix):
lib_name = lib_name[len(crazy_prefix):]
# Put this in a fictional lib sub-directory for good measure.
new_path = '%s!lib/%s' % (apk_path, lib_name)
return (new_path, new_offset)
|