1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327
|
# Copyright 2016 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import os
import re
import shutil
import sys
import tempfile
from xml.etree import ElementTree
from collections import namedtuple
from typing import Dict
DIR_SOURCE_ROOT = os.environ.get(
'CHECKOUT_SOURCE_ROOT',
os.path.abspath(
os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, os.pardir,
os.pardir)))
DEVIL_PATH = os.path.join(DIR_SOURCE_ROOT, 'third_party', 'catapult', 'devil')
if DEVIL_PATH not in sys.path:
sys.path.append(DEVIL_PATH)
from devil.utils import cmd_helper
PYLIB_PATH = os.path.join(os.path.dirname(__file__), '..')
sys.path.append(PYLIB_PATH)
import constants # pylint: disable=import-error
GYP_UTIL_PATH = os.path.join(os.path.dirname(__file__), '..', '..', 'gyp',
'util')
sys.path.append(GYP_UTIL_PATH)
import build_utils # pylint: disable=import-error
DEXDUMP_PATH = os.path.join(constants.ANDROID_SDK_TOOLS, 'dexdump')
# Annotations dict format:
# {
# 'empty-annotation-class-name': None,
# 'annotation-class-name': {
# 'fieldA': 'primitive-value',
# 'fieldB': [ 'array-item-1', 'array-item-2', ... ],
# 'fieldC': { # CURRENTLY UNSUPPORTED.
# /* Object value */
# 'field': 'primitive-value',
# 'field': [ 'array-item-1', 'array-item-2', ... ],
# 'field': { /* Object value */ }
# }
# }
# }
Annotations = namedtuple('Annotations',
['classAnnotations', 'methodsAnnotations'])
# Finds each space-separated "foo=..." (where ... can contain spaces).
_ANNOTATION_VALUE_MATCHER = re.compile(r'\w+=.*?(?:$|(?= \w+=))')
def Dump(apk_path):
"""Dumps class and method information from a APK into a dict via dexdump.
Args:
apk_path: An absolute path to an APK file to dump.
Returns:
A dict in the following format:
{
<package_name>: {
'classes': {
<class_name>: {
'methods': [<method_1>, <method_2>],
'superclass': <string>,
'is_abstract': <boolean>,
'annotations': <Annotations>
}
}
}
}
"""
try:
dexfile_dir = tempfile.mkdtemp()
parsed_dex_files = []
for dex_file in build_utils.ExtractAll(apk_path,
dexfile_dir,
pattern='*classes*.dex'):
output_xml = cmd_helper.GetCmdOutput(
[DEXDUMP_PATH, '-a', '-j', '-l', 'xml', dex_file])
# Dexdump doesn't escape its XML output very well; decode it as utf-8 with
# invalid sequences replaced, then remove forbidden characters and
# re-encode it (as etree expects a byte string as input so it can figure
# out the encoding itself from the XML declaration)
BAD_XML_CHARS = re.compile(
u'[\x00-\x08\x0b-\x0c\x0e-\x1f\x7f-\x84\x86-\x9f' +
u'\ud800-\udfff\ufdd0-\ufddf\ufffe-\uffff]')
# Line duplicated to avoid pylint redefined-variable-type error.
clean_xml = BAD_XML_CHARS.sub(u'\ufffd', output_xml)
# Constructors are referenced as "<init>" in our annotations
# which will result in in the ElementTree failing to parse
# our xml as it won't find a closing tag for this
clean_xml = clean_xml.replace('<init>', 'constructor')
annotations = _ParseAnnotations(clean_xml)
parsed_dex_files.append(
_ParseRootNode(ElementTree.fromstring(clean_xml.encode('utf-8')),
annotations))
return parsed_dex_files
finally:
shutil.rmtree(dexfile_dir)
def _ParseAnnotationValues(values_str):
if not values_str:
return None
ret = {}
for key_value in _ANNOTATION_VALUE_MATCHER.findall(values_str):
key, value_str = key_value.split('=', 1)
# TODO: support for dicts if ever needed.
if value_str.startswith('{ ') and value_str.endswith(' }'):
value = value_str[2:-2].split()
else:
value = value_str
ret[key] = value
return ret
def _ParseAnnotations(dexRaw: str) -> Dict[int, Annotations]:
""" Parse XML strings and return a list of Annotations mapped to
classes by index.
Annotations are written to the dex dump as human readable blocks of text
The only prescription is that they appear before the class in our xml file
They are not required to be nested within the package as our classes
It is simpler to parse for all the annotations and then associate them
back to the
classes
Example:
Class #12 annotations:
Annotations on class
VISIBILITY_RUNTIME Ldalvik/annotation/EnclosingClass; value=...
Annotations on method #512 'example'
VISIBILITY_SYSTEM Ldalvik/annotation/Signature; value=...
VISIBILITY_RUNTIME Landroidx/test/filters/SmallTest;
VISIBILITY_RUNTIME Lorg/chromium/base/test/util/Feature; value={ Cronet }
VISIBILITY_RUNTIME LFoo; key1={ A B } key2=4104 key3=null
"""
# We want to find the lines matching the annotations header pattern
# Eg: Class #12 annotations -> true
annotationsBlockMatcher = re.compile(u'^Class #.*annotations:$')
# We want to retrieve the index of the class
# Eg: Class #12 annotations -> 12
classIndexMatcher = re.compile(u'(?<=#)[0-9]*')
# We want to retrieve the method name from between the quotes
# of the annotations line
# Eg: Annotations on method #512 'example' -> example
methodMatcher = re.compile(u"(?<=')[^']*")
# We want to match everything after the last slash until before the semi colon
# Eg: Ldalvik/annotation/Signature; -> Signature
annotationMatcher = re.compile(u'([^/]+); ?(.*)?')
annotations = {}
currentAnnotationsForClass = None
currentAnnotationsBlock: Dict[str, None] = None
# This loop does four things
# 1. It looks for a line telling us we are describing annotations for
# a new class
# 2. It looks for a line telling us if the annotations we find will be
# for the class or for any of it's methods; we will keep reference to
# this
# 3. It adds the annotations to whatever we are holding reference to
# 4. It looks for a line to see if we should start looking for a
# new class again
for line in dexRaw.splitlines():
if currentAnnotationsForClass is None:
# Step 1
# We keep searching until we find an annotation descriptor
# This lets us know that we are storing annotations for a new class
if annotationsBlockMatcher.match(line):
currentClassIndex = int(classIndexMatcher.findall(line)[0])
currentAnnotationsForClass = Annotations(classAnnotations={},
methodsAnnotations={})
annotations[currentClassIndex] = currentAnnotationsForClass
else:
# Step 2
# If we find a descriptor indicating we are tracking annotations
# for the class or it's methods, we'll keep a reference of this
# block for when we start finding annotation references
if line.startswith(u'Annotations on class'):
currentAnnotationsBlock = currentAnnotationsForClass.classAnnotations
elif line.startswith(u'Annotations on method'):
method = methodMatcher.findall(line)[0]
currentAnnotationsBlock = {}
currentAnnotationsForClass.methodsAnnotations[
method] = currentAnnotationsBlock
# If we match against any other type of annotations
# we will ignore them
elif line.startswith(u'Annotations on'):
currentAnnotationsBlock = None
# Step 3
# We are only adding runtime annotations as those are the types
# that will affect if we should run tests or not (where this is
# being used)
elif currentAnnotationsBlock is not None and line.strip().startswith(
'VISIBILITY_RUNTIME'):
annotationName, annotationValuesStr = annotationMatcher.findall(line)[0]
annotationValues = _ParseAnnotationValues(annotationValuesStr)
# Our instrumentation tests expect a mapping of "Annotation: Value"
# We aren't using the value for anything and this would increase
# the complexity of this parser so just mapping these to None
currentAnnotationsBlock.update({annotationName: annotationValues})
# Step 4
# Empty lines indicate that the annotation descriptions are complete
# and we should look for new classes
elif not line.strip():
currentAnnotationsForClass = None
currentAnnotationsBlock = None
return annotations
def _ParseRootNode(root, annotations: Dict[int, Annotations]):
"""Parses the XML output of dexdump. This output is in the following format.
This is a subset of the information contained within dexdump output.
<api>
<package name="foo.bar">
<class name="Class" extends="foo.bar.SuperClass">
<field name="Field">
</field>
<constructor name="Method">
<parameter name="Param" type="int">
</parameter>
</constructor>
<method name="Method">
<parameter name="Param" type="int">
</parameter>
</method>
</class>
</package>
</api>
"""
results = {}
# Annotations are referenced by the class order
# To match them, we need to keep track of the class number and
# match it to the appropriate annotation at that stage
classCount = 0
for child in root:
if child.tag == 'package':
package_name = child.attrib['name']
parsed_node, classCount = _ParsePackageNode(child, classCount,
annotations)
if package_name in results:
results[package_name]['classes'].update(parsed_node['classes'])
else:
results[package_name] = parsed_node
return results
def _ParsePackageNode(package_node, classCount: int,
annotations: Dict[int, Annotations]):
"""Parses a <package> node from the dexdump xml output.
Returns:
A tuple in the format:
(classes: {
'classes': {
<class_1>: {
'methods': [<method_1>, <method_2>],
'superclass': <string>,
'is_abstract': <boolean>,
'annotations': <Annotations or None>
},
<class_2>: {
'methods': [<method_1>, <method_2>],
'superclass': <string>,
'is_abstract': <boolean>,
'annotations': <Annotations or None>
},
}
}, classCount: number)
"""
classes = {}
for child in package_node:
if child.tag == 'class':
classes[child.attrib['name']] = _ParseClassNode(child, classCount,
annotations)
classCount += 1
return ({'classes': classes}, classCount)
def _ParseClassNode(class_node, classIndex: int,
annotations: Dict[int, Annotations]):
"""Parses a <class> node from the dexdump xml output.
Returns:
A dict in the format:
{
'methods': [<method_1>, <method_2>],
'superclass': <string>,
'is_abstract': <boolean>
}
"""
methods = []
for child in class_node:
if child.tag == 'method' and child.attrib['visibility'] == 'public':
methods.append(child.attrib['name'])
return {
'methods':
methods,
'superclass':
class_node.attrib['extends'],
'is_abstract':
class_node.attrib.get('abstract') == 'true',
'annotations':
annotations.get(classIndex,
Annotations(classAnnotations={}, methodsAnnotations={}))
}
|