1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
|
# This is a very simple DEX parser, to get the bytecodes for each method
# Output format will be:
# <class name> <method name> <bytecode as hex string>
import sys
from binascii import hexlify
from struct import pack, unpack
sys.path.append('.')
from androguard.core.dex import DalvikPacker, readsleb128, readuleb128
def read_null_terminated(f):
x = bytearray()
while True:
z = f.read(1)
if ord(z) == 0:
return x
else:
x.append(ord(z))
class MockClassManager:
@property
def packer(self):
return DalvikPacker(0x12345678)
cm = MockClassManager()
class read_dex:
def __init__(self, fname):
methods = [] # Stores method_idx, code_off
with open(fname, "rb") as f:
(
magic,
checksum,
signature,
file_size,
header_size,
endian_tag,
link_size,
link_off,
map_off,
self.string_ids_size,
string_ids_off,
type_ids_size,
type_ids_off,
proto_ids_size,
proto_ids_off,
field_ids_size,
field_ids_off,
method_ids_size,
method_ids_off,
class_defs_size,
class_defs_off,
data_size,
data_off,
) = unpack("<8sI20s20I", f.read(112))
# print("class_defs_size", class_defs_size, "class_defs_off", class_defs_off)
for i in range(class_defs_size):
# class_def_item
f.seek(class_defs_off + i * 8 * 4)
(
class_idx,
access_flags,
superclass_idx,
interfaces_off,
source_file_idx,
annotations_off,
class_data_off,
static_values_off,
) = unpack("<8I", f.read(8 * 4))
# Now parse the class_data_item
if class_data_off == 0:
continue
f.seek(class_data_off)
static_fields_size = readuleb128(cm, f)
instance_fields_size = readuleb128(cm, f)
direct_methods_size = readuleb128(cm, f)
virtual_methods_size = readuleb128(cm, f)
# print("class_data_item:", static_fields_size, instance_fields_size, direct_methods_size, virtual_methods_size)
# We do not need the fields...
for _ in range(static_fields_size + instance_fields_size):
readuleb128(cm, f)
readuleb128(cm, f)
# Now parse methods
method_idx = 0
for _ in range(direct_methods_size):
method_idx_diff = readuleb128(cm, f)
access_flags = readuleb128(cm, f)
code_off = readuleb128(cm, f)
# print("direct_methods", method_idx_diff, access_flags, code_off)
method_idx += method_idx_diff
methods.append([method_idx, code_off])
method_idx = 0
for _ in range(virtual_methods_size):
method_idx_diff = readuleb128(cm, f)
access_flags = readuleb128(cm, f)
code_off = readuleb128(cm, f)
# print("virtual_methods", method_idx_diff, access_flags, code_off)
method_idx += method_idx_diff
methods.append([method_idx, code_off])
# Read the string section
strings = dict()
self.str_raw = dict()
for i in range(self.string_ids_size):
f.seek(string_ids_off + i * 4)
(string_data_off,) = unpack("<I", f.read(4))
f.seek(string_data_off)
utf16_size = readuleb128(cm, f)
s = read_null_terminated(f)
# FIXME this is wrong...
self.str_raw[i] = s
strings[i] = s.decode("UTF-8")
# Read the type section
self.types = dict()
for i in range(type_ids_size):
f.seek(type_ids_off + i * 4)
(descriptor_idx,) = unpack("<I", f.read(4))
self.types[i] = descriptor_idx
method_ids = {}
# Next, we need to parse the method_id section
for i in range(method_ids_size):
f.seek(method_ids_off + i * 8)
class_idx, proto_idx, name_idx = unpack("<HHI", f.read(8))
method_ids[i] = [
strings[self.types[class_idx]],
strings[name_idx],
]
# Now parse the found methods and print to stdout
mres = dict()
for method_idx, code_off in methods:
if code_off == 0:
continue
# We just parse everything manually to get the length, then we save the
# complete code block
f.seek(code_off)
(
registers_size,
ins_size,
outs_size,
tries_size,
debug_info_off,
insns_size,
) = unpack("<4HII", f.read(4 * 2 + 2 * 4))
insns = unpack(
"<{}H".format(insns_size), f.read(2 * insns_size)
)
if tries_size > 0 and insns_size % 2 == 1:
padding = unpack("<H", f.read(2))
if tries_size > 0:
# try_item[tries_size]
tries = unpack(
"<{}".format("".join(["IHH"] * tries_size)),
f.read(8 * tries_size),
)
# encoded_catch_handler_list
size = readuleb128(cm, f)
for _ in range(size):
# encoded_catch_handler
s = readsleb128(cm, f)
for _ in range(abs(s)):
# encoded_type_addr_pair
_ = readuleb128(cm, f)
_ = readuleb128(cm, f)
if s <= 0:
catch_all_addr = readuleb128(cm, f)
l = f.tell() - code_off
f.seek(code_off)
buff = f.read(l)
mres[method_idx] = hexlify(buff)
self.methods = mres
if __name__ == "__main__":
for midx, buff in read_dex(sys.argv[1]).methods.items():
pass
# print(midx, buff)
|