File: dalvik_bytecode.py

package info (click to toggle)
chromium 145.0.7632.109-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 5,974,804 kB
  • sloc: cpp: 36,197,696; ansic: 7,602,761; javascript: 3,563,590; python: 1,649,324; xml: 838,427; asm: 717,087; pascal: 185,708; sh: 88,786; perl: 88,718; objc: 79,984; sql: 59,811; cs: 42,452; fortran: 24,101; makefile: 21,022; tcl: 15,277; php: 14,022; yacc: 9,066; ruby: 7,553; awk: 3,720; lisp: 3,233; lex: 1,328; ada: 727; jsp: 228; sed: 36
file content (337 lines) | stat: -rw-r--r-- 8,606 bytes parent folder | download | duplicates (12)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
# Copyright 2022 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Utilities for parsing Dalvik bytecode."""

import collections
import struct

# Dalvik Bytecode specs copied from first two column of table in:
#   https://source.android.com/docs/core/runtime/dalvik-bytecode#instructions
# with minor modification (truncating comments).
_DALVIK_BYTECODE_SPECS = """00 10x  nop
01 12x  move vA, vB
02 22x  move/from16 vAA, vBBBB
03 32x  move/16 vAAAA, vBBBB
04 12x  move-wide vA, vB
05 22x  move-wide/from16 vAA, vBBBB
06 32x  move-wide/16 vAAAA, vBBBB
07 12x  move-object vA, vB
08 22x  move-object/from16 vAA, vBBBB
09 32x  move-object/16 vAAAA, vBBBB
0a 11x  move-result vAA
0b 11x  move-result-wide vAA
0c 11x  move-result-object vAA
0d 11x  move-exception vAA
0e 10x  return-void
0f 11x  return vAA
10 11x  return-wide vAA
11 11x  return-object vAA
12 11n  const/4 vA, #+B
13 21s  const/16 vAA, #+BBBB
14 31i  const vAA, #+BBBBBBBB
15 21h  const/high16 vAA, #+BBBB0000
16 21s  const-wide/16 vAA, #+BBBB
17 31i  const-wide/32 vAA, #+BBBBBBBB
18 51l  const-wide vAA, #+BBBBBBBBBBBBBBBB
19 21h  const-wide/high16 vAA, #+BBBB000000000000
1a 21c  const-string vAA, string@BBBB
1b 31c  const-string/jumbo vAA, string@BBBBBBBB
1c 21c  const-class vAA, type@BBBB
1d 11x  monitor-enter vAA
1e 11x  monitor-exit vAA
1f 21c  check-cast vAA, type@BBBB
20 22c  instance-of vA, vB, type@CCCC
21 12x  array-length vA, vB
22 21c  new-instance vAA, type@BBBB
23 22c  new-array vA, vB, type@CCCC
24 35c  filled-new-array {vC, vD, vE, vF, vG}, type@BBBB
25 3rc  filled-new-array/range {vCCCC .. vNNNN}, type@BBBB
26 31t  fill-array-data vAA, +BBBBBBBB (with supplemental data...)
27 11x  throw vAA
28 10t  goto +AA
29 20t  goto/16 +AAAA
2a 30t  goto/32 +AAAAAAAA
2b 31t  packed-switch vAA, +BBBBBBBB (with supplemental data...)
2c 31t  sparse-switch vAA, +BBBBBBBB (with supplemental data...)
2d..31 23x  cmpkind vAA, vBB, vCC
2d: cmpl-float (lt bias)
2e: cmpg-float (gt bias)
2f: cmpl-double (lt bias)
30: cmpg-double (gt bias)
31: cmp-long
32..37 22t  if-test vA, vB, +CCCC
32: if-eq
33: if-ne
34: if-lt
35: if-ge
36: if-gt
37: if-le
38..3d 21t  if-testz vAA, +BBBB
38: if-eqz
39: if-nez
3a: if-ltz
3b: if-gez
3c: if-gtz
3d: if-lez
3e..43 10x  (unused)
44..51 23x  arrayop vAA, vBB, vCC
44: aget
45: aget-wide
46: aget-object
47: aget-boolean
48: aget-byte
49: aget-char
4a: aget-short
4b: aput
4c: aput-wide
4d: aput-object
4e: aput-boolean
4f: aput-byte
50: aput-char
51: aput-short
52..5f 22c  iinstanceop vA, vB, field@CCCC
52: iget
53: iget-wide
54: iget-object
55: iget-boolean
56: iget-byte
57: iget-char
58: iget-short
59: iput
5a: iput-wide
5b: iput-object
5c: iput-boolean
5d: iput-byte
5e: iput-char
5f: iput-short
60..6d 21c  sstaticop vAA, field@BBBB
60: sget
61: sget-wide
62: sget-object
63: sget-boolean
64: sget-byte
65: sget-char
66: sget-short
67: sput
68: sput-wide
69: sput-object
6a: sput-boolean
6b: sput-byte
6c: sput-char
6d: sput-short
6e..72 35c  invoke-kind {vC, vD, vE, vF, vG}, meth@BBBB
6e: invoke-virtual
6f: invoke-super
70: invoke-direct
71: invoke-static
72: invoke-interface
73 10x  (unused)
74..78 3rc  invoke-kind/range {vCCCC .. vNNNN}, meth@BBBB
74: invoke-virtual/range
75: invoke-super/range
76: invoke-direct/range
77: invoke-static/range
78: invoke-interface/range
79..7a 10x  (unused)
7b..8f 12x  unop vA, vB
7b: neg-int
7c: not-int
7d: neg-long
7e: not-long
7f: neg-float
80: neg-double
81: int-to-long
82: int-to-float
83: int-to-double
84: long-to-int
85: long-to-float
86: long-to-double
87: float-to-int
88: float-to-long
89: float-to-double
8a: double-to-int
8b: double-to-long
8c: double-to-float
8d: int-to-byte
8e: int-to-char
8f: int-to-short
90..af 23x  binop vAA, vBB, vCC
90: add-int
91: sub-int
92: mul-int
93: div-int
94: rem-int
95: and-int
96: or-int
97: xor-int
98: shl-int
99: shr-int
9a: ushr-int
9b: add-long
9c: sub-long
9d: mul-long
9e: div-long
9f: rem-long
a0: and-long
a1: or-long
a2: xor-long
a3: shl-long
a4: shr-long
a5: ushr-long
a6: add-float
a7: sub-float
a8: mul-float
a9: div-float
aa: rem-float
ab: add-double
ac: sub-double
ad: mul-double
ae: div-double
af: rem-double
b0..cf 12x  binop/2addr vA, vB
b0: add-int/2addr
b1: sub-int/2addr
b2: mul-int/2addr
b3: div-int/2addr
b4: rem-int/2addr
b5: and-int/2addr
b6: or-int/2addr
b7: xor-int/2addr
b8: shl-int/2addr
b9: shr-int/2addr
ba: ushr-int/2addr
bb: add-long/2addr
bc: sub-long/2addr
bd: mul-long/2addr
be: div-long/2addr
bf: rem-long/2addr
c0: and-long/2addr
c1: or-long/2addr
c2: xor-long/2addr
c3: shl-long/2addr
c4: shr-long/2addr
c5: ushr-long/2addr
c6: add-float/2addr
c7: sub-float/2addr
c8: mul-float/2addr
c9: div-float/2addr
ca: rem-float/2addr
cb: add-double/2addr
cc: sub-double/2addr
cd: mul-double/2addr
ce: div-double/2addr
cf: rem-double/2addr
d0..d7 22s  binop/lit16 vA, vB, #+CCCC
d0: add-int/lit16
d1: rsub-int (reverse subtract)
d2: mul-int/lit16
d3: div-int/lit16
d4: rem-int/lit16
d5: and-int/lit16
d6: or-int/lit16
d7: xor-int/lit16
d8..e2 22b  binop/lit8 vAA, vBB, #+CC
d8: add-int/lit8
d9: rsub-int/lit8
da: mul-int/lit8
db: div-int/lit8
dc: rem-int/lit8
dd: and-int/lit8
de: or-int/lit8
df: xor-int/lit8
e0: shl-int/lit8
e1: shr-int/lit8
e2: ushr-int/lit8
e3..f9 10x  (unused)
fa 45cc invoke-polymorphic {vC, vD, vE, vF, vG}, meth@BBBB, proto@HHHH
fb 4rcc invoke-polymorphic/range {vCCCC .. vNNNN}, meth@BBBB, proto@HHHH
fc 35c  invoke-custom {vC, vD, vE, vF, vG}, call_site@BBBB
fd 3rc  invoke-custom/range {vCCCC .. vNNNN}, call_site@BBBB
fe 21c  const-method-handle vAA, method_handle@BBBB
ff 21c  const-method-type vAA, proto@BBBB
"""

DalvikByteCode = collections.namedtuple('DalvikByteCode',
                                        'op,size,format,name,params')


def _ParseByteCodeSpecs():
  """Parses _DALVIK_BYTECODE_SPECS into DalvikByteCode array."""
  format_map = [None] * 256
  name_map = [None] * 256
  params_map = [None] * 256
  (op_lo, op_hi) = (None, None)
  for line in _DALVIK_BYTECODE_SPECS.splitlines():
    comment_pos = line.find(' (')
    if comment_pos >= 0:
      line = line[:comment_pos]
    assert len(line) >= 5
    if line[2] == ':':
      # Inside op range, e.g.: 'b0: add-int/2addr'.
      # ['b0', 'add-int/2addr'].
      toks = line.split(': ')
      assert len(toks) == 2
      op = int(toks[0], 16)
      assert op_lo <= op <= op_hi
      name_map[op] = toks[1]  # 'add-int/2addr'.
      if op == op_hi:
        op_lo = op_hi = None
    elif line[2:4] == '..':
      # Define op range, e.g.: 'b0..cf 12x  binop/2addr vA, vB'.
      # ['b0..cf', '12x', 'binop/2addr', 'vA, vB'].
      toks = line.split(maxsplit=3)
      # (0xb0, 0xcf).
      (op_lo, op_hi) = (int(t, 16) for t in toks[0].split('..'))
      for op in range(op_lo, op_hi + 1):
        format_map[op] = toks[1]  # '12x'.
      if len(toks) > 2:  # If not unused.
        for op in range(op_lo, op_hi + 1):
          params_map[op] = toks[3]  # 'vA, vB'.
    else:
      # Standalone op, e.g.: '15 21h  const/high16 vAA, #+BBBB0000'.
      # ['15', '21h', 'const/high16', 'vAA, #+BBBB0000'].
      toks = line.split(maxsplit=3)
      op = int(toks[0], 16)
      format_map[op] = toks[1]  # '21h'.
      if len(toks) > 2:  # If not unused.
        name_map[op] = toks[2]  # 'const/high16'.
        params_map[op] = toks[3] if len(toks) >= 4 else ''  # 'vAA, #+BBBB0000'.

  ret = []
  for op in range(256):
    size = int(format_map[op][0]) * 2  # '21h' -> 4.
    bc = DalvikByteCode(op, size, format_map[op], name_map[op], params_map[op])
    ret.append(bc)
  return ret


DALVIK_INSTRUCTIONS = _ParseByteCodeSpecs()


def Split(insns):
  """Splits Dalvik code into a series of instruction bytes.

  The minimalistic approach avoids wasted work. It's up to the caller to filter
  and/or disassemble emitted bytes. It is assumed that supplemental data (from
  31t instructions {fill-array-data, packed-switch, sparse-switch}) are found at
  the end of `insns`. These are detected and omitted.

  Args:
    insns: Even-length bytearray data containing valid Dalvik code.
  """
  pos_end = len(insns)
  assert pos_end % 2 == 0
  pos = 0
  while pos < pos_end:
    instr = DALVIK_INSTRUCTIONS[insns[pos]]
    size = instr.size
    chunk = insns[pos:pos + size]
    # Instructions with supplemental data contains relative offset to where
    # data starts, which indicates where code ends.
    if instr.format == '31t':
      offset = struct.unpack_from('<L', chunk, 2)[0]
      pos_end = min(pos_end, pos + offset * 2)
    yield chunk
    pos += size
  # Do not emit supplemental data.