File: binxml.go

package info (click to toggle)
golang-github-avast-apkparser 0.0~git20190516.3b8c5ef-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, sid
  • size: 244 kB
  • sloc: xml: 101; sh: 6; makefile: 2
file content (359 lines) | stat: -rw-r--r-- 10,276 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
package apkparser

import (
	"bytes"
	"encoding/binary"
	"encoding/xml"
	"errors"
	"fmt"
	"io"
	"io/ioutil"
	"strconv"
	"strings"
	"unsafe"
)

type binxmlParseInfo struct {
	strings     stringTable
	resourceIds []uint32

	encoder ManifestEncoder
	res     *ResourceTable
}

// Some samples have manifest in plaintext, this is an error.
// 2c882a2376034ed401be082a42a21f0ac837689e7d3ab6be0afb82f44ca0b859
var ErrPlainTextManifest = errors.New("xml is in plaintext, binary form expected")

// Deprecated: just calls ParseXML
func ParseManifest(r io.Reader, enc ManifestEncoder, resources *ResourceTable) error {
	return ParseXml(r, enc, resources)
}

// Parse the binary Xml format. The resources are optional and can be nil.
func ParseXml(r io.Reader, enc ManifestEncoder, resources *ResourceTable) error {
	x := binxmlParseInfo{
		encoder: enc,
		res:     resources,
	}

	id, headerLen, totalLen, err := parseChunkHeader(r)
	if err != nil {
		return err
	}

	if (id & 0xFF) == '<' {
		buf := bytes.NewBuffer(make([]byte, 0, 8))
		binary.Write(buf, binary.LittleEndian, &id)
		binary.Write(buf, binary.LittleEndian, &headerLen)
		binary.Write(buf, binary.LittleEndian, &totalLen)

		if s := buf.String(); strings.HasPrefix(s, "<?xml ") || strings.HasPrefix(s, "<manif") {
			return ErrPlainTextManifest
		}
	}

	// Android doesn't care.
	/*if id != chunkAxmlFile {
	    return fmt.Errorf("Invalid top chunk id: 0x%08x", id)
	}*/

	defer x.encoder.Flush()

	totalLen -= chunkHeaderSize

	var len uint32
	var lastId uint16
	for i := uint32(0); i < totalLen; i += len {
		id, _, len, err = parseChunkHeader(r)
		if err != nil {
			return fmt.Errorf("Error parsing header at 0x%08x of 0x%08x %08x: %s", i, totalLen, lastId, err.Error())
		}

		lastId = id

		lm := &io.LimitedReader{R: r, N: int64(len) - 2*4}

		switch id {
		case chunkStringTable:
			x.strings, err = parseStringTable(lm)
		case chunkResourceIds:
			err = x.parseResourceIds(lm)
		default:
			if (id & chunkMaskXml) == 0 {
				err = fmt.Errorf("Unknown chunk id 0x%x", id)
				break
			}

			// skip line number and unknown 0xFFFFFFFF
			if _, err = io.CopyN(ioutil.Discard, lm, 2*4); err != nil {
				break
			}

			switch id {
			case chunkXmlNsStart:
				err = x.parseNsStart(lm)
			case chunkXmlNsEnd:
				err = x.parseNsEnd(lm)
			case chunkXmlTagStart:
				err = x.parseTagStart(lm)
			case chunkXmlTagEnd:
				err = x.parseTagEnd(lm)
			case chunkXmlText:
				err = x.parseText(lm)
			default:
				err = fmt.Errorf("Unknown chunk id 0x%x", id)
			}
		}

		if err == ErrEndParsing {
			break
		} else if err != nil {
			return fmt.Errorf("Chunk: 0x%08x: %s", id, err.Error())
		} else if lm.N != 0 {
			return fmt.Errorf("Chunk: 0x%08x: was not fully read", id)
		}
	}

	return x.encoder.Flush()
}

func (x *binxmlParseInfo) parseResourceIds(r *io.LimitedReader) error {
	if (r.N % 4) != 0 {
		return fmt.Errorf("Invalid chunk size!")
	}

	count := uint32(r.N / 4)
	var id uint32
	for i := uint32(0); i < count; i++ {
		if err := binary.Read(r, binary.LittleEndian, &id); err != nil {
			return err
		}
		x.resourceIds = append(x.resourceIds, id)
	}
	return nil
}

func (x *binxmlParseInfo) parseNsStart(r *io.LimitedReader) error {
	var err error
	ns := &xml.Name{}

	var idx uint32
	if err = binary.Read(r, binary.LittleEndian, &idx); err != nil {
		return err
	}

	if ns.Local, err = x.strings.get(idx); err != nil {
		return err
	}

	if err = binary.Read(r, binary.LittleEndian, &idx); err != nil {
		return err
	}

	if ns.Space, err = x.strings.get(idx); err != nil {
		return err
	}

	// TODO: what to do with this?
	_ = ns
	return nil
}

func (x *binxmlParseInfo) parseNsEnd(r *io.LimitedReader) error {
	if _, err := io.CopyN(ioutil.Discard, r, 2*4); err != nil {
		return fmt.Errorf("error skipping: %s", err.Error())
	}

	// TODO: what to do with this?
	return nil
}

func (x *binxmlParseInfo) parseTagStart(r *io.LimitedReader) error {
	var namespaceIdx, nameIdx, attrCnt, classAttrIdx uint32

	if err := binary.Read(r, binary.LittleEndian, &namespaceIdx); err != nil {
		return fmt.Errorf("error reading namespace idx: %s", err.Error())
	}

	if err := binary.Read(r, binary.LittleEndian, &nameIdx); err != nil {
		return fmt.Errorf("error reading name idx: %s", err.Error())
	}

	if _, err := io.CopyN(ioutil.Discard, r, 4); err != nil {
		return fmt.Errorf("error skipping flag: %s", err.Error())
	}

	if err := binary.Read(r, binary.LittleEndian, &attrCnt); err != nil {
		return fmt.Errorf("error reading attrCnt: %s", err.Error())
	}

	if err := binary.Read(r, binary.LittleEndian, &classAttrIdx); err != nil {
		return fmt.Errorf("error reading classAttr: %s", err.Error())
	}

	idAttributeIdx := (attrCnt >> 16) - 1
	attrCnt = (attrCnt & 0xFFFF)

	styleAttrIdx := (classAttrIdx >> 16) - 1
	classAttrIdx = (classAttrIdx & 0xFFFF)

	_ = styleAttrIdx
	_ = idAttributeIdx

	namespace, err := x.strings.get(namespaceIdx)
	if err != nil {
		return fmt.Errorf("error decoding namespace: %s", err.Error())
	}

	name, err := x.strings.get(nameIdx)
	if err != nil {
		return fmt.Errorf("error decoding name: %s", err.Error())
	}

	tok := xml.StartElement{
		Name: xml.Name{Local: name, Space: namespace},
	}

	var attrData [attrValuesCount]uint32
	for i := uint32(0); i < attrCnt; i++ {
		if err := binary.Read(r, binary.LittleEndian, &attrData); err != nil {
			return fmt.Errorf("error reading attrData: %s", err.Error())
		}

		// Android actually reads attributes purely by their IDs (see frameworks/base/core/res/res/values/attrs_manifest.xml
		// and its generated R class, that's where the indexes come from, namely the AndroidManifestActivity array)
		// but good guy android actually puts the strings into the string table on the same indexes anyway, most of the time.
		// This is for the samples that don't have it, mostly due to obfuscators/minimizers.
		// The ID can't change, because it would break current APKs.
		// Sample: 98d2e837b8f3ac41e74b86b2d532972955e5352197a893206ecd9650f678ae31
		//
		// The exception to this rule is the "package" attribute in the root manifest tag. That one MUST NOT use
		// resource ids, instead, it needs to use the string table. The meta attrs 'platformBuildVersion*'
		// are the same, except Android never parses them so it's just for manual analysis.
		// Sample: a3ee88cf1492237a1be846df824f9de30a6f779973fe3c41c7d7ed0be644ba37
		//
		// In general, android doesn't care about namespaces, but if a resource ID is used, it has to have been
		// in the android: namespace, so we fix that up.

		// frameworks/base/core/jni/android_util_AssetManager.cpp android_content_AssetManager_retrieveAttributes
		// frameworks/base/core/java/android/content/pm/PackageParser.java parsePackageSplitNames
		var attrName string
		if attrData[attrIdxName] < uint32(len(x.resourceIds)) {
			attrName = getAttributteName(x.resourceIds[attrData[attrIdxName]])
		}

		var attrNameFromStrings string
		if attrName == "" || name == "manifest" {
			attrNameFromStrings, err = x.strings.get(attrData[attrIdxName])
			if err != nil {
				if attrName == "" {
					return fmt.Errorf("error decoding attrNameIdx: %s", err.Error())
				}
			} else if attrName != "" && attrNameFromStrings != "package" && !strings.HasPrefix(attrNameFromStrings, "platformBuildVersion") {
				attrNameFromStrings = ""
			}
		}

		attrNameSpace, err := x.strings.get(attrData[attrIdxNamespace])
		if err != nil {
			return fmt.Errorf("error decoding attrNamespaceIdx: %s", err.Error())
		}

		if attrNameFromStrings != "" {
			attrName = attrNameFromStrings
		} else if attrNameSpace == "" {
			attrNameSpace = "http://schemas.android.com/apk/res/android"
		}

		attr := xml.Attr{
			Name: xml.Name{Local: attrName, Space: attrNameSpace},
		}

		switch attrData[attrIdxType] >> 24 {
		case AttrTypeString:
			attr.Value, err = x.strings.get(attrData[attrIdxString])
			if err != nil {
				return fmt.Errorf("error decoding attrStringIdx: %s", err.Error())
			}
		case AttrTypeIntBool:
			attr.Value = strconv.FormatBool(attrData[attrIdxData] != 0)
		case AttrTypeIntHex:
			attr.Value = fmt.Sprintf("0x%x", attrData[attrIdxData])
		case AttrTypeFloat:
			val := (*float32)(unsafe.Pointer(&attrData[attrIdxData]))
			attr.Value = fmt.Sprintf("%g", *val)
		case AttrTypeReference:
			isValidString := false
			if x.res != nil {
				cfg := ConfigFirst
				if attr.Name.Local == "icon" {
					cfg = ConfigPngIcon
				}

				e, err := x.res.GetResourceEntryEx(attrData[attrIdxData], cfg)
				if err == nil {
					for i := 0; e.value.dataType == AttrTypeReference && i < 5; i++ {
						lower, err := x.res.GetResourceEntryEx(e.value.data, cfg)
						if err != nil {
							break
						}
						e = lower
					}
					attr.Value, err = e.value.String()
					isValidString = err == nil
				}
			}

			if !isValidString && attr.Value == "" {
				attr.Value = fmt.Sprintf("@%x", attrData[attrIdxData])
			}
		default:
			attr.Value = strconv.FormatInt(int64(int32(attrData[attrIdxData])), 10)
		}
		tok.Attr = append(tok.Attr, attr)
	}

	return x.encoder.EncodeToken(tok)
}

func (x *binxmlParseInfo) parseTagEnd(r *io.LimitedReader) error {
	var namespaceIdx, nameIdx uint32
	if err := binary.Read(r, binary.LittleEndian, &namespaceIdx); err != nil {
		return fmt.Errorf("error reading namespace idx: %s", err.Error())
	}

	if err := binary.Read(r, binary.LittleEndian, &nameIdx); err != nil {
		return fmt.Errorf("error reading name idx: %s", err.Error())
	}

	namespace, err := x.strings.get(namespaceIdx)
	if err != nil {
		return fmt.Errorf("error decoding namespace: %s", err.Error())
	}

	name, err := x.strings.get(nameIdx)
	if err != nil {
		return fmt.Errorf("error decoding name: %s", err.Error())
	}

	return x.encoder.EncodeToken(xml.EndElement{Name: xml.Name{Local: name, Space: namespace}})
}

func (x *binxmlParseInfo) parseText(r *io.LimitedReader) error {
	var idx uint32
	if err := binary.Read(r, binary.LittleEndian, &idx); err != nil {
		return fmt.Errorf("error reading idx: %s", err.Error())
	}

	text, err := x.strings.get(idx)
	if err != nil {
		return fmt.Errorf("error decoding idx: %s", err.Error())
	}

	if _, err := io.CopyN(ioutil.Discard, r, 2*4); err != nil {
		return fmt.Errorf("error skipping: %s", err.Error())
	}

	return x.encoder.EncodeToken(xml.CharData(text))
}