File: droidutil.py

package info (click to toggle)
droidlysis 3.2.1-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, sid
  • size: 340 kB
  • sloc: python: 1,816; makefile: 3
file content (302 lines) | stat: -rw-r--r-- 10,365 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
import os
import errno
import re
import shutil
import magic
import xml.dom.minidom
import hashlib
from collections import defaultdict

"""Those are my own utilities for sample analysis"""

def mkdir_if_necessary(path):
    """Creates the directory if it does not exist yet. 
    If it exists, does not do anything.
    If path is None (not filled), does not do anything."""

    if path != None:
        try:
            os.makedirs(path)
        except OSError as exc: # Python >2.5
            if exc.errno == errno.EEXIST and os.path.isdir(path):
                pass
            else: raise

def on_rm_tree_error(fn, path, exc_info):
    """
    Error handler for ``shutil.rmtree``.

    rmtree fails in particular if the file to delete is read-only.
    to remove, we attempt to set all permissions and then retry.

    Usage : ``shutil.rmtree(path, onerror=onerror)``
    """
    if fn is os.rmdir:
        os.chmod(path, 777)
        os.rmdir(path)
    elif fn is os.remove:
        os.chmod(path, 777)
        os.remove(path)

def move_dir(src,dst):
        """Move src directory to dst - works even if dst already exists."""
        assert os.path.isdir(src), "src must be an existing directory"
        os.system ("mv"+ " " + src + "/* " + dst)
        shutil.rmtree(src, onerror=on_rm_tree_error)

def sanitize_filename(filename):
    """Sanitizes a filename so that we can create the output analysis directory without any problem.
    We need to consider we might have filenames with Russian or Chinese characters. 
    
    filename is only the 'basename' not an absolute path

    Returns the sanitized name."""
    # we remove any character which is not letters, numbers, _ or .
    return re.sub('[^a-zA-Z0-9_\.]','', filename)

def listAll(dirName):
    filelist1=[]
    files = os.listdir(dirName)
    for f in files:
        if os.path.isfile(os.path.join(dirName,f)):
            filelist1.append(os.path.join(dirName,f))
        else:
            newlist=listAll(os.path.join(dirName,f));
            filelist1.extend(newlist)           
    return filelist1 



def count_filedirs(dirname):
    """Counts the number of directories and files in a given directory. Counts recursively.
    dirname must be readable.
    Returns:
    nb of directories
    nb of files
    
    This is somewhat the equivalent of: find ./smali -type d -print 
    or -type f
    """
    assert os.access(dirname, os.R_OK), "Can't access directory: "+dirname
    
    dirs = [name for name in os.listdir(dirname) if os.path.isdir(os.path.join(dirname, name))]
    nb_dirs = len(dirs)
    nb_files = len([name for name in os.listdir(dirname) if os.path.isfile(os.path.join(dirname, name))])
    for element in dirs:
        try:
            element_dirs, element_files = count_filedirs(os.path.join(dirname, element))
        except RuntimeError:
            # occurs when too many recursive dir
            element_dirs = 0
            element_files = 0
        nb_dirs += element_dirs
        nb_files += element_files

    return nb_dirs, nb_files

def sha256sum(input_file_name):
    """Computes the SHA256 hash of a binary file
    Returns the digest string or '' if an error occurred reading the file"""
    chunk_size = 1048576 # 1 MB
    file_sha256 = hashlib.sha256()
    try:
        with open(input_file_name, "rb") as f:
            byte = f.read(chunk_size)
            while byte:
                file_sha256.update(byte)
                byte = f.read(chunk_size)
    except IOError:
        print ('sha256sum: cannot open file: %s' % (input_file_name))
        return ''
    return file_sha256.hexdigest()

def sha1sum(input_file_name):
    """Computes the SHA1 hash of a binary file
    Returns the digest string or '' if an error occurred reading the file"""
    chunk_size = 1048576 # 1 MB
    file_sha1 = hashlib.sha1()
    try:
        with open(input_file_name, "rb") as f:
            byte = f.read(chunk_size)
            while byte:
                file_sha1.update(byte)
                byte = f.read(chunk_size)
    except IOError:
        print ('sha1sum: cannot open file: %s' % (input_file_name))
        return ''
    return file_sha1.hexdigest()

# -------------------------- File Constants -------------------------
"""Something else than the other file types. We do not support this file type."""
UNKNOWN=0 

"""An APK. It is not possible to differentiate a ZIP from an APK until we have looked inside the ZIP."""
APK=1    

"""A Dalvik Executable file. We do not check the file is valid/accepted by the verifier."""
DEX=2

"""An ARM ELF executable."""
ARM=3

"""A Java .class file"""
CLASS=4

"""A Zip file. Actually, this can also be a JAR or an APK until we have thoroughly checked."""
ZIP=5

"""A RARed file."""
RAR=6

"""We can probably add some more later: TAR, TGZ, BZ2..."""

def str_filetype(filetype):
    """Provide as input a droidutil filetype (APK, DEX, ARM...) and returns the corresponding string"""
    if filetype == APK:
        return "APK"
    if filetype == DEX:
        return "DEX"
    if filetype == ARM:
        return "ARM"
    if filetype == CLASS:
        return "CLASS"
    if filetype == ZIP:
        return "ZIP"
    if filetype == RAR:
        return "RAR"
    return "UNKNOWN"
    

def get_filetype(filename):
    """Returns an enumerate for the filetype corresponding to the given absolute filename.
    This function does not open the file or unzip it.
    It will return one of these:
    droidutil.ZIP
    droidutil.RAR
    droidutil.ARM
    droidutil.CLASS
    droidutil.DEX
    droidutil.UNKNOWN
    """
    filetype = magic.from_file(filename)
    if filetype == None:
        # this happens if magic is unable to find file type
        return UNKNOWN
    match = re.search('Zip archive data|zip|RAR archive data|executable, ARM|Java class|Dalvik dex|Java archive', filetype)
    if match == None:
        mytype = UNKNOWN
    else:
        typecase = { 'Zip archive data' : ZIP,
                     'zip' : ZIP,
                     'Java archive' : ZIP,
                     'RAR archive data' : RAR,
                     'executable, ARM' : ARM,
                     'Java class' : CLASS,
                     'Dalvik dex' : DEX,
                     'None' : UNKNOWN   }
        mytype = typecase[match.group(0)]
    return mytype


def get_elements(xmldoc, tag_name, attribute):
    """Returns a list of elements"""
    l = []
    for item in xmldoc.getElementsByTagName(tag_name) :
        value = item.getAttribute(attribute)
        l.append( repr( value ) )
    return l

def get_element(xmldoc, tag_name, attribute):
    for item in xmldoc.getElementsByTagName(tag_name) :
        value = item.getAttribute(attribute)
        if len(value) > 0 :
            return value
    return None

"""Very simple exception to raise when we found something. For instance to break a loop."""
class Found(Exception): pass

class matchresult:
    """Match information"""

    def __init__(self, thefile, theline, thelineno):
        """Represents a match for a keyword.
        Made of a filename and a line"""
        self.file = thefile
        self.line = theline
        self.lineno = thelineno
    
    def __repr__(self):
        return 'file=%s lineno=%d line=%s' % (self.file, self.lineno, self.line)

    def __str__(self):
        if len(self.file) > 70:
            f = '...'+self.file[-70:]
        else:
            f = self.file
        return 'file=%50s no=%4d line=%30s' % (f, self.lineno, self.line)

def recursive_search(search_regexp, directory, exception_list=[], verbose=False):
    """Recursively search in a directory except in some subdirectories
    The exception list actually is a list of regexp for directories.
    
    Returns a dictionary of list of matches:
    match[ keyword ] = [ <'filename', 'matching line content', 'lineno'>,
                         <'filename', 'matching line content', 'lineno'>,
                         <'filename', 'matching line content', 'lineno'>, ]

    We can only have one match per line. Otherwise, this won't work we should be using re.findall
    """
    matches = defaultdict(list)

    if verbose:
        print("Searching in " + directory + " for " + search_regexp.decode('utf-8'))
        print("Exceptions: %s" % (str(exception_list)))

    for entry in os.listdir(directory):
        current_entry = os.path.join(directory, entry)
        try: 
            if os.path.isfile(current_entry):
                for exception in exception_list:
                    match = re.search(exception, current_entry)
                    if match != None:
                        # skip this file
                        raise Found

                # ok, this file must be searched
                lineno = 0
                for line in open(current_entry, 'rb'):
                    lineno += 1
                    match = re.search(search_regexp, line)
                    if match != None:
                        if verbose:
                            print("Match: File: " +entry+ " Keyword: " +match.group(0).decode('utf-8', errors='replace') + " Line: " + line.decode('utf-8', errors='replace'))
                        """match.group(0) only provides one match per line if we need more, 
                        re.search is not appropriate
                        and should be replaced by re.findall"""
                        matches[ match.group(0).decode('utf-8', errors='replace') ].append(matchresult(current_entry, line, lineno))


            if os.path.isdir(current_entry):
                for exception in exception_list:
                    match = re.search(exception, current_entry)
                    if match != None:
                        # skip this directory
                        raise Found

                # this directory is not in the exception list, we must search it recursively
                try:
                    hismatches = recursive_search(search_regexp, current_entry, exception_list, verbose)
                    # merge in those results
                    for key in hismatches.keys():
                        matches[ key ].extend( hismatches[ key ] )
                except RuntimeError:
                    # we get this when there are too many recursive dirs
                    pass # next


        except Found:
            pass # go to next entry

    return matches