File: hash.py

package info (click to toggle)
python-pyelftools 0.32-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 68,964 kB
  • sloc: python: 15,903; ansic: 298; asm: 86; makefile: 24; cpp: 18; sh: 4
file content (186 lines) | stat: -rw-r--r-- 7,490 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
#-------------------------------------------------------------------------------
# elftools: elf/hash.py
#
# ELF hash table sections
#
# Andreas Ziegler (andreas.ziegler@fau.de)
# This code is in the public domain
#-------------------------------------------------------------------------------

import struct

from ..common.utils import struct_parse
from .sections import Section


class ELFHashTable(object):
    """ Representation of an ELF hash table to find symbols in the
        symbol table - useful for super-stripped binaries without section
        headers where only the start of the symbol table is known from the
        dynamic segment. The layout and contents are nicely described at
        https://flapenguin.me/2017/04/24/elf-lookup-dt-hash/.

        The symboltable argument needs to implement a get_symbol() method -
        in a regular ELF file, this will be the linked symbol table section
        as indicated by the sh_link attribute. For super-stripped binaries,
        one should use the DynamicSegment object as the symboltable as it
        supports symbol lookup without access to a symbol table section.
    """

    def __init__(self, elffile, start_offset, symboltable):
        self.elffile = elffile
        self._symboltable = symboltable
        self.params = struct_parse(self.elffile.structs.Elf_Hash,
                                   self.elffile.stream,
                                   start_offset)

    def get_number_of_symbols(self):
        """ Get the number of symbols from the hash table parameters.
        """
        return self.params['nchains']

    def get_symbol(self, name):
        """ Look up a symbol from this hash table with the given name.
        """
        if self.params['nbuckets'] == 0:
            return None
        hval = self.elf_hash(name) % self.params['nbuckets']
        symndx = self.params['buckets'][hval]
        while symndx != 0:
            sym = self._symboltable.get_symbol(symndx)
            if sym.name == name:
                return sym
            symndx = self.params['chains'][symndx]
        return None

    @staticmethod
    def elf_hash(name):
        """ Compute the hash value for a given symbol name.
        """
        if not isinstance(name, bytes):
            name = name.encode('utf-8')
        h = 0
        x = 0
        for c in bytearray(name):
            h = (h << 4) + c
            x = h & 0xF0000000
            if x != 0:
                h ^= (x >> 24)
            h &= ~x
        return h


class ELFHashSection(Section, ELFHashTable):
    """ Section representation of an ELF hash table. In regular ELF files, this
        allows us to use the common functions defined on Section objects when
        dealing with the hash table.
    """
    def __init__(self, header, name, elffile, symboltable):
        Section.__init__(self, header, name, elffile)
        ELFHashTable.__init__(self, elffile, self['sh_offset'], symboltable)


class GNUHashTable(object):
    """ Representation of a GNU hash table to find symbols in the
        symbol table - useful for super-stripped binaries without section
        headers where only the start of the symbol table is known from the
        dynamic segment. The layout and contents are nicely described at
        https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/.

        The symboltable argument needs to implement a get_symbol() method -
        in a regular ELF file, this will be the linked symbol table section
        as indicated by the sh_link attribute. For super-stripped binaries,
        one should use the DynamicSegment object as the symboltable as it
        supports symbol lookup without access to a symbol table section.
    """
    def __init__(self, elffile, start_offset, symboltable):
        self.elffile = elffile
        self._symboltable = symboltable
        self.params = struct_parse(self.elffile.structs.Gnu_Hash,
                                   self.elffile.stream,
                                   start_offset)
        # Element sizes in the hash table
        self._wordsize = self.elffile.structs.Elf_word('').sizeof()
        self._xwordsize = self.elffile.structs.Elf_xword('').sizeof()
        self._chain_pos = start_offset + 4 * self._wordsize + \
            self.params['bloom_size'] * self._xwordsize + \
            self.params['nbuckets'] * self._wordsize

    def get_number_of_symbols(self):
        """ Get the number of symbols in the hash table by finding the bucket
            with the highest symbol index and walking to the end of its chain.
        """
        # Find highest index in buckets array
        max_idx = max(self.params['buckets'])
        if max_idx < self.params['symoffset']:
            return self.params['symoffset']

        # Position the stream at the start of the corresponding chain
        max_chain_pos = self._chain_pos + \
            (max_idx - self.params['symoffset']) * self._wordsize
        self.elffile.stream.seek(max_chain_pos)
        hash_format = '<I' if self.elffile.little_endian else '>I'

        # Walk the chain to its end (lowest bit is set)
        while True:
            cur_hash = struct.unpack(hash_format, self.elffile.stream.read(self._wordsize))[0]
            if cur_hash & 1:
                return max_idx + 1

            max_idx += 1

    def _matches_bloom(self, H1):
        """ Helper function to check if the given hash could be in the hash
            table by testing it against the bloom filter.
        """
        arch_bits = self.elffile.elfclass
        H2 = H1 >> self.params['bloom_shift']
        word_idx = int(H1 / arch_bits) % self.params['bloom_size']
        BITMASK = (1 << (H1 % arch_bits)) | (1 << (H2 % arch_bits))
        return (self.params['bloom'][word_idx] & BITMASK) == BITMASK

    def get_symbol(self, name):
        """ Look up a symbol from this hash table with the given name.
        """
        namehash = self.gnu_hash(name)
        if not self._matches_bloom(namehash):
            return None

        symidx = self.params['buckets'][namehash % self.params['nbuckets']]
        if symidx < self.params['symoffset']:
            return None

        self.elffile.stream.seek(self._chain_pos + (symidx - self.params['symoffset']) * self._wordsize)
        hash_format = '<I' if self.elffile.little_endian else '>I'
        while True:
            cur_hash = struct.unpack(hash_format, self.elffile.stream.read(self._wordsize))[0]
            if cur_hash | 1 == namehash | 1:
                symbol = self._symboltable.get_symbol(symidx)
                if name == symbol.name:
                    return symbol

            if cur_hash & 1:
                break
            symidx += 1
        return None

    @staticmethod
    def gnu_hash(key):
        """ Compute the GNU-style hash value for a given symbol name.
        """
        if not isinstance(key, bytes):
            key = key.encode('utf-8')
        h = 5381
        for c in bytearray(key):
            h = h * 33 + c
        return h & 0xFFFFFFFF


class GNUHashSection(Section, GNUHashTable):
    """ Section representation of a GNU hash table. In regular ELF files, this
        allows us to use the common functions defined on Section objects when
        dealing with the hash table.
    """
    def __init__(self, header, name, elffile, symboltable):
        Section.__init__(self, header, name, elffile)
        GNUHashTable.__init__(self, elffile, self['sh_offset'], symboltable)