1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
|
#-------------------------------------------------------------------------------
# elftools: elf/hash.py
#
# ELF hash table sections
#
# Andreas Ziegler (andreas.ziegler@fau.de)
# This code is in the public domain
#-------------------------------------------------------------------------------
import struct
from ..common.utils import struct_parse
from .sections import Section
class ELFHashTable(object):
""" Representation of an ELF hash table to find symbols in the
symbol table - useful for super-stripped binaries without section
headers where only the start of the symbol table is known from the
dynamic segment. The layout and contents are nicely described at
https://flapenguin.me/2017/04/24/elf-lookup-dt-hash/.
The symboltable argument needs to implement a get_symbol() method -
in a regular ELF file, this will be the linked symbol table section
as indicated by the sh_link attribute. For super-stripped binaries,
one should use the DynamicSegment object as the symboltable as it
supports symbol lookup without access to a symbol table section.
"""
def __init__(self, elffile, start_offset, symboltable):
self.elffile = elffile
self._symboltable = symboltable
self.params = struct_parse(self.elffile.structs.Elf_Hash,
self.elffile.stream,
start_offset)
def get_number_of_symbols(self):
""" Get the number of symbols from the hash table parameters.
"""
return self.params['nchains']
def get_symbol(self, name):
""" Look up a symbol from this hash table with the given name.
"""
if self.params['nbuckets'] == 0:
return None
hval = self.elf_hash(name) % self.params['nbuckets']
symndx = self.params['buckets'][hval]
while symndx != 0:
sym = self._symboltable.get_symbol(symndx)
if sym.name == name:
return sym
symndx = self.params['chains'][symndx]
return None
@staticmethod
def elf_hash(name):
""" Compute the hash value for a given symbol name.
"""
if not isinstance(name, bytes):
name = name.encode('utf-8')
h = 0
x = 0
for c in bytearray(name):
h = (h << 4) + c
x = h & 0xF0000000
if x != 0:
h ^= (x >> 24)
h &= ~x
return h
class ELFHashSection(Section, ELFHashTable):
""" Section representation of an ELF hash table. In regular ELF files, this
allows us to use the common functions defined on Section objects when
dealing with the hash table.
"""
def __init__(self, header, name, elffile, symboltable):
Section.__init__(self, header, name, elffile)
ELFHashTable.__init__(self, elffile, self['sh_offset'], symboltable)
class GNUHashTable(object):
""" Representation of a GNU hash table to find symbols in the
symbol table - useful for super-stripped binaries without section
headers where only the start of the symbol table is known from the
dynamic segment. The layout and contents are nicely described at
https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/.
The symboltable argument needs to implement a get_symbol() method -
in a regular ELF file, this will be the linked symbol table section
as indicated by the sh_link attribute. For super-stripped binaries,
one should use the DynamicSegment object as the symboltable as it
supports symbol lookup without access to a symbol table section.
"""
def __init__(self, elffile, start_offset, symboltable):
self.elffile = elffile
self._symboltable = symboltable
self.params = struct_parse(self.elffile.structs.Gnu_Hash,
self.elffile.stream,
start_offset)
# Element sizes in the hash table
self._wordsize = self.elffile.structs.Elf_word('').sizeof()
self._xwordsize = self.elffile.structs.Elf_xword('').sizeof()
self._chain_pos = start_offset + 4 * self._wordsize + \
self.params['bloom_size'] * self._xwordsize + \
self.params['nbuckets'] * self._wordsize
def get_number_of_symbols(self):
""" Get the number of symbols in the hash table by finding the bucket
with the highest symbol index and walking to the end of its chain.
"""
# Find highest index in buckets array
max_idx = max(self.params['buckets'])
if max_idx < self.params['symoffset']:
return self.params['symoffset']
# Position the stream at the start of the corresponding chain
max_chain_pos = self._chain_pos + \
(max_idx - self.params['symoffset']) * self._wordsize
self.elffile.stream.seek(max_chain_pos)
hash_format = '<I' if self.elffile.little_endian else '>I'
# Walk the chain to its end (lowest bit is set)
while True:
cur_hash = struct.unpack(hash_format, self.elffile.stream.read(self._wordsize))[0]
if cur_hash & 1:
return max_idx + 1
max_idx += 1
def _matches_bloom(self, H1):
""" Helper function to check if the given hash could be in the hash
table by testing it against the bloom filter.
"""
arch_bits = self.elffile.elfclass
H2 = H1 >> self.params['bloom_shift']
word_idx = int(H1 / arch_bits) % self.params['bloom_size']
BITMASK = (1 << (H1 % arch_bits)) | (1 << (H2 % arch_bits))
return (self.params['bloom'][word_idx] & BITMASK) == BITMASK
def get_symbol(self, name):
""" Look up a symbol from this hash table with the given name.
"""
namehash = self.gnu_hash(name)
if not self._matches_bloom(namehash):
return None
symidx = self.params['buckets'][namehash % self.params['nbuckets']]
if symidx < self.params['symoffset']:
return None
self.elffile.stream.seek(self._chain_pos + (symidx - self.params['symoffset']) * self._wordsize)
hash_format = '<I' if self.elffile.little_endian else '>I'
while True:
cur_hash = struct.unpack(hash_format, self.elffile.stream.read(self._wordsize))[0]
if cur_hash | 1 == namehash | 1:
symbol = self._symboltable.get_symbol(symidx)
if name == symbol.name:
return symbol
if cur_hash & 1:
break
symidx += 1
return None
@staticmethod
def gnu_hash(key):
""" Compute the GNU-style hash value for a given symbol name.
"""
if not isinstance(key, bytes):
key = key.encode('utf-8')
h = 5381
for c in bytearray(key):
h = h * 33 + c
return h & 0xFFFFFFFF
class GNUHashSection(Section, GNUHashTable):
""" Section representation of a GNU hash table. In regular ELF files, this
allows us to use the common functions defined on Section objects when
dealing with the hash table.
"""
def __init__(self, header, name, elffile, symboltable):
Section.__init__(self, header, name, elffile)
GNUHashTable.__init__(self, elffile, self['sh_offset'], symboltable)
|