1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
|
#-------------------------------------------------------------------------------
# elftools: dwarf/namelut.py
#
# DWARF pubtypes/pubnames section decoding (.debug_pubtypes, .debug_pubnames)
#
# Vijay Ramasami (rvijayc@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
import os
import collections
from collections import OrderedDict
from collections.abc import Mapping
from ..common.utils import struct_parse
from bisect import bisect_right
import math
from ..construct import CString, Struct, If
NameLUTEntry = collections.namedtuple('NameLUTEntry', 'cu_ofs die_ofs')
class NameLUT(Mapping):
"""
A "Name LUT" holds any of the tables specified by .debug_pubtypes or
.debug_pubnames sections. This is basically a dictionary where the key is
the symbol name (either a public variable, function or a type), and the
value is the tuple (cu_offset, die_offset) corresponding to the variable.
The die_offset is an absolute offset (meaning, it can be used to search the
CU by iterating until a match is obtained).
An ordered dictionary is used to preserve the CU order (i.e, items are
stored on a per-CU basis (as it was originally in the .debug_* section).
Usage:
The NameLUT walks and talks like a dictionary and hence it can be used as
such. Some examples below:
# get the pubnames (a NameLUT from DWARF info).
pubnames = dwarf_info.get_pubnames()
# lookup a variable.
entry1 = pubnames["var_name1"]
entry2 = pubnames.get("var_name2", default=<default_var>)
print(entry2.cu_ofs)
...
# iterate over items.
for (name, entry) in pubnames.items():
# do stuff with name, entry.cu_ofs, entry.die_ofs
# iterate over items on a per-CU basis.
import itertools
for cu_ofs, item_list in itertools.groupby(pubnames.items(),
key = lambda x: x[1].cu_ofs):
# items are now grouped by cu_ofs.
# item_list is an iterator yeilding NameLUTEntry'ies belonging
# to cu_ofs.
# We can parse the CU at cu_offset and use the parsed CU results
# to parse the pubname DIEs in the CU listed by item_list.
for item in item_list:
# work with item which is part of the CU with cu_ofs.
"""
def __init__(self, stream, size, structs):
self._stream = stream
self._size = size
self._structs = structs
# entries are lazily loaded on demand.
self._entries = None
# CU headers (for readelf).
self._cu_headers = None
def get_entries(self):
"""
Returns the parsed NameLUT entries. The returned object is a dictionary
with the symbol name as the key and NameLUTEntry(cu_ofs, die_ofs) as
the value.
This is useful when dealing with very large ELF files with millions of
entries. The returned entries can be pickled to a file and restored by
calling set_entries on subsequent loads.
"""
if self._entries is None:
self._entries, self._cu_headers = self._get_entries()
return self._entries
def set_entries(self, entries, cu_headers):
"""
Set the NameLUT entries from an external source. The input is a
dictionary with the symbol name as the key and NameLUTEntry(cu_ofs,
die_ofs) as the value.
This option is useful when dealing with very large ELF files with
millions of entries. The entries can be parsed once and pickled to a
file and can be restored via this function on subsequent loads.
"""
self._entries = entries
self._cu_headers = cu_headers
def __len__(self):
"""
Returns the number of entries in the NameLUT.
"""
if self._entries is None:
self._entries, self._cu_headers = self._get_entries()
return len(self._entries)
def __getitem__(self, name):
"""
Returns a namedtuple - NameLUTEntry(cu_ofs, die_ofs) - that corresponds
to the given symbol name.
"""
if self._entries is None:
self._entries, self._cu_headers = self._get_entries()
return self._entries.get(name)
def __iter__(self):
"""
Returns an iterator to the NameLUT dictionary.
"""
if self._entries is None:
self._entries, self._cu_headers = self._get_entries()
return iter(self._entries)
def items(self):
"""
Returns the NameLUT dictionary items.
"""
if self._entries is None:
self._entries, self._cu_headers = self._get_entries()
return self._entries.items()
def get(self, name, default=None):
"""
Returns NameLUTEntry(cu_ofs, die_ofs) for the provided symbol name or
None if the symbol does not exist in the corresponding section.
"""
if self._entries is None:
self._entries, self._cu_headers = self._get_entries()
return self._entries.get(name, default)
def get_cu_headers(self):
"""
Returns all CU headers. Mainly required for readelf.
"""
if self._cu_headers is None:
self._entries, self._cu_headers = self._get_entries()
return self._cu_headers
def _get_entries(self):
"""
Parse the (name, cu_ofs, die_ofs) information from this section and
store as a dictionary.
"""
self._stream.seek(0)
entries = OrderedDict()
cu_headers = []
offset = 0
# According to 6.1.1. of DWARFv4, each set of names is terminated by
# an offset field containing zero (and no following string). Because
# of sequential parsing, every next entry may be that terminator.
# So, field "name" is conditional.
entry_struct = Struct("Dwarf_offset_name_pair",
self._structs.Dwarf_offset('die_ofs'),
If(lambda ctx: ctx['die_ofs'], CString('name')))
# each run of this loop will fetch one CU worth of entries.
while offset < self._size:
# read the header for this CU.
namelut_hdr = struct_parse(self._structs.Dwarf_nameLUT_header,
self._stream, offset)
cu_headers.append(namelut_hdr)
# compute the next offset.
offset = (offset + namelut_hdr.unit_length +
self._structs.initial_length_field_size())
# before inner loop, latch data that will be used in the inner
# loop to avoid attribute access and other computation.
hdr_cu_ofs = namelut_hdr.debug_info_offset
# while die_ofs of the entry is non-zero (which indicates the end) ...
while True:
entry = struct_parse(entry_struct, self._stream)
# if it is zero, this is the terminating record.
if entry.die_ofs == 0:
break
# add this entry to the look-up dictionary.
entries[entry.name.decode('utf-8')] = NameLUTEntry(
cu_ofs = hdr_cu_ofs,
die_ofs = hdr_cu_ofs + entry.die_ofs)
# return the entries parsed so far.
return (entries, cu_headers)
|