File: namelut.py

package info (click to toggle)
python-pyelftools 0.32-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 68,964 kB
  • sloc: python: 15,903; ansic: 298; asm: 86; makefile: 24; cpp: 18; sh: 4
file content (198 lines) | stat: -rwxr-xr-x 7,320 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#-------------------------------------------------------------------------------
# elftools: dwarf/namelut.py
#
# DWARF pubtypes/pubnames section decoding (.debug_pubtypes, .debug_pubnames)
#
# Vijay Ramasami (rvijayc@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
import os
import collections
from collections import OrderedDict
from collections.abc import Mapping
from ..common.utils import struct_parse
from bisect import bisect_right
import math
from ..construct import CString, Struct, If

NameLUTEntry = collections.namedtuple('NameLUTEntry', 'cu_ofs die_ofs')

class NameLUT(Mapping):
    """
    A "Name LUT" holds any of the tables specified by .debug_pubtypes or
    .debug_pubnames sections. This is basically a dictionary where the key is
    the symbol name (either a public variable, function or a type), and the
    value is the tuple (cu_offset, die_offset) corresponding to the variable.
    The die_offset is an absolute offset (meaning, it can be used to search the
    CU by iterating until a match is obtained).

    An ordered dictionary is used to preserve the CU order (i.e, items are
    stored on a per-CU basis (as it was originally in the .debug_* section).

    Usage:

    The NameLUT walks and talks like a dictionary and hence it can be used as
    such. Some examples below:

    # get the pubnames (a NameLUT from DWARF info).
    pubnames = dwarf_info.get_pubnames()

    # lookup a variable.
    entry1 = pubnames["var_name1"]
    entry2 = pubnames.get("var_name2", default=<default_var>)
    print(entry2.cu_ofs)
    ...

    # iterate over items.
    for (name, entry) in pubnames.items():
      # do stuff with name, entry.cu_ofs, entry.die_ofs

    # iterate over items on a per-CU basis.
    import itertools
    for cu_ofs, item_list in itertools.groupby(pubnames.items(),
        key = lambda x: x[1].cu_ofs):
      # items are now grouped by cu_ofs.
      # item_list is an iterator yeilding NameLUTEntry'ies belonging
      # to cu_ofs.
      # We can parse the CU at cu_offset and use the parsed CU results
      # to parse the pubname DIEs in the CU listed by item_list.
      for item in item_list:
        # work with item which is part of the CU with cu_ofs.

    """

    def __init__(self, stream, size, structs):

        self._stream = stream
        self._size = size
        self._structs = structs
        # entries are lazily loaded on demand.
        self._entries = None
        # CU headers (for readelf).
        self._cu_headers = None

    def get_entries(self):
        """
        Returns the parsed NameLUT entries. The returned object is a dictionary
        with the symbol name as the key and NameLUTEntry(cu_ofs, die_ofs) as
        the value.

        This is useful when dealing with very large ELF files with millions of
        entries. The returned entries can be pickled to a file and restored by
        calling set_entries on subsequent loads.
        """
        if self._entries is None:
            self._entries, self._cu_headers = self._get_entries()
        return self._entries

    def set_entries(self, entries, cu_headers):
        """
        Set the NameLUT entries from an external source. The input is a
        dictionary with the symbol name as the key and NameLUTEntry(cu_ofs,
        die_ofs) as the value.

        This option is useful when dealing with very large ELF files with
        millions of entries. The entries can be parsed once and pickled to a
        file and can be restored via this function on subsequent loads.
        """
        self._entries = entries
        self._cu_headers = cu_headers

    def __len__(self):
        """
        Returns the number of entries in the NameLUT.
        """
        if self._entries is None:
            self._entries, self._cu_headers = self._get_entries()
        return len(self._entries)

    def __getitem__(self, name):
        """
        Returns a namedtuple - NameLUTEntry(cu_ofs, die_ofs) - that corresponds
        to the given symbol name.
        """
        if self._entries is None:
            self._entries, self._cu_headers = self._get_entries()
        return self._entries.get(name)

    def __iter__(self):
        """
        Returns an iterator to the NameLUT dictionary.
        """
        if self._entries is None:
            self._entries, self._cu_headers = self._get_entries()
        return iter(self._entries)

    def items(self):
        """
        Returns the NameLUT dictionary items.
        """
        if self._entries is None:
            self._entries, self._cu_headers = self._get_entries()
        return self._entries.items()

    def get(self, name, default=None):
        """
        Returns NameLUTEntry(cu_ofs, die_ofs) for the provided symbol name or
        None if the symbol does not exist in the corresponding section.
        """
        if self._entries is None:
            self._entries, self._cu_headers = self._get_entries()
        return self._entries.get(name, default)

    def get_cu_headers(self):
        """
        Returns all CU headers. Mainly required for readelf.
        """
        if self._cu_headers is None:
            self._entries, self._cu_headers = self._get_entries()

        return self._cu_headers

    def _get_entries(self):
        """
        Parse the (name, cu_ofs, die_ofs) information from this section and
        store as a dictionary.
        """

        self._stream.seek(0)
        entries = OrderedDict()
        cu_headers = []
        offset = 0
        # According to 6.1.1. of DWARFv4, each set of names is terminated by
        # an offset field containing zero (and no following string). Because
        # of sequential parsing, every next entry may be that terminator.
        # So, field "name" is conditional.
        entry_struct = Struct("Dwarf_offset_name_pair",
                self._structs.Dwarf_offset('die_ofs'),
                If(lambda ctx: ctx['die_ofs'], CString('name')))

        # each run of this loop will fetch one CU worth of entries.
        while offset < self._size:

            # read the header for this CU.
            namelut_hdr = struct_parse(self._structs.Dwarf_nameLUT_header,
                    self._stream, offset)
            cu_headers.append(namelut_hdr)
            # compute the next offset.
            offset = (offset + namelut_hdr.unit_length +
                     self._structs.initial_length_field_size())

            # before inner loop, latch data that will be used in the inner
            # loop to avoid attribute access and other computation.
            hdr_cu_ofs = namelut_hdr.debug_info_offset

            # while die_ofs of the entry is non-zero (which indicates the end) ...
            while True:
                entry = struct_parse(entry_struct, self._stream)

                # if it is zero, this is the terminating record.
                if entry.die_ofs == 0:
                    break
                # add this entry to the look-up dictionary.
                entries[entry.name.decode('utf-8')] = NameLUTEntry(
                        cu_ofs = hdr_cu_ofs,
                        die_ofs = hdr_cu_ofs + entry.die_ofs)

        # return the entries parsed so far.
        return (entries, cu_headers)