File: typeunit.py

package info (click to toggle)
python-pyelftools 0.32-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 68,964 kB
  • sloc: python: 15,903; ansic: 298; asm: 86; makefile: 24; cpp: 18; sh: 4
file content (259 lines) | stat: -rw-r--r-- 10,179 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
#-------------------------------------------------------------------------------
# elftools: dwarf/typeunit.py
#
# DWARF type unit
#
# Dinkar Khandalekar (contact@dinkar.dev)
# This code is in the public domain
#-------------------------------------------------------------------------------
from bisect import bisect_right
from .die import DIE
from ..common.utils import dwarf_assert


class TypeUnit(object):
    """ A DWARF type unit (TU).

            A type unit contains type definition entries that can be used to
            reference to type definition for debugging information entries in
            other compilation units and type units. Each type unit must be uniquely
            identified by a 64-bit signature. (DWARFv4 section 3.1.3)

            Type units are stored in the .debug_types section. This section was
            introduced by the DWARFv4 standard (and removed in the DWARFv5 standard;
            the underlying type units were relocated to the .debug_info
            section - DWARFv5 section 1.4)

        Serves as a container and context to DIEs that describe type definitions
        referenced from compilation units and other type units.

        TU header entries can be accessed as dict keys from this object, i.e.
           tu = TypeUnit(...)
           tu['version']  # version field of the TU header

        To get the top-level DIE describing the type unit, call the
        get_top_DIE method.
    """
    def __init__(self, header, dwarfinfo, structs, tu_offset, tu_die_offset):
        """ header:
                TU header for this type unit

            dwarfinfo:
                The DWARFInfo context object which created this one

            structs:
                A DWARFStructs instance suitable for this type unit

            tu_offset:
                Offset in the stream to the beginning of this TU (its header)

            tu_die_offset:
                Offset in the stream of the top DIE of this TU
        """
        self.dwarfinfo = dwarfinfo
        self.header = header
        self.structs = structs
        self.tu_offset = tu_offset
        self.tu_die_offset = tu_die_offset

        # The abbreviation table for this TU. Filled lazily when DIEs are
        # requested.
        self._abbrev_table = None

        # A list of DIEs belonging to this TU.
        # This list is lazily constructed as DIEs are iterated over.
        self._dielist = []
        # A list of file offsets, corresponding (by index) to the DIEs
        # in `self._dielist`. This list exists separately from
        # `self._dielist` to make it binary searchable, enabling the
        # DIE population strategy used in `iter_DIE_children`.
        # Like `self._dielist`, this list is lazily constructed
        # as DIEs are iterated over.
        self._diemap = []

    @property
    def cu_offset(self):
        """Simulates the cu_offset attribute required by the DIE by returning the tu_offset instead
        """
        return self.tu_offset

    @property
    def cu_die_offset(self):
        """Simulates the cu_die_offset attribute required by the DIE by returning the tu_offset instead
        """
        return self.tu_die_offset

    def dwarf_format(self):
        """ Get the DWARF format (32 or 64) for this TU
        """
        return self.structs.dwarf_format

    def get_abbrev_table(self):
        """ Get the abbreviation table (AbbrevTable object) for this TU
        """
        if self._abbrev_table is None:
            self._abbrev_table = self.dwarfinfo.get_abbrev_table(
                self['debug_abbrev_offset'])
        return self._abbrev_table

    def get_top_DIE(self):
        """ Get the top DIE (which is DW_TAG_type_unit entry) of this TU
        """

        # Note that a top DIE always has minimal offset and is therefore
        # at the beginning of our lists, so no bisect is required.
        if len(self._diemap) > 0:
            return self._dielist[0]

        top = DIE(
                cu=self,
                stream=self.dwarfinfo.debug_types_sec.stream,
                offset=self.tu_die_offset)

        self._dielist.insert(0, top)
        self._diemap.insert(0, self.tu_die_offset)

        top._translate_indirect_attributes()  # Can't translate indirect attributes until the top DIE has been parsed to the end

        return top

    def has_top_DIE(self):
        """ Returns whether the top DIE in this TU has already been parsed and cached.
            No parsing on demand!
        """
        return len(self._diemap) > 0        

    @property
    def size(self):
        return self['unit_length'] + self.structs.initial_length_field_size()

    def iter_DIEs(self):
        """ Iterate over all the DIEs in the TU, in order of their appearance.
            Note that null DIEs will also be returned.
        """
        return self._iter_DIE_subtree(self.get_top_DIE())

    def iter_DIE_children(self, die):
        """ Given a DIE, yields either its children, without null DIE list
            terminator, or nothing, if that DIE has no children.

            The null DIE terminator is saved in that DIE when iteration ended.
        """
        if not die.has_children:
            return

        # `cur_offset` tracks the stream offset of the next DIE to yield
        # as we iterate over our children,
        cur_offset = die.offset + die.size

        while True:
            child = self._get_cached_DIE(cur_offset)

            child.set_parent(die)

            if child.is_null():
                die._terminator = child
                return

            yield child

            if not child.has_children:
                cur_offset += child.size
            elif "DW_AT_sibling" in child.attributes:
                sibling = child.attributes["DW_AT_sibling"]
                if sibling.form in ('DW_FORM_ref1', 'DW_FORM_ref2',
                                    'DW_FORM_ref4', 'DW_FORM_ref8',
                                    'DW_FORM_ref', 'DW_FORM_ref_udata'):
                    cur_offset = sibling.value + self.tu_offset
                elif sibling.form == 'DW_FORM_ref_addr':
                    cur_offset = sibling.value
                else:
                    raise NotImplementedError('sibling in form %s' % sibling.form)
            else:
                # If no DW_AT_sibling attribute is provided by the producer
                # then the whole child subtree must be parsed to find its next
                # sibling. There is one zero byte representing null DIE
                # terminating children list. It is used to locate child subtree
                # bounds.

                # If children are not parsed yet, this instruction will manage
                # to recursive call of this function which will result in
                # setting of `_terminator` attribute of the `child`.
                if child._terminator is None:
                    for _ in self.iter_DIE_children(child):
                        pass

                cur_offset = child._terminator.offset + child._terminator.size

    def get_DIE_from_refaddr(self, refaddr):
        """ Obtain a DIE contained in this CU from a reference.
            refaddr:
                The offset into the .debug_info section, which must be
                contained in this CU or a DWARFError will be raised.
            When using a reference class attribute with a form that is
            relative to the compile unit, add unit add the compile unit's
            .cu_addr before calling this function.
        """
        # All DIEs are after the cu header and within the unit
        dwarf_assert(
            self.cu_die_offset <= refaddr < self.cu_offset + self.size,
            'refaddr %s not in DIE range of CU %s' % (refaddr, self.cu_offset))

        return self._get_cached_DIE(refaddr)

    #------ PRIVATE ------#

    def __getitem__(self, name):
        """ Implement dict-like access to header entries
        """
        return self.header[name]

    def _iter_DIE_subtree(self, die):
        """ Given a DIE, this yields it with its subtree including null DIEs
            (child list terminators).
        """
        # If the die is an imported unit, replace it with what it refers to if
        # we can
        if die.tag == 'DW_TAG_imported_unit' and self.dwarfinfo.supplementary_dwarfinfo:
            die = die.get_DIE_from_attribute('DW_AT_import')
        yield die
        if die.has_children:
            for c in die.iter_children():
                for d in die.cu._iter_DIE_subtree(c):
                    yield d
            yield die._terminator

    def _get_cached_DIE(self, offset):
        """ Given a DIE offset, look it up in the cache.  If not present,
            parse the DIE and insert it into the cache.

            offset:
                The offset of the DIE in the debug_types section to retrieve.

            The stream reference is copied from the top DIE.  The top die will
            also be parsed and cached if needed.

            See also get_DIE_from_refaddr(self, refaddr).
        """
        # The top die must be in the cache if any DIE is in the cache.
        # The stream is the same for all DIEs in this TU, so populate
        # the top DIE and obtain a reference to its stream.
        top_die_stream = self.get_top_DIE().stream

        # `offset` is the offset in the stream of the DIE we want to return.
        # The map is maintined as a parallel array to the list.  We call
        # bisect each time to ensure new DIEs are inserted in the correct
        # order within both `self._dielist` and `self._diemap`.
        i = bisect_right(self._diemap, offset)

        # Note that `self._diemap` cannot be empty because a the top DIE
        # was inserted by the call to .get_top_DIE().  Also it has the minimal
        # offset, so the bisect_right insert point will always be at least 1.
        if offset == self._diemap[i - 1]:
            die = self._dielist[i - 1]
        else:
            die = DIE(cu=self, stream=top_die_stream, offset=offset)
            self._dielist.insert(i, die)
            self._diemap.insert(i, offset)

        return die