File: Parser.hpp

package info (click to toggle)
lief 0.9.0-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, buster
  • size: 16,036 kB
  • sloc: cpp: 76,013; python: 6,167; ansic: 3,355; pascal: 404; sh: 98; makefile: 32
file content (247 lines) | stat: -rw-r--r-- 8,965 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
/* Copyright 2017 R. Thomas
 * Copyright 2017 Quarkslab
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
#ifndef LIEF_ELF_PARSER_H_
#define LIEF_ELF_PARSER_H_

#include <vector>
#include <string>
#include <memory>
#include <fstream>
#include <iterator>
#include <iostream>
#include <algorithm>
#include <stdexcept>
#include <functional>

#include "LIEF/visibility.h"
#include "LIEF/exception.hpp"
#include "LIEF/BinaryStream/VectorStream.hpp"

#include "LIEF/Abstract/Parser.hpp"

#include "LIEF/ELF/Binary.hpp"
#include "LIEF/ELF/Structures.hpp"
#include "LIEF/ELF/DynamicEntryArray.hpp"
#include "LIEF/ELF/DynamicEntryRpath.hpp"
#include "LIEF/ELF/DynamicEntryRunPath.hpp"
#include "LIEF/ELF/GnuHash.hpp"

namespace LIEF {

namespace OAT {
class Parser;
}
namespace ELF {


//! @brief Class which parse an ELF file and transform into a ELF::Binary
class LIEF_API Parser : public LIEF::Parser {
  friend class OAT::Parser;
  public:

    static constexpr uint32_t NB_MAX_SYMBOLS         = 1000000;
    static constexpr uint32_t DELTA_NB_SYMBOLS       = 3000;
    static constexpr uint32_t NB_MAX_BUCKETS         = NB_MAX_SYMBOLS;
    static constexpr uint32_t NB_MAX_CHAINS          = 1000000;
    static constexpr uint32_t NB_MAX_SECTION         = 10000;
    static constexpr uint32_t NB_MAX_SEGMENTS        = 10000;
    static constexpr uint32_t NB_MAX_RELOCATIONS     = 3000000;
    static constexpr uint32_t NB_MAX_DYNAMIC_ENTRIES = 1000;
    static constexpr uint32_t NB_MAX_MASKWORD        = 512;
    static constexpr uint32_t MAX_NOTE_DESCRIPTION   = 1_MB;
    static constexpr uint32_t MAX_SECTION_SIZE       = 100_MB;
    static constexpr uint32_t MAX_SEGMENT_SIZE       = MAX_SECTION_SIZE;


    //! @brief Parse an ELF file an return a LIEF::ELF::Binary object
    //!
    //! For weird binaries (e.g. sectionless) you can choose which method use to count dynamic symbols
    //!
    //! @param[in] file Path to the ELF binary
    //! @param[in] count_mtd Method used to count dynamic symbols. Default: LIEF::ELF::DYNSYM_COUNT_METHODS::COUNT_AUTO
    //! @Return LIEF::ELF::Binary
    static std::unique_ptr<Binary> parse(const std::string& file, DYNSYM_COUNT_METHODS count_mtd = DYNSYM_COUNT_METHODS::COUNT_AUTO);

    //! @brief Parse the given raw data as an ELF binary and return a LIEF::ELF::Binary object
    //!
    //! For weird binaries (e.g. sectionless) you can choose which method use to count dynamic symbols
    //!
    //! @param[in] data Raw ELF
    //! @param[in] name Binary name (optional)
    //! @param[in] count_mtd Method used to count dynamic symbols. Default: LIEF::ELF::DYNSYM_COUNT_METHODS::COUNT_AUTO
    //! @Return LIEF::ELF::Binary
    static std::unique_ptr<Binary> parse(const std::vector<uint8_t>& data, const std::string& name = "", DYNSYM_COUNT_METHODS count_mtd = DYNSYM_COUNT_METHODS::COUNT_AUTO);

    Parser& operator=(const Parser& copy) = delete;
    Parser(const Parser& copy)            = delete;

  private:
    Parser(void);
    Parser(const std::string& file, DYNSYM_COUNT_METHODS count_mtd = DYNSYM_COUNT_METHODS::COUNT_AUTO, Binary* output = nullptr);
    Parser(const std::vector<uint8_t>& data, const std::string& name, DYNSYM_COUNT_METHODS count_mtd = DYNSYM_COUNT_METHODS::COUNT_AUTO, Binary* output = nullptr);
    ~Parser(void);

    void init(const std::string& name = "");

    bool should_swap(void) const;

    // map, dynamic_symbol.version <----> symbol_version
    // symbol_version comes from symbol_version table
    void link_symbol_version(void);

    template<typename ELF_T>
    void parse_binary(void);

    template<typename ELF_T>
    bool parse_header(void);

    //! @brief Parse binary's Section
    //!
    //! Parse sections by using the ``e_shoff`` field as offset
    template<typename ELF_T>
    void parse_sections(void);

    //! @brief Parse binary's segments
    //!
    //! Parse segment by using the ``e_phoff`` field as offset
    template<typename ELF_T>
    void parse_segments(void);

    //! @brief Return offset of the dynamic string table
    uint64_t get_dynamic_string_table(void) const;

    uint64_t get_dynamic_string_table_from_segments(void) const;

    uint64_t get_dynamic_string_table_from_sections(void) const;

    //! @brief Return the number of dynamic symbols using the given method
    template<typename ELF_T>
    uint32_t get_numberof_dynamic_symbols(DYNSYM_COUNT_METHODS mtd) const;

    //! @brief Count based on hash table (reliable)
    template<typename ELF_T>
    uint32_t nb_dynsym_hash(void) const;

    //! @brief Count based on SYSV hash table
    template<typename ELF_T>
    uint32_t nb_dynsym_sysv_hash(void) const;

    //! @brief Count based on GNU hash table
    template<typename ELF_T>
    uint32_t nb_dynsym_gnu_hash(void) const;

    //! @brief Count based on sections (not very reliable)
    template<typename ELF_T>
    uint32_t nb_dynsym_section(void) const;

    //! @brief Count based on PLT/GOT relocations (very reliable but not accurate)
    template<typename ELF_T>
    uint32_t nb_dynsym_relocations(void) const;

    template<typename ELF_T>
    void parse_dynamic_entries(uint64_t offset, uint64_t size);

    template<typename ELF_T>
    void parse_dynamic_symbols(uint64_t offset);

    //! @brief Parse static Symbol
    //!
    //! Parser find Symbols offset by using the file offset attribute of the
    //! ELF_SECTION_TYPES::SHT_SYMTAB Section.
    //!
    //! The number of symbols is taken from the `information` attribute in the section header.
    //!
    //! The section containing symbols name is found with the `link` attribute.
    template<typename ELF_T>
    void parse_static_symbols(uint64_t offset, uint32_t nbSymbols, const Section* string_section);

    //! @brief Parse Dynamic relocations
    //!
    //! It use DT_REL/DT_RELA dynamic entries to parse it
    template<typename ELF_T, typename REL_T>
    void parse_dynamic_relocations(uint64_t relocations_offset, uint64_t size);

    //! @brief Parse `.plt.got`/`got` relocations
    //!
    //! For:
    //! * ELF32 it uses **DT_JMPREL** and **DT_PLTRELSZ**
    //! * ELF64 it uses **DT_PLTREL** and **DT_PLTRELSZ**
    template<typename ELF_T, typename REL_T>
    void parse_pltgot_relocations(uint64_t offset, uint64_t size);


    //! @brief Parse relocations using LIEF::ELF::Section.
    //!
    //! Parser::parse_dynamic_relocations and Parser::parse_pltgot_relocations
    //! use parse relocations by using LIEF::ELF::Segment. This method parse relocations
    //! that are not reachable through segments (For example Object file).
    template<typename ELF_T, typename REL_T>
    void parse_section_relocations(uint64_t offset, uint64_t size, Section *applies_to = nullptr);

    //! @brief Parse SymbolVersionRequirement
    //!
    //! We use the virtual address stored in the
    //! DYNAMIC_TAGS::DT_VERNEED entry to get the offset.
    //! and DYNAMIC_TAGS::DT_VERNEEDNUM to get the number of entries
    template<typename ELF_T>
    void parse_symbol_version_requirement(uint64_t offset, uint32_t nb_entries);


    //! @brief Parse SymbolVersionDefinition.
    //!
    //! We use the virtual address stored in
    //! the DYNAMIC_TAGS::DT_VERDEF DT_VERDEF entry to get the offset.
    //! DYNAMIC_TAGS::DT_VERDEFNUM gives the number of entries
    template<typename ELF_T>
    void parse_symbol_version_definition(uint64_t offset, uint32_t nb_entries);


    //! @brief Parse @link SymbolVersion Symbol version @endlink.
    //!
    //! We use the virtual address stored in the
    //! DYNAMIC_TAGS::DT_VERSYM entry to parse it.
    //!
    //! @see http://dev.gentoo.org/~solar/elf/symbol-versioning
    void parse_symbol_version(uint64_t symbol_version_offset);

    //! @brief Parse Symbols's GNU hash
    //!
    //! @see https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections
    template<typename ELF_T>
    void parse_symbol_gnu_hash(uint64_t offset);

    //! @brief Parse Note (.gnu.note)
    void parse_notes(uint64_t offset, uint64_t size);

    //! @brief Parse Symbols's SYSV hash
    void parse_symbol_sysv_hash(uint64_t offset);


    template<typename ELF_T, typename REL_T>
    uint32_t max_relocation_index(uint64_t relocations_offset, uint64_t size) const;

    std::unique_ptr<VectorStream> stream_;
    Binary*                       binary_;
    ELF_CLASS                     type_;
    DYNSYM_COUNT_METHODS          count_mtd_;
};




} // namespace ELF
} // namespace LIEF
#endif