1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
|
//===- InputSection.h -------------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLD_MACHO_INPUT_SECTION_H
#define LLD_MACHO_INPUT_SECTION_H
#include "Config.h"
#include "Relocations.h"
#include "Symbols.h"
#include "lld/Common/LLVM.h"
#include "lld/Common/Memory.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/CachedHashString.h"
#include "llvm/ADT/TinyPtrVector.h"
#include "llvm/BinaryFormat/MachO.h"
namespace lld {
namespace macho {
class InputFile;
class OutputSection;
class InputSection {
public:
enum Kind : uint8_t {
ConcatKind,
CStringLiteralKind,
WordLiteralKind,
};
Kind kind() const { return sectionKind; }
virtual ~InputSection() = default;
virtual uint64_t getSize() const { return data.size(); }
virtual bool empty() const { return data.empty(); }
InputFile *getFile() const { return section.file; }
StringRef getName() const { return section.name; }
StringRef getSegName() const { return section.segname; }
uint32_t getFlags() const { return section.flags; }
uint64_t getFileSize() const;
// Translates \p off -- an offset relative to this InputSection -- into an
// offset from the beginning of its parent OutputSection.
virtual uint64_t getOffset(uint64_t off) const = 0;
// The offset from the beginning of the file.
uint64_t getVA(uint64_t off) const;
// Return a user-friendly string for use in diagnostics.
// Format: /path/to/object.o:(symbol _func+0x123)
std::string getLocation(uint64_t off) const;
// Return the source line corresponding to an address, or the empty string.
// Format: Source.cpp:123 (/path/to/Source.cpp:123)
std::string getSourceLocation(uint64_t off) const;
// Return the relocation at \p off, if it exists. This does a linear search.
const Reloc *getRelocAt(uint32_t off) const;
// Whether the data at \p off in this InputSection is live.
virtual bool isLive(uint64_t off) const = 0;
virtual void markLive(uint64_t off) = 0;
virtual InputSection *canonical() { return this; }
virtual const InputSection *canonical() const { return this; }
protected:
InputSection(Kind kind, const Section §ion, ArrayRef<uint8_t> data,
uint32_t align)
: sectionKind(kind), keepUnique(false), hasAltEntry(false), align(align),
data(data), section(section) {}
InputSection(const InputSection &rhs)
: sectionKind(rhs.sectionKind), keepUnique(false), hasAltEntry(false),
align(rhs.align), data(rhs.data), section(rhs.section) {}
Kind sectionKind;
public:
// is address assigned?
bool isFinal = false;
// keep the address of the symbol(s) in this section unique in the final
// binary ?
bool keepUnique : 1;
// Does this section have symbols at offsets other than zero? (NOTE: only
// applies to ConcatInputSections.)
bool hasAltEntry : 1;
uint32_t align = 1;
OutputSection *parent = nullptr;
ArrayRef<uint8_t> data;
std::vector<Reloc> relocs;
// The symbols that belong to this InputSection, sorted by value. With
// .subsections_via_symbols, there is typically only one element here.
llvm::TinyPtrVector<Defined *> symbols;
protected:
const Section §ion;
const Defined *getContainingSymbol(uint64_t off) const;
};
// ConcatInputSections are combined into (Concat)OutputSections through simple
// concatenation, in contrast with literal sections which may have their
// contents merged before output.
class ConcatInputSection final : public InputSection {
public:
ConcatInputSection(const Section §ion, ArrayRef<uint8_t> data,
uint32_t align = 1)
: InputSection(ConcatKind, section, data, align) {}
uint64_t getOffset(uint64_t off) const override { return outSecOff + off; }
uint64_t getVA() const { return InputSection::getVA(0); }
// ConcatInputSections are entirely live or dead, so the offset is irrelevant.
bool isLive(uint64_t off) const override { return live; }
void markLive(uint64_t off) override { live = true; }
bool isCoalescedWeak() const { return wasCoalesced && symbols.empty(); }
bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }
void writeTo(uint8_t *buf);
void foldIdentical(ConcatInputSection *redundant);
ConcatInputSection *canonical() override {
return replacement ? replacement : this;
}
const InputSection *canonical() const override {
return replacement ? replacement : this;
}
static bool classof(const InputSection *isec) {
return isec->kind() == ConcatKind;
}
// Points to the surviving section after this one is folded by ICF
ConcatInputSection *replacement = nullptr;
// Equivalence-class ID for ICF
uint32_t icfEqClass[2] = {0, 0};
// With subsections_via_symbols, most symbols have their own InputSection,
// and for weak symbols (e.g. from inline functions), only the
// InputSection from one translation unit will make it to the output,
// while all copies in other translation units are coalesced into the
// first and not copied to the output.
bool wasCoalesced = false;
bool live = !config->deadStrip;
bool hasCallSites = false;
// This variable has two usages. Initially, it represents the input order.
// After assignAddresses is called, it represents the offset from the
// beginning of the output section this section was assigned to.
uint64_t outSecOff = 0;
};
// Initialize a fake InputSection that does not belong to any InputFile.
ConcatInputSection *makeSyntheticInputSection(StringRef segName,
StringRef sectName,
uint32_t flags = 0,
ArrayRef<uint8_t> data = {},
uint32_t align = 1);
// Helper functions to make it easy to sprinkle asserts.
inline bool shouldOmitFromOutput(InputSection *isec) {
return isa<ConcatInputSection>(isec) &&
cast<ConcatInputSection>(isec)->shouldOmitFromOutput();
}
inline bool isCoalescedWeak(InputSection *isec) {
return isa<ConcatInputSection>(isec) &&
cast<ConcatInputSection>(isec)->isCoalescedWeak();
}
// We allocate a lot of these and binary search on them, so they should be as
// compact as possible. Hence the use of 31 rather than 64 bits for the hash.
struct StringPiece {
// Offset from the start of the containing input section.
uint32_t inSecOff;
uint32_t live : 1;
// Only set if deduplicating literals
uint32_t hash : 31;
// Offset from the start of the containing output section.
uint64_t outSecOff = 0;
StringPiece(uint64_t off, uint32_t hash)
: inSecOff(off), live(!config->deadStrip), hash(hash) {}
};
static_assert(sizeof(StringPiece) == 16, "StringPiece is too big!");
// CStringInputSections are composed of multiple null-terminated string
// literals, which we represent using StringPieces. These literals can be
// deduplicated and tail-merged, so translating offsets between the input and
// outputs sections is more complicated.
//
// NOTE: One significant difference between LLD and ld64 is that we merge all
// cstring literals, even those referenced directly by non-private symbols.
// ld64 is more conservative and does not do that. This was mostly done for
// implementation simplicity; if we find programs that need the more
// conservative behavior we can certainly implement that.
class CStringInputSection final : public InputSection {
public:
CStringInputSection(const Section §ion, ArrayRef<uint8_t> data,
uint32_t align, bool dedupLiterals)
: InputSection(CStringLiteralKind, section, data, align),
deduplicateLiterals(dedupLiterals) {}
uint64_t getOffset(uint64_t off) const override;
bool isLive(uint64_t off) const override { return getStringPiece(off).live; }
void markLive(uint64_t off) override { getStringPiece(off).live = true; }
// Find the StringPiece that contains this offset.
StringPiece &getStringPiece(uint64_t off);
const StringPiece &getStringPiece(uint64_t off) const;
// Split at each null byte.
void splitIntoPieces();
LLVM_ATTRIBUTE_ALWAYS_INLINE
StringRef getStringRef(size_t i) const {
size_t begin = pieces[i].inSecOff;
// The endpoint should be *at* the null terminator, not after. This matches
// the behavior of StringRef(const char *Str).
size_t end =
((pieces.size() - 1 == i) ? data.size() : pieces[i + 1].inSecOff) - 1;
return toStringRef(data.slice(begin, end - begin));
}
StringRef getStringRefAtOffset(uint64_t off) const {
return getStringRef(getStringPieceIndex(off));
}
// Returns i'th piece as a CachedHashStringRef. This function is very hot when
// string merging is enabled, so we want to inline.
LLVM_ATTRIBUTE_ALWAYS_INLINE
llvm::CachedHashStringRef getCachedHashStringRef(size_t i) const {
assert(deduplicateLiterals);
return {getStringRef(i), pieces[i].hash};
}
static bool classof(const InputSection *isec) {
return isec->kind() == CStringLiteralKind;
}
bool deduplicateLiterals = false;
std::vector<StringPiece> pieces;
private:
size_t getStringPieceIndex(uint64_t off) const;
};
class WordLiteralInputSection final : public InputSection {
public:
WordLiteralInputSection(const Section §ion, ArrayRef<uint8_t> data,
uint32_t align);
uint64_t getOffset(uint64_t off) const override;
bool isLive(uint64_t off) const override {
return live[off >> power2LiteralSize];
}
void markLive(uint64_t off) override {
live[off >> power2LiteralSize] = true;
}
static bool classof(const InputSection *isec) {
return isec->kind() == WordLiteralKind;
}
private:
unsigned power2LiteralSize;
// The liveness of data[off] is tracked by live[off >> power2LiteralSize].
llvm::BitVector live;
};
inline uint8_t sectionType(uint32_t flags) {
return flags & llvm::MachO::SECTION_TYPE;
}
inline bool isZeroFill(uint32_t flags) {
return llvm::MachO::isVirtualSection(sectionType(flags));
}
inline bool isThreadLocalVariables(uint32_t flags) {
return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_VARIABLES;
}
// These sections contain the data for initializing thread-local variables.
inline bool isThreadLocalData(uint32_t flags) {
return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_REGULAR ||
sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_ZEROFILL;
}
inline bool isDebugSection(uint32_t flags) {
return (flags & llvm::MachO::SECTION_ATTRIBUTES_USR) ==
llvm::MachO::S_ATTR_DEBUG;
}
inline bool isWordLiteralSection(uint32_t flags) {
return sectionType(flags) == llvm::MachO::S_4BYTE_LITERALS ||
sectionType(flags) == llvm::MachO::S_8BYTE_LITERALS ||
sectionType(flags) == llvm::MachO::S_16BYTE_LITERALS;
}
bool isCodeSection(const InputSection *);
bool isCfStringSection(const InputSection *);
bool isClassRefsSection(const InputSection *);
bool isSelRefsSection(const InputSection *);
bool isEhFrameSection(const InputSection *);
bool isGccExceptTabSection(const InputSection *);
extern std::vector<ConcatInputSection *> inputSections;
namespace section_names {
constexpr const char authGot[] = "__auth_got";
constexpr const char authPtr[] = "__auth_ptr";
constexpr const char binding[] = "__binding";
constexpr const char bitcodeBundle[] = "__bundle";
constexpr const char cString[] = "__cstring";
constexpr const char cfString[] = "__cfstring";
constexpr const char cgProfile[] = "__cg_profile";
constexpr const char chainFixups[] = "__chainfixups";
constexpr const char codeSignature[] = "__code_signature";
constexpr const char common[] = "__common";
constexpr const char compactUnwind[] = "__compact_unwind";
constexpr const char data[] = "__data";
constexpr const char debugAbbrev[] = "__debug_abbrev";
constexpr const char debugInfo[] = "__debug_info";
constexpr const char debugLine[] = "__debug_line";
constexpr const char debugStr[] = "__debug_str";
constexpr const char debugStrOffs[] = "__debug_str_offs";
constexpr const char ehFrame[] = "__eh_frame";
constexpr const char gccExceptTab[] = "__gcc_except_tab";
constexpr const char export_[] = "__export";
constexpr const char dataInCode[] = "__data_in_code";
constexpr const char functionStarts[] = "__func_starts";
constexpr const char got[] = "__got";
constexpr const char header[] = "__mach_header";
constexpr const char indirectSymbolTable[] = "__ind_sym_tab";
constexpr const char initOffsets[] = "__init_offsets";
constexpr const char const_[] = "__const";
constexpr const char lazySymbolPtr[] = "__la_symbol_ptr";
constexpr const char lazyBinding[] = "__lazy_binding";
constexpr const char literals[] = "__literals";
constexpr const char moduleInitFunc[] = "__mod_init_func";
constexpr const char moduleTermFunc[] = "__mod_term_func";
constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr";
constexpr const char objcCatList[] = "__objc_catlist";
constexpr const char objcClassList[] = "__objc_classlist";
constexpr const char objcClassRefs[] = "__objc_classrefs";
constexpr const char objcConst[] = "__objc_const";
constexpr const char objCImageInfo[] = "__objc_imageinfo";
constexpr const char objcStubs[] = "__objc_stubs";
constexpr const char objcSelrefs[] = "__objc_selrefs";
constexpr const char objcMethname[] = "__objc_methname";
constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist";
constexpr const char objcNonLazyClassList[] = "__objc_nlclslist";
constexpr const char objcProtoList[] = "__objc_protolist";
constexpr const char pageZero[] = "__pagezero";
constexpr const char pointers[] = "__pointers";
constexpr const char rebase[] = "__rebase";
constexpr const char staticInit[] = "__StaticInit";
constexpr const char stringTable[] = "__string_table";
constexpr const char stubHelper[] = "__stub_helper";
constexpr const char stubs[] = "__stubs";
constexpr const char swift[] = "__swift";
constexpr const char symbolTable[] = "__symbol_table";
constexpr const char textCoalNt[] = "__textcoal_nt";
constexpr const char text[] = "__text";
constexpr const char threadPtrs[] = "__thread_ptrs";
constexpr const char threadVars[] = "__thread_vars";
constexpr const char unwindInfo[] = "__unwind_info";
constexpr const char weakBinding[] = "__weak_binding";
constexpr const char zeroFill[] = "__zerofill";
constexpr const char addrSig[] = "__llvm_addrsig";
} // namespace section_names
} // namespace macho
std::string toString(const macho::InputSection *);
} // namespace lld
#endif
|