1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
|
//===- MarkLive.cpp -------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements --gc-sections, which is a feature to remove unused
// chunks from the output. Unused chunks are those that are not reachable from
// known root symbols or chunks. This feature is implemented as a mark-sweep
// garbage collector.
//
// Here's how it works. Each InputChunk has a "Live" bit. The bit is off by
// default. Starting with the GC-roots, visit all reachable chunks and set their
// Live bits. The Writer will then ignore chunks whose Live bits are off, so
// that such chunk are not appear in the output.
//
//===----------------------------------------------------------------------===//
#include "MarkLive.h"
#include "Config.h"
#include "InputChunks.h"
#include "InputElement.h"
#include "SymbolTable.h"
#include "Symbols.h"
#define DEBUG_TYPE "lld"
using namespace llvm;
using namespace llvm::wasm;
namespace lld::wasm {
namespace {
class MarkLive {
public:
void run();
private:
void enqueue(Symbol *sym);
void enqueue(InputChunk *chunk);
void enqueueInitFunctions(const ObjFile *sym);
void enqueueRetainedSegments(const ObjFile *file);
void mark();
bool isCallCtorsLive();
// A list of chunks to visit.
SmallVector<InputChunk *, 256> queue;
};
} // namespace
void MarkLive::enqueue(Symbol *sym) {
if (!sym || sym->isLive())
return;
LLVM_DEBUG(dbgs() << "markLive: " << sym->getName() << "\n");
InputFile *file = sym->getFile();
bool markImplicitDeps = file && !file->isLive() && sym->isDefined();
sym->markLive();
if (markImplicitDeps) {
// Mark ctor functions in the object that defines this symbol live.
// The ctor functions are all referenced by the synthetic callCtors
// function. However, this function does not contain relocations so we
// have to manually mark the ctors as live.
enqueueInitFunctions(cast<ObjFile>(file));
// Mark retained segments in the object that defines this symbol live.
enqueueRetainedSegments(cast<ObjFile>(file));
}
if (InputChunk *chunk = sym->getChunk())
queue.push_back(chunk);
}
void MarkLive::enqueue(InputChunk *chunk) {
LLVM_DEBUG(dbgs() << "markLive: " << toString(chunk) << "\n");
chunk->live = true;
queue.push_back(chunk);
}
// The ctor functions are all referenced by the synthetic callCtors
// function. However, this function does not contain relocations so we
// have to manually mark the ctors as live.
void MarkLive::enqueueInitFunctions(const ObjFile *obj) {
const WasmLinkingData &l = obj->getWasmObj()->linkingData();
for (const WasmInitFunc &f : l.InitFunctions) {
auto *initSym = obj->getFunctionSymbol(f.Symbol);
if (!initSym->isDiscarded())
enqueue(initSym);
}
}
// Mark segments flagged by segment-level no-strip. Segment-level no-strip is
// usually used to retain segments without having symbol table entry.
void MarkLive::enqueueRetainedSegments(const ObjFile *file) {
for (InputChunk *chunk : file->segments)
if (chunk->isRetained())
enqueue(chunk);
}
void MarkLive::run() {
// Add GC root symbols.
if (!config->entry.empty())
enqueue(symtab->find(config->entry));
// We need to preserve any no-strip or exported symbol
for (Symbol *sym : symtab->symbols())
if (sym->isNoStrip() || sym->isExported())
enqueue(sym);
if (WasmSym::callDtors)
enqueue(WasmSym::callDtors);
for (const ObjFile *obj : symtab->objectFiles)
if (obj->isLive()) {
// Enqueue constructors in objects explicitly live from the command-line.
enqueueInitFunctions(obj);
// Enqueue retained segments in objects explicitly live from the
// command-line.
enqueueRetainedSegments(obj);
}
mark();
// If we have any non-discarded init functions, mark `__wasm_call_ctors` as
// live so that we assign it an index and call it.
if (isCallCtorsLive())
WasmSym::callCtors->markLive();
}
void MarkLive::mark() {
// Follow relocations to mark all reachable chunks.
while (!queue.empty()) {
InputChunk *c = queue.pop_back_val();
for (const WasmRelocation reloc : c->getRelocations()) {
if (reloc.Type == R_WASM_TYPE_INDEX_LEB)
continue;
Symbol *sym = c->file->getSymbol(reloc.Index);
// If the function has been assigned the special index zero in the table,
// the relocation doesn't pull in the function body, since the function
// won't actually go in the table (the runtime will trap attempts to call
// that index, since we don't use it). A function with a table index of
// zero is only reachable via "call", not via "call_indirect". The stub
// functions used for weak-undefined symbols have this behaviour (compare
// equal to null pointer, only reachable via direct call).
if (reloc.Type == R_WASM_TABLE_INDEX_SLEB ||
reloc.Type == R_WASM_TABLE_INDEX_SLEB64 ||
reloc.Type == R_WASM_TABLE_INDEX_I32 ||
reloc.Type == R_WASM_TABLE_INDEX_I64) {
auto *funcSym = cast<FunctionSymbol>(sym);
if (funcSym->isStub)
continue;
}
enqueue(sym);
}
}
}
void markLive() {
if (!config->gcSections)
return;
LLVM_DEBUG(dbgs() << "markLive\n");
MarkLive marker;
marker.run();
// Report garbage-collected sections.
if (config->printGcSections) {
for (const ObjFile *obj : symtab->objectFiles) {
for (InputChunk *c : obj->functions)
if (!c->live)
message("removing unused section " + toString(c));
for (InputChunk *c : obj->segments)
if (!c->live)
message("removing unused section " + toString(c));
for (InputGlobal *g : obj->globals)
if (!g->live)
message("removing unused section " + toString(g));
for (InputTag *t : obj->tags)
if (!t->live)
message("removing unused section " + toString(t));
for (InputTable *t : obj->tables)
if (!t->live)
message("removing unused section " + toString(t));
}
for (InputChunk *c : symtab->syntheticFunctions)
if (!c->live)
message("removing unused section " + toString(c));
for (InputGlobal *g : symtab->syntheticGlobals)
if (!g->live)
message("removing unused section " + toString(g));
for (InputTable *t : symtab->syntheticTables)
if (!t->live)
message("removing unused section " + toString(t));
}
}
bool MarkLive::isCallCtorsLive() {
// In a reloctable link, we don't call `__wasm_call_ctors`.
if (config->relocatable)
return false;
// In Emscripten-style PIC, we call `__wasm_call_ctors` which calls
// `__wasm_apply_data_relocs`.
if (config->isPic)
return true;
// If there are any init functions, mark `__wasm_call_ctors` live so that
// it can call them.
for (const ObjFile *file : symtab->objectFiles) {
const WasmLinkingData &l = file->getWasmObj()->linkingData();
for (const WasmInitFunc &f : l.InitFunctions) {
auto *sym = file->getFunctionSymbol(f.Symbol);
if (!sym->isDiscarded() && sym->isLive())
return true;
}
}
return false;
}
} // namespace lld::wasm
|