1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
|
//===- bolt/Core/HashUtilities.cpp - Misc hash utilities ------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Computation of hash values over BinaryFunction and BinaryBasicBlock.
//
//===----------------------------------------------------------------------===//
#include "bolt/Core/HashUtilities.h"
#include "bolt/Core/BinaryContext.h"
#include "bolt/Utils/NameResolver.h"
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
namespace bolt {
std::string hashInteger(uint64_t Value) {
std::string HashString;
if (Value == 0)
HashString.push_back(0);
while (Value) {
uint8_t LSB = Value & 0xff;
HashString.push_back(LSB);
Value >>= 8;
}
return HashString;
}
std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) {
std::string HashString;
// Ignore function references.
if (BC.getFunctionForSymbol(&Symbol))
return HashString;
llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol);
if (!ErrorOrValue)
return HashString;
// Ignore jump table references.
if (BC.getJumpTableContainingAddress(*ErrorOrValue))
return HashString;
return HashString.append(hashInteger(*ErrorOrValue));
}
std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) {
switch (Expr.getKind()) {
case MCExpr::Constant:
return hashInteger(cast<MCConstantExpr>(Expr).getValue());
case MCExpr::SymbolRef:
return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol());
case MCExpr::Unary: {
const auto &UnaryExpr = cast<MCUnaryExpr>(Expr);
return hashInteger(UnaryExpr.getOpcode())
.append(hashExpr(BC, *UnaryExpr.getSubExpr()));
}
case MCExpr::Binary: {
const auto &BinaryExpr = cast<MCBinaryExpr>(Expr);
return hashExpr(BC, *BinaryExpr.getLHS())
.append(hashInteger(BinaryExpr.getOpcode()))
.append(hashExpr(BC, *BinaryExpr.getRHS()));
}
case MCExpr::Target:
return std::string();
}
llvm_unreachable("invalid expression kind");
}
std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) {
if (Operand.isImm())
return hashInteger(Operand.getImm());
if (Operand.isReg())
return hashInteger(Operand.getReg());
if (Operand.isExpr())
return hashExpr(BC, *Operand.getExpr());
return std::string();
}
std::string hashBlock(BinaryContext &BC, const BinaryBasicBlock &BB,
OperandHashFuncTy OperandHashFunc) {
const bool IsX86 = BC.isX86();
// The hash is computed by creating a string of all instruction opcodes and
// possibly their operands and then hashing that string with std::hash.
std::string HashString;
for (const MCInst &Inst : BB) {
if (BC.MIB->isPseudo(Inst))
continue;
unsigned Opcode = Inst.getOpcode();
// Ignore unconditional jumps since we check CFG consistency by processing
// basic blocks in order and do not rely on branches to be in-sync with
// CFG. Note that we still use condition code of conditional jumps.
if (BC.MIB->isUnconditionalBranch(Inst))
continue;
if (IsX86 && BC.MIB->isConditionalBranch(Inst))
Opcode = BC.MIB->getShortBranchOpcode(Opcode);
if (Opcode == 0) {
HashString.push_back(0);
} else {
StringRef OpcodeName = BC.InstPrinter->getOpcodeName(Opcode);
HashString.append(OpcodeName.str());
}
for (const MCOperand &Op : MCPlus::primeOperands(Inst))
HashString.append(OperandHashFunc(Op));
}
return HashString;
}
/// A "loose" hash of a basic block to use with the stale profile matching. The
/// computed value will be the same for blocks with minor changes (such as
/// reordering of instructions or using different operands) but may result in
/// collisions that need to be resolved by a stronger hashing.
std::string hashBlockLoose(BinaryContext &BC, const BinaryBasicBlock &BB) {
// The hash is computed by creating a string of all lexicographically ordered
// instruction opcodes, which is then hashed with std::hash.
std::set<std::string> Opcodes;
for (const MCInst &Inst : BB) {
// Skip pseudo instructions and nops.
if (BC.MIB->isPseudo(Inst) || BC.MIB->isNoop(Inst))
continue;
// Ignore unconditional jumps, as they can be added / removed as a result
// of basic block reordering.
if (BC.MIB->isUnconditionalBranch(Inst))
continue;
// Do not distinguish different types of conditional jumps.
if (BC.MIB->isConditionalBranch(Inst)) {
Opcodes.insert("JMP");
continue;
}
std::string Mnemonic = BC.InstPrinter->getMnemonic(&Inst).first;
llvm::erase_if(Mnemonic, [](unsigned char ch) { return std::isspace(ch); });
Opcodes.insert(Mnemonic);
}
std::string HashString;
for (const std::string &Opcode : Opcodes)
HashString.append(Opcode);
return HashString;
}
/// An even looser hash level relative to $ hashBlockLoose to use with stale
/// profile matching, composed of the names of a block's called functions in
/// lexicographic order.
std::string hashBlockCalls(BinaryContext &BC, const BinaryBasicBlock &BB) {
// The hash is computed by creating a string of all lexicographically ordered
// called function names.
std::vector<std::string> FunctionNames;
for (const MCInst &Instr : BB) {
// Skip non-call instructions.
if (!BC.MIB->isCall(Instr))
continue;
const MCSymbol *CallSymbol = BC.MIB->getTargetSymbol(Instr);
if (!CallSymbol)
continue;
FunctionNames.push_back(std::string(CallSymbol->getName()));
}
std::sort(FunctionNames.begin(), FunctionNames.end());
std::string HashString;
for (const std::string &FunctionName : FunctionNames)
HashString.append(FunctionName);
return HashString;
}
/// The same as the $hashBlockCalls function, but for profiled functions.
std::string
hashBlockCalls(const DenseMap<uint32_t, yaml::bolt::BinaryFunctionProfile *>
&IdToYamlFunction,
const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
std::vector<std::string> FunctionNames;
for (const yaml::bolt::CallSiteInfo &CallSiteInfo : YamlBB.CallSites) {
auto It = IdToYamlFunction.find(CallSiteInfo.DestId);
if (It == IdToYamlFunction.end())
continue;
StringRef Name = NameResolver::dropNumNames(It->second->Name);
FunctionNames.push_back(std::string(Name));
}
std::sort(FunctionNames.begin(), FunctionNames.end());
std::string HashString;
for (const std::string &FunctionName : FunctionNames)
HashString.append(FunctionName);
return HashString;
}
} // namespace bolt
} // namespace llvm
|