File: StructuralHash.cpp

package info (click to toggle)
llvm-toolchain-19 1%3A19.1.7-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,998,520 kB
  • sloc: cpp: 6,951,680; ansic: 1,486,157; asm: 913,598; python: 232,024; f90: 80,126; objc: 75,281; lisp: 37,276; pascal: 16,990; sh: 10,009; ml: 5,058; perl: 4,724; awk: 3,523; makefile: 3,167; javascript: 2,504; xml: 892; fortran: 664; cs: 573
file content (166 lines) | stat: -rw-r--r-- 5,877 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
//===-- StructuralHash.cpp - IR Hashing -------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "llvm/IR/StructuralHash.h"
#include "llvm/ADT/Hashing.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"

using namespace llvm;

namespace {

// Basic hashing mechanism to detect structural change to the IR, used to verify
// pass return status consistency with actual change. In addition to being used
// by the MergeFunctions pass.

class StructuralHashImpl {
  uint64_t Hash;

  void hash(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }

  // This will produce different values on 32-bit and 64-bit systens as
  // hash_combine returns a size_t. However, this is only used for
  // detailed hashing which, in-tree, only needs to distinguish between
  // differences in functions.
  template <typename T> void hashArbitaryType(const T &V) {
    hash(hash_combine(V));
  }

  void hashType(Type *ValueType) {
    hash(ValueType->getTypeID());
    if (ValueType->isIntegerTy())
      hash(ValueType->getIntegerBitWidth());
  }

public:
  StructuralHashImpl() : Hash(4) {}

  void updateOperand(Value *Operand) {
    hashType(Operand->getType());

    // The cases enumerated below are not exhaustive and are only aimed to
    // get decent coverage over the function.
    if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(Operand)) {
      hashArbitaryType(ConstInt->getValue());
    } else if (ConstantFP *ConstFP = dyn_cast<ConstantFP>(Operand)) {
      hashArbitaryType(ConstFP->getValue());
    } else if (Argument *Arg = dyn_cast<Argument>(Operand)) {
      hash(Arg->getArgNo());
    } else if (Function *Func = dyn_cast<Function>(Operand)) {
      // Hashing the name will be deterministic as LLVM's hashing infrastructure
      // has explicit support for hashing strings and will not simply hash
      // the pointer.
      hashArbitaryType(Func->getName());
    }
  }

  void updateInstruction(const Instruction &Inst, bool DetailedHash) {
    hash(Inst.getOpcode());

    if (!DetailedHash)
      return;

    hashType(Inst.getType());

    // Handle additional properties of specific instructions that cause
    // semantic differences in the IR.
    if (const auto *ComparisonInstruction = dyn_cast<CmpInst>(&Inst))
      hash(ComparisonInstruction->getPredicate());

    for (const auto &Op : Inst.operands())
      updateOperand(Op);
  }

  // A function hash is calculated by considering only the number of arguments
  // and whether a function is varargs, the order of basic blocks (given by the
  // successors of each basic block in depth first order), and the order of
  // opcodes of each instruction within each of these basic blocks. This mirrors
  // the strategy FunctionComparator::compare() uses to compare functions by
  // walking the BBs in depth first order and comparing each instruction in
  // sequence. Because this hash currently does not look at the operands, it is
  // insensitive to things such as the target of calls and the constants used in
  // the function, which makes it useful when possibly merging functions which
  // are the same modulo constants and call targets.
  //
  // Note that different users of StructuralHash will want different behavior
  // out of it (i.e., MergeFunctions will want something different from PM
  // expensive checks for pass modification status). When modifying this
  // function, most changes should be gated behind an option and enabled
  // selectively.
  void update(const Function &F, bool DetailedHash) {
    // Declarations don't affect analyses.
    if (F.isDeclaration())
      return;

    hash(0x62642d6b6b2d6b72); // Function header

    hash(F.isVarArg());
    hash(F.arg_size());

    SmallVector<const BasicBlock *, 8> BBs;
    SmallPtrSet<const BasicBlock *, 16> VisitedBBs;

    // Walk the blocks in the same order as
    // FunctionComparator::cmpBasicBlocks(), accumulating the hash of the
    // function "structure." (BB and opcode sequence)
    BBs.push_back(&F.getEntryBlock());
    VisitedBBs.insert(BBs[0]);
    while (!BBs.empty()) {
      const BasicBlock *BB = BBs.pop_back_val();

      // This random value acts as a block header, as otherwise the partition of
      // opcodes into BBs wouldn't affect the hash, only the order of the
      // opcodes
      hash(45798);
      for (auto &Inst : *BB)
        updateInstruction(Inst, DetailedHash);

      for (const BasicBlock *Succ : successors(BB))
        if (VisitedBBs.insert(Succ).second)
          BBs.push_back(Succ);
    }
  }

  void update(const GlobalVariable &GV) {
    // Declarations and used/compiler.used don't affect analyses.
    // Since there are several `llvm.*` metadata, like `llvm.embedded.object`,
    // we ignore anything with the `.llvm` prefix
    if (GV.isDeclaration() || GV.getName().starts_with("llvm."))
      return;
    hash(23456); // Global header
    hash(GV.getValueType()->getTypeID());
  }

  void update(const Module &M, bool DetailedHash) {
    for (const GlobalVariable &GV : M.globals())
      update(GV);
    for (const Function &F : M)
      update(F, DetailedHash);
  }

  uint64_t getHash() const { return Hash; }
};

} // namespace

IRHash llvm::StructuralHash(const Function &F, bool DetailedHash) {
  StructuralHashImpl H;
  H.update(F, DetailedHash);
  return H.getHash();
}

IRHash llvm::StructuralHash(const Module &M, bool DetailedHash) {
  StructuralHashImpl H;
  H.update(M, DetailedHash);
  return H.getHash();
}