1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
|
//===-- llvm-sim.cpp - Find similar sections of programs -------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This program finds similar sections of a Module, and exports them as a JSON
// file.
//
// To find similarities contained across multiple modules, please use llvm-link
// first to merge the modules.
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/IRSimilarityIdentifier.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/ToolOutputFile.h"
using namespace llvm;
using namespace IRSimilarity;
static cl::opt<std::string> OutputFilename("o", cl::desc("Output Filename"),
cl::init("-"),
cl::value_desc("filename"));
static cl::opt<std::string> InputSourceFile(cl::Positional,
cl::desc("<Source file>"),
cl::init("-"),
cl::value_desc("filename"));
/// Retrieve the unique number \p I was mapped to in parseBitcodeFile.
///
/// \param I - The Instruction to find the instruction number for.
/// \param LLVMInstNum - The mapping of Instructions to their location in the
/// module represented by an unsigned integer.
/// \returns The instruction number for \p I if it exists.
Optional<unsigned>
getPositionInModule(const Instruction *I,
const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
assert(I && "Instruction is nullptr!");
DenseMap<Instruction *, unsigned>::const_iterator It = LLVMInstNum.find(I);
if (It == LLVMInstNum.end())
return None;
return It->second;
}
/// Exports the given SimilarityGroups to a JSON file at \p FilePath.
///
/// \param FilePath - The path to the output location.
/// \param SimSections - The similarity groups to process.
/// \param LLVMInstNum - The mapping of Instructions to their location in the
/// module represented by an unsigned integer.
/// \returns A nonzero error code if there was a failure creating the file.
std::error_code
exportToFile(const StringRef FilePath,
const SimilarityGroupList &SimSections,
const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
std::error_code EC;
std::unique_ptr<ToolOutputFile> Out(
new ToolOutputFile(FilePath, EC, sys::fs::OF_None));
if (EC)
return EC;
json::OStream J(Out->os(), 1);
J.objectBegin();
unsigned SimOption = 1;
// Process each list of SimilarityGroups organized by the Module.
for (const SimilarityGroup &G : SimSections) {
std::string SimOptionStr = std::to_string(SimOption);
J.attributeBegin(SimOptionStr);
J.arrayBegin();
// For each file there is a list of the range where the similarity
// exists.
for (const IRSimilarityCandidate &C : G) {
Optional<unsigned> Start =
getPositionInModule((*C.front()).Inst, LLVMInstNum);
Optional<unsigned> End =
getPositionInModule((*C.back()).Inst, LLVMInstNum);
assert(Start &&
"Could not find instruction number for first instruction");
assert(End && "Could not find instruction number for last instruction");
J.object([&] {
J.attribute("start", Start.value());
J.attribute("end", End.value());
});
}
J.arrayEnd();
J.attributeEnd();
SimOption++;
}
J.objectEnd();
Out->keep();
return EC;
}
int main(int argc, const char *argv[]) {
InitLLVM X(argc, argv);
cl::ParseCommandLineOptions(argc, argv, "LLVM IR Similarity Visualizer\n");
LLVMContext CurrContext;
SMDiagnostic Err;
std::unique_ptr<Module> ModuleToAnalyze =
parseIRFile(InputSourceFile, Err, CurrContext);
if (!ModuleToAnalyze) {
Err.print(argv[0], errs());
return 1;
}
// Mapping from an Instruction pointer to its occurrence in a sequential
// list of all the Instructions in a Module.
DenseMap<Instruction *, unsigned> LLVMInstNum;
// We give each instruction a number, which gives us a start and end value
// for the beginning and end of each IRSimilarityCandidate.
unsigned InstructionNumber = 1;
for (Function &F : *ModuleToAnalyze)
for (BasicBlock &BB : F)
for (Instruction &I : BB.instructionsWithoutDebug())
LLVMInstNum[&I]= InstructionNumber++;
// The similarity identifier we will use to find the similar sections.
IRSimilarityIdentifier SimIdent;
SimilarityGroupList SimilaritySections =
SimIdent.findSimilarity(*ModuleToAnalyze);
std::error_code E =
exportToFile(OutputFilename, SimilaritySections, LLVMInstNum);
if (E) {
errs() << argv[0] << ": " << E.message() << '\n';
return 2;
}
return 0;
}
|