1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
|
/*===- DataFlow.cpp - a standalone DataFlow tracer -------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// An experimental data-flow tracer for fuzz targets.
// It is based on DFSan and SanitizerCoverage.
// https://clang.llvm.org/docs/DataFlowSanitizer.html
// https://clang.llvm.org/docs/SanitizerCoverage.html#tracing-data-flow
//
// It executes the fuzz target on the given input while monitoring the
// data flow for every instrumented comparison instruction.
//
// The output shows which functions depend on which bytes of the input.
//
// Build:
// 1. Compile this file with -fsanitize=dataflow
// 2. Build the fuzz target with -g -fsanitize=dataflow
// -fsanitize-coverage=trace-pc-guard,pc-table,func,trace-cmp
// 3. Link those together with -fsanitize=dataflow
//
// -fsanitize-coverage=trace-cmp inserts callbacks around every comparison
// instruction, DFSan modifies the calls to pass the data flow labels.
// The callbacks update the data flow label for the current function.
// See e.g. __dfsw___sanitizer_cov_trace_cmp1 below.
//
// -fsanitize-coverage=trace-pc-guard,pc-table,func instruments function
// entries so that the comparison callback knows that current function.
//
//
// Run:
// # Collect data flow for INPUT_FILE, write to OUTPUT_FILE (default: stdout)
// ./a.out INPUT_FILE [OUTPUT_FILE]
//
// # Print all instrumented functions. llvm-symbolizer must be present in PATH
// ./a.out
//
// Example output:
// ===============
// F0 11111111111111
// F1 10000000000000
// ===============
// "FN xxxxxxxxxx": tells what bytes of the input does the function N depend on.
// The byte string is LEN+1 bytes. The last byte is set if the function
// depends on the input length.
//===----------------------------------------------------------------------===*/
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <execinfo.h> // backtrace_symbols_fd
#include <sanitizer/dfsan_interface.h>
extern "C" {
extern int LLVMFuzzerTestOneInput(const unsigned char *Data, size_t Size);
__attribute__((weak)) extern int LLVMFuzzerInitialize(int *argc, char ***argv);
} // extern "C"
static size_t InputLen;
static size_t NumFuncs;
static const uintptr_t *FuncsBeg;
static __thread size_t CurrentFunc;
static dfsan_label *FuncLabels; // Array of NumFuncs elements.
static char *PrintableStringForLabel; // InputLen + 2 bytes.
static bool LabelSeen[1 << 8 * sizeof(dfsan_label)];
// Prints all instrumented functions.
static int PrintFunctions() {
// We don't have the symbolizer integrated with dfsan yet.
// So use backtrace_symbols_fd and pipe it through llvm-symbolizer.
// TODO(kcc): this is pretty ugly and may break in lots of ways.
// We'll need to make a proper in-process symbolizer work with DFSan.
FILE *Pipe = popen("sed 's/(+/ /g; s/).*//g' "
"| llvm-symbolizer "
"| grep 'dfs\\$' "
"| sed 's/dfs\\$//g'", "w");
for (size_t I = 0; I < NumFuncs; I++) {
uintptr_t PC = FuncsBeg[I * 2];
void *const Buf[1] = {(void*)PC};
backtrace_symbols_fd(Buf, 1, fileno(Pipe));
}
pclose(Pipe);
return 0;
}
extern "C"
void SetBytesForLabel(dfsan_label L, char *Bytes) {
if (LabelSeen[L])
return;
LabelSeen[L] = true;
assert(L);
if (L <= InputLen + 1) {
Bytes[L - 1] = '1';
} else {
auto *DLI = dfsan_get_label_info(L);
SetBytesForLabel(DLI->l1, Bytes);
SetBytesForLabel(DLI->l2, Bytes);
}
}
static char *GetPrintableStringForLabel(dfsan_label L) {
memset(PrintableStringForLabel, '0', InputLen + 1);
PrintableStringForLabel[InputLen + 1] = 0;
memset(LabelSeen, 0, sizeof(LabelSeen));
SetBytesForLabel(L, PrintableStringForLabel);
return PrintableStringForLabel;
}
static void PrintDataFlow(FILE *Out) {
for (size_t I = 0; I < NumFuncs; I++)
if (FuncLabels[I])
fprintf(Out, "F%zd %s\n", I, GetPrintableStringForLabel(FuncLabels[I]));
}
int main(int argc, char **argv) {
if (LLVMFuzzerInitialize)
LLVMFuzzerInitialize(&argc, &argv);
if (argc == 1)
return PrintFunctions();
assert(argc == 4 || argc == 5);
size_t Beg = atoi(argv[1]);
size_t End = atoi(argv[2]);
assert(Beg < End);
const char *Input = argv[3];
fprintf(stderr, "INFO: reading '%s'\n", Input);
FILE *In = fopen(Input, "r");
assert(In);
fseek(In, 0, SEEK_END);
InputLen = ftell(In);
fseek(In, 0, SEEK_SET);
unsigned char *Buf = (unsigned char*)malloc(InputLen);
size_t NumBytesRead = fread(Buf, 1, InputLen, In);
assert(NumBytesRead == InputLen);
PrintableStringForLabel = (char*)malloc(InputLen + 2);
fclose(In);
fprintf(stderr, "INFO: running '%s'\n", Input);
for (size_t I = 1; I <= InputLen; I++) {
dfsan_label L = dfsan_create_label("", nullptr);
assert(L == I);
size_t Idx = I - 1;
if (Idx >= Beg && Idx < End)
dfsan_set_label(L, Buf + Idx, 1);
}
dfsan_label SizeL = dfsan_create_label("", nullptr);
assert(SizeL == InputLen + 1);
dfsan_set_label(SizeL, &InputLen, sizeof(InputLen));
LLVMFuzzerTestOneInput(Buf, InputLen);
free(Buf);
bool OutIsStdout = argc == 4;
fprintf(stderr, "INFO: writing dataflow to %s\n",
OutIsStdout ? "<stdout>" : argv[4]);
FILE *Out = OutIsStdout ? stdout : fopen(argv[4], "w");
PrintDataFlow(Out);
if (!OutIsStdout) fclose(Out);
}
extern "C" {
void __sanitizer_cov_trace_pc_guard_init(uint32_t *start,
uint32_t *stop) {
assert(NumFuncs == 0 && "This tool does not support DSOs");
assert(start < stop && "The code is not instrumented for coverage");
if (start == stop || *start) return; // Initialize only once.
for (uint32_t *x = start; x < stop; x++)
*x = ++NumFuncs; // The first index is 1.
FuncLabels = (dfsan_label*)calloc(NumFuncs, sizeof(dfsan_label));
fprintf(stderr, "INFO: %zd instrumented function(s) observed\n", NumFuncs);
}
void __sanitizer_cov_pcs_init(const uintptr_t *pcs_beg,
const uintptr_t *pcs_end) {
assert(NumFuncs == (pcs_end - pcs_beg) / 2);
FuncsBeg = pcs_beg;
}
void __sanitizer_cov_trace_pc_indir(uint64_t x){} // unused.
void __sanitizer_cov_trace_pc_guard(uint32_t *guard){
uint32_t FuncNum = *guard - 1; // Guards start from 1.
assert(FuncNum < NumFuncs);
CurrentFunc = FuncNum;
}
void __dfsw___sanitizer_cov_trace_switch(uint64_t Val, uint64_t *Cases,
dfsan_label L1, dfsan_label UnusedL) {
assert(CurrentFunc < NumFuncs);
FuncLabels[CurrentFunc] = dfsan_union(FuncLabels[CurrentFunc], L1);
}
#define HOOK(Name, Type) \
void Name(Type Arg1, Type Arg2, dfsan_label L1, dfsan_label L2) { \
assert(CurrentFunc < NumFuncs); \
FuncLabels[CurrentFunc] = \
dfsan_union(FuncLabels[CurrentFunc], dfsan_union(L1, L2)); \
}
HOOK(__dfsw___sanitizer_cov_trace_const_cmp1, uint8_t)
HOOK(__dfsw___sanitizer_cov_trace_const_cmp2, uint16_t)
HOOK(__dfsw___sanitizer_cov_trace_const_cmp4, uint32_t)
HOOK(__dfsw___sanitizer_cov_trace_const_cmp8, uint64_t)
HOOK(__dfsw___sanitizer_cov_trace_cmp1, uint8_t)
HOOK(__dfsw___sanitizer_cov_trace_cmp2, uint16_t)
HOOK(__dfsw___sanitizer_cov_trace_cmp4, uint32_t)
HOOK(__dfsw___sanitizer_cov_trace_cmp8, uint64_t)
} // extern "C"
|