1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
|
#!/bin/bash
if [ $# -lt 2 ]; then
echo "Path to clang and llvm-profdata required!"
echo "Usage: update_memprof_inputs.sh /path/to/updated/clang /path/to/updated/llvm-profdata"
exit 1
else
CLANG=$1
LLVMPROFDATA=$2
fi
# Allows the script to be invoked from other directories.
OUTDIR=$(dirname $(realpath -s $0))
# Note that changes in the code below which affect relative line number
# offsets of calls from their parent function can affect callsite matching in
# the LLVM IR.
cat > ${OUTDIR}/memprof.cc << EOF
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
char *foo() {
return new char[10];
}
char *foo2() {
return foo();
}
char *bar() {
return foo2();
}
char *baz() {
return foo2();
}
char *recurse(unsigned n) {
if (!n)
return foo();
return recurse(n-1);
}
int main(int argc, char **argv) {
// Test allocations with different combinations of stack contexts and
// coldness (based on lifetime, since they are all accessed a single time
// per byte via the memset).
char *a = new char[10];
char *b = new char[10];
char *c = foo();
char *d = foo();
char *e = bar();
char *f = baz();
memset(a, 0, 10);
memset(b, 0, 10);
memset(c, 0, 10);
memset(d, 0, 10);
memset(e, 0, 10);
memset(f, 0, 10);
// a and c have short lifetimes
delete[] a;
delete[] c;
// b, d, e, and f have long lifetimes and will be detected as cold by default.
sleep(200);
delete[] b;
delete[] d;
delete[] e;
delete[] f;
// Loop ensures the two calls to recurse have stack contexts that only differ
// in one level of recursion. We should get two stack contexts reflecting the
// different levels of recursion and different allocation behavior (since the
// first has a very long lifetime and the second has a short lifetime).
for (unsigned i = 0; i < 2; i++) {
char *g = recurse(i + 3);
memset(g, 0, 10);
if (!i)
sleep(200);
delete[] g;
}
return 0;
}
EOF
COMMON_FLAGS="-fuse-ld=lld -Wl,--no-rosegment -gmlt -fdebug-info-for-profiling -mno-omit-leaf-frame-pointer -fno-omit-frame-pointer -fno-optimize-sibling-calls -m64 -Wl,-build-id -no-pie"
${CLANG} ${COMMON_FLAGS} -fmemory-profile ${OUTDIR}/memprof.cc -o ${OUTDIR}/memprof.exe
env MEMPROF_OPTIONS=log_path=stdout ${OUTDIR}/memprof.exe > ${OUTDIR}/memprof.memprofraw
# Generate another profile without any column numbers.
${CLANG} ${COMMON_FLAGS} -gno-column-info -fmemory-profile ${OUTDIR}/memprof.cc -o ${OUTDIR}/memprof.nocolinfo.exe
env MEMPROF_OPTIONS=log_path=stdout ${OUTDIR}/memprof.nocolinfo.exe > ${OUTDIR}/memprof.nocolinfo.memprofraw
${CLANG} ${COMMON_FLAGS} -fprofile-generate=. \
${OUTDIR}/memprof.cc -o ${OUTDIR}/pgo.exe
env LLVM_PROFILE_FILE=${OUTDIR}/memprof_pgo.profraw ${OUTDIR}/pgo.exe
${LLVMPROFDATA} merge --text ${OUTDIR}/memprof_pgo.profraw -o ${OUTDIR}/memprof_pgo.proftext
rm ${OUTDIR}/memprof.cc
rm ${OUTDIR}/pgo.exe
rm ${OUTDIR}/memprof_pgo.profraw
# Use musttail to simulate a missing leaf debug frame in the profiled binary.
# Note we don't currently match onto explicit ::operator new calls, which is
# why the non-musttail case uses implicit new (which doesn't support musttail).
# Note that changes in the code below which affect relative line number
# offsets of calls from their parent function can affect callsite matching in
# the LLVM IR.
cat > ${OUTDIR}/memprof_missing_leaf.cc << EOF
#include <new>
#ifndef USE_MUSTTAIL
#define USE_MUSTTAIL 0
#endif
// clang::musttail requires that the argument signature matches that of the caller.
void *bar(std::size_t s) {
#if USE_MUSTTAIL
[[clang::musttail]] return ::operator new (s);
#else
return new char[s];
#endif
}
int main() {
char *a = (char *)bar(1);
delete a;
return 0;
}
EOF
${CLANG} ${COMMON_FLAGS} -fmemory-profile -DUSE_MUSTTAIL=1 ${OUTDIR}/memprof_missing_leaf.cc -o ${OUTDIR}/memprof_missing_leaf.exe
env MEMPROF_OPTIONS=log_path=stdout ${OUTDIR}/memprof_missing_leaf.exe > ${OUTDIR}/memprof_missing_leaf.memprofraw
rm ${OUTDIR}/memprof_missing_leaf.cc
cat > ${OUTDIR}/memprof_internal_linkage.cc << EOF
#include <cstring>
#include <unistd.h>
static void foo() {
int *a = new int[5];
memset(a, 0, 5);
}
int main(int argc, char **argv) {
foo();
return 0;
}
EOF
${CLANG} ${COMMON_FLAGS} -fmemory-profile -funique-internal-linkage-names ${OUTDIR}/memprof_internal_linkage.cc -o ${OUTDIR}/memprof_internal_linkage.exe
env MEMPROF_OPTIONS=log_path=stdout ${OUTDIR}/memprof_internal_linkage.exe > ${OUTDIR}/memprof_internal_linkage.memprofraw
rm ${OUTDIR}/memprof_internal_linkage.cc
cat > ${OUTDIR}/memprof_loop_unroll_a.cc << EOF
int* a[2];
extern void foo();
int main() {
foo();
for (int i = 0; i < 1000000; ++i) {
*a[0] = 1;
}
return 0;
}
EOF
cat > ${OUTDIR}/memprof_loop_unroll_b.cc << EOF
#include <string>
extern int* a[2];
void foo() {
for (int i = 0; i < 2; ++i) {
a[i] = new int[1];
}
}
EOF
${CLANG} ${COMMON_FLAGS} -fmemory-profile ${OUTDIR}/memprof_loop_unroll_a.cc -O0 -o ${OUTDIR}/memprof_loop_unroll_a.o -c
${CLANG} ${COMMON_FLAGS} -fmemory-profile ${OUTDIR}/memprof_loop_unroll_b.cc -O3 -o ${OUTDIR}/memprof_loop_unroll_b.o -c
${CLANG} ${COMMON_FLAGS} -fmemory-profile ${OUTDIR}/memprof_loop_unroll_a.o ${OUTDIR}/memprof_loop_unroll_b.o -o ${OUTDIR}/memprof_loop_unroll.exe
env MEMPROF_OPTIONS=log_path=stdout ${OUTDIR}/memprof_loop_unroll.exe > ${OUTDIR}/memprof_loop_unroll.memprofraw
rm ${OUTDIR}/memprof_loop_unroll_a.cc
rm ${OUTDIR}/memprof_loop_unroll_a.o
rm ${OUTDIR}/memprof_loop_unroll_b.cc
rm ${OUTDIR}/memprof_loop_unroll_b.o
|