1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
|
// Copyright 2015 The Gemmlowp Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// instrumentation.h: contains the definitions needed to
// instrument code for profiling:
// ScopedProfilingLabel, RegisterCurrentThreadForProfiling.
//
// profiler.h is only needed to drive the profiler:
// StartProfiling, FinishProfiling.
//
// See the usage example in profiler.h.
#ifndef GEMMLOWP_PROFILING_INSTRUMENTATION_H_
#define GEMMLOWP_PROFILING_INSTRUMENTATION_H_
#include <cstdio>
#ifndef GEMMLOWP_USE_STLPORT
#include <cstdint>
#else
#include <stdint.h>
namespace std {
using ::int16_t;
using ::int32_t;
using ::int8_t;
using ::size_t;
using ::uint16_t;
using ::uint32_t;
using ::uint8_t;
using ::uintptr_t;
} // namespace std
#endif
#include <algorithm>
#include <cassert>
#include <cstdlib>
#ifdef GEMMLOWP_PROFILING
#include <cstring>
#include <set>
#endif
#include "./pthread_everywhere.h"
namespace gemmlowp {
inline void ReleaseBuildAssertion(bool condition, const char* msg) {
if (!condition) {
fprintf(stderr, "gemmlowp error: %s\n", msg);
abort();
}
}
class Mutex {
public:
Mutex(const Mutex&) = delete;
Mutex& operator=(const Mutex&) = delete;
Mutex() { pthread_mutex_init(&m, NULL); }
~Mutex() { pthread_mutex_destroy(&m); }
void Lock() { pthread_mutex_lock(&m); }
void Unlock() { pthread_mutex_unlock(&m); }
private:
pthread_mutex_t m;
};
class GlobalMutexes {
public:
static Mutex* Profiler() {
static Mutex m;
return &m;
}
static Mutex* EightBitIntGemm() {
static Mutex m;
return &m;
}
};
// A very simple RAII helper to lock and unlock a Mutex
struct ScopedLock {
ScopedLock(Mutex* m) : _m(m) { _m->Lock(); }
~ScopedLock() { _m->Unlock(); }
private:
Mutex* _m;
};
// Profiling definitions. Two paths: when profiling is enabled,
// and when profiling is disabled.
#ifdef GEMMLOWP_PROFILING
// This code path is when profiling is enabled.
// A pseudo-call-stack. Contrary to a real call-stack, this only
// contains pointers to literal strings that were manually entered
// in the instrumented code (see ScopedProfilingLabel).
struct ProfilingStack {
static const std::size_t kMaxSize = 30;
typedef const char* LabelsArrayType[kMaxSize];
LabelsArrayType labels;
std::size_t size;
Mutex* lock;
ProfilingStack() { memset(this, 0, sizeof(ProfilingStack)); }
~ProfilingStack() { delete lock; }
void Push(const char* label) {
ScopedLock sl(lock);
ReleaseBuildAssertion(size < kMaxSize, "ProfilingStack overflow");
labels[size] = label;
size++;
}
void Pop() {
ScopedLock sl(lock);
ReleaseBuildAssertion(size > 0, "ProfilingStack underflow");
size--;
}
void UpdateTop(const char* new_label) {
ScopedLock sl(lock);
assert(size);
labels[size - 1] = new_label;
}
ProfilingStack& operator=(const ProfilingStack& other) {
memcpy(this, &other, sizeof(ProfilingStack));
return *this;
}
bool operator==(const ProfilingStack& other) const {
return !memcmp(this, &other, sizeof(ProfilingStack));
}
};
static_assert(
!(sizeof(ProfilingStack) & (sizeof(ProfilingStack) - 1)),
"ProfilingStack should have power-of-two size to fit in cache lines");
struct ThreadInfo;
// The global set of threads being profiled.
inline std::set<ThreadInfo*>& ThreadsUnderProfiling() {
static std::set<ThreadInfo*> v;
return v;
}
struct ThreadInfo {
pthread_key_t key; // used only to get a callback at thread exit.
ProfilingStack stack;
ThreadInfo() {
pthread_key_create(&key, ThreadExitCallback);
pthread_setspecific(key, this);
stack.lock = new Mutex();
}
static void ThreadExitCallback(void* ptr) {
ScopedLock sl(GlobalMutexes::Profiler());
ThreadInfo* self = static_cast<ThreadInfo*>(ptr);
ThreadsUnderProfiling().erase(self);
}
};
inline ThreadInfo& ThreadLocalThreadInfo() {
static pthread_key_t key;
static auto DeleteThreadInfo = [](void* threadInfoPtr) {
ThreadInfo* threadInfo = static_cast<ThreadInfo*>(threadInfoPtr);
if (threadInfo) {
delete threadInfo;
}
};
// key_result is unused. The purpose of this 'static' local object is
// to have its initializer (the pthread_key_create call) performed exactly
// once, in a way that is guaranteed (since C++11) to be reentrant.
static const int key_result = pthread_key_create(&key, DeleteThreadInfo);
(void)key_result;
ThreadInfo* threadInfo = static_cast<ThreadInfo*>(pthread_getspecific(key));
if (!threadInfo) {
threadInfo = new ThreadInfo();
pthread_setspecific(key, threadInfo);
}
return *threadInfo;
}
// ScopedProfilingLabel is how one instruments code for profiling
// with this profiler. Construct local ScopedProfilingLabel variables,
// passing a literal string describing the local code. Profile
// samples will then be annotated with this label, while it is in scope
// (whence the name --- also known as RAII).
// See the example in profiler.h.
class ScopedProfilingLabel {
ProfilingStack* profiling_stack_;
public:
explicit ScopedProfilingLabel(const char* label)
: profiling_stack_(&ThreadLocalThreadInfo().stack) {
profiling_stack_->Push(label);
}
~ScopedProfilingLabel() { profiling_stack_->Pop(); }
void Update(const char* new_label) { profiling_stack_->UpdateTop(new_label); }
};
// To be called once on each thread to be profiled.
inline void RegisterCurrentThreadForProfiling() {
ScopedLock sl(GlobalMutexes::Profiler());
ThreadsUnderProfiling().insert(&ThreadLocalThreadInfo());
}
#else // not GEMMLOWP_PROFILING
// This code path is when profiling is disabled.
// This empty definition of ScopedProfilingLabel ensures that
// it has zero runtime overhead when profiling is disabled.
struct ScopedProfilingLabel {
explicit ScopedProfilingLabel(const char*) {}
void Update(const char*) {}
};
inline void RegisterCurrentThreadForProfiling() {}
#endif
} // end namespace gemmlowp
#endif // GEMMLOWP_PROFILING_INSTRUMENTATION_H_
|