1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
|
// -*- mode: C++; c-file-style: "cc-mode" -*-
//=============================================================================
//
// Code available from: https://verilator.org
//
// Copyright 2012-2025 by Wilson Snyder. This program is free software; you can
// redistribute it and/or modify it under the terms of either the GNU
// Lesser General Public License Version 3 or the Perl Artistic License
// Version 2.0.
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0
//
//=============================================================================
///
/// \file
/// \brief Verilated run-time profiling implementation code
///
//=============================================================================
#include "verilatedos.h"
#include "verilated_profiler.h"
#include "verilated_threads.h"
#include <fstream>
#include <string>
//=============================================================================
// Globals
// Internal note: Globals may multi-construct, see verilated.cpp top.
thread_local VlExecutionProfiler::ExecutionTrace VlExecutionProfiler::t_trace;
constexpr const char* const VlExecutionRecord::s_ascii[];
//=============================================================================
// VlExecutionProfiler implementation
template <size_t N>
static size_t roundUptoMultipleOf(size_t value) {
static_assert((N & (N - 1)) == 0, "'N' must be a power of 2");
size_t mask = N - 1;
return (value + mask) & ~mask;
}
VlExecutionProfiler::VlExecutionProfiler(VerilatedContext& context)
: m_context{context} {
// Setup profiling on main thread
setupThread(0);
}
void VlExecutionProfiler::configure() {
if (VL_UNLIKELY(m_enabled)) {
--m_windowCount;
if (VL_UNLIKELY(m_windowCount == m_context.profExecWindow())) {
VL_DEBUG_IF(VL_DBG_MSGF("+ profile start collection\n"););
clear(); // Clear the profile after the cache warm-up cycles.
m_tickBegin = VL_CPU_TICK();
} else if (VL_UNLIKELY(m_windowCount == 0)) {
const uint64_t tickEnd = VL_CPU_TICK();
VL_DEBUG_IF(VL_DBG_MSGF("+ profile end\n"););
const std::string& fileName = m_context.profExecFilename();
dump(fileName.c_str(), tickEnd);
m_enabled = false;
}
return;
}
const uint64_t startReq = m_context.profExecStart() + 1; // + 1, so we can start at time 0
if (VL_UNLIKELY(m_lastStartReq < startReq && VL_TIME_Q() >= m_context.profExecStart())) {
VL_DEBUG_IF(VL_DBG_MSGF("+ profile start warmup\n"););
VL_DEBUG_IF(assert(m_windowCount == 0););
m_enabled = true;
m_windowCount = m_context.profExecWindow() * 2;
m_lastStartReq = startReq;
}
}
VerilatedVirtualBase* VlExecutionProfiler::construct(VerilatedContext& context) {
VlExecutionProfiler* const selfp = new VlExecutionProfiler{context};
if (VlThreadPool* const threadPoolp = static_cast<VlThreadPool*>(context.threadPoolp())) {
for (int i = 0; i < threadPoolp->numThreads(); ++i) {
// Data to pass to worker thread initialization
struct Data final {
VlExecutionProfiler* const selfp;
const uint32_t threadId;
} data{selfp, static_cast<uint32_t>(i + 1)};
// Initialize worker thread
threadPoolp->workerp(i)->addTask(
[](void* userp, bool) {
Data* const datap = static_cast<Data*>(userp);
datap->selfp->setupThread(datap->threadId);
},
&data);
// Wait until initialization is complete
threadPoolp->workerp(i)->wait();
}
}
return selfp;
}
void VlExecutionProfiler::setupThread(uint32_t threadId) {
// Reserve some space in the thread-local profiling buffer, in order to try to avoid malloc
// while profiling.
t_trace.reserve(RESERVED_TRACE_CAPACITY);
// Register thread-local buffer in list of all buffers
bool exists;
{
const VerilatedLockGuard lock{m_mutex};
exists = !m_traceps.emplace(threadId, &t_trace).second;
}
if (VL_UNLIKELY(exists)) {
VL_FATAL_MT(__FILE__, __LINE__, "", "multiple initialization of profiler on some thread");
}
}
void VlExecutionProfiler::clear() VL_MT_SAFE_EXCLUDES(m_mutex) {
const VerilatedLockGuard lock{m_mutex};
for (const auto& pair : m_traceps) {
ExecutionTrace* const tracep = pair.second;
const size_t reserve = roundUptoMultipleOf<RESERVED_TRACE_CAPACITY>(tracep->size());
tracep->clear();
tracep->reserve(reserve);
}
}
void VlExecutionProfiler::dump(const char* filenamep, uint64_t tickEnd)
VL_MT_SAFE_EXCLUDES(m_mutex) {
const VerilatedLockGuard lock{m_mutex};
VL_DEBUG_IF(VL_DBG_MSGF("+prof+exec writing to '%s'\n", filenamep););
FILE* const fp = std::fopen(filenamep, "w");
if (VL_UNLIKELY(!fp)) VL_FATAL_MT(filenamep, 0, "", "+prof+exec+file file not writable");
// TODO Perhaps merge with verilated_coverage output format, so can
// have a common merging and reporting tool, etc.
fprintf(fp, "VLPROFVERSION 2.2 # Verilator execution profile version 2.2\n");
fprintf(fp, "VLPROF arg +verilator+prof+exec+start+%" PRIu64 "\n",
Verilated::threadContextp()->profExecStart());
fprintf(fp, "VLPROF arg +verilator+prof+exec+window+%u\n",
Verilated::threadContextp()->profExecWindow());
std::string numa = "no threads";
if (VlThreadPool* const threadPoolp
= static_cast<VlThreadPool*>(Verilated::threadContextp()->threadPoolp())) {
numa = threadPoolp->numaStatus();
}
fprintf(fp, "VLPROF info numa %s\n", numa.c_str());
// Note that VerilatedContext will by default create as many threads as there are hardware
// processors, but not all of them might be utilized. Report the actual number that has trace
// entries to avoid over-counting.
unsigned threads = 0;
for (const auto& pair : m_traceps) {
if (!pair.second->empty()) ++threads;
}
fprintf(fp, "VLPROF stat threads %u\n", threads);
fprintf(fp, "VLPROF stat yields %" PRIu64 "\n", VlMTaskVertex::yields());
// Copy /proc/cpuinfo into this output so verilator_gantt can be run on
// a different machine
{
const std::unique_ptr<std::ifstream> ifp{new std::ifstream{"/proc/cpuinfo"}};
if (!ifp->fail()) {
std::string line;
while (std::getline(*ifp, line)) { fprintf(fp, "VLPROFPROC %s\n", line.c_str()); }
}
}
for (const auto& pair : m_traceps) {
const uint32_t threadId = pair.first;
ExecutionTrace* const tracep = pair.second;
if (tracep->empty()) continue;
fprintf(fp, "VLPROFTHREAD %" PRIu32 "\n", threadId);
for (const VlExecutionRecord& er : *tracep) {
const char* const name = VlExecutionRecord::s_ascii[static_cast<uint8_t>(er.m_type)];
const uint64_t time = er.m_tick - m_tickBegin;
fprintf(fp, "VLPROFEXEC %s %" PRIu64, name, time);
switch (er.m_type) {
case VlExecutionRecord::Type::SECTION_POP:
case VlExecutionRecord::Type::EXEC_GRAPH_BEGIN:
case VlExecutionRecord::Type::EXEC_GRAPH_END:
// No payload
fprintf(fp, "\n");
break;
case VlExecutionRecord::Type::MTASK_BEGIN: {
const auto& payload = er.m_payload.mtaskBegin;
if (payload.m_hierBlock[0] != '\0') {
fprintf(fp, " id %u predictStart %u cpu %u hierBlock %s\n", payload.m_id,
payload.m_predictStart, payload.m_cpu, payload.m_hierBlock);
} else {
fprintf(fp, " id %u predictStart %u cpu %u\n", payload.m_id,
payload.m_predictStart, payload.m_cpu);
}
break;
}
case VlExecutionRecord::Type::MTASK_END: {
const auto& payload = er.m_payload.mtaskEnd;
fprintf(fp, " predictCost %u\n", payload.m_predictCost);
break;
}
case VlExecutionRecord::Type::THREAD_SCHEDULE_WAIT_BEGIN:
case VlExecutionRecord::Type::THREAD_SCHEDULE_WAIT_END: {
const auto& payload = er.m_payload.threadScheduleWait;
fprintf(fp, " cpu %u\n", payload.m_cpu);
break;
}
case VlExecutionRecord::Type::SECTION_PUSH: {
const auto& payload = er.m_payload.sectionPush;
fprintf(fp, " %s\n", payload.m_name);
break;
}
default: abort(); // LCOV_EXCL_LINE
}
}
}
fprintf(fp, "VLPROF stat ticks %" PRIu64 "\n", tickEnd - m_tickBegin);
std::fclose(fp);
}
|