1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
|
#include "threading.h"
#include "cmdline/cmdline.h"
#include "object/objcollide.h"
#include "globalincs/pstypes.h"
#include <atomic>
#include <condition_variable>
#include <mutex>
#include <thread>
#ifdef WIN32
#include <windows.h>
#elif defined(__APPLE__)
#include <sys/sysctl.h>
#endif
namespace threading {
static size_t num_threads = 1;
static std::condition_variable wait_for_task;
static std::mutex wait_for_task_mutex;
static bool wait_for_task_condition;
static std::atomic<WorkerThreadTask> worker_task;
static SCP_vector<std::thread> worker_threads;
//Internal Functions
static void mp_worker_thread_main(size_t threadIdx) {
while(true) {
{
std::unique_lock<std::mutex> lk(wait_for_task_mutex);
wait_for_task.wait(lk, []() { return wait_for_task_condition; });
}
switch (worker_task.load(std::memory_order_acquire)) {
case WorkerThreadTask::EXIT:
return;
case WorkerThreadTask::COLLISION:
collide_mp_worker_thread(threadIdx);
break;
default:
UNREACHABLE("Invalid threaded worker task!");
}
}
}
static size_t get_number_of_physical_cores_fallback() {
unsigned int hardware_threads = std::thread::hardware_concurrency();
if (hardware_threads > 0) {
return hardware_threads;
}
else {
Warning(LOCATION, "Could not autodetect available number of threads! Disabling multithreading...");
return 1;
}
}
//We don't want to rely on std::thread::hardware_concurrency() unless we have to, as it reports threads, not physical cores, and FSO doesn't gain much from hyperthreaded threads at the moment.
#ifdef WIN32
static size_t get_number_of_physical_cores() {
auto glpi = (BOOL (WINAPI *)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION, PDWORD)) GetProcAddress(
GetModuleHandle(TEXT("kernel32")),
"GetLogicalProcessorInformation");
if (glpi == nullptr)
return get_number_of_physical_cores_fallback();
DWORD length = 0;
glpi(nullptr, &length);
SCP_vector<SYSTEM_LOGICAL_PROCESSOR_INFORMATION> infoBuffer(length / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION));
DWORD error = glpi(infoBuffer.data(), &length);
if (error == 0)
return get_number_of_physical_cores_fallback();
size_t num_cores = 0;
for (const auto& info : infoBuffer) {
if (info.Relationship == RelationProcessorCore && info.ProcessorMask != 0)
num_cores++;
}
if (num_cores < 1) {
//invalid results, try fallback
return get_number_of_physical_cores_fallback();
}
else {
return num_cores;
}
}
#elif defined __APPLE__
static size_t get_number_of_physical_cores() {
int rval = 0;
int num = 0;
size_t numSize = sizeof(num);
// apple silicon (performance cores only)
rval = sysctlbyname("hw.perflevel0.physicalcpu", &num, &numSize, nullptr, 0);
// intel
if (rval != 0) {
rval = sysctlbyname("hw.physicalcpu", &num, &numSize, nullptr, 0);
}
if (rval == 0 && num > 0) {
return num;
} else {
// invalid results, try fallback
return get_number_of_physical_cores_fallback();
}
}
#elif defined SCP_UNIX
static size_t get_number_of_physical_cores() {
try {
std::ifstream cpuinfo("/proc/cpuinfo");
SCP_string line;
while (std::getline(cpuinfo, line)) {
//Looking for a cpu cores property is fine assuming a user has only one physical CPU socket. If they have multiple CPU's, this'll underreport the core count, but that should be very rare in typical configurations
if (line.find("cpu cores") != SCP_string::npos){
size_t numberpos = line.find(": ");
if (numberpos == SCP_string::npos)
return get_number_of_physical_cores_fallback();
int num_cores = std::stoi(line.substr(numberpos + 2));
if (num_cores < 1) {
//invalid results, try fallback
return get_number_of_physical_cores_fallback();
}
else {
return num_cores;
}
}
}
return get_number_of_physical_cores_fallback();
}
catch (const std::exception&) {
return get_number_of_physical_cores_fallback();
}
}
#else
#define get_number_of_physical_cores() get_number_of_physical_cores_fallback()
#endif
//External Functions
void spin_up_threaded_task(WorkerThreadTask task) {
worker_task.store(task);
{
std::scoped_lock lock {wait_for_task_mutex};
wait_for_task_condition = true;
wait_for_task.notify_all();
}
}
void spin_down_threaded_task() {
std::scoped_lock lock {wait_for_task_mutex};
wait_for_task_condition = false;
}
void init_task_pool() {
if (Cmdline_multithreading == 0) {
//At least given the current collision-detection threading, 8 cores (if available) seems like a sweetspot, with more cores adding too much overhead.
//This could be improved in the future.
//This could also be made task-dependant, if stuff like parallelized loading benefits from more cores.
num_threads = std::min(get_number_of_physical_cores() - 1, static_cast<size_t>(7));
}
else {
num_threads = Cmdline_multithreading - 1;
}
if (!is_threading())
return;
mprintf(("Spinning up threadpool with %d threads...\n", static_cast<int>(num_threads)));
for (size_t i = 0; i < num_threads; i++) {
worker_threads.emplace_back([i](){ mp_worker_thread_main(i); });
}
}
void shut_down_task_pool() {
spin_up_threaded_task(WorkerThreadTask::EXIT);
for(auto& thread : worker_threads) {
thread.join();
}
}
bool is_threading() {
return num_threads > 0;
}
size_t get_num_workers() {
return worker_threads.size();
}
}
|