1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418
|
/*******************************************************************************
Copyright (c) 2015-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#ifndef __UVM_GLOBAL_H__
#define __UVM_GLOBAL_H__
#include "nv_uvm_types.h"
#include "uvm_extern_decl.h"
#include "uvm_linux.h"
#include "uvm_common.h"
#include "uvm_processors.h"
#include "uvm_gpu.h"
#include "uvm_lock.h"
#include "uvm_ats_ibm.h"
// Global state of the uvm driver
struct uvm_global_struct
{
// Mask of retained GPUs.
// Note that GPUs are added to this mask as the last step of add_gpu() and
// removed from it as the first step of remove_gpu() implying that a GPU
// that's being initialized or deinitialized will not be in it.
uvm_processor_mask_t retained_gpus;
// Array of the parent GPUs registered with UVM. Note that GPUs will have
// ids offset by 1 to accomodate the UVM_ID_CPU so e.g., parent_gpus[0]
// will have GPU id = 1. A GPU entry is unused iff it does not exist
// (is a NULL pointer) in this table.
uvm_parent_gpu_t *parent_gpus[UVM_PARENT_ID_MAX_GPUS];
// A global RM session (RM client)
// Created on module load and destroyed on module unload
uvmGpuSessionHandle rm_session_handle;
// peer-to-peer table
// peer info is added and removed from this table when usermode
// driver calls UvmEnablePeerAccess and UvmDisablePeerAccess
// respectively.
uvm_gpu_peer_t peers[UVM_MAX_UNIQUE_GPU_PAIRS];
// peer-to-peer copy mode
// Pascal+ GPUs support virtual addresses in p2p copies.
// Ampere+ GPUs add support for physical addresses in p2p copies.
uvm_gpu_peer_copy_mode_t peer_copy_mode;
// Stores an NV_STATUS, once it becomes != NV_OK, the driver should refuse to
// do most anything other than try and clean up as much as possible.
// An example of a fatal error is an unrecoverable ECC error on one of the
// GPUs.
atomic_t fatal_error;
// A flag to disable the assert on fatal error
// To be used by tests and only consulted if tests are enabled.
bool disable_fatal_error_assert;
// Lock protecting the global state
uvm_mutex_t global_lock;
struct
{
// Lock synchronizing user threads with power management activity
uvm_rw_semaphore_t lock;
// Power management state flag; tested by UVM_GPU_WRITE_ONCE()
// and UVM_GPU_READ_ONCE() to detect accesses to GPUs when
// UVM is suspended.
bool is_suspended;
} pm;
// This lock synchronizes addition and removal of GPUs from UVM's global
// table. It must be held whenever g_uvm_global.parent_gpus[] is written. In
// order to read from this table, you must hold either the gpu_table_lock,
// or the global_lock.
//
// This is a leaf lock.
uvm_spinlock_irqsave_t gpu_table_lock;
// Number of simulated/emulated devices that have registered with UVM
unsigned num_simulated_devices;
// A single queue for deferred work that is non-GPU-specific.
nv_kthread_q_t global_q;
// A single queue for deferred f_ops->release() handling. Items scheduled to
// run on it may block for the duration of system sleep cycles, stalling
// the queue and preventing any other items from running.
nv_kthread_q_t deferred_release_q;
struct
{
// Indicates whether the system HW supports ATS. This field is set once
// during global initialization (uvm_global_init), and can be read
// afterwards without acquiring any locks.
bool supported;
// On top of HW platform support, ATS support can be overridden using
// the module parameter uvm_ats_mode. This field is set once during
// global initialization (uvm_global_init), and can be read afterwards
// without acquiring any locks.
bool enabled;
} ats;
#if UVM_IBM_NPU_SUPPORTED()
// On IBM systems this array tracks the active NPUs (the NPUs which are
// attached to retained GPUs).
uvm_ibm_npu_t npus[NV_MAX_NPUS];
#endif
// List of all active VA spaces
struct
{
uvm_mutex_t lock;
struct list_head list;
} va_spaces;
// Notify a registered process about the driver state after it's unloaded.
// The intent is to systematically report any error during the driver
// teardown. unload_state is used for testing only.
struct
{
// ptr points to a 8-byte buffer within page.
NvU64 *ptr;
struct page *page;
} unload_state;
// True if the VM has AMD's SEV, or equivalent HW security extensions such
// as Intel's TDX, enabled. The flag is always false on the host.
//
// This value moves in tandem with that of Confidential Computing in the
// GPU(s) in all supported configurations, so it is used as a proxy for the
// Confidential Computing state.
//
// This field is set once during global initialization (uvm_global_init),
// and can be read afterwards without acquiring any locks.
bool conf_computing_enabled;
};
// Initialize global uvm state
NV_STATUS uvm_global_init(void);
// Deinitialize global state (called from module exit)
void uvm_global_exit(void);
// Prepare for entry into a system sleep state
NV_STATUS uvm_suspend_entry(void);
// Recover after exit from a system sleep state
NV_STATUS uvm_resume_entry(void);
// Add parent GPU to the global table.
//
// LOCKING: requires that you hold the global lock and gpu_table_lock
static void uvm_global_add_parent_gpu(uvm_parent_gpu_t *parent_gpu)
{
NvU32 gpu_index = uvm_parent_id_gpu_index(parent_gpu->id);
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
uvm_assert_spinlock_locked(&g_uvm_global.gpu_table_lock);
UVM_ASSERT(!g_uvm_global.parent_gpus[gpu_index]);
g_uvm_global.parent_gpus[gpu_index] = parent_gpu;
}
// Remove parent GPU from the global table.
//
// LOCKING: requires that you hold the global lock and gpu_table_lock
static void uvm_global_remove_parent_gpu(uvm_parent_gpu_t *parent_gpu)
{
NvU32 gpu_index = uvm_parent_id_gpu_index(parent_gpu->id);
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
uvm_assert_spinlock_locked(&g_uvm_global.gpu_table_lock);
UVM_ASSERT(g_uvm_global.parent_gpus[gpu_index] == NULL || g_uvm_global.parent_gpus[gpu_index] == parent_gpu);
g_uvm_global.parent_gpus[gpu_index] = NULL;
}
// Get a parent gpu by its id.
// Returns a pointer to the parent GPU object, or NULL if not found.
//
// LOCKING: requires that you hold the gpu_table_lock, the global lock, or have
// retained at least one of the child GPUs.
static uvm_parent_gpu_t *uvm_parent_gpu_get(uvm_parent_gpu_id_t id)
{
return g_uvm_global.parent_gpus[uvm_parent_id_gpu_index(id)];
}
// Get a gpu by its GPU id.
// Returns a pointer to the GPU object, or NULL if not found.
//
// LOCKING: requires that you hold the gpu_table_lock, the global_lock, or have
// retained the gpu.
static uvm_gpu_t *uvm_gpu_get(uvm_gpu_id_t gpu_id)
{
uvm_parent_gpu_t *parent_gpu;
parent_gpu = g_uvm_global.parent_gpus[uvm_parent_id_gpu_index_from_gpu_id(gpu_id)];
if (!parent_gpu)
return NULL;
return parent_gpu->gpus[uvm_id_sub_processor_index(gpu_id)];
}
static uvmGpuSessionHandle uvm_global_session_handle(void)
{
return g_uvm_global.rm_session_handle;
}
// Use these READ_ONCE()/WRITE_ONCE() wrappers when accessing GPU resources
// in BAR0/BAR1 to detect cases in which GPUs are accessed when UVM is
// suspended.
#define UVM_GPU_WRITE_ONCE(x, val) do { \
UVM_ASSERT(!uvm_global_is_suspended()); \
UVM_WRITE_ONCE(x, val); \
} while (0)
#define UVM_GPU_READ_ONCE(x) ({ \
UVM_ASSERT(!uvm_global_is_suspended()); \
UVM_READ_ONCE(x); \
})
static bool global_is_fatal_error_assert_disabled(void)
{
// Only allow the assert to be disabled if tests are enabled
if (!uvm_enable_builtin_tests)
return false;
return g_uvm_global.disable_fatal_error_assert;
}
// Set a global fatal error
// Once that happens the the driver should refuse to do anything other than try
// and clean up as much as possible.
// An example of a fatal error is an unrecoverable ECC error on one of the
// GPUs.
// Use a macro so that the assert below provides precise file and line info and
// a backtrace.
#define uvm_global_set_fatal_error(error) \
do { \
if (!global_is_fatal_error_assert_disabled()) \
UVM_ASSERT_MSG(0, "Fatal error: %s\n", nvstatusToString(error)); \
uvm_global_set_fatal_error_impl(error); \
} while (0)
void uvm_global_set_fatal_error_impl(NV_STATUS error);
// Get the global status
static NV_STATUS uvm_global_get_status(void)
{
return atomic_read(&g_uvm_global.fatal_error);
}
// Reset global fatal error
// This is to be used by tests triggering the global error on purpose only.
// Returns the value of the global error field that existed just before this
// reset call was made.
NV_STATUS uvm_global_reset_fatal_error(void);
static uvm_gpu_t *uvm_processor_mask_find_first_gpu(const uvm_processor_mask_t *gpus)
{
uvm_gpu_t *gpu;
uvm_gpu_id_t gpu_id = uvm_processor_mask_find_first_gpu_id(gpus);
if (UVM_ID_IS_INVALID(gpu_id))
return NULL;
gpu = uvm_gpu_get(gpu_id);
// If there is valid GPU id in the mask, assert that the corresponding
// uvm_gpu_t is present. Otherwise it would stop a
// for_each_gpu_in_mask() loop pre-maturely. Today, this could only
// happen in remove_gpu() because the GPU being removed is deleted from the
// global table very early.
UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_id_value(gpu_id));
return gpu;
}
static uvm_gpu_t *__uvm_processor_mask_find_next_gpu(const uvm_processor_mask_t *gpus, uvm_gpu_t *gpu)
{
uvm_gpu_id_t gpu_id;
UVM_ASSERT(gpu);
gpu_id = uvm_processor_mask_find_next_id(gpus, uvm_gpu_id_next(gpu->id));
if (UVM_ID_IS_INVALID(gpu_id))
return NULL;
gpu = uvm_gpu_get(gpu_id);
// See comment in uvm_processor_mask_find_first_gpu().
UVM_ASSERT_MSG(gpu, "gpu_id %u\n", uvm_id_value(gpu_id));
return gpu;
}
// Helper to iterate over all GPUs in the input mask
#define for_each_gpu_in_mask(gpu, mask) \
for (gpu = uvm_processor_mask_find_first_gpu(mask); \
gpu != NULL; \
gpu = __uvm_processor_mask_find_next_gpu(mask, gpu))
// Helper to iterate over all GPUs retained by the UVM driver
// (across all va spaces).
#define for_each_gpu(gpu) \
for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock); \
gpu = uvm_processor_mask_find_first_gpu(&g_uvm_global.retained_gpus);}); \
gpu != NULL; \
gpu = __uvm_processor_mask_find_next_gpu(&g_uvm_global.retained_gpus, gpu))
// LOCKING: Must hold either the global_lock or the gpu_table_lock
static uvm_parent_gpu_t *uvm_global_find_next_parent_gpu(uvm_parent_gpu_t *parent_gpu)
{
NvU32 i;
if (parent_gpu) {
NvU32 gpu_index = uvm_parent_id_gpu_index(parent_gpu->id);
i = gpu_index + 1;
}
else {
i = 0;
}
parent_gpu = NULL;
while (i < UVM_PARENT_ID_MAX_GPUS) {
if (g_uvm_global.parent_gpus[i]) {
parent_gpu = g_uvm_global.parent_gpus[i];
break;
}
i++;
}
return parent_gpu;
}
// LOCKING: Must hold the global_lock
static uvm_gpu_t *uvm_gpu_find_next_valid_gpu_in_parent(uvm_parent_gpu_t *parent_gpu, uvm_gpu_t *cur_gpu)
{
uvm_gpu_t *gpu = NULL;
uvm_gpu_id_t gpu_id;
NvU32 sub_processor_index;
NvU32 cur_sub_processor_index;
UVM_ASSERT(parent_gpu);
gpu_id = uvm_gpu_id_from_parent_gpu_id(parent_gpu->id);
cur_sub_processor_index = cur_gpu ? uvm_id_sub_processor_index(cur_gpu->id) : -1;
sub_processor_index = find_next_bit(parent_gpu->valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS, cur_sub_processor_index + 1);
if (sub_processor_index < UVM_PARENT_ID_MAX_SUB_PROCESSORS) {
gpu = uvm_gpu_get(uvm_id_from_value(uvm_id_value(gpu_id) + sub_processor_index));
UVM_ASSERT(gpu != NULL);
}
return gpu;
}
// LOCKING: Must hold either the global_lock or the gpu_table_lock
#define for_each_parent_gpu(parent_gpu) \
for ((parent_gpu) = uvm_global_find_next_parent_gpu(NULL); \
(parent_gpu) != NULL; \
(parent_gpu) = uvm_global_find_next_parent_gpu((parent_gpu)))
// LOCKING: Must hold the global_lock
#define for_each_gpu_in_parent(parent_gpu, gpu) \
for (({uvm_assert_mutex_locked(&g_uvm_global.global_lock); \
(gpu) = uvm_gpu_find_next_valid_gpu_in_parent((parent_gpu), NULL);}); \
(gpu) != NULL; \
(gpu) = uvm_gpu_find_next_valid_gpu_in_parent((parent_gpu), (gpu)))
// Helper which calls uvm_gpu_retain() on each GPU in mask.
void uvm_global_gpu_retain(const uvm_processor_mask_t *mask);
// Helper which calls uvm_gpu_release_locked on each GPU in mask.
//
// LOCKING: this function takes and releases the global lock if the input mask
// is not empty
void uvm_global_gpu_release(const uvm_processor_mask_t *mask);
// Check for ECC errors for all GPUs in a mask
// Notably this check cannot be performed where it's not safe to call into RM.
NV_STATUS uvm_global_gpu_check_ecc_error(uvm_processor_mask_t *gpus);
// Pre-allocate fault service contexts.
NV_STATUS uvm_service_block_context_init(void);
// Release fault service contexts if any exist.
void uvm_service_block_context_exit(void);
// Allocate a service block context
uvm_service_block_context_t *uvm_service_block_context_alloc(struct mm_struct *mm);
// Free a servic block context
void uvm_service_block_context_free(uvm_service_block_context_t *service_context);
#endif // __UVM_GLOBAL_H__
|