File: uvm_hal_types.h

package info (click to toggle)
nvidia-open-gpu-kernel-modules 535.261.03-1
  • links: PTS, VCS
  • area: contrib
  • in suites: bookworm-proposed-updates
  • size: 80,736 kB
  • sloc: ansic: 1,033,792; cpp: 21,829; sh: 3,575; makefile: 614; python: 189
file content (561 lines) | stat: -rw-r--r-- 17,269 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
/*******************************************************************************
    Copyright (c) 2016-2023 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
    deal in the Software without restriction, including without limitation the
    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
    sell copies of the Software, and to permit persons to whom the Software is
    furnished to do so, subject to the following conditions:

        The above copyright notice and this permission notice shall be
        included in all copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
    DEALINGS IN THE SOFTWARE.

*******************************************************************************/

#ifndef __UVM_HAL_TYPES_H__
#define __UVM_HAL_TYPES_H__

#include "uvm_common.h"
#include "uvm_forward_decl.h"
#include "uvm_processors.h"

#define UVM_GPU_MMU_MAX_FAULT_PACKET_SIZE 32

typedef enum
{
    UVM_APERTURE_PEER_0,
    UVM_APERTURE_PEER_1,
    UVM_APERTURE_PEER_2,
    UVM_APERTURE_PEER_3,
    UVM_APERTURE_PEER_4,
    UVM_APERTURE_PEER_5,
    UVM_APERTURE_PEER_6,
    UVM_APERTURE_PEER_7,
    UVM_APERTURE_PEER_MAX,
    UVM_APERTURE_SYS,
    UVM_APERTURE_VID,

    // DEFAULT is a special value to let MMU pick the location of page tables
    UVM_APERTURE_DEFAULT,

    UVM_APERTURE_MAX
} uvm_aperture_t;

const char *uvm_aperture_string(uvm_aperture_t aperture);

static bool uvm_aperture_is_peer(uvm_aperture_t aperture)
{
    return (aperture >= UVM_APERTURE_PEER_0) && (aperture < UVM_APERTURE_PEER_MAX);
}

static inline NvU32 UVM_APERTURE_PEER_ID(uvm_aperture_t aperture)
{
    UVM_ASSERT(uvm_aperture_is_peer(aperture));

    return (NvU32)aperture;
}

static inline uvm_aperture_t UVM_APERTURE_PEER(NvU32 id)
{
    uvm_aperture_t aperture = (uvm_aperture_t)id;

    UVM_ASSERT(UVM_APERTURE_PEER_ID(aperture) == id);

    return aperture;
}

// A physical GPU address
typedef struct
{
    NvU64 address;

    uvm_aperture_t aperture;
} uvm_gpu_phys_address_t;

// Create a physical GPU address
static uvm_gpu_phys_address_t uvm_gpu_phys_address(uvm_aperture_t aperture, NvU64 address)
{
    return (uvm_gpu_phys_address_t){ address, aperture };
}

// Compare two gpu physical addresses
static int uvm_gpu_phys_addr_cmp(uvm_gpu_phys_address_t a, uvm_gpu_phys_address_t b)
{
    int result = UVM_CMP_DEFAULT(a.aperture, b.aperture);
    if (result != 0)
        return result;

    return UVM_CMP_DEFAULT(a.address, b.address);
}

// A physical or virtual address directly accessible by a GPU.
// This implies that the address already went through identity mapping and IOMMU
// translations and is only valid for a specific GPU.
typedef struct
{
    // Physical or virtual address
    // In general, only valid for a specific GPU
    NvU64 address;

    // Aperture for a physical address
    uvm_aperture_t aperture;

    // Whether the address is virtual
    bool is_virtual;

    // Whether the address resides in a non-protected memory region when the
    // Confidential Computing feature is enabled. Default is protected.
    // Ignored if the feature is disabled and should not be used.
    bool is_unprotected;
} uvm_gpu_address_t;

// Create a virtual GPU address
static uvm_gpu_address_t uvm_gpu_address_virtual(NvU64 va)
{
    uvm_gpu_address_t address = {0};
    address.address = va;
    address.aperture = UVM_APERTURE_MAX;
    address.is_virtual = true;
    return address;
}

static uvm_gpu_address_t uvm_gpu_address_virtual_unprotected(NvU64 va)
{
    uvm_gpu_address_t address = uvm_gpu_address_virtual(va);
    address.is_unprotected = true;
    return address;
}

// Create a physical GPU address
static uvm_gpu_address_t uvm_gpu_address_physical(uvm_aperture_t aperture, NvU64 pa)
{
    uvm_gpu_address_t address = {0};
    address.aperture = aperture;
    address.address = pa;
    return address;
}

// Create a GPU address from a physical GPU address
static uvm_gpu_address_t uvm_gpu_address_from_phys(uvm_gpu_phys_address_t phys_address)
{
    return uvm_gpu_address_physical(phys_address.aperture, phys_address.address);
}

static const char *uvm_gpu_address_aperture_string(uvm_gpu_address_t addr)
{
    if (addr.is_virtual)
        return "VIRTUAL";
    return uvm_aperture_string(addr.aperture);
}

// Compare two gpu addresses
static int uvm_gpu_addr_cmp(uvm_gpu_address_t a, uvm_gpu_address_t b)
{
    int result = UVM_CMP_DEFAULT(a.is_virtual, b.is_virtual);
    if (result != 0)
        return result;

    if (a.is_virtual) {
        return UVM_CMP_DEFAULT(a.address, b.address);
    }
    else {
        uvm_gpu_phys_address_t phys_a = { a.address, a.aperture };
        uvm_gpu_phys_address_t phys_b = { b.address, b.aperture };

        return uvm_gpu_phys_addr_cmp(phys_a, phys_b);
    }
}

// For processors with no concept of an atomic fault (the CPU and pre-Pascal
// GPUs), UVM_PROT_READ_WRITE and UVM_PROT_READ_WRITE_ATOMIC are
// interchangeable.
typedef enum
{
    UVM_PROT_NONE,
    UVM_PROT_READ_ONLY,
    UVM_PROT_READ_WRITE,
    UVM_PROT_READ_WRITE_ATOMIC,
    UVM_PROT_MAX
} uvm_prot_t;

const char *uvm_prot_string(uvm_prot_t prot);

typedef enum
{
    UVM_MEMBAR_NONE,
    UVM_MEMBAR_GPU,
    UVM_MEMBAR_SYS,
} uvm_membar_t;

const char *uvm_membar_string(uvm_membar_t membar);

// Types of memory accesses that can cause a replayable fault on the GPU. They
// are ordered by access "intrusiveness" to simplify fault preprocessing (e.g.
// to implement fault coalescing)
typedef enum
{
    UVM_FAULT_ACCESS_TYPE_PREFETCH = 0,
    UVM_FAULT_ACCESS_TYPE_READ,
    UVM_FAULT_ACCESS_TYPE_WRITE,
    UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK,
    UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG,
    UVM_FAULT_ACCESS_TYPE_COUNT
} uvm_fault_access_type_t;

const char *uvm_fault_access_type_string(uvm_fault_access_type_t fault_access_type);

static NvU32 uvm_fault_access_type_mask_bit(uvm_fault_access_type_t fault_access_type)
{
    BUILD_BUG_ON(UVM_FAULT_ACCESS_TYPE_COUNT >= 32);

    UVM_ASSERT(fault_access_type >= 0);
    UVM_ASSERT(fault_access_type < UVM_FAULT_ACCESS_TYPE_COUNT);

    return (NvU32)1 << fault_access_type;
}

static bool uvm_fault_access_type_mask_test(NvU32 mask, uvm_fault_access_type_t fault_access_type)
{
    return uvm_fault_access_type_mask_bit(fault_access_type) & mask;
}

static void uvm_fault_access_type_mask_set(NvU32 *mask, uvm_fault_access_type_t fault_access_type)
{
    *mask |= uvm_fault_access_type_mask_bit(fault_access_type);
}

static uvm_fault_access_type_t uvm_fault_access_type_mask_highest(NvU32 mask)
{
    int pos;

    UVM_ASSERT((1 << UVM_FAULT_ACCESS_TYPE_COUNT) > mask);
    UVM_ASSERT(mask != 0);

    pos = __fls(mask);
    UVM_ASSERT(pos < UVM_FAULT_ACCESS_TYPE_COUNT);

    return pos;
}

static uvm_fault_access_type_t uvm_fault_access_type_mask_lowest(NvU32 mask)
{
    int pos;

    UVM_ASSERT((1 << UVM_FAULT_ACCESS_TYPE_COUNT) > mask);
    UVM_ASSERT(mask != 0);

    pos = __ffs(mask);
    UVM_ASSERT(pos < UVM_FAULT_ACCESS_TYPE_COUNT);

    return pos;
}

typedef enum
{
    // Cancel all accesses on the page
    UVM_FAULT_CANCEL_VA_MODE_ALL = 0,

    // Cancel write and atomic accesses on the page
    UVM_FAULT_CANCEL_VA_MODE_WRITE_AND_ATOMIC,

    UVM_FAULT_CANCEL_VA_MODE_COUNT,
} uvm_fault_cancel_va_mode_t;

// Types of faults that can show up in the fault buffer. Non-UVM related faults
// are grouped in FATAL category since we don't care about the specific type.
typedef enum
{
    UVM_FAULT_TYPE_INVALID_PDE = 0,
    UVM_FAULT_TYPE_INVALID_PTE,
    UVM_FAULT_TYPE_ATOMIC,

    // WRITE to READ-ONLY
    UVM_FAULT_TYPE_WRITE,

    // READ to WRITE-ONLY (ATS)
    UVM_FAULT_TYPE_READ,

    // The next values are considered fatal and are not handled by the UVM
    // driver
    UVM_FAULT_TYPE_FATAL,

    // Values required for tools
    UVM_FAULT_TYPE_PDE_SIZE = UVM_FAULT_TYPE_FATAL,
    UVM_FAULT_TYPE_VA_LIMIT_VIOLATION,
    UVM_FAULT_TYPE_UNBOUND_INST_BLOCK,
    UVM_FAULT_TYPE_PRIV_VIOLATION,
    UVM_FAULT_TYPE_PITCH_MASK_VIOLATION,
    UVM_FAULT_TYPE_WORK_CREATION,
    UVM_FAULT_TYPE_UNSUPPORTED_APERTURE,
    UVM_FAULT_TYPE_COMPRESSION_FAILURE,
    UVM_FAULT_TYPE_UNSUPPORTED_KIND,
    UVM_FAULT_TYPE_REGION_VIOLATION,
    UVM_FAULT_TYPE_POISONED,

    UVM_FAULT_TYPE_COUNT
} uvm_fault_type_t;

const char *uvm_fault_type_string(uvm_fault_type_t fault_type);

// Main MMU client type that triggered the fault
typedef enum
{
    UVM_FAULT_CLIENT_TYPE_GPC = 0,
    UVM_FAULT_CLIENT_TYPE_HUB,
    UVM_FAULT_CLIENT_TYPE_COUNT
} uvm_fault_client_type_t;

const char *uvm_fault_client_type_string(uvm_fault_client_type_t fault_client_type);

typedef enum
{
    UVM_MMU_ENGINE_TYPE_GRAPHICS = 0,
    UVM_MMU_ENGINE_TYPE_HOST,
    UVM_MMU_ENGINE_TYPE_CE,
    UVM_MMU_ENGINE_TYPE_COUNT,
} uvm_mmu_engine_type_t;

typedef enum
{
    // Allow entry to be fetched before the previous entry finishes ESCHED
    // execution.
    UVM_GPFIFO_SYNC_PROCEED = 0,

    // Fetch of this entry has to wait until the previous entry has finished
    // executing by ESCHED.
    // For a complete engine sync the previous entry needs to include
    // WAIT_FOR_IDLE command or other engine synchronization.
    UVM_GPFIFO_SYNC_WAIT,
} uvm_gpfifo_sync_t;

const char *uvm_mmu_engine_type_string(uvm_mmu_engine_type_t mmu_engine_type);

// HW unit that triggered the fault. We include the fields required for fault
// cancelling. Including more information might be useful for performance
// heuristics in the future.
typedef struct
{
    uvm_fault_client_type_t                client_type  : order_base_2(UVM_FAULT_CLIENT_TYPE_COUNT) + 1;

    uvm_mmu_engine_type_t              mmu_engine_type  : order_base_2(UVM_MMU_ENGINE_TYPE_COUNT) + 1;

    NvU16                                    client_id;

    NvU16                                mmu_engine_id;

    union
    {
        struct
        {
            NvU16                              utlb_id;

            NvU8                                gpc_id;
        };

        // TODO: Bug 3283289: the channel ID, which is only populated for
        // non-replayable faults, is never consumed.
        NvU16                               channel_id;
    };


    // Identifier of the subcontext that caused the fault. HW uses it as an
    // offset in the instance block to obtain the GPU VA space PDB of the
    // faulting process.
    NvU8                                         ve_id;
} uvm_fault_source_t;

struct uvm_fault_buffer_entry_struct
{
    //
    // The next fields are filled by the fault buffer parsing code
    //

    // Virtual address of the faulting request aligned to CPU page size
    NvU64                                fault_address;

    // GPU timestamp in (nanoseconds) when the fault was inserted in the fault
    // buffer
    NvU64                                    timestamp;

    uvm_gpu_phys_address_t                instance_ptr;

    uvm_fault_source_t                    fault_source;

    uvm_fault_type_t                        fault_type : order_base_2(UVM_FAULT_TYPE_COUNT) + 1;

    uvm_fault_access_type_t          fault_access_type : order_base_2(UVM_FAULT_ACCESS_TYPE_COUNT) + 1;

    //
    // The next fields are managed by the fault handling code
    //

    uvm_va_space_t                           *va_space;

    // This is set to true when some fault could not be serviced and a
    // cancel command needs to be issued
    bool                                      is_fatal : 1;

    // This is set to true for all GPU faults on a page that is thrashing
    bool                                  is_throttled : 1;

    // This is set to true if the fault has prefetch access type and the
    // address or the access privileges are not valid
    bool                           is_invalid_prefetch : 1;

    bool                                 is_replayable : 1;

    bool                                    is_virtual : 1;

    bool                             in_protected_mode : 1;

    bool                                      filtered : 1;

    // Reason for the fault to be fatal
    UvmEventFatalReason                   fatal_reason : order_base_2(UvmEventNumFatalReasons) + 1;

    // Mode to be used to cancel faults. This must be set according to the
    // fatal fault reason and the fault access types of the merged fault
    // instances.
    union
    {
        struct
        {
            uvm_fault_cancel_va_mode_t  cancel_va_mode : order_base_2(UVM_FAULT_CANCEL_VA_MODE_COUNT) + 1;
        } replayable;

        struct
        {
            NvU32                         buffer_index;
        } non_replayable;
    };

    // List of duplicate fault buffer entries that have been merged into this
    // one
    struct list_head        merged_instances_list;

    // Access types to this page for all accesses that have been coalesced at
    // fetch time. It must include, at least, fault_access_type
    NvU32                        access_type_mask;

    // Number of faults with the same properties that have been coalesced at
    // fetch time
    NvU16                           num_instances;
};

typedef enum
{
    // Completes when all fault replays are in-flight
    UVM_FAULT_REPLAY_TYPE_START = 0,

    // Completes when all faulting accesses have been correctly translated or
    // faulted again
    UVM_FAULT_REPLAY_TYPE_START_ACK_ALL,

    UVM_FAULT_REPLAY_TYPE_MAX
} uvm_fault_replay_type_t;

static uvm_membar_t uvm_membar_max(uvm_membar_t membar_1, uvm_membar_t membar_2)
{
    BUILD_BUG_ON(UVM_MEMBAR_NONE >= UVM_MEMBAR_GPU);
    BUILD_BUG_ON(UVM_MEMBAR_GPU >= UVM_MEMBAR_SYS);
    return max(membar_1, membar_2);
}

typedef enum
{
    UVM_ACCESS_COUNTER_TYPE_MIMC = 0,
    UVM_ACCESS_COUNTER_TYPE_MOMC,

    UVM_ACCESS_COUNTER_TYPE_MAX,
} uvm_access_counter_type_t;

const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_counter_type);

struct uvm_access_counter_buffer_entry_struct
{
    // Whether this counter refers to outbound accesses to remote GPUs or
    // sysmem (MIMC), or it refers to inbound accesses from CPU or a non-peer
    // GPU (whose accesses are routed through the CPU, too) to vidmem (MOMC)
    uvm_access_counter_type_t counter_type;

    // Address of the region for which a notification was sent
    uvm_gpu_address_t address;

    // These fields are only valid if address.is_virtual is true
    union
    {
        struct
        {
            // Instance pointer of one of the channels in the TSG that triggered
            // the notification.
            uvm_gpu_phys_address_t instance_ptr;

            uvm_mmu_engine_type_t mmu_engine_type;

            NvU32 mmu_engine_id;

            // Identifier of the subcontext that performed the memory accesses
            // that triggered the notification. This value, combined with the
            // instance_ptr, is needed to obtain the GPU VA space of the process
            // that triggered the notification.
            NvU32 ve_id;

            // VA space for the address that triggered the notification
            uvm_va_space_t *va_space;
        } virtual_info;

        // These fields are only valid if address.is_virtual is false
        struct
        {
            // Processor id where data is resident
            //
            // Although this information is not tied to a VA space, we can use
            // a regular processor id because P2P is not allowed between
            // partitioned GPUs.
            uvm_processor_id_t resident_id;
        } physical_info;
    };

    // Number of times the tracked region was accessed since the last time it
    // was cleared. Counter values saturate at the maximum value supported by
    // the GPU (2^16 - 1 in Volta)
    NvU32 counter_value;

    // When the granularity of the tracked regions is greater than 64KB, the
    // region is split into 32 equal subregions. Each bit in this field
    // represents one of those subregions. 1 means that the subregion has been
    // accessed
    NvU32 sub_granularity;

    // Opaque fields provided by HW, required for targeted clear of a counter
    NvU32 bank;
    NvU32 tag;
};

static uvm_prot_t uvm_fault_access_type_to_prot(uvm_fault_access_type_t access_type)
{
    switch (access_type) {
        case UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG:
            return UVM_PROT_READ_WRITE_ATOMIC;

        case UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK:
        case UVM_FAULT_ACCESS_TYPE_WRITE:
            return UVM_PROT_READ_WRITE;

        default:
            // Prefetch faults, if not ignored, are handled like read faults and
            // requirea mapping with, at least, READ_ONLY access permission.
            return UVM_PROT_READ_ONLY;
    }
}

#endif // __UVM_HAL_TYPES_H__