File: uvm_gpu.h

package info (click to toggle)
nvidia-open-gpu-kernel-modules 535.261.03-1
  • links: PTS, VCS
  • area: contrib
  • in suites: bookworm-proposed-updates
  • size: 80,736 kB
  • sloc: ansic: 1,033,792; cpp: 21,829; sh: 3,575; makefile: 614; python: 189
file content (1604 lines) | stat: -rw-r--r-- 58,314 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
/*******************************************************************************
    Copyright (c) 2015-2022 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
    deal in the Software without restriction, including without limitation the
    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
    sell copies of the Software, and to permit persons to whom the Software is
    furnished to do so, subject to the following conditions:

        The above copyright notice and this permission notice shall be
        included in all copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
    DEALINGS IN THE SOFTWARE.

*******************************************************************************/

#ifndef __UVM_GPU_H__
#define __UVM_GPU_H__

#include "nvtypes.h"
#include "nvmisc.h"
#include "uvm_types.h"
#include "nv_uvm_types.h"
#include "uvm_linux.h"
#include "nv-kref.h"
#include "uvm_common.h"
#include "ctrl2080mc.h"
#include "uvm_forward_decl.h"
#include "uvm_processors.h"
#include "uvm_pmm_gpu.h"
#include "uvm_pmm_sysmem.h"
#include "uvm_mmu.h"
#include "uvm_gpu_replayable_faults.h"
#include "uvm_gpu_isr.h"
#include "uvm_hal_types.h"
#include "uvm_hmm.h"
#include "uvm_va_block_types.h"
#include "uvm_perf_module.h"
#include "uvm_rb_tree.h"
#include "uvm_perf_prefetch.h"
#include "nv-kthread-q.h"
#include <linux/mmu_notifier.h>
#include "uvm_conf_computing.h"

// Buffer length to store uvm gpu id, RM device name and gpu uuid.
#define UVM_GPU_NICE_NAME_BUFFER_LENGTH (sizeof("ID 999: : ") + \
            UVM_GPU_NAME_LENGTH + UVM_GPU_UUID_TEXT_BUFFER_LENGTH)

#define UVM_GPU_MAGIC_VALUE 0xc001d00d12341993ULL

typedef struct
{
    // Number of faults from this uTLB that have been fetched but have not been
    // serviced yet.
    NvU32 num_pending_faults;

    // Whether the uTLB contains fatal faults
    bool has_fatal_faults;

    // We have issued a replay of type START_ACK_ALL while containing fatal
    // faults. This puts the uTLB in lockdown mode and no new translations are
    // accepted.
    bool in_lockdown;

    // We have issued a cancel on this uTLB
    bool cancelled;

    uvm_fault_buffer_entry_t prev_fatal_fault;

    // Last fetched fault that was originated from this uTLB. Used for fault
    // filtering.
    uvm_fault_buffer_entry_t *last_fault;
} uvm_fault_utlb_info_t;

struct uvm_service_block_context_struct
{
    //
    // Fields initialized by CPU/GPU fault handling and access counter routines
    //

    // Whether the information refers to replayable/non-replayable faults or
    // access counters
    uvm_service_operation_t operation;

    // Processors that will be the residency of pages after the operation has
    // been serviced
    uvm_processor_mask_t resident_processors;

    // VA block region that contains all the pages affected by the operation
    uvm_va_block_region_t region;

    // Array of type uvm_fault_access_type_t that contains the type of the
    // access that caused the fault/access_counter notification to be serviced
    // for each page.
    NvU8 access_type[PAGES_PER_UVM_VA_BLOCK];

    // Number of times the service operation has been retried
    unsigned num_retries;

    // Pages that need to be pinned due to thrashing
    uvm_page_mask_t thrashing_pin_mask;

    // Number of pages that need to be pinned due to thrashing. This is the same
    // value as the result of bitmap_weight(thrashing_pin_mask)
    unsigned thrashing_pin_count;

    // Pages that can be read-duplicated
    uvm_page_mask_t read_duplicate_mask;

    // Number of pages that can be read-duplicated. This is the same value as
    // the result of bitmap_weight(read_duplicate_count_mask)
    unsigned read_duplicate_count;

    //
    // Fields used by the CPU fault handling routine
    //

    struct
    {
        // Node of the list of fault service contexts used by the CPU
        struct list_head service_context_list;

        // A mask of GPUs that need to be checked for ECC errors before the CPU
        // fault handler returns, but after the VA space lock has been unlocked
        // to avoid the RM/UVM VA space lock deadlocks.
        uvm_processor_mask_t gpus_to_check_for_ecc;

        // This is set to throttle page fault thrashing.
        NvU64 wakeup_time_stamp;

        // This is set if the page migrated to/from the GPU and CPU.
        bool did_migrate;

        // Sequence number used to start a mmu notifier read side critical
        // section.
        unsigned long notifier_seq;

        struct vm_fault *vmf;
    } cpu_fault;

    //
    // Fields managed by the common operation servicing routine
    //

    uvm_prot_page_mask_array_t mappings_by_prot;

    // Mask with the pages that did not migrate to the processor (they were
    // already resident) in the last call to uvm_va_block_make_resident.
    // This is used to compute the pages that need to revoke mapping permissions
    // from other processors.
    uvm_page_mask_t did_not_migrate_mask;

    // Pages whose permissions need to be revoked from other processors
    uvm_page_mask_t revocation_mask;

    struct
    {
        // Per-processor mask with the pages that will be resident after
        // servicing. We need one mask per processor because we may coalesce
        // faults that trigger migrations to different processors.
        uvm_page_mask_t new_residency;
    } per_processor_masks[UVM_ID_MAX_PROCESSORS];

    // State used by the VA block routines called by the servicing routine
    uvm_va_block_context_t block_context;

    // Prefetch state hint
    uvm_perf_prefetch_hint_t prefetch_hint;

    // Prefetch temporary state.
    uvm_perf_prefetch_bitmap_tree_t prefetch_bitmap_tree;
};

typedef struct
{
    // Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
    // VMA. Used for batching ATS faults in a vma. This is unused for access
    // counter service requests.
    uvm_page_mask_t read_fault_mask;

    // Mask of write faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
    // SAM VMA. Used for batching ATS faults in a vma. This is unused for access
    // counter service requests.
    uvm_page_mask_t write_fault_mask;

    // Mask of successfully serviced pages in a UVM_VA_BLOCK_SIZE aligned region
    // of a SAM VMA. Used to return ATS fault status. This is unused for access
    // counter service requests.
    uvm_page_mask_t faults_serviced_mask;

    // Mask of successfully serviced read faults on pages in write_fault_mask.
    // This is unused for access counter service requests.
    uvm_page_mask_t reads_serviced_mask;

    // Mask of all accessed pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
    // VMA. This is used as input for access counter service requests and output
    // of fault service requests.
    uvm_page_mask_t accessed_mask;

    // Client type of the service requestor.
    uvm_fault_client_type_t client_type;

    // New residency ID of the faulting region.
    uvm_processor_id_t residency_id;

    // New residency NUMA node ID of the faulting region.
    int residency_node;

    struct
    {
        // True if preferred_location was set on this faulting region.
        // UVM_VA_BLOCK_SIZE sized region in the faulting region bound by the
        // VMA is is prefetched if preferred_location was set and if first_touch
        // is true;
        bool has_preferred_location;

        // True if the UVM_VA_BLOCK_SIZE sized region isn't resident on any
        // node. False if any page in the region is resident somewhere.
        bool first_touch;

        // Mask of prefetched pages in a UVM_VA_BLOCK_SIZE aligned region of a
        // SAM VMA.
        uvm_page_mask_t prefetch_pages_mask;

        // PFN info of the faulting region
        unsigned long pfns[PAGES_PER_UVM_VA_BLOCK];

        // Faulting/preferred processor residency mask of the faulting region.
        uvm_page_mask_t residency_mask;

#if defined(NV_MMU_INTERVAL_NOTIFIER)
        // MMU notifier used to compute residency of this faulting region.
        struct mmu_interval_notifier notifier;
#endif

        uvm_va_space_t *va_space;

        // Prefetch temporary state.
        uvm_perf_prefetch_bitmap_tree_t bitmap_tree;
    } prefetch_state;

} uvm_ats_fault_context_t;

struct uvm_fault_service_batch_context_struct
{
    // Array of elements fetched from the GPU fault buffer. The number of
    // elements in this array is exactly max_batch_size
    uvm_fault_buffer_entry_t *fault_cache;

    // Array of pointers to elements in fault cache used for fault
    // preprocessing. The number of elements in this array is exactly
    // max_batch_size
    uvm_fault_buffer_entry_t **ordered_fault_cache;

    // Per uTLB fault information. Used for replay policies and fault
    // cancellation on Pascal
    uvm_fault_utlb_info_t *utlbs;

    // Largest uTLB id seen in a GPU fault
    NvU32 max_utlb_id;

    NvU32 num_cached_faults;

    NvU32 num_coalesced_faults;

    // One of the VA spaces in this batch which had fatal faults. If NULL, no
    // faults were fatal. More than one VA space could have fatal faults, but we
    // pick one to be the target of the cancel sequence.
    uvm_va_space_t *fatal_va_space;

    bool has_throttled_faults;

    NvU32 num_invalid_prefetch_faults;

    NvU32 num_duplicate_faults;

    NvU32 num_replays;

    uvm_ats_fault_context_t ats_context;

    // Unique id (per-GPU) generated for tools events recording
    NvU32 batch_id;

    uvm_tracker_t tracker;

    // Boolean used to avoid sorting the fault batch by instance_ptr if we
    // determine at fetch time that all the faults in the batch report the same
    // instance_ptr
    bool is_single_instance_ptr;

    // Last fetched fault. Used for fault filtering.
    uvm_fault_buffer_entry_t *last_fault;
};

struct uvm_ats_fault_invalidate_struct
{
    bool            tlb_batch_pending;
    uvm_tlb_batch_t tlb_batch;
};

typedef struct
{
    // Fault buffer information and structures provided by RM
    UvmGpuFaultInfo rm_info;

    // Maximum number of faults to be processed in batch before fetching new
    // entries from the GPU buffer
    NvU32 max_batch_size;

    struct uvm_replayable_fault_buffer_info_struct
    {
        // Maximum number of faults entries that can be stored in the buffer
        NvU32 max_faults;

        // Cached value of the GPU GET register to minimize the round-trips
        // over PCIe
        NvU32 cached_get;

        // Cached value of the GPU PUT register to minimize the round-trips over
        // PCIe
        NvU32 cached_put;

        // Policy that determines when GPU replays are issued during normal
        // fault servicing
        uvm_perf_fault_replay_policy_t replay_policy;

        // Tracker used to aggregate replay operations, needed for fault cancel
        // and GPU removal
        uvm_tracker_t replay_tracker;

        // If there is a ratio larger than replay_update_put_ratio of duplicate
        // faults in a batch, PUT pointer is updated before flushing the buffer
        // that comes before the replay method.
        NvU32 replay_update_put_ratio;

        // Fault statistics. These fields are per-GPU and most of them are only
        // updated during fault servicing, and can be safely incremented.
        // Migrations may be triggered by different GPUs and need to be
        // incremented using atomics
        struct
        {
            NvU64 num_prefetch_faults;

            NvU64 num_read_faults;

            NvU64 num_write_faults;

            NvU64 num_atomic_faults;

            NvU64 num_duplicate_faults;

            atomic64_t num_pages_out;

            atomic64_t num_pages_in;

            NvU64 num_replays;

            NvU64 num_replays_ack_all;
        } stats;

        // Number of uTLBs in the chip
        NvU32 utlb_count;

        // Context structure used to service a GPU fault batch
        uvm_fault_service_batch_context_t batch_service_context;

        // Structure used to coalesce fault servicing in a VA block
        uvm_service_block_context_t block_service_context;

        // Information required to invalidate stale ATS PTEs from the GPU TLBs
        uvm_ats_fault_invalidate_t ats_invalidate;
    } replayable;

    struct uvm_non_replayable_fault_buffer_info_struct
    {
        // Maximum number of faults entries that can be stored in the buffer
        NvU32 max_faults;

        // Tracker used to aggregate clear faulted operations, needed for GPU
        // removal
        uvm_tracker_t clear_faulted_tracker;

        // Buffer used to store elements popped out from the queue shared with
        // RM for fault servicing.
        void *shadow_buffer_copy;

        // Array of elements fetched from the GPU fault buffer. The number of
        // elements in this array is exactly max_batch_size
        uvm_fault_buffer_entry_t *fault_cache;

        // Fault statistics. See replayable fault stats for more details.
        struct
        {
            NvU64 num_read_faults;

            NvU64 num_write_faults;

            NvU64 num_atomic_faults;

            NvU64 num_physical_faults;

            atomic64_t num_pages_out;

            atomic64_t num_pages_in;
        } stats;

        // Tracker which temporarily holds the work pushed to service faults
        uvm_tracker_t fault_service_tracker;

        // Structure used to coalesce fault servicing in a VA block
        uvm_service_block_context_t block_service_context;

        // Unique id (per-GPU) generated for tools events recording
        NvU32 batch_id;

        // Information required to service ATS faults.
        uvm_ats_fault_context_t ats_context;

        // Information required to invalidate stale ATS PTEs from the GPU TLBs
        uvm_ats_fault_invalidate_t ats_invalidate;
    } non_replayable;

    // Flag that tells if prefetch faults are enabled in HW
    bool prefetch_faults_enabled;

    // Timestamp when prefetch faults where disabled last time
    NvU64 disable_prefetch_faults_timestamp;
} uvm_fault_buffer_info_t;

struct uvm_access_counter_service_batch_context_struct
{
    uvm_access_counter_buffer_entry_t *notification_cache;

    NvU32 num_cached_notifications;

    struct
    {
        uvm_access_counter_buffer_entry_t   **notifications;

        NvU32                             num_notifications;

        // Boolean used to avoid sorting the fault batch by instance_ptr if we
        // determine at fetch time that all the access counter notifications in
        // the batch report the same instance_ptr
        bool is_single_instance_ptr;
    } virt;

    struct
    {
        uvm_access_counter_buffer_entry_t    **notifications;
        uvm_reverse_map_t                      *translations;

        NvU32                              num_notifications;

        // Boolean used to avoid sorting the fault batch by aperture if we
        // determine at fetch time that all the access counter notifications in
        // the batch report the same aperture
        bool                              is_single_aperture;
    } phys;

    // Helper page mask to compute the accessed pages within a VA block
    uvm_page_mask_t accessed_pages;

    // Structure used to coalesce access counter servicing in a VA block
    uvm_service_block_context_t block_service_context;

    // Structure used to service access counter migrations in an ATS block.
    uvm_ats_fault_context_t ats_context;

    // Unique id (per-GPU) generated for tools events recording
    NvU32 batch_id;
};

typedef struct
{
    // Values used to configure access counters in RM
    struct
    {
        UVM_ACCESS_COUNTER_GRANULARITY  granularity;
        UVM_ACCESS_COUNTER_USE_LIMIT    use_limit;
    } rm;

    // The following values are precomputed by the access counter notification
    // handling code. See comments for UVM_MAX_TRANSLATION_SIZE in
    // uvm_gpu_access_counters.c for more details.
    NvU64 translation_size;

    NvU64 translations_per_counter;

    NvU64 sub_granularity_region_size;

    NvU64 sub_granularity_regions_per_translation;
} uvm_gpu_access_counter_type_config_t;

typedef struct
{
    UvmGpuAccessCntrInfo rm_info;

    NvU32 max_notifications;

    NvU32 max_batch_size;

    // Cached value of the GPU GET register to minimize the round-trips
    // over PCIe
    NvU32 cached_get;

    // Cached value of the GPU PUT register to minimize the round-trips over
    // PCIe
    NvU32 cached_put;

    // Tracker used to aggregate access counters clear operations, needed for
    // GPU removal
    uvm_tracker_t clear_tracker;

    // Current access counter configuration. During normal operation this
    // information is computed once during GPU initialization. However, tests
    // may override it to try different configuration values.
    struct
    {
        uvm_gpu_access_counter_type_config_t mimc;
        uvm_gpu_access_counter_type_config_t momc;

        NvU32                                threshold;
    } current_config;

    // Access counter statistics
    struct
    {
        atomic64_t num_pages_out;

        atomic64_t num_pages_in;
    } stats;

    // Ignoring access counters means that notifications are left in the HW
    // buffer without being serviced.  Requests to ignore access counters
    // are counted since the suspend path inhibits access counter interrupts,
    // and the resume path needs to know whether to reenable them.
    NvU32 notifications_ignored_count;

    // Context structure used to service a GPU access counter batch
    uvm_access_counter_service_batch_context_t batch_service_context;

    // VA space that reconfigured the access counters configuration, if any.
    // Used in builtin tests only, to avoid reconfigurations from different
    // processes
    //
    // Locking: both readers and writers must hold the access counters ISR lock
    uvm_va_space_t *reconfiguration_owner;
} uvm_access_counter_buffer_info_t;

typedef struct
{
    // VA where the identity mapping should be mapped in the internal VA
    // space managed by uvm_gpu_t.address_space_tree (see below).
    NvU64 base;

    // Page tables with the mapping.
    uvm_page_table_range_vec_t *range_vec;

    // Used during init to indicate whether the mapping has been fully
    // initialized.
    bool ready;
} uvm_gpu_identity_mapping_t;

// Root chunk mapping
typedef struct
{
    // Page table range representation of the mapping. Because a root chunk
    // fits into a single 2MB page, in practice the range consists of a single
    // 2MB PTE.
    uvm_page_table_range_t *range;

    // Number of mapped pages of size PAGE_SIZE.
    NvU32 num_mapped_pages;
} uvm_gpu_root_chunk_mapping_t;

typedef enum
{
    UVM_GPU_LINK_INVALID = 0,
    UVM_GPU_LINK_PCIE,
    UVM_GPU_LINK_NVLINK_1,
    UVM_GPU_LINK_NVLINK_2,
    UVM_GPU_LINK_NVLINK_3,
    UVM_GPU_LINK_NVLINK_4,
    UVM_GPU_LINK_C2C,
    UVM_GPU_LINK_MAX
} uvm_gpu_link_type_t;

// UVM does not support P2P copies on pre-Pascal GPUs. Pascal+ GPUs only
// support virtual addresses in P2P copies. Therefore, a peer identity mapping
// needs to be created.
// Ampere+ GPUs support physical peer copies, too, so identity mappings are not
// needed
typedef enum
{
    UVM_GPU_PEER_COPY_MODE_UNSUPPORTED,
    UVM_GPU_PEER_COPY_MODE_VIRTUAL,
    UVM_GPU_PEER_COPY_MODE_PHYSICAL,
    UVM_GPU_PEER_COPY_MODE_COUNT
} uvm_gpu_peer_copy_mode_t;

struct uvm_gpu_struct
{
    uvm_parent_gpu_t *parent;

    // Refcount of the gpu, i.e. how many times it has been retained. This is
    // roughly a count of how many times it has been registered with a VA space,
    // except that some paths retain the GPU temporarily without a VA space.
    //
    // While this is >0, the GPU can't be removed. This differs from gpu_kref,
    // which merely prevents the uvm_gpu_t object from being freed.
    //
    // In most cases this count is protected by the global lock: retaining a GPU
    // from a UUID and any release require the global lock to be taken. But it's
    // also useful for a caller to retain a GPU they've already retained, in
    // which case there's no need to take the global lock. This can happen when
    // an operation needs to drop the VA space lock but continue operating on a
    // GPU. This is an atomic variable to handle those cases.
    //
    // Security note: keep it as a 64-bit counter to prevent overflow cases (a
    // user can create a lot of va spaces and register the gpu with them).
    atomic64_t retained_count;

    // A unique uvm gpu id in range [1, UVM_ID_MAX_PROCESSORS); this is a copy
    // of the parent's id.
    uvm_gpu_id_t id;

    // A unique uvm global_gpu id in range [1, UVM_GLOBAL_ID_MAX_PROCESSORS)
    uvm_global_gpu_id_t global_id;

    // Should be UVM_GPU_MAGIC_VALUE. Used for memory checking.
    NvU64 magic;

    struct
    {
        // The amount of memory the GPU has in total, in bytes. If the GPU is in
        // ZeroFB testing mode, this will be 0.
        NvU64 size;

        // Max (inclusive) physical address of this GPU's memory that the driver
        // can allocate through PMM (PMA).
        NvU64 max_allocatable_address;

        struct
        {
            // True if the platform supports HW coherence and the GPU's memory
            // is exposed as a NUMA node to the kernel.
            bool enabled;
            unsigned int node_id;
        } numa;
    } mem_info;

    struct
    {
        // Big page size used by the internal UVM VA space
        // Notably it may be different than the big page size used by a user's
        // VA space in general.
        NvU32 internal_size;
    } big_page;

    // Mapped registers needed to obtain the current GPU timestamp
    struct
    {
        volatile NvU32 *time0_register;
        volatile NvU32 *time1_register;
    } time;

    // Identity peer mappings are only defined when
    // peer_copy_mode == UVM_GPU_PEER_COPY_MODE_VIRTUAL
    uvm_gpu_identity_mapping_t peer_mappings[UVM_ID_MAX_GPUS];

    struct
    {
        // Mask of peer_gpus set
        //
        // We can use a regular processor id because P2P is not allowed between
        // partitioned GPUs when SMC is enabled
        uvm_processor_mask_t peer_gpu_mask;

        // lazily-populated array of peer GPUs, indexed by the peer's GPU index
        uvm_gpu_t *peer_gpus[UVM_ID_MAX_GPUS];

        // Leaf spinlock used to synchronize access to the peer_gpus table so
        // that it can be safely accessed from the access counters bottom half
        uvm_spinlock_t peer_gpus_lock;
    } peer_info;

    // Maximum number of subcontexts supported
    NvU32 max_subcontexts;

    // RM address space handle used in many of the UVM/RM APIs
    // Represents a GPU VA space within rm_device.
    //
    // In SR-IOV heavy, proxy channels are not associated with this address
    // space.
    uvmGpuAddressSpaceHandle rm_address_space;

    // Page tree used for the internal UVM VA space shared with RM
    uvm_page_tree_t address_space_tree;

    // Set to true during add_gpu() as soon as the RM's address space is moved
    // to the address_space_tree.
    bool rm_address_space_moved_to_page_tree;

    uvm_gpu_semaphore_pool_t *semaphore_pool;

    uvm_gpu_semaphore_pool_t *secure_semaphore_pool;

    uvm_channel_manager_t *channel_manager;

    uvm_pmm_gpu_t pmm;

    // Flat linear mapping covering vidmem. This is a kernel mapping that is
    // only created in certain configurations.
    //
    // There are two mutually exclusive versions of the mapping. The simplest
    // version covers the entire GPU memory, and it is created during GPU
    // initialization. The dynamic version is a partial vidmem mapping that
    // creates and destroys mappings to GPU root chunks on demand.
    union
    {
        // Static mapping covering the whole GPU memory.
        uvm_gpu_identity_mapping_t static_flat_mapping;

        // Dynamic mapping of GPU memory.
        struct
        {
            // Array of root chunk mappings.
            uvm_gpu_root_chunk_mapping_t *array;

            // Number of elements in the array.
            size_t count;

            // Each bit in the bitlock protects a single root chunk mapping.
            uvm_bit_locks_t bitlocks;

        } root_chunk_mappings;
    };

    // Linear sysmem mappings. Mappings are added on demand, and removed upon
    // GPU deinitialization. The mappings are added to UVM's internal address
    // space i.e. they are kernel mappings.
    //
    // Only used in SR-IOV heavy.
    struct
    {
        // Size of each mapping, in bytes.
        NvU64 mapping_size;

        // Array of sysmem mappings.
        uvm_gpu_identity_mapping_t *array;

        // Number of elements in the array.
        size_t count;

        // Each bit in the bitlock protects a sysmem mapping.
        uvm_bit_locks_t bitlocks;
    } sysmem_mappings;

    // Reverse lookup table used to query the user mapping associated with a
    // sysmem (DMA) physical address.
    //
    // The system memory mapping information referred to by this field is
    // different from that of sysmem_mappings, because it relates to user
    // mappings (instead of kernel), and it is used in most configurations.
    uvm_pmm_sysmem_mappings_t pmm_reverse_sysmem_mappings;

    struct
    {
        uvm_conf_computing_dma_buffer_pool_t dma_buffer_pool;

        // Dummy memory used to store the IV contents during CE encryption.
        // This memory location is also only available after CE channels
        // because we use them to write PTEs for allocations such as this one.
        // This location is used when a physical addressing for the IV buffer
        // is required. See uvm_hal_hopper_ce_encrypt().
        uvm_mem_t *iv_mem;

        // Dummy memory used to store the IV contents during CE encryption.
        // Because of the limitations of `iv_mem', and the need to have such
        // buffer at channel initialization, we use an RM allocation.
        // This location is used when a virtual addressing for the IV buffer
        // is required. See uvm_hal_hopper_ce_encrypt().
        uvm_rm_mem_t *iv_rm_mem;
    } conf_computing;

    // ECC handling
    // In order to trap ECC errors as soon as possible the driver has the hw
    // interrupt register mapped directly. If an ECC interrupt is ever noticed
    // to be pending, then the UVM driver needs to:
    //
    //   1) ask RM to service interrupts, and then
    //   2) inspect the ECC error notifier state.
    //
    // Notably, checking for channel errors is not enough, because ECC errors
    // can be pending, even after a channel has become idle.
    //
    // See more details in uvm_gpu_check_ecc_error().
    struct
    {
        // Does the GPU have ECC enabled?
        bool enabled;

        // Direct mapping of the 32-bit part of the hw interrupt tree that has
        // the ECC bits.
        volatile NvU32 *hw_interrupt_tree_location;

        // Mask to get the ECC interrupt bits from the 32-bits above.
        NvU32 mask;

        // Set to true by RM when a fatal ECC error is encountered (requires
        // asking RM to service pending interrupts to be current).
        NvBool *error_notifier;
    } ecc;

    struct
    {
        NvU32 swizz_id;

        uvmGpuSessionHandle rm_session_handle;

        // RM device handle used in many of the UVM/RM APIs.
        //
        // Do not read this field directly, use uvm_gpu_device_handle instead.
        uvmGpuDeviceHandle rm_device;
    } smc;

    struct
    {
        struct proc_dir_entry *dir;

        struct proc_dir_entry *dir_symlink;

        struct proc_dir_entry *info_file;

        struct proc_dir_entry *dir_peers;
    } procfs;

    // Placeholder for per-GPU performance heuristics information
    uvm_perf_module_data_desc_t perf_modules_data[UVM_PERF_MODULE_TYPE_COUNT];

    // Force pushbuffer's GPU VA to be >= 1TB; used only for testing purposes.
    bool uvm_test_force_upper_pushbuffer_segment;
};

struct uvm_parent_gpu_struct
{
    // Reference count for how many places are holding on to a parent GPU
    // (internal to the UVM driver).  This includes any GPUs we know about, not
    // just GPUs that are registered with a VA space.  Most GPUs end up being
    // registered, but there are brief periods when they are not registered,
    // such as during interrupt handling, and in add_gpu() or remove_gpu().
    nv_kref_t gpu_kref;

    // The number of uvm_gpu_ts referencing this uvm_parent_gpu_t.
    NvU32 num_retained_gpus;

    uvm_gpu_t *gpus[UVM_ID_MAX_SUB_PROCESSORS];

    // Bitmap of valid child entries in the gpus[] table.  Used to retrieve a
    // usable child GPU in bottom-halves.
    DECLARE_BITMAP(valid_gpus, UVM_ID_MAX_SUB_PROCESSORS);

    // The gpu's uuid
    NvProcessorUuid uuid;

    // Nice printable name including the uvm gpu id, ascii name from RM and uuid
    char name[UVM_GPU_NICE_NAME_BUFFER_LENGTH];

    // GPU information and provided by RM (architecture, implementation,
    // hardware classes, etc.).
    UvmGpuInfo rm_info;

    // A unique uvm gpu id in range [1, UVM_ID_MAX_PROCESSORS)
    uvm_gpu_id_t id;

    // Reference to the Linux PCI device
    //
    // The reference to the PCI device remains valid as long as the GPU is
    // registered with RM's Linux layer (between nvUvmInterfaceRegisterGpu() and
    // nvUvmInterfaceUnregisterGpu()).
    struct pci_dev *pci_dev;

    // NVLINK Processing Unit (NPU) on PowerPC platforms. The NPU is a
    // collection of CPU-side PCI devices which bridge GPU NVLINKs and the CPU
    // memory bus.
    //
    // There is one PCI device per NVLINK. A set of NVLINKs connects to a single
    // GPU, and all NVLINKs for a given socket are collected logically under
    // this UVM NPU because some resources (such as register mappings) are
    // shared by all those NVLINKs. This means multiple GPUs may connect to the
    // same UVM NPU.
    uvm_ibm_npu_t *npu;

    // On kernels with NUMA support, this entry contains the closest CPU NUMA
    // node to this GPU. Otherwise, the value will be -1.
    int closest_cpu_numa_node;

    // RM device handle used in many of the UVM/RM APIs.
    //
    // Do not read this field directly, use uvm_gpu_device_handle instead.
    uvmGpuDeviceHandle rm_device;

    // The physical address range addressable by the GPU
    //
    // The GPU has its NV_PFB_XV_UPPER_ADDR register set by RM to
    // dma_addressable_start (in bifSetupDmaWindow_IMPL()) and hence when
    // referencing sysmem from the GPU, dma_addressable_start should be
    // subtracted from the physical address. The DMA mapping helpers like
    // uvm_gpu_map_cpu_pages() and uvm_gpu_dma_alloc_page() take care of that.
    NvU64 dma_addressable_start;
    NvU64 dma_addressable_limit;

    // Total size (in bytes) of physically mapped (with uvm_gpu_map_cpu_pages)
    // sysmem pages, used for leak detection.
    atomic64_t mapped_cpu_pages_size;

    // Hardware Abstraction Layer
    uvm_host_hal_t *host_hal;
    uvm_ce_hal_t *ce_hal;
    uvm_arch_hal_t *arch_hal;
    uvm_fault_buffer_hal_t *fault_buffer_hal;
    uvm_access_counter_buffer_hal_t *access_counter_buffer_hal;
    uvm_sec2_hal_t *sec2_hal;

    // Whether CE supports physical addressing mode for writes to vidmem
    bool ce_phys_vidmem_write_supported;

    uvm_gpu_peer_copy_mode_t peer_copy_mode;

    // Virtualization mode of the GPU.
    UVM_VIRT_MODE virt_mode;

    // Whether the GPU can trigger faults on prefetch instructions
    bool prefetch_fault_supported;

    // Number of membars required to flush out HSHUB following a TLB invalidate
    NvU32 num_hshub_tlb_invalidate_membars;

    // Whether the channels can configure GPFIFO in vidmem
    bool gpfifo_in_vidmem_supported;

    bool replayable_faults_supported;

    bool non_replayable_faults_supported;

    bool access_counters_supported;

    bool fault_cancel_va_supported;

    // True if the GPU has hardware support for scoped atomics
    bool scoped_atomics_supported;

    // If true, a HW method can be used to clear a faulted channel.
    // If false, then the GPU supports clearing faulted channels using registers
    // instead of a HW method.
    // This value is only defined for GPUs that support non-replayable faults.
    bool has_clear_faulted_channel_method;

    // If true, a SW method can be used to clear a faulted channel.
    // If false, the HW method or the registers (whichever is available
    // according to has_clear_faulted_channel_method) needs to be used.
    //
    // This value is only defined for GPUs that support non-replayable faults.
    bool has_clear_faulted_channel_sw_method;

    bool sparse_mappings_supported;

    // Ampere(GA100) requires map->invalidate->remap->invalidate for page size
    // promotion
    bool map_remap_larger_page_promotion;

    bool plc_supported;

    // If true, page_tree initialization pre-populates no_ats_ranges. It only
    // affects ATS systems.
    bool no_ats_range_required;

    // Parameters used by the TLB batching API
    struct
    {
        // Is the targeted (single page) VA invalidate supported at all?
        NvBool va_invalidate_supported;

        // Is the VA range invalidate supported?
        NvBool va_range_invalidate_supported;

        union
        {
            // Maximum (inclusive) number of single page invalidations before
            // falling back to invalidate all
            NvU32 max_pages;

            // Maximum (inclusive) number of range invalidations before falling
            // back to invalidate all
            NvU32 max_ranges;
        };
    } tlb_batch;

    // Largest VA (exclusive) which can be used for channel buffer mappings
    NvU64 max_channel_va;

    // Largest VA (exclusive) which Host can operate.
    NvU64 max_host_va;

    // Indicates whether the GPU can map sysmem with pages larger than 4k
    bool can_map_sysmem_with_large_pages;

    // VA base and size of the RM managed part of the internal UVM VA space.
    //
    // The internal UVM VA is shared with RM by RM controlling some of the top
    // level PDEs and leaving the rest for UVM to control.
    // On Pascal a single top level PDE covers 128 TB of VA and given that
    // semaphores and other allocations limited to 40bit are currently allocated
    // through RM, RM needs to control the [0, 128TB) VA range at least for now.
    // On Maxwell, limit RMs VA to [0, 128GB) that should easily fit
    // all RM allocations and leave enough space for UVM.
    NvU64 rm_va_base;
    NvU64 rm_va_size;

    // Base and size of the GPU VA used for uvm_mem_t allocations mapped in the
    // internal address_space_tree.
    NvU64 uvm_mem_va_base;
    NvU64 uvm_mem_va_size;

    // Base of the GPU VAs used for the vidmem and sysmem flat mappings.
    NvU64 flat_vidmem_va_base;
    NvU64 flat_sysmem_va_base;

    // Bitmap of allocation sizes for user memory supported by a GPU. PAGE_SIZE
    // is guaranteed to be both present and the smallest size.
    uvm_chunk_sizes_mask_t mmu_user_chunk_sizes;

    // Bitmap of allocation sizes that could be requested by the page tree for
    // a GPU
    uvm_chunk_sizes_mask_t mmu_kernel_chunk_sizes;

    struct
    {
        struct proc_dir_entry *dir;

        struct proc_dir_entry *fault_stats_file;

        struct proc_dir_entry *access_counters_file;
    } procfs;

    // Interrupt handling state and locks
    uvm_isr_info_t isr;

    // Fault buffer info. This is only valid if supports_replayable_faults is
    // set to true.
    uvm_fault_buffer_info_t fault_buffer_info;

    // PMM lazy free processing queue.
    // TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
    nv_kthread_q_t lazy_free_q;

    // Access counter buffer info. This is only valid if
    // supports_access_counters is set to true.
    uvm_access_counter_buffer_info_t access_counter_buffer_info;

    // Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
    NvU32 utlb_per_gpc_count;

    // In order to service GPU faults, UVM must be able to obtain the VA
    // space for each reported fault. The fault packet contains the
    // instance_ptr of the channel that was bound when the SMs triggered
    // the fault. On fault any instance pointer in the TSG may be
    // reported. This is a problem on Volta, which allow different channels
    // in the TSG to be bound to different VA spaces in order to support
    // subcontexts. In order to be able to obtain the correct VA space, HW
    // provides the subcontext id (or VEID) in addition to the instance_ptr.
    //
    // Summary:
    //
    // 1) Channels in a TSG may be in different VA spaces, identified by their
    // subcontext ID.
    // 2) Different subcontext IDs may map to the same or different VA spaces.
    // 3) On fault, any instance pointer in the TSG may be reported. The
    // reported subcontext ID identifies which VA space within the TSG actually
    // encountered the fault.
    //
    // Thus, UVM needs to keep track of all the instance pointers that belong
    // to the same TSG. We use two tables:
    //
    // - instance_ptr_table (instance_ptr -> subctx_info) this table maps
    // instance pointers to the subcontext info descriptor for the channel. If
    // the channel belongs to a subcontext, this descriptor will contain all
    // the VA spaces for the subcontexts in the same TSG. If the channel does
    // not belong to a subcontext, it will only contain a pointer to its VA
    // space.
    // - tsg_table (tsg_id -> subctx_info): this table also stores the
    // subctx information, but in this case it is indexed by TSG ID. Thus,
    // when a new channel bound to a subcontext is registered, it will check
    // first in this table if the subcontext information descriptor for its TSG
    // already exists, otherwise it will create it. Channels not bound to
    // subcontexts will not use this table.
    //
    // The bottom half reads the tables under
    // isr.replayable_faults_handler.lock, but a separate lock is necessary
    // because entries are added and removed from the table under the va_space
    // lock, and we can't take isr.replayable_faults_handler.lock while holding
    // the va_space lock.
    uvm_rb_tree_t tsg_table;

    uvm_rb_tree_t instance_ptr_table;
    uvm_spinlock_t instance_ptr_table_lock;

    // This is set to true if the GPU belongs to an SLI group.
    bool sli_enabled;

    struct
    {
        bool supported;

        bool enabled;
    } smc;

    // Global statistics. These fields are per-GPU and most of them are only
    // updated during fault servicing, and can be safely incremented.
    struct
    {
        NvU64          num_replayable_faults;

        NvU64      num_non_replayable_faults;

        atomic64_t             num_pages_out;

        atomic64_t              num_pages_in;
    } stats;

    // Structure to hold nvswitch specific information. In an nvswitch
    // environment, rather than using the peer-id field of the PTE (which can
    // only address 8 gpus), all gpus are assigned a 47-bit physical address
    // space by the fabric manager. Any physical address access to these
    // physical address spaces are routed through the switch to the
    // corresponding peer.
    struct
    {
        bool is_nvswitch_connected;

        // 47-bit fabric memory physical offset that peer gpus need to access
        // to read a peer's memory
        NvU64 fabric_memory_window_start;
    } nvswitch_info;

    struct
    {
        // Note that this represents the link to system memory, not the link the
        // system used to discover the GPU. There are some cases such as NVLINK2
        // where the GPU is still on the PCIe bus, but it accesses memory over
        // this link rather than PCIe.
        uvm_gpu_link_type_t link;
        NvU32 link_rate_mbyte_per_s;

        // Range in the system physical address space where the memory of this
        // GPU is exposed as coherent. memory_window_end is inclusive.
        // memory_window_start == memory_window_end indicates that no window is
        // present (coherence is not supported).
        NvU64 memory_window_start;
        NvU64 memory_window_end;
    } system_bus;

    // WAR to issue ATS TLB invalidation commands ourselves.
    struct
    {
        uvm_mutex_t smmu_lock;
        struct page *smmu_cmdq;
        void __iomem *smmu_cmdqv_base;
        unsigned long smmu_prod;
        unsigned long smmu_cons;
    } smmu_war;
};

static const char *uvm_gpu_name(uvm_gpu_t *gpu)
{
    return gpu->parent->name;
}

static const NvProcessorUuid *uvm_gpu_uuid(uvm_gpu_t *gpu)
{
    return &gpu->parent->uuid;
}

static uvmGpuDeviceHandle uvm_gpu_device_handle(uvm_gpu_t *gpu)
{
    if (gpu->parent->smc.enabled)
        return gpu->smc.rm_device;
    return gpu->parent->rm_device;
}

struct uvm_gpu_peer_struct
{
    // The fields in this global structure can only be inspected under one of
    // the following conditions:
    //
    // - The VA space lock is held for either read or write, both GPUs are
    //   registered in the VA space, and the corresponding bit in the
    //   va_space.enabled_peers bitmap is set.
    //
    // - The global lock is held.
    //
    // - While the global lock was held in the past, the two GPUs were detected
    //   to be NVLINK peers and were both retained.
    //
    // - While the global lock was held in the past, the two GPUs were detected
    //   to be PCIe peers and uvm_gpu_retain_pcie_peer_access() was called.
    //
    // - The peer_gpus_lock is held on one of the GPUs. In this case, the other
    //   GPU must be read from the original GPU's peer_gpus table. The fields
    //   will not change while the lock is held, but they may no longer be valid
    //   because the other GPU might be in teardown.

    // Peer Id associated with this device w.r.t. to a peer GPU.
    // Note: peerId (A -> B) != peerId (B -> A)
    // peer_id[0] from min(gpu_id_1, gpu_id_2) -> max(gpu_id_1, gpu_id_2)
    // peer_id[1] from max(gpu_id_1, gpu_id_2) -> min(gpu_id_1, gpu_id_2)
    NvU8 peer_ids[2];

    // Indirect peers are GPUs which can coherently access each others' memory
    // over NVLINK, but are routed through the CPU using the SYS aperture rather
    // than a PEER aperture
    NvU8 is_indirect_peer : 1;

    // The link type between the peer GPUs, currently either PCIe or NVLINK.
    // This field is used to determine the when this peer struct has been
    // initialized (link_type != UVM_GPU_LINK_INVALID). NVLink peers are
    // initialized at GPU registration time. PCIe peers are initialized when
    // the refcount below goes from 0 to 1.
    uvm_gpu_link_type_t link_type;

    // Maximum unidirectional bandwidth between the peers in megabytes per
    // second, not taking into account the protocols' overhead. The reported
    // bandwidth for indirect peers is zero. See UvmGpuP2PCapsParams.
    NvU32 total_link_line_rate_mbyte_per_s;

    // For PCIe, the number of times that this has been retained by a VA space.
    // For NVLINK this will always be 1.
    NvU64 ref_count;

    // This handle gets populated when enable_peer_access successfully creates
    // an NV50_P2P object. disable_peer_access resets the same on the object
    // deletion.
    NvHandle p2p_handle;

    struct
    {
        struct proc_dir_entry *peer_file[2];
        struct proc_dir_entry *peer_symlink_file[2];

        // GPU-A <-> GPU-B link is bidirectional, pairs[x][0] is always the
        // local GPU, while pairs[x][1] is the remote GPU. The table shall be
        // filled like so: [[GPU-A, GPU-B], [GPU-B, GPU-A]].
        uvm_gpu_t *pairs[2][2];
    } procfs;
};

// Initialize global gpu state
NV_STATUS uvm_gpu_init(void);

// Deinitialize global state (called from module exit)
void uvm_gpu_exit(void);

NV_STATUS uvm_gpu_init_va_space(uvm_va_space_t *va_space);

void uvm_gpu_exit_va_space(uvm_va_space_t *va_space);

static unsigned int uvm_gpu_numa_node(uvm_gpu_t *gpu)
{
    UVM_ASSERT(gpu->mem_info.numa.enabled);
    return gpu->mem_info.numa.node_id;
}

static uvm_gpu_phys_address_t uvm_gpu_page_to_phys_address(uvm_gpu_t *gpu, struct page *page)
{
    unsigned long sys_addr = page_to_pfn(page) << PAGE_SHIFT;
    unsigned long gpu_offset = sys_addr - gpu->parent->system_bus.memory_window_start;

    UVM_ASSERT(page_to_nid(page) == uvm_gpu_numa_node(gpu));
    UVM_ASSERT(sys_addr >= gpu->parent->system_bus.memory_window_start);
    UVM_ASSERT(sys_addr + PAGE_SIZE - 1 <= gpu->parent->system_bus.memory_window_end);

    return uvm_gpu_phys_address(UVM_APERTURE_VID, gpu_offset);
}

// Note that there is a uvm_gpu_get() function defined in uvm_global.h to break
// a circular dep between global and gpu modules.

// Get a uvm_gpu_t by UUID.  This returns NULL if the GPU is not present.  This
// is the general purpose call that should be used normally.
// That is, unless a uvm_gpu_t for a specific SMC partition needs to be
// retrieved, in which case uvm_gpu_get_by_parent_and_swizz_id() must be used
// instead.
//
// LOCKING: requires the global lock to be held
uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);

// Get a uvm_parent_gpu_t by UUID.  Like uvm_gpu_get_by_uuid(), this function
// returns NULL if the GPU has not been registered.
//
// LOCKING: requires the global lock to be held
uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);

// Like uvm_parent_gpu_get_by_uuid(), but this variant does not assertion-check
// that the caller is holding the global_lock.  This is a narrower-purpose
// function, and is only intended for use by the top-half ISR, or other very
// limited cases.
uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_uuid);

// Get the uvm_gpu_t for a partition by parent and swizzId. This returns NULL if
// the partition hasn't been registered. This call needs to be used instead of
// uvm_gpu_get_by_uuid() when a specific partition is targeted.
//
// LOCKING: requires the global lock to be held
uvm_gpu_t *uvm_gpu_get_by_parent_and_swizz_id(uvm_parent_gpu_t *parent_gpu, NvU32 swizz_id);

// Retain a gpu by uuid
// Returns the retained uvm_gpu_t in gpu_out on success
//
// LOCKING: Takes and releases the global lock for the caller.
NV_STATUS uvm_gpu_retain_by_uuid(const NvProcessorUuid *gpu_uuid,
                                 const uvm_rm_user_object_t *user_rm_device,
                                 uvm_gpu_t **gpu_out);

// Retain a gpu which is known to already be retained. Does NOT require the
// global lock to be held.
void uvm_gpu_retain(uvm_gpu_t *gpu);

// Release a gpu
// LOCKING: requires the global lock to be held
void uvm_gpu_release_locked(uvm_gpu_t *gpu);

// Like uvm_gpu_release_locked, but takes and releases the global lock for the
// caller.
void uvm_gpu_release(uvm_gpu_t *gpu);

static NvU64 uvm_gpu_retained_count(uvm_gpu_t *gpu)
{
    return atomic64_read(&gpu->retained_count);
}

// Decrease the refcount on the parent GPU object, and actually delete the object
// if the refcount hits zero.
void uvm_parent_gpu_kref_put(uvm_parent_gpu_t *gpu);

// Calculates peer table index using GPU ids.
NvU32 uvm_gpu_peer_table_index(uvm_gpu_id_t gpu_id1, uvm_gpu_id_t gpu_id2);

// Either retains an existing PCIe peer entry or creates a new one. In both
// cases the two GPUs are also each retained.
// LOCKING: requires the global lock to be held
NV_STATUS uvm_gpu_retain_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);

// Releases a PCIe peer entry and the two GPUs.
// LOCKING: requires the global lock to be held
void uvm_gpu_release_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);

// Get the aperture for local_gpu to use to map memory resident on remote_gpu.
// They must not be the same gpu.
uvm_aperture_t uvm_gpu_peer_aperture(uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu);

// Get the processor id accessible by the given GPU for the given physical
// address.
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);

// Get the P2P capabilities between the gpus with the given indexes
uvm_gpu_peer_t *uvm_gpu_index_peer_caps(uvm_gpu_id_t gpu_id1, uvm_gpu_id_t gpu_id2);

// Get the P2P capabilities between the given gpus
static uvm_gpu_peer_t *uvm_gpu_peer_caps(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1)
{
    return uvm_gpu_index_peer_caps(gpu0->id, gpu1->id);
}

static bool uvm_gpus_are_nvswitch_connected(uvm_gpu_t *gpu1, uvm_gpu_t *gpu2)
{
    if (gpu1->parent->nvswitch_info.is_nvswitch_connected && gpu2->parent->nvswitch_info.is_nvswitch_connected) {
        UVM_ASSERT(uvm_gpu_peer_caps(gpu1, gpu2)->link_type >= UVM_GPU_LINK_NVLINK_2);
        return true;
    }

    return false;
}

static bool uvm_gpus_are_indirect_peers(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
{
    uvm_gpu_peer_t *peer_caps = uvm_gpu_peer_caps(gpu0, gpu1);

    if (peer_caps->link_type != UVM_GPU_LINK_INVALID && peer_caps->is_indirect_peer) {
        UVM_ASSERT(gpu0->mem_info.numa.enabled);
        UVM_ASSERT(gpu1->mem_info.numa.enabled);
        UVM_ASSERT(peer_caps->link_type != UVM_GPU_LINK_PCIE);
        UVM_ASSERT(!uvm_gpus_are_nvswitch_connected(gpu0, gpu1));
        return true;
    }

    return false;
}

// Retrieve the virtual address corresponding to the given vidmem physical
// address, according to the linear vidmem mapping in the GPU kernel address
// space.
//
// The actual GPU mapping only exists if a full flat mapping, or a partial flat
// mapping covering the passed address, has been previously created.
static uvm_gpu_address_t uvm_gpu_address_virtual_from_vidmem_phys(uvm_gpu_t *gpu, NvU64 pa)
{
    UVM_ASSERT(uvm_mmu_gpu_needs_static_vidmem_mapping(gpu) || uvm_mmu_gpu_needs_dynamic_vidmem_mapping(gpu));
    UVM_ASSERT(pa <= gpu->mem_info.max_allocatable_address);

    if (uvm_mmu_gpu_needs_static_vidmem_mapping(gpu))
        UVM_ASSERT(gpu->static_flat_mapping.ready);

    return uvm_gpu_address_virtual(gpu->parent->flat_vidmem_va_base + pa);
}

// Retrieve the virtual address corresponding to the given sysmem physical
// address, according to the linear sysmem mapping in the GPU kernel address
// space.
//
// The actual GPU mapping only exists if a linear mapping covering the passed
// address has been previously created.
static uvm_gpu_address_t uvm_gpu_address_virtual_from_sysmem_phys(uvm_gpu_t *gpu, NvU64 pa)
{
    UVM_ASSERT(uvm_mmu_gpu_needs_dynamic_sysmem_mapping(gpu));
    UVM_ASSERT(pa <= (gpu->parent->dma_addressable_limit - gpu->parent->dma_addressable_start));

    return uvm_gpu_address_virtual(gpu->parent->flat_sysmem_va_base + pa);
}

// Given a GPU or CPU physical address (not peer), retrieve an address suitable
// for CE access.
static uvm_gpu_address_t uvm_gpu_address_copy(uvm_gpu_t *gpu, uvm_gpu_phys_address_t phys_addr)
{
    UVM_ASSERT(phys_addr.aperture == UVM_APERTURE_VID || phys_addr.aperture == UVM_APERTURE_SYS);

    if (phys_addr.aperture == UVM_APERTURE_VID) {
        if (uvm_mmu_gpu_needs_static_vidmem_mapping(gpu) || uvm_mmu_gpu_needs_dynamic_vidmem_mapping(gpu))
            return uvm_gpu_address_virtual_from_vidmem_phys(gpu, phys_addr.address);
    }
    else if (uvm_mmu_gpu_needs_dynamic_sysmem_mapping(gpu)) {
        return uvm_gpu_address_virtual_from_sysmem_phys(gpu, phys_addr.address);
    }

    return uvm_gpu_address_from_phys(phys_addr);
}

static uvm_gpu_identity_mapping_t *uvm_gpu_get_peer_mapping(uvm_gpu_t *gpu, uvm_gpu_id_t peer_id)
{
    return &gpu->peer_mappings[uvm_id_gpu_index(peer_id)];
}

// Check for ECC errors
//
// Notably this check cannot be performed where it's not safe to call into RM.
NV_STATUS uvm_gpu_check_ecc_error(uvm_gpu_t *gpu);

// Check for ECC errors without calling into RM
//
// Calling into RM is problematic in many places, this check is always safe to
// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an ECC error
// and it's required to call uvm_gpu_check_ecc_error() to be sure.
NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu);

// Map size bytes of contiguous sysmem on the GPU for physical access
//
// size has to be aligned to PAGE_SIZE.
//
// Returns the physical address of the pages that can be used to access them on
// the GPU.
NV_STATUS uvm_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out);

// Unmap num_pages pages previously mapped with uvm_gpu_map_cpu_pages().
void uvm_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size);

static NV_STATUS uvm_gpu_map_cpu_page(uvm_parent_gpu_t *parent_gpu, struct page *page, NvU64 *dma_address_out)
{
    return uvm_gpu_map_cpu_pages(parent_gpu, page, PAGE_SIZE, dma_address_out);
}

static void uvm_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address)
{
    uvm_gpu_unmap_cpu_pages(parent_gpu, dma_address, PAGE_SIZE);
}

// Allocate and map a page of system DMA memory on the GPU for physical access
//
// Returns
// - the address of the page that can be used to access them on
//   the GPU in the dma_address_out parameter.
// - the address of allocated memory in CPU virtual address space.
void *uvm_gpu_dma_alloc_page(uvm_parent_gpu_t *parent_gpu,
                             gfp_t gfp_flags,
                             NvU64 *dma_address_out);

// Unmap and free size bytes of contiguous sysmem DMA previously allocated
// with uvm_gpu_map_cpu_pages().
void uvm_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_address);

// Returns whether the given range is within the GPU's addressable VA ranges.
// It requires the input 'addr' to be in canonical form for platforms compliant
// to canonical form addresses, i.e., ARM64, and x86.
// Warning: This only checks whether the GPU's MMU can support the given
// address. Some HW units on that GPU might only support a smaller range.
//
// The GPU must be initialized before calling this function.
bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);

// Returns whether the given range is within the GPU's addressable VA ranges in
// the internal GPU VA "kernel" address space, which is a linear address space.
// Therefore, the input 'addr' must not be in canonical form, even platforms
// that use to the canonical form addresses, i.e., ARM64, and x86.
// Warning: This only checks whether the GPU's MMU can support the given
// address. Some HW units on that GPU might only support a smaller range.
//
// The GPU must be initialized before calling this function.
bool uvm_gpu_can_address_kernel(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);

bool uvm_platform_uses_canonical_form_address(void);

// Returns addr's canonical form for host systems that use canonical form
// addresses.
NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr);

static bool uvm_gpu_is_coherent(const uvm_parent_gpu_t *parent_gpu)
{
    return parent_gpu->system_bus.memory_window_end > parent_gpu->system_bus.memory_window_start;
}

static bool uvm_gpu_has_pushbuffer_segments(uvm_gpu_t *gpu)
{
    return gpu->parent->max_host_va > (1ull << 40);
}

static bool uvm_gpu_supports_eviction(uvm_gpu_t *gpu)
{
    // Eviction is supported only if the GPU supports replayable faults
    return gpu->parent->replayable_faults_supported;
}

static bool uvm_gpu_is_virt_mode_sriov_heavy(const uvm_gpu_t *gpu)
{
    return gpu->parent->virt_mode == UVM_VIRT_MODE_SRIOV_HEAVY;
}

static bool uvm_gpu_is_virt_mode_sriov_standard(const uvm_gpu_t *gpu)
{
    return gpu->parent->virt_mode == UVM_VIRT_MODE_SRIOV_STANDARD;
}

// Returns true if the virtualization mode is SR-IOV heavy or SR-IOV standard.
static bool uvm_gpu_is_virt_mode_sriov(const uvm_gpu_t *gpu)
{
    return uvm_gpu_is_virt_mode_sriov_heavy(gpu) || uvm_gpu_is_virt_mode_sriov_standard(gpu);
}

static bool uvm_gpu_uses_proxy_channel_pool(const uvm_gpu_t *gpu)
{
    return uvm_gpu_is_virt_mode_sriov_heavy(gpu);
}

uvm_aperture_t uvm_gpu_page_tree_init_location(const uvm_gpu_t *gpu);

// Debug print of GPU properties
void uvm_gpu_print(uvm_gpu_t *gpu);

// Add the given instance pointer -> user_channel mapping to this GPU. The
// bottom half GPU page fault handler uses this to look up the VA space for GPU
// faults.
NV_STATUS uvm_gpu_add_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel);
void uvm_gpu_remove_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel);

// Looks up an entry added by uvm_gpu_add_user_channel. Return codes:
//  NV_OK                        Translation successful
//  NV_ERR_INVALID_CHANNEL       Entry's instance pointer was not found
//  NV_ERR_PAGE_TABLE_NOT_AVAIL  Entry's instance pointer is valid but the entry
//                               targets an invalid subcontext
//
// out_va_space is valid if NV_OK is returned, otherwise it's NULL. The caller
// is responsibile for ensuring that the returned va_space can't be destroyed,
// so these functions should only be called from the bottom half.
NV_STATUS uvm_gpu_fault_entry_to_va_space(uvm_gpu_t *gpu,
                                          uvm_fault_buffer_entry_t *fault,
                                          uvm_va_space_t **out_va_space);

NV_STATUS uvm_gpu_access_counter_entry_to_va_space(uvm_gpu_t *gpu,
                                                   uvm_access_counter_buffer_entry_t *entry,
                                                   uvm_va_space_t **out_va_space);

typedef enum
{
    UVM_GPU_BUFFER_FLUSH_MODE_CACHED_PUT,
    UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT,
    UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT,
} uvm_gpu_buffer_flush_mode_t;

#endif // __UVM_GPU_H__