File: index.html

package info (click to toggle)
nvidia-cuda-toolkit 12.4.1-3
  • links: PTS, VCS
  • area: non-free
  • in suites: forky, sid
  • size: 18,505,836 kB
  • sloc: ansic: 203,477; cpp: 64,769; python: 34,699; javascript: 22,006; xml: 13,410; makefile: 3,085; sh: 2,343; perl: 352
file content (1636 lines) | stat: -rw-r--r-- 147,368 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
  <meta charset="utf-8" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
<meta content="Nsight Compute Release Notes." name="description" />
<meta content="Release notes" name="keywords" />

  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  <title>1. Release Notes &mdash; NsightCompute 12.4 documentation</title>
      <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
      <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
      <link rel="stylesheet" href="../_static/design-style.b7bb847fb20b106c3d81b95245e65545.min.css" type="text/css" />
      <link rel="stylesheet" href="../_static/omni-style.css" type="text/css" />
      <link rel="stylesheet" href="../_static/api-styles.css" type="text/css" />
    <link rel="shortcut icon" href="../_static/nsight-compute.ico"/>
  <!--[if lt IE 9]>
    <script src="../_static/js/html5shiv.min.js"></script>
  <![endif]-->
  
        <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
        <script src="../_static/jquery.js"></script>
        <script src="../_static/underscore.js"></script>
        <script src="../_static/doctools.js"></script>
        <script src="../_static/mermaid-init.js"></script>
        <script src="../_static/design-tabs.js"></script>
        <script src="../_static/version.js"></script>
        <script src="../_static/social-media.js"></script>
    <script src="../_static/js/theme.js"></script>
    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" />
    <link rel="next" title="2. Kernel Profiling Guide" href="../ProfilingGuide/index.html" />
    <link rel="prev" title="Nsight Compute Documentation" href="../index.html" />
 


</head>

<body class="wy-body-for-nav"> 
  <div class="wy-grid-for-nav">
    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search" >


  <a href="../index.html">
  <img src="../_static/nsight-compute.png" class="logo" alt="Logo"/>
</a>

<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>
        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
              <p class="caption" role="heading"><span class="caption-text">Nsight Compute</span></p>
<ul class="current">
<li class="toctree-l1 current"><a class="current reference internal" href="#">1. Release Notes</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#id1">1.1. Release Notes</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#updates-in-2024-1-1">1.1.1. Updates in 2024.1.1</a></li>
<li class="toctree-l3"><a class="reference internal" href="#updates-in-2024-1">1.1.2. Updates in 2024.1</a></li>
<li class="toctree-l3"><a class="reference internal" href="#updates-in-2023-3-1">1.1.3. Updates in 2023.3.1</a></li>
<li class="toctree-l3"><a class="reference internal" href="#updates-in-2023-3">1.1.4. Updates in 2023.3</a></li>
<li class="toctree-l3"><a class="reference internal" href="#updates-in-2023-2-2">1.1.5. Updates in 2023.2.2</a></li>
<li class="toctree-l3"><a class="reference internal" href="#updates-in-2023-2-1">1.1.6. Updates in 2023.2.1</a></li>
<li class="toctree-l3"><a class="reference internal" href="#updates-in-2023-2">1.1.7. Updates in 2023.2</a></li>
<li class="toctree-l3"><a class="reference internal" href="#updates-in-2023-1-1">1.1.8. Updates in 2023.1.1</a></li>
<li class="toctree-l3"><a class="reference internal" href="#updates-in-2023-1">1.1.9. Updates in 2023.1</a></li>
<li class="toctree-l3"><a class="reference internal" href="#updates-in-2022-4-1">1.1.10. Updates in 2022.4.1</a></li>
<li class="toctree-l3"><a class="reference internal" href="#updates-in-2022-4">1.1.11. Updates in 2022.4</a></li>
<li class="toctree-l3"><a class="reference internal" href="#updates-in-2021-2-9">1.1.12. Updates in 2021.2.9</a></li>
<li class="toctree-l3"><a class="reference internal" href="#older-versions">1.1.13. Older Versions</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2022-3">Updates in 2022.3</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2022-2-1">Updates in 2022.2.1</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2022-2">Updates in 2022.2</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2022-1-1">Updates in 2022.1.1</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2022-1">Updates in 2022.1</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2021-3-1">Updates in 2021.3.1</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2021-3">Updates in 2021.3</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2021-2-8">Updates in 2021.2.8</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2021-2-7">Updates in 2021.2.7</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2021-2-6">Updates in 2021.2.6</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2021-2-5">Updates in 2021.2.5</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2021-2-4">Updates in 2021.2.4</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2021-2-3">Updates in 2021.2.3</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2021-2-2">Updates in 2021.2.2</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2021-2-1">Updates in 2021.2.1</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2021-2">Updates in 2021.2</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2021-1-1">Updates in 2021.1.1</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2021-1">Updates in 2021.1</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2020-3-1">Updates in 2020.3.1</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2020-3">Updates in 2020.3</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2020-2-1">Updates in 2020.2.1</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2020-2">Updates in 2020.2</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2020-1-2">Updates in 2020.1.2</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2020-1-1">Updates in 2020.1.1</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2020-1">Updates in 2020.1</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2019-5-3">Updates in 2019.5.3</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2019-5-2">Updates in 2019.5.2</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2019-5-1">Updates in 2019.5.1</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2019-5">Updates in 2019.5</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2019-4">Updates in 2019.4</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2019-3-1">Updates in 2019.3.1</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2019-3">Updates in 2019.3</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2019-2">Updates in 2019.2</a></li>
<li class="toctree-l4"><a class="reference internal" href="#updates-in-2019-1">Updates in 2019.1</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#known-issues">1.2. Known Issues</a></li>
<li class="toctree-l2"><a class="reference internal" href="#support">1.3. Support</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#platform-support">1.3.1. Platform Support</a></li>
<li class="toctree-l3"><a class="reference internal" href="#gpu-support">1.3.2. GPU Support</a></li>
<li class="toctree-l3"><a class="reference internal" href="#library-support">1.3.3. Library Support</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#optix">OptiX</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#system-requirements">1.3.4. System Requirements</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../ProfilingGuide/index.html">2. Kernel Profiling Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="../NsightCompute/index.html">3. Nsight Compute</a></li>
<li class="toctree-l1"><a class="reference internal" href="../NsightComputeCli/index.html">4. Nsight Compute CLI</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Developer Interfaces</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../CustomizationGuide/index.html">1. Customization Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="../NvRulesAPI/index.html">2. NvRules API</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Training</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../Training/index.html">Training</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Release Information</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../Archives/index.html">Archives</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Copyright and Licenses</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../CopyrightAndLicenses/index.html">Copyright and Licenses</a></li>
</ul>

        </div>
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
          <a href="../index.html">NsightCompute</a>
      </nav>

      <div class="wy-nav-content">
        <div class="rst-content">
          <div role="navigation" aria-label="Page navigation">
  <ul class="wy-breadcrumbs">


<li><a href="../index.html" class="icon icon-home"></a> &raquo;</li>
<li><span class="section-number">1. </span>Release Notes</li>

      <li class="wy-breadcrumbs-aside">
      </li>
<li class="wy-breadcrumbs-aside">


  <span>v2024.1.1 |</span>



  <a href="https://developer.nvidia.com/nsight-compute-history" class="reference external">Archive</a>


  <span>&nbsp;</span>
</li>

  </ul>
  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
             
  <section id="release-notes">
<h1><span class="section-number">1. </span>Release Notes<a class="headerlink" href="#release-notes" title="Permalink to this headline"></a></h1>
<p>Nsight Compute Release Notes.</p>
<p>Release notes, including new features and important bug fixes. Supported platforms and GPUs. List of known issues for the current release.</p>
<section id="id1">
<h2><span class="section-number">1.1. </span>Release Notes<a class="headerlink" href="#id1" title="Permalink to this headline"></a></h2>
<section id="updates-in-2024-1-1">
<h3><span class="section-number">1.1.1. </span>Updates in 2024.1.1<a class="headerlink" href="#updates-in-2024-1-1" title="Permalink to this headline"></a></h3>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Added clarification that for profiling a range with multiple, active CUDA Green Contexts, counter values that are not attributable to SMs will be aggregated over all these Green Contexts.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Changed the way the PerfWorks library is loaded into the target application’s process space. This addresses possible connection errors in case the library search path includes other directories with PerfWorks libraries.</p></li>
<li><p>Fixed an issue that caused PM sampling data to be missing from the results of a Profile Series.</p></li>
<li><p>Fixed the incorrect calculation of the percentage values in the Inline Function table.</p></li>
<li><p>Fixed a potential crash of the NVIDIA Nsight Compute UI when PM sampling data was requested, but no sample was collected.</p></li>
</ul>
</section>
<section id="updates-in-2024-1">
<h3><span class="section-number">1.1.2. </span>Updates in 2024.1<a class="headerlink" href="#updates-in-2024-1" title="Permalink to this headline"></a></h3>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Switched to using OpenSSL version 3.0.10.</p></li>
<li><p>Added new metrics available when profiling on CUDA Green Contexts.</p></li>
<li><p>Reduced the number of passes required for collecting <a class="reference external" href="../ProfilingGuide/index.html#pm-sampling">PM sampling</a> sections.</p></li>
<li><p>Counter domains can now be specified for PM sampling metrics in section files.</p></li>
<li><p>PM sampling metrics can now be queried in the command line and Metric Details window by specifying the respective <code class="docutils literal notranslate"><span class="pre">collection</span></code> option.</p></li>
<li><p>Added a new optional <a class="reference external" href="../ProfilingGuide/index.html#sections-and-rules">PmSampling_WarpStates</a> section for understanding warp stall reasons over the workload duration.</p></li>
<li><p>Added a new rule for detecting load imbalances.</p></li>
<li><p>Improved the performance of graph-level profiling on new drivers.</p></li>
<li><p>Updated the <a class="reference external" href="../ProfilingGuide/index.html#compatibility">metrics compatibility</a> table for OptiX cmdlists and instruction-level SASS metrics.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>Added SASS view and Source Markers support in <a class="reference external" href="../NsightCompute/index.html#source-comparison">Source Comparison</a>.</p></li>
<li><p>Improved <a class="reference external" href="../NsightCompute/index.html#source-comparison">Source Comparison</a> diff visualization by adding empty lines on other side of inserted/deleted lines.</p></li>
<li><p>The Source page <a class="reference external" href="../NsightCompute/index.html#profiler-report-source-page-metrics">column chooser</a> can now be opened directly from the Navigation drop down.</p></li>
<li><p>Added a <a class="reference external" href="../NsightCompute/index.html#tool-window-launch-details">Launch Details</a> tool window for showing information about individual launches within larger workloads like OptiX command lists.</p></li>
<li><p>Added support for CUDA Green Contexts in the <a class="reference external" href="../NsightCompute/index.html#tool-window-resources">Resources</a> tool window, the Launch Statistics section and the report header.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute CLI</strong></p>
<ul class="simple">
<li><p>Improved documentation on NVTX expressions and command line output when a potentially incorrect expression led to no workloads being profiled.</p></li>
<li><p>Improved checking for invalid expressions when using the <code class="docutils literal notranslate"><span class="pre">--target-processes-filer</span></code> option.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed that the L1 cache achieved roofline value was missing when profiling on GH100.</p></li>
<li><p>Fixed several “Launch Failed” errors when collecting instruction-level SASS metrics.</p></li>
<li><p>Fixed that <a class="reference external" href="../NsightCompute/index.html#profiler-report-source-page-metrics">Live Register</a> values would be too high for some workloads.</p></li>
<li><p>Fixed a scrolling issue on the Source page when collapsing a multi-file view.</p></li>
<li><p>Fixed an issue that no PM sampling data would be shown in the timeline when context switch trace was not available.</p></li>
<li><p>Fixed a display issue in the memory chart when adding baselines.</p></li>
<li><p>Fixed a crash when adding baselines.</p></li>
<li><p>Fixed a crash in timeline views when not all configured data was available.</p></li>
<li><p>Fixed that the application history was not always deleted when selecting Reset Application Data.</p></li>
<li><p>Fixed an error in the metric compatibility documentation.</p></li>
</ul>
</section>
<section id="updates-in-2023-3-1">
<h3><span class="section-number">1.1.3. </span>Updates in 2023.3.1<a class="headerlink" href="#updates-in-2023-3-1" title="Permalink to this headline"></a></h3>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Switched to using OpenSSL version 1.1.1w.</p></li>
<li><p>Improved the speedup estimates for rule IssueSlotUtilization as well as its child rules.</p></li>
<li><p>Updated report files and documentation for the samples located at <code class="docutils literal notranslate"><span class="pre">extras/samples/</span></code>.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed collection of context switch data during <a class="reference external" href="../ProfilingGuide/index.html#pm-sampling">PM Sampling</a> when using <a class="reference external" href="../ProfilingGuide/index.html#range-replay">Range Replay</a>.</p></li>
<li><p>Fixed potential crash of NVIDIA Nsight Compute when an invalid regular expression was provided as  requested metric.</p></li>
<li><p>Improved the performance of NVIDIA Nsight Compute in cases where only a single process is being profiled and <code class="docutils literal notranslate"><span class="pre">--target-processes</span> <span class="pre">all</span></code> was specified.</p></li>
<li><p>Fixed an issue of reporting too high register counts on the Source Page.</p></li>
<li><p>Fixed a bug that could cause a GPU fault while collecting SW counters through PerfWorks.</p></li>
<li><p>Fixed showing incorrect baseline values for the Runtime Improvement values on the Summary Page.</p></li>
</ul>
</section>
<section id="updates-in-2023-3">
<h3><span class="section-number">1.1.4. </span>Updates in 2023.3<a class="headerlink" href="#updates-in-2023-3" title="Permalink to this headline"></a></h3>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>NVIDIA Nsight Compute now supports collecting many metrics by <a class="reference external" href="../ProfilingGuide/index.html#pm-sampling">sampling the GPU’s performance monitors (PM)</a> periodically at fixed intervals. The results can be visualized on a <a class="reference external" href="../NsightCompute/index.html#details-page">timeline</a>.</p></li>
<li><p>Added WSL profiling support on Windows 10 WSL with OS build version 19044 and greater. WSL profiling is not supported on Windows 10 WSL for systems that exceed 1 TB of system memory.</p></li>
<li><p>Rule outputs are prioritized to improve the accuracy of estimated speedups. The <a class="reference external" href="../NsightCompute/index.html#summary-page">Summary</a> page now shows the most actionable optimization advices when a result row is selected.</p></li>
<li><p>Improved the handling and reporting for unavailable metrics during collection and when applying rules.</p></li>
<li><p>Added <em>instructionMix</em> sample CUDA application and document to show how to use NVIDIA Nsight Compute to analyze and identify the performance bottleneck due to an imbalanced instruction mix. Refer to the <code class="docutils literal notranslate"><span class="pre">README.TXT</span></code> file, sample code, and document under <code class="docutils literal notranslate"><span class="pre">extras/samples/instructionMix</span></code>.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>Added support to see the source files of two profile results side by side using <a class="reference external" href="../NsightCompute/index.html#source-comparison">Source Comparison</a>. This allows you to quickly identify source differences and understand changes in metric values.</p></li>
<li><p>The <a class="reference external" href="../NsightCompute/index.html#summary-page">Summary</a> page is now the default page when a report is opened. Previous behavior can be enabled in the <a class="reference external" href="../NsightCompute/index.html#profile">options</a> dialog.</p></li>
<li><p>On the <a class="reference external" href="../NsightCompute/index.html#summary-page">Summary</a> and <a class="reference external" href="../NsightCompute/index.html#raw-page">Raw</a> pages, values from all/selected rows are automatically aggregated in the column header for applicable metrics. Selected individual cells are aggregated in the bottom status bar.</p></li>
<li><p>Added <code class="docutils literal notranslate"><span class="pre">Launch</span> <span class="pre">Name</span></code> and <code class="docutils literal notranslate"><span class="pre">Device</span></code> options in the filter dialog launched by <code class="docutils literal notranslate"><span class="pre">Apply</span> <span class="pre">Filters</span></code> button in the <a class="reference external" href="../NsightCompute/index.html#profiler-report-header">report header</a>.</p></li>
<li><p>Added support for <a class="reference external" href="../NsightCompute/index.html#profiles">source view profiles</a>  that persist the Source page configuration and allow you to re-apply it to other reports.</p></li>
<li><p>The <a class="reference external" href="../NsightCompute/index.html#metric-details">Metric Details</a> tool window now supports querying metrics beyond the current report by using the <code class="docutils literal notranslate"><span class="pre">chip:&lt;chipname&gt;</span></code> tag in the search.</p></li>
<li><p>Added support for <em>CUDA Graph Edge Data</em> (such as port and dependency type) and <em>CUDA Graph Conditional Handles</em> in the <a class="reference external" href="../NsightCompute/index.html#resources">Resources</a> tool window.</p></li>
<li><p>The <a class="reference external" href="../NsightCompute/index.html#acceleration-structure-viewer">Acceleration Structure Viewer</a> and <a class="reference external" href="../NsightCompute/index.html#resources">Resources</a> tool window now support OptiX Opacity Micromaps.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute CLI</strong></p>
<ul class="simple">
<li><p>Tracking and profiling all child processes (<code class="docutils literal notranslate"><span class="pre">--target-processes</span> <span class="pre">all</span></code>) is now the default for ncu.</p></li>
<li><p>Improved reporting of requested but unavailable metrics. Metrics requested in section files are by default considered optional and only cause a warning to be shown.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Support for tracking child processes launched with <code class="docutils literal notranslate"><span class="pre">system()</span></code> is available on Linux ppc64le.</p></li>
<li><p>Improved the behavior of following SASS navigation links on the Source page.</p></li>
<li><p>Fixed issues with profiling CUDA graphs in graph-profiling mode when nodes are associated with a non-current CUDA context.</p></li>
<li><p>Fixed an issue in L2 bandwidth calculations in the hierarchical roofline sections.</p></li>
</ul>
</section>
<section id="updates-in-2023-2-2">
<h3><span class="section-number">1.1.5. </span>Updates in 2023.2.2<a class="headerlink" href="#updates-in-2023-2-2" title="Permalink to this headline"></a></h3>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed possible crash when profiling CUDA graphs on multiple GPUs.</p></li>
<li><p>Fixed the detection mechanism of the C2C interface, i.e. <code class="docutils literal notranslate"><span class="pre">metric</span> <span class="pre">c2clink__present</span></code>. The fix requires the display driver shipping with this release or any newer driver.</p></li>
</ul>
</section>
<section id="updates-in-2023-2-1">
<h3><span class="section-number">1.1.6. </span>Updates in 2023.2.1<a class="headerlink" href="#updates-in-2023-2-1" title="Permalink to this headline"></a></h3>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed a crash during application replay while having the temporary directory located on a network file system (NFS).</p></li>
<li><p>Improved detection mechanism for C2C interface. Added caching of the detected configuration to reduce overhead.</p></li>
</ul>
</section>
<section id="updates-in-2023-2">
<h3><span class="section-number">1.1.7. </span>Updates in 2023.2<a class="headerlink" href="#updates-in-2023-2" title="Permalink to this headline"></a></h3>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Extended the rules system to show estimates of the potential speedup that can be achieved by addressing the corresponding performance bottleneck. These speedups allow prioritizing applicable rules and help focusing first on optimization strategies with the highest potential performance gain.</p></li>
<li><p>Added support for rules to highlight individual source lines. Lines with global/local memory access with high excessive sector counts and shared accesses with many bank conflicts are automatically detected and highlighted.</p></li>
<li><p>Added the ability to query metric attributes in NvRules API.</p></li>
<li><p>Added support for creating instanced metrics through the NvRules API.</p></li>
<li><p>For Orin+ mobile chips on the Linux aarch64 platform, added metrics (<code class="docutils literal notranslate"><span class="pre">mcc__*</span></code>) support for memory controller channel (MC Channel) unit which connects to the DRAM.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>Added hyperlinks to the SASS View of the Source Page for instructions that reference others by address or offset. This enables to quickly jump to the target instruction of a branch.</p></li>
<li><p>Improved the search bar in the Metric Details tool window. The search string now matches any part of the metric names, and the matching results are shown in a sorted order.</p></li>
<li><p>Added a visual indication of scale of the metric value changes when the baselines are used. The background bars in the table cells of the Details Page allow to quickly identify which metrics values increased or decreased the most. The color scheme can be configured in the <a class="reference external" href="../NsightCompute/index.html#tool-window-baselines">Baselines tool window</a>.</p></li>
<li><p>Added support for rules toggle button on the Summary Page. Allows to hide the bottom pane with the rules output for the selected kernel launch.</p></li>
<li><p>Added support for allowing users to configure properties on <a class="reference external" href="../NsightCompute/index.html#profiler-report-summary-page">Summary Page</a> using <a class="reference external" href="../NsightCompute/index.html#options-profile">Metrics/Properties profile option</a>.</p></li>
<li><p>Added percentage bars on <a class="reference external" href="../NsightCompute/index.html#profiler-report-summary-page">Summary Page</a>.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute CLI</strong></p>
<ul class="simple">
<li><p>Added support for tracking child processes launched with <code class="docutils literal notranslate"><span class="pre">posix_spawn(p)</span></code> when using <code class="docutils literal notranslate"><span class="pre">--target-processes</span> <span class="pre">all</span></code>.</p></li>
<li><p>Added support for tracking child processes launched with <code class="docutils literal notranslate"><span class="pre">system()</span></code> on Windows and Linux (aarch64, x86_64) when using <code class="docutils literal notranslate"><span class="pre">--target-processes</span> <span class="pre">all</span></code>.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed table alignment in the output of the NVIDIA Nsight Compute CLI on Windows when printing Unicode characters.</p></li>
<li><p>Fixed view corruption in the Source Page after switching from the collapsed view to the expanded view.</p></li>
<li><p>Fixed missing tooltip descriptions for some SASS instructions.</p></li>
<li><p>Fixed potential crash when copying from the Resources tool window using CTRL+C.</p></li>
<li><p>Fixed possible crash when restoring sections in the Sections tool window.</p></li>
</ul>
</section>
<section id="updates-in-2023-1-1">
<h3><span class="section-number">1.1.8. </span>Updates in 2023.1.1<a class="headerlink" href="#updates-in-2023-1-1" title="Permalink to this headline"></a></h3>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>Added new configuration options to set the default view mode and precision for the Source page.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Added support for the <code class="docutils literal notranslate"><span class="pre">DT_RUNPATH</span></code> attribute when intercepting calls to <code class="docutils literal notranslate"><span class="pre">dlopen</span></code>. Fixed issue for applications or libraries relying on <code class="docutils literal notranslate"><span class="pre">DT_RUNPATH</span></code> not finding all dynamic libraries when launched by NVIDIA Nsight Compute.</p></li>
<li><p>Improved interaction between custom additional metrics and the selected metric set. Adding custom metrics no longer forces switching to the custom metric set.</p></li>
<li><p>Added ability to gracefully skip folders with insufficient access permissions while importing source code.</p></li>
<li><p>Fixed the calculation of the peak values for the L1 and L2 cache bandwidths in the hierarchical roofline charts.</p></li>
<li><p>Fixed issue that prevented modules loaded with function <code class="docutils literal notranslate"><span class="pre">optixModuleCreateFromPTX</span></code> showing up in the <em>Optix: Modules</em> table of the <em>Resources</em> tool window.</p></li>
<li><p>Fixed handling of deprecated functions when querying function pointers from the OptiX interception library.</p></li>
<li><p>Fixed that sometimes sections or rules couldn’t be easily selected in the tool window.</p></li>
<li><p>Fixed issue with <em>Reset Application Data</em> that prevented some setting from correctly resetting.</p></li>
<li><p>Fixed potential crash of NVIDIA Nsight Compute when <em>Reset Application Data</em> was executed multiple times in a row.</p></li>
<li><p>Fixed a crash when saving or loading baselines for non-kernel results.</p></li>
<li><p>Fixed that memory written while executing a CUDA graph was not properly restored in single-pass graph profiling.</p></li>
<li><p>Fixed potential memory leak while collecting SW counters for modules with unpatched kernel functions.</p></li>
</ul>
</section>
<section id="updates-in-2023-1">
<h3><span class="section-number">1.1.9. </span>Updates in 2023.1<a class="headerlink" href="#updates-in-2023-1" title="Permalink to this headline"></a></h3>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Added support for the CUDA toolkit 12.1.</p></li>
<li><p>Added a new <a class="reference external" href="../ProfilingGuide/index.html#application-range-replay">app-range</a> replay mode to profile ranges without API capture by relaunching the entire application multiple times.</p></li>
<li><p>Added <em>sharedBankConflicts</em> sample CUDA application and document to show how NVIDIA Nsight Compute can be used to analyze and identify the shared memory bank conflicts which result in inefficient shared memory accesses. Refer to the <code class="docutils literal notranslate"><span class="pre">README.TXT</span></code> file, sample code and document under <code class="docutils literal notranslate"><span class="pre">extras/samples/sharedBankConflicts</span></code>.</p></li>
<li><p>Jupyter notebook samples are available in the Nsight training <a class="reference external" href="https://github.com/NVIDIA/nsight-training/blob/master/cuda/nsight_compute/python_report_interface">github repository</a>.</p></li>
<li><p>The equivalent of the <a class="reference external" href="../CustomizationGuide/index.html#python-report-interface-high-level">high-level Python report interface</a> is now available in rule files.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>Added support for profiling individual metrics in <a class="reference external" href="../NsightCompute/index.html#connection-activity-interactive">Interactive Profile activity</a>. A new input field for metrics was added in the <a class="reference external" href="../NsightCompute/index.html#tool-window-sections-info">Metric Selection</a> tool window.</p></li>
<li><p>Files on remote systems can be opened directly from the <a class="reference external" href="../NsightCompute/index.html#main-menu">menu</a>.</p></li>
<li><p>Metric- and section-related entries in the menu, <a class="reference external" href="../NsightCompute/index.html#connection-activity-non-interactive">Profile activity</a> and <a class="reference external" href="../NsightCompute/index.html#tool-window-sections-info">Metric Selection</a> tool window were renamed to make them more clear.</p></li>
<li><p>CPU and GPU <a class="reference external" href="../ProfilingGuide/index.html#metrics-reference">NUMA topology metrics</a> can be collected on applicable systems. Topology information is shown in a new <a class="reference external" href="../ProfilingGuide/index.html#sections-and-rules">NUMA Affinity section</a>.</p></li>
<li><p>Added content-aware suggestions to the Details page to provide suggestions based on the selected profiling options.</p></li>
<li><p>Added support for <a class="reference external" href="../NsightCompute/index.html#profiler-report-source-page-navigation">re-resolving source files</a> on the Source page.</p></li>
<li><p>Not-issued warp stall reasons are removed from the Source Counters section tables and hidden by default on the Source page. Users should focus on regular warp stall reasons by default and only inspect not-issued samples if this distinction is needed.</p></li>
<li><p>Added support to search missing CUDA source files to permanently import into the report using <a class="reference external" href="../NsightCompute/index.html#options-source-lookup">Source Lookup options</a> in the <a class="reference external" href="../NsightCompute/index.html#connection-activity-interactive">Interactive Profile activity</a>.</p></li>
<li><p>The <a class="reference external" href="../NsightCompute/index.html#profiler-report-source-page-metrics">source page</a> now shows metric values as percentages by default. New buttons are added to support switching between different value modes.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute CLI</strong></p>
<ul class="simple">
<li><p>Added support for config files in the current working or user directory to set default ncu parameters. See the <a class="reference external" href="../NsightComputeCli/index.html#command-line-options-general">General options</a> for more details.</p></li>
<li><p>Added <code class="docutils literal notranslate"><span class="pre">--range-filter</span></code><a class="reference external" href="../NsightComputeCli/index.html#command-line-options-console-output">command line option</a> which allows to select subset of enabled profile ranges.</p></li>
<li><p>Added new <code class="docutils literal notranslate"><span class="pre">--source-folders</span></code><a class="reference external" href="../NsightComputeCli/index.html#command-line-options-profile">command line option</a> that allows to recursively search for missing CUDA source files to permanently import into the report.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed performance issues on the Summary and Raw pages for large reports.</p></li>
<li><p>Improved support for non-ASCII characters in filenames.</p></li>
<li><p>Fixed an issue with delayed updates of assembly analysis information on the Source page’s Source and PTX views.</p></li>
<li><p>Fixed potential crashes when using the Python report interface.</p></li>
</ul>
</section>
<section id="updates-in-2022-4-1">
<h3><span class="section-number">1.1.10. </span>Updates in 2022.4.1<a class="headerlink" href="#updates-in-2022-4-1" title="Permalink to this headline"></a></h3>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Improved the documentation for the NvRules API.</p></li>
<li><p>The python report interface links libstdc++ statically.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed an issue that enabled profiling on CUDA Graph uploads.</p></li>
<li><p>Fixed formatting issues during unit conversion of metric instances.</p></li>
<li><p>Fixed an issue that could lead to a crash during application replay.</p></li>
<li><p>Fixed an issue that could lead to a crash in the python report interface.</p></li>
<li><p>Fixed typos in the metrics reference documentation and descriptions.</p></li>
</ul>
</section>
<section id="updates-in-2022-4">
<h3><span class="section-number">1.1.11. </span>Updates in 2022.4<a class="headerlink" href="#updates-in-2022-4" title="Permalink to this headline"></a></h3>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Added support for the CUDA toolkit 12.0.</p></li>
</ul>
<ul class="simple">
<li><p>Added support for profiling <a class="reference external" href="https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__GRAPH.html#group__CUDA__GRAPH">CUDA graphs</a> as complete workloads instead of as single kernel nodes. Enable this using the <em>Graph Profiling</em> option in the <a class="reference external" href="../NsightCompute/index.html#connection-dialog">activities</a>. Similarly to <a class="reference external" href="../ProfilingGuide/index.html#range-replay">range replay</a> results, selected metrics are not available when profiling graphs.</p></li>
</ul>
<ul class="simple">
<li><p>Added support for profiling on Windows Subsystem for Linux (WSL2). See the <a class="reference external" href="index.html#system-requirements">System Requirements</a> for more details.</p></li>
<li><p>Deprecated <code class="docutils literal notranslate"><span class="pre">nv-nsight-cu</span></code> and <code class="docutils literal notranslate"><span class="pre">nv-nsight-cu-cli</span></code> aliases are removed in favor of <code class="docutils literal notranslate"><span class="pre">ncu-ui</span></code> and <code class="docutils literal notranslate"><span class="pre">ncu</span></code>.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>The <a class="reference external" href="../NsightCompute/index.html#profiler-report-source-page">Source page</a> now loads disassembly and static analysis results asynchronously in the background.</p></li>
<li><p>Added a new <a class="reference external" href="../NsightCompute/index.html#tool-window-metric-details">Metric Details</a> tool window to inspect metric information such as raw value, unit, description or instance values. Open the tool window and select a metric on the <em>Details</em> or <em>Raw</em> page or lookup any metric in the focused report directly in the tool window’s search bar.</p></li>
<li><p>In the Source page PTX view, the source name will be shown as a list of comma-separated files.</p></li>
<li><p>Added flexibility with NVTX based filtering in the <em>Next Trigger</em> filter, similar to the command line. Filters can now use nvtx-include and nvtx-exclude expressions by adding the <code class="docutils literal notranslate"><span class="pre">nvtx-include:</span></code> or <code class="docutils literal notranslate"><span class="pre">nvtx-exclude:</span></code> prefix.</p></li>
<li><p>NVTX views now show the payload type.</p></li>
<li><p>Simplified the command line generated by the <a class="reference external" href="../NsightCompute/index.html#connection-activity-non-interactive">Profile activity</a>.</p></li>
<li><p>Reduced the number of steps required to re-run the <a class="reference external" href="../NsightCompute/index.html#connection-activity-non-interactive">Profile activity</a>.</p></li>
<li><p>The way to rename <a class="reference external" href="../NsightCompute/index.html#baselines">Baselines</a> in-place has been improved.</p></li>
<li><p>The <a class="reference external" href="../NsightCompute/index.html#tool-window-resources">Resources</a> tool window now shows the CUDA Dynamic Parallelism state for CUDA functions and modules.</p></li>
<li><p>OptiX traversable handles can now be exported as <a class="reference external" href="https://graphviz.org">Graphviz</a> DOT or SVG files for visualization from the Resources tool window.</p></li>
<li><p>All OptiX build, instance and geometry flags can be viewed in the <a class="reference external" href="../NsightCompute/index.html#as-viewer">Acceleration Structure Viewer</a>.</p></li>
<li><p>Added OptiX-specific highlight filters to the Acceleration Structure Viewer.</p></li>
<li><p>Added support for user-specified index strides to the Acceleration Structure Viewer.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute CLI</strong></p>
<ul class="simple">
<li><p>Added new option <code class="docutils literal notranslate"><span class="pre">--graph-profiling</span> <span class="pre">graph</span></code> to enable profiling of complete CUDA graphs as single workloads.</p></li>
</ul>
<ul class="simple">
<li><p>Added new option <code class="docutils literal notranslate"><span class="pre">--filter-mode</span> <span class="pre">per-launch-config</span></code> to enable filtering of kernel launches for each GPU launch parameter separately.</p></li>
<li><p>Added support to print section body item metrics on the details page with the new <code class="docutils literal notranslate"><span class="pre">--print-details</span></code><a class="reference external" href="../NsightComputeCli/index.html#command-line-options-console-output">command line option</a>.</p></li>
<li><p>Added support to select what to show in Metric Name column on the details page with the new <code class="docutils literal notranslate"><span class="pre">--print-metric-name</span></code><a class="reference external" href="../NsightComputeCli/index.html#command-line-options-console-output">command line option</a>.</p></li>
<li><p>Removed deprecated options: <code class="docutils literal notranslate"><span class="pre">--units</span></code>, <code class="docutils literal notranslate"><span class="pre">--fp</span></code>, <code class="docutils literal notranslate"><span class="pre">--summary</span></code> and <code class="docutils literal notranslate"><span class="pre">--kernel-base</span></code></p></li>
<li><p>Added support to print launch, session, process and device attributes on session page with the new <code class="docutils literal notranslate"><span class="pre">--page</span> <span class="pre">session</span></code> option.</p></li>
<li><p>Added <code class="docutils literal notranslate"><span class="pre">--kill</span> <span class="pre">yes</span></code> support for application replay mode.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed an issue that NVIDIA Nsight Compute could crash when continuing profiling after transposing the <em>Raw</em> page table.</p></li>
<li><p>Fixed an issue that caused closing a report document to be delayed by pending source analysis.</p></li>
<li><p>Fixed support for profiling applications with older OptiX versions.</p></li>
<li><p>Fixed display of OptiX module inputs for IR and built-in modules.</p></li>
</ul>
</section>
<section id="updates-in-2021-2-9">
<h3><span class="section-number">1.1.12. </span>Updates in 2021.2.9<a class="headerlink" href="#updates-in-2021-2-9" title="Permalink to this headline"></a></h3>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>Clarify when not all metrics for the roofline chart could be collected on the current chip.</p></li>
</ul>
</section>
<section id="older-versions">
<h3><span class="section-number">1.1.13. </span>Older Versions<a class="headerlink" href="#older-versions" title="Permalink to this headline"></a></h3>
<section id="updates-in-2022-3">
<h4>Updates in 2022.3<a class="headerlink" href="#updates-in-2022-3" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Added support for the CUDA toolkit 11.8.</p></li>
<li><p>Added support for the Ada GPU architecture.</p></li>
<li><p>Added support for the Hopper GPU architecture.</p></li>
<li><p>Added support for <a class="reference external" href="index.html#library-support-optix">OptiX 7.6</a>.</p></li>
<li><p>Added <em>uncoalescedGlobalAccesses</em> sample CUDA application and document to show how the NVIDIA Nsight Compute profiler can be used to analyze and identify the memory accesses which are uncoalesced and result in inefficient DRAM accesses. Refer to the README, sample code and document under <code class="docutils literal notranslate"><span class="pre">extras/samples/uncoalescedGlobalAccesses</span></code>.</p></li>
<li><p>Added <a class="reference external" href="../ProfilingGuide/index.html#metrics-reference">Metrics Reference</a> in the documentation that lists metrics not available through <code class="docutils literal notranslate"><span class="pre">--query-metrics</span></code>.</p></li>
<li><p>Reduced the overhead of collecting SASS-patching based metrics.</p></li>
<li><p>On <a class="reference external" href="../ProfilingGuide/index.html#special-configurations-mig">Multi-Instance GPU (MIG)</a> configurations, NVIDIA Nsight Compute cannot lock clocks anymore. Users are expected to lock clocks externally using nvidia-smi.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>Wrapper script <code class="docutils literal notranslate"><span class="pre">nv-nsight-cu</span></code> is deprecated in favor of <code class="docutils literal notranslate"><span class="pre">ncu-ui</span></code> and will be removed in a future release.</p></li>
<li><p>Source page supports range replay results.</p></li>
<li><p>Added a second chart on the Compute Workload Analysis section to avoid mixing metrics with different meaning.</p></li>
<li><p>NVIDIA Nsight Compute now tracks traversable handles created with <code class="docutils literal notranslate"><span class="pre">optixAccelRelocate</span></code>.</p></li>
<li><p>NVIDIA Nsight Compute now tracks traversable handles created as updates from others.</p></li>
<li><p>The Acceleration Structure viewer now reports unsupported inputs.</p></li>
<li><p>The Acceleration Structure viewer now supports opening multiple traversable handles.</p></li>
<li><p>The Acceleration Structure viewer now uses OptiX naming for displayed elements.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute CLI</strong></p>
<ul class="simple">
<li><p>Wrapper script <code class="docutils literal notranslate"><span class="pre">nv-nsight-cu-cli</span></code> is deprecated in favor of <code class="docutils literal notranslate"><span class="pre">ncu</span></code> and will be removed in a future release.</p></li>
<li><p>Added new option <code class="docutils literal notranslate"><span class="pre">--filter-mode</span> <span class="pre">per-gpu</span></code> to enable filtering of kernel launches on each GPU separately.</p></li>
<li><p>Added new option <code class="docutils literal notranslate"><span class="pre">--app-replay-mode</span> <span class="pre">relaxed</span></code> to produce profiling results for valid kernels even if the number of kernel launches is inconsistent across application replay passes.</p></li>
<li><p>Added a documentation section on supported <a class="reference external" href="../NsightComputeCli/index.html#environment-variables">environment variables</a>.</p></li>
<li><p>Improved the performance when loading existing reports on the command line.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed an issue when resolving files on the Source page.</p></li>
<li><p>Fixed an issue when profiling OptiX applications.</p></li>
<li><p>Fixed an issue in the OptiX traversable handle management caused by clashing handle values.</p></li>
<li><p>Fixed an issue in the Acceleration Structure viewer causing the display of invalid memory when viewing AABB buffers.</p></li>
</ul>
</section>
<section id="updates-in-2022-2-1">
<h4>Updates in 2022.2.1<a class="headerlink" href="#updates-in-2022-2-1" title="Permalink to this headline"></a></h4>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed an issue that caused some tootips to not show up for the charts on the Details page.</p></li>
<li><p>Fixed the incorrect reporting of the accessed bytes for LDGSTS (access) traffic in the L1TEX memory table.</p></li>
<li><p>Fixed an issue that resulted in an empty view on the Source page after resolving multiple source files at once.</p></li>
<li><p>Fixed a failure to connect to remote machines over SSH due to a mismatch in the configuration of data compression.</p></li>
<li><p>Fixed a potential failure to profile kernels on multiple devices of the same type of chip. The failure occurred on the attempt to profile on the second device.</p></li>
</ul>
</section>
<section id="updates-in-2022-2">
<h4>Updates in 2022.2<a class="headerlink" href="#updates-in-2022-2" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Added support for the CUDA toolkit 11.7.</p></li>
<li><p>Improved performance for profiling and metric query.</p></li>
<li><p>Added Linux (aarch64 sbsa) as a supported <a class="reference external" href="index.html#platform-support">host platform</a>.</p></li>
<li><p>The NVIDIA Nsight Compute CLI stores the command line arguments, which can be viewed in the <a class="reference external" href="../NsightCompute/index.html#profiler-report-session-page">Session</a> report page.</p></li>
<li><p>Added an API to query the version of the <a class="reference external" href="../CustomizationGuide/index.html#python-report-interface">Python Report</a> and <a class="reference external" href="../NvRulesAPI/index.html#abstract">NvRules</a> interfaces.</p></li>
<li><p>Added an API to query the PTX in the <a class="reference external" href="../CustomizationGuide/index.html#python-report-interface">Python Report</a> and <a class="reference external" href="../NvRulesAPI/index.html#abstract">NvRules</a> interfaces.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>The <a class="reference external" href="../NsightCompute/index.html#as-viewer">Acceleration Structure Viewer</a> allows inspection of acceleration structures built using the OptiX API for debugging and performance optimization.</p></li>
<li><p>The <a class="reference external" href="../NsightCompute/index.html#profiler-report-source-page">Source page</a> column chooser now supports to enable or disable groups of metrics. Note that not all metrics are enabled anymore by default to make the view easier to use.</p></li>
<li><p>The <a class="reference external" href="../NsightCompute/index.html#tool-window-resources">Resources</a> tool window now links to the exact target resource instances for <em>CUDA</em> resource types.</p></li>
<li><p>The <a class="reference external" href="../NsightCompute/index.html#tool-window-resources">Resources</a> tool window now shows the instanced nodes for CUDA graphs.</p></li>
<li><p>The <a class="reference external" href="../NsightCompute/index.html#tool-window-resources">Resources</a> tool window now shows the loading state and number of loaded functions for <em>CUDA Modules</em>.</p></li>
<li><p>The <a class="reference external" href="../NsightCompute/index.html#tool-window-resources">Resources</a> tool window now shows the graph node enablement state for applicable instanced graph nodes.</p></li>
<li><p>The <a class="reference external" href="../NsightCompute/index.html#tool-window-resources">Resources</a> tool window now shows the graph node priorities for instanced kernel graph nodes.</p></li>
<li><p>Added regex support in the <em>Next Trigger</em> filter for NVTX based filtering. The <em>Next Trigger</em> filter now considers the NVTX config as a regular expression if the <code class="docutils literal notranslate"><span class="pre">regex:</span></code> prefix is specified.</p></li>
<li><p>Added regex support in the report’s <em>Filter Results</em> dialog.</p></li>
<li><p>Added <a class="reference external" href="../NsightCompute/index.html#quick-start-navigate-report">keyboard shortcuts</a> to navigate between the pages in a report.</p></li>
<li><p>The behavior for selecting sets and sections is now consistent between the <a class="reference external" href="../NsightCompute/index.html#tool-window-sections-info">Sections/Rules Info window</a> and the <a class="reference external" href="../NsightCompute/index.html#quick-start-non-interactive">non-interactive profile activity</a>.</p></li>
<li><p>Reports can now be opened directly from the welcome dialog.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute CLI</strong></p>
<ul class="simple">
<li><p>Added support for collecting <a class="reference external" href="../ProfilingGuide/index.html#sampling">sampling-based</a> warp stalls in range replay mode.</p></li>
<li><p>Added regex support in <a class="reference external" href="../NsightComputeCli/index.html#nvtx-filtering">NVTX filtering</a>.</p></li>
<li><p>The metric type is shown when querying metrics.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Reduced overhead of connecting to the host UI for non-interactive remote profiling sessions.</p></li>
<li><p>Fixed issues with persisting the Source page state when collapsing or switching between results.</p></li>
<li><p>Fixed an issue that locked GPU clocks were not reset when terminating the NVIDIA Nsight Compute CLI while profiling a kernel.</p></li>
<li><p>Fixed issues with selecting and copying text from the Details page tables.</p></li>
<li><p>Fixed an issue with opening report files in the UI on MacOSX.</p></li>
<li><p>Fixed an issue with the <em>Freeze API</em> option.</p></li>
</ul>
</section>
<section id="updates-in-2022-1-1">
<h4>Updates in 2022.1.1<a class="headerlink" href="#updates-in-2022-1-1" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Filtering kernel launches or profile results based on NVTX domains/ranges now takes registered strings in the payload field into account, if the range name is empty.</p></li>
<li><p>Added support for the suffix <code class="docutils literal notranslate"><span class="pre">.max_rate</span></code> for ratio metrics.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed a crash during the disassembly of the kernel’s SASS code for the Source page.</p></li>
<li><p>Fixed a crash on exit of the NVIDIA Nsight Compute UI.</p></li>
<li><p>Fixed a hang during profiling when CPU call stack collection is enabled.</p></li>
<li><p>Fixed missing to flush UVM buffers before taking memory checkpoints during <a class="reference external" href="../ProfilingGuide/index.html#range-replay">Range Replay</a>.</p></li>
<li><p>Fixed tracking of memory during <a class="reference external" href="../ProfilingGuide/index.html#range-replay">Range Replay</a>, if the CUDA context has any device mapped memory allocations.</p></li>
<li><p>Fixed the maximum available shared memory sizes in the <a class="reference external" href="../NsightCompute/index.html#occupancy-calculator">Occupancy Calculator</a> for NVIDIA Ampere GPUs.</p></li>
<li><p>Fixed that the shared memory usage of the kernel is incorrectly initialized when opening the <a class="reference external" href="../NsightCompute/index.html#occupancy-calculator">Occupancy Calculator</a> from a profile report.</p></li>
</ul>
</section>
<section id="updates-in-2022-1">
<h4>Updates in 2022.1<a class="headerlink" href="#updates-in-2022-1" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Added support for the CUDA toolkit 11.6.</p></li>
</ul>
<ul class="simple">
<li><p>Added support for GA103 chips.</p></li>
</ul>
<ul class="simple">
<li><p>Added a new <a class="reference external" href="../ProfilingGuide/index.html#range-replay">Range Replay</a> mode to profile ranges of multiple, concurrent kernels. Range replay is available in the NVIDIA Nsight Compute CLI and the non-interactive Profile activity.</p></li>
<li><p>Added a new rule to detect non-fused floating-point instructions.</p></li>
<li><p>The Uncoalesced Memory access rules now show results in a dynamic table.</p></li>
<li><p>Unix Domain Sockets and Windows Named Pipes are used for local connection between the host and target processes on x86_64 Linux and Windows, respectively.</p></li>
<li><p>The <a class="reference external" href="../NvRulesAPI/index.html#abstract">NvRules API</a> now supports querying action names using different function name bases (e.g. demangled).</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>The default <a class="reference external" href="../NsightCompute/index.html#profiler-report-pages">report page</a> is now chosen automatically when opening a report.</p></li>
<li><p>Added coverage for ECC (Error Correction Code) operations in the L2 Cache table of the Memory Analysis section.</p></li>
<li><p>Added a new <a class="reference external" href="../ProfilingGuide/topics/memory-tables-l2-evict-policy.html">L2 Evict Policies</a> table to the Memory Analysis section.</p></li>
<li><p>The <a class="reference external" href="../NsightCompute/index.html#occupancy-calculator">Occupancy Calculator</a> now updates automatically when the input changes.</p></li>
<li><p>Added new metric <em>Thread Instructions Executed</em> to the Source page.</p></li>
<li><p>Added tooltips to the <a class="reference external" href="../NsightCompute/index.html#profiler-report-source-page">Register Dependency</a> columns in the Source page to identify the associated register more conveniently.</p></li>
<li><p>Improved the selection of Sections and Sets in the Profile activity connection dialog.</p></li>
<li><p>NVLink utilization is shown in the NVLink Tables section.</p></li>
<li><p>NVLink links are colored according to the measured throughput.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute CLI</strong></p>
<ul class="simple">
<li><p><code class="docutils literal notranslate"><span class="pre">--kernel-regex</span></code> and <code class="docutils literal notranslate"><span class="pre">--kernel-regex-base</span></code> options are no longer supported. Alternate options are <code class="docutils literal notranslate"><span class="pre">--kernel-name</span></code> and <code class="docutils literal notranslate"><span class="pre">--kernel-name-base</span></code> respectively, added in 2021.1.0.</p></li>
<li><p>Added support to resolve CUDA source files in the <code class="docutils literal notranslate"><span class="pre">--page</span> <span class="pre">source</span></code> output with the new <code class="docutils literal notranslate"><span class="pre">--resolve-source-file</span></code><a class="reference external" href="../NsightComputeCli/index.html#command-line-options-console-output">command line option</a>.</p></li>
<li><p>Added new option <code class="docutils literal notranslate"><span class="pre">--target-processes-filter</span></code> to filter the processes being profiled by name.</p></li>
<li><p>The CPU Stack Trace is shown in the NVIDIA Nsight Compute CLI output.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed the calculation of aggregated average instruction execution metrics in non-SASS views on the Source page.</p></li>
<li><p>Fixed that atomic instructions are counted as both loads and stores in the Memory Analysis tables.</p></li>
</ul>
</section>
<section id="updates-in-2021-3-1">
<h4>Updates in 2021.3.1<a class="headerlink" href="#updates-in-2021-3-1" title="Permalink to this headline"></a></h4>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed that kernels with the same name and launch configuration were in some scenarios associated with the wrong profiling results during application replay.</p></li>
<li><p>Fixed an issue with binary forward compatibility of the report format.</p></li>
<li><p>Fixed an issue with applications calling into the CUDA API during process teardown.</p></li>
<li><p>Fixed an issue profiling application using pre-CUDA API 3.1 contexts.</p></li>
<li><p>Fixed a crash when resolving files on the Source page.</p></li>
<li><p>Fixed that opening reports with large embedded CUBINs would hang the UI.</p></li>
<li><p>Fixed an issue with remote profiling on a target where the UI is already launched.</p></li>
</ul>
</section>
<section id="updates-in-2021-3">
<h4>Updates in 2021.3<a class="headerlink" href="#updates-in-2021-3" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Added support for the CUDA toolkit 11.5.</p></li>
<li><p>Added a new rule for detecting inefficient memory access patterns in the L1TEX cache and L2 cache.</p></li>
<li><p>Added a new rule for detecting high usage of system or peer memory.</p></li>
<li><p>Added new <code class="docutils literal notranslate"><span class="pre">IAction::sass_by_pc</span></code> function to the the <a class="reference external" href="../NvRulesAPI/index.html#abstract">NvRules API</a>.</p></li>
<li><p>The <a class="reference external" href="../CustomizationGuide/index.html#python-report-interface">Python-based report interface</a> is now available for Windows and MacOS hosts, too.</p></li>
<li><p>Added Hierarchical Roofline section files in a new “roofline” section set.</p></li>
<li><p>Added support for collecting CPU call stack information.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>Added support for new remote profiling <a class="reference external" href="../NsightCompute/index.html#remote-connections">SSH connection and authentication options</a> as well as local SSH configuration files.</p></li>
<li><p>Added an <a class="reference external" href="../NsightCompute/index.html#occupancy-calculator">Occupancy Calculator</a> which can be opened directly from a profile report or as a new activity. It offers feature parity to the CUDA Occupancy Calculator <a class="reference external" href="http://docs.nvidia.com/cuda/cuda-occupancy-calculator/index.html">spreadsheet</a>.</p></li>
<li><p>Added new <a class="reference external" href="../NsightCompute/index.html#tool-window-baselines">Baselines tool window</a> to manage (hide, update, re-order, save/load) baseline selections.</p></li>
<li><p>The Source page views now support multi-line/cell selection and copy/paste. Different colors are used for highlighting selections and correlated lines.</p></li>
<li><p>The search edit on the Source page now supports <em>Shift+Enter</em> to search in reverse direction.</p></li>
<li><p>The <a class="reference external" href="../ProfilingGuide/index.html#memory-chart">Memory Workload Analysis Chart</a> can be configured to show throughput values instead of transferred bytes.</p></li>
<li><p>The <em>Profile</em> activity now supports the <code class="docutils literal notranslate"><span class="pre">--devices</span></code> option.</p></li>
<li><p>The <em>NVLink Topology</em> diagram displays per NVLink metrics.</p></li>
<li><p>Added a new tool window showing the CPU call stack at the location where the current thread was suspended during interactive profiling activities.</p></li>
<li><p>If enabled, the <em>Call Stack / NVTX</em> page of the profile report shows the captured CPU call stack for the selected kernel launch.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute CLI</strong></p>
<ul class="simple">
<li><p>Added support for printing source/metric content with the new <code class="docutils literal notranslate"><span class="pre">--page</span> <span class="pre">source</span></code> and <code class="docutils literal notranslate"><span class="pre">--print-source</span></code><a class="reference external" href="../NsightComputeCli/index.html#command-line-options-console-output">command line options</a>.</p></li>
<li><p>Added new option <code class="docutils literal notranslate"><span class="pre">--call-stack</span></code> to enable collecting the CPU call stack for every profiled kernel launch.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed that <code class="docutils literal notranslate"><span class="pre">memory_*</span></code> metrics could not be collected with the <code class="docutils literal notranslate"><span class="pre">--metrics</span></code> option.</p></li>
<li><p>Fixed that selection and copy/paste was not supported for section header tables on the Details page.</p></li>
<li><p>Fixed issues with the Source page when collapsing the content.</p></li>
<li><p>Fixed that the UI could crash when applying rules to a new profile result.</p></li>
<li><p>Fixed that PC Sampling metrics were not available for <em>Profile Series</em>.</p></li>
<li><p>Fixed that local profiling did not work if no non-loopback address was configured for the system.</p></li>
<li><p>Fixed termination of remote-launched applications. On QNX, terminating an application profiled via <em>Remote Launch</em> is now supported. Canceling remote-launched <em>Profile</em> activities is now supported.</p></li>
</ul>
</section>
<section id="updates-in-2021-2-8">
<h4>Updates in 2021.2.8<a class="headerlink" href="#updates-in-2021-2-8" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Updated Python libraries to version 3.10.5.</p></li>
</ul>
</section>
<section id="updates-in-2021-2-7">
<h4>Updates in 2021.2.7<a class="headerlink" href="#updates-in-2021-2-7" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Enabled stack canaries with random canary values for L4T builds.</p></li>
</ul>
</section>
<section id="updates-in-2021-2-6">
<h4>Updates in 2021.2.6<a class="headerlink" href="#updates-in-2021-2-6" title="Permalink to this headline"></a></h4>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed an issue causing a hang on QNX after pressing <code class="docutils literal notranslate"><span class="pre">ctrl+c</span></code> while profiling a multi-process application.</p></li>
</ul>
</section>
<section id="updates-in-2021-2-5">
<h4>Updates in 2021.2.5<a class="headerlink" href="#updates-in-2021-2-5" title="Permalink to this headline"></a></h4>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Improve the handling of the performance monitor reservation on mobile target GPUs.</p></li>
</ul>
</section>
<section id="updates-in-2021-2-4">
<h4>Updates in 2021.2.4<a class="headerlink" href="#updates-in-2021-2-4" title="Permalink to this headline"></a></h4>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed an issue that prevented remote interactive profiling of kernels on NVIDIA GA10b chips.</p></li>
</ul>
</section>
<section id="updates-in-2021-2-3">
<h4>Updates in 2021.2.3<a class="headerlink" href="#updates-in-2021-2-3" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Added support for the NVIDIA GA10b chip.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Improved error message on QNX for failure to deploy stock section and rules files.</p></li>
</ul>
</section>
<section id="updates-in-2021-2-2">
<h4>Updates in 2021.2.2<a class="headerlink" href="#updates-in-2021-2-2" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Changes for profiling support on NVIDIA virtual GPUs (vGPUs) for an upcoming GRID/vGPU release.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed hang issue on QNX when using the <code class="docutils literal notranslate"><span class="pre">--target-processes</span> <span class="pre">all</span></code> option while profiling shell scripts.</p></li>
</ul>
</section>
<section id="updates-in-2021-2-1">
<h4>Updates in 2021.2.1<a class="headerlink" href="#updates-in-2021-2-1" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Reduced the memory overhead when loading reports in the <a class="reference external" href="../CustomizationGuide/index.html#python-report-interface">Python Report Interface</a>.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed that links in the <em>Memory Allocations</em> Resource view were not working correctly.</p></li>
<li><p>Fixed that NVTX state might not be correctly reset between interactive profiling activities.</p></li>
<li><p>Fixed that the UI could crash when opening baselines from different GPU architectures.</p></li>
</ul>
</section>
<section id="updates-in-2021-2">
<h4>Updates in 2021.2<a class="headerlink" href="#updates-in-2021-2" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Added support for the CUDA toolkit 11.4.</p></li>
<li><p>Added support for OptiX version 7.3.</p></li>
<li><p>Added support for profiling on <a class="reference external" href="https://www.nvidia.com/en-us/data-center/virtual-gpu-technology/">NVIDIA virtual GPUs</a> (vGPUs) on an upcoming GRID/vGPU release.</p></li>
<li><p>Added a new <a class="reference external" href="../CustomizationGuide/index.html#python-report-interface">Python-based report interface</a> for interacting with report files from Python scripts.</p></li>
<li><p>Added a new rule to warn users when sampling metrics were selected, but no sampling data was collected.</p></li>
<li><p>Renamed <em>SOL</em> to <em>Throughput</em> in the Speed of Light section.</p></li>
<li><p>Renamed several <code class="docutils literal notranslate"><span class="pre">memory_*</span></code> metrics used on the <em>Source</em> page, to better reflect the measured value. See the <a class="reference external" href="../NsightCompute/index.html#profiler-report-source-page">Source page</a> documentation for more details.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>Added support for opening <a class="reference external" href="https://docs.nvidia.com/cuda/cuda-binary-utilities/index.html#cuda-binary">cubin files</a> in a <a class="reference external" href="../NsightCompute/index.html#cubin-viewer">Standalone Source Viewer</a> without profiling the application.</p></li>
<li><p>Moved the output of all rules so that it is visible even if a section’s body is collapsed. Visibility of the rules’ output can be toggled by a new button in the report header.</p></li>
<li><p>The profiler report header now shows the report name for each baseline when ambiguous.</p></li>
<li><p>Rules can define <em>Focused Metrics</em> that were most important for triggering their result output. Metrics are provided per result message which additional information, such as the underlying conditions and thresholds.</p></li>
<li><p><a class="reference external" href="../ProfilingGuide/index.html#memory-tables">Memory tables</a> show tooltips for cells with derived metric calculations.</p></li>
<li><p>Added a knowledge base service to show more comprehensive background information on metric names and descriptions in their tooltips.</p></li>
<li><p>Following a link in the Source Counters hot spot tables automatically selects the corresponding metric in the Source page.</p></li>
<li><p>Added new columns for visualizing register dependencies in the SASS view of the <a class="reference external" href="../NsightCompute/index.html#profiler-report-source-page">Source page</a>.</p></li>
<li><p>Functions in the SASS view are now sorted by name.</p></li>
<li><p>Added support for OptiX 7.x resource tracking in the interactive profile activity. The <em>Resources</em> tool window will show information on instantiated <code class="docutils literal notranslate"><span class="pre">optixDeviceContexts,</span> <span class="pre">optixModules,</span> <span class="pre">optixProgramGroups,</span> <span class="pre">optixPipelines</span> <span class="pre">and</span> <span class="pre">optixDenoiser</span></code> objects.</p></li>
<li><p>Added support for new CUDA graph memory allocation APIs.</p></li>
<li><p>Improved consistency between command line parameters and the <em>Next Trigger</em> filter in the API Stream window for handling of regex inputs. The <em>Next Trigger</em> filter now considers kernel/API name as a regular expression only if string has <code class="docutils literal notranslate"><span class="pre">regex:</span></code> as prefix.</p></li>
<li><p>Added ability to select font settings in the options dialog.</p></li>
<li><p>Added ability to configure the metrics shown on the summary page via the options dialog.</p></li>
<li><p>The selected heatmap color scale now also applies to the <em>Memory chart</em>.</p></li>
<li><p>The ncu-ui script now checks for missing library dependencies, such as OpenGL or <a class="reference external" href="https://doc.qt.io/qt-5/linux-requirements.html">Qt</a>.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute CLI</strong></p>
<ul class="simple">
<li><p>Added environment variable <a class="reference external" href="../NsightComputeCli/index.html#environment-variables">NV_COMPUTE_PROFILER_DISABLE_STOCK_FILE_DEPLOYMENT=1</a> to skip deployment of section and rule files.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed a performance issue in the NVIDIA Nsight Compute CLI when using <code class="docutils literal notranslate"><span class="pre">--page</span> <span class="pre">raw</span> <span class="pre">--csv</span> <span class="pre">--units</span> <span class="pre">auto</span></code>.</p></li>
<li><p>Fixed that the SSH passphase key is no longer persisted in the project file.</p></li>
<li><p>Fixed state of restore button in connection dialog. The button now supports restoring the default settings, if current setting differ from the default.</p></li>
<li><p>Fixed that the complete GPU name can be shown in the NVLINK topology diagram on MacOS.</p></li>
<li><p>Fixed that collapsing the Source view reset the selected metrics.</p></li>
<li><p>Fixed that correlated lines could differ between filtered and unfiltered views of the executed functions.</p></li>
<li><p>Fixed that two application icons were shown in the MacOS dock.</p></li>
<li><p>Improved HiDPI awareness.</p></li>
</ul>
</section>
<section id="updates-in-2021-1-1">
<h4>Updates in 2021.1.1<a class="headerlink" href="#updates-in-2021-1-1" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Updated OpenSSL library to version 1.1.1k.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>Remote source resolution can now use the IP address, in addition to the hostname, to find the necessary SSH target.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute CLI</strong></p>
<ul class="simple">
<li><p>Added support for the existing command line options for kernel filtering while importing data from an existing report file using <code class="docutils literal notranslate"><span class="pre">--import</span></code>.</p></li>
<li><p>Option <code class="docutils literal notranslate"><span class="pre">-k</span></code> is not considered as deprecated option <code class="docutils literal notranslate"><span class="pre">--kernel-regex</span></code> anymore.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed failure to profile kernels from applications that use the CUDA graphics interop APIs to share semaphores.</p></li>
<li><p>Fixed wavefront metric in the L1TEX table for writes to shared memory on GA10x chips.</p></li>
<li><p>Fixed an issue resulting in incomplete data collection for the interactive profile activity after switching from single-pass mode to collecting multiple passes in the same session.</p></li>
<li><p>Fixed values shown in the mimimap of the Source page when all functions are collapsed.</p></li>
<li><p>Fixed an issue causing names set by the NVTX naming APIs of one application to be applied to all subsequent sessions of the same instance of NVIDIA Nsight Compute.</p></li>
<li><p>Fixed behavior of horizontal scroll bars when clicking in the source views on the Source page.</p></li>
<li><p>Fixed appearance of multi-line entries in column chooser on the Source page.</p></li>
<li><p>Fixed enablement state of the reset button on the Connection dialog.</p></li>
<li><p>Fixed potential crash of NVIDIA Nsight Compute when windows size becomes small while being on the Source page.</p></li>
<li><p>Fixed potential crash of NVIDIA Nsight Compute when relative paths for section/rules files could not be found.</p></li>
<li><p>Fixed potential crash of NVIDIA Nsight Compute after removing baselines.</p></li>
</ul>
</section>
<section id="updates-in-2021-1">
<h4>Updates in 2021.1<a class="headerlink" href="#updates-in-2021-1" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Added support for the CUDA toolkit 11.3.</p></li>
<li><p>Added support for the <a class="reference external" href="index.html#library-support-optix">OptiX 7 API</a>.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">GpuArch</span></code> enumeration values used for filtering in section files were renamed from architecture names to compute capabilities.</p></li>
<li><p>NVTX states can now be accessed via the <a class="reference external" href="../NvRulesAPI/index.html#abstract">NvRules API</a>.</p></li>
<li><p>Added a rule for the <em>Occupancy</em> section.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>Added support for new CUDA asynchronous allocator attributes in the <em>Memory Pools</em> resources view.</p></li>
<li><p>Added a topology chart and link properties table in the NVLink section.</p></li>
<li><p>The selected metric column is scrolled into view on the <em>Source</em> page when a new metric is selected.</p></li>
<li><p>Users can choose the <em>Source</em> heatmap color scale in the <em>Options</em> dialog.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute CLI</strong></p>
<ul class="simple">
<li><p>Added file-based <a class="reference external" href="../ProfilingGuide/index.html#application-replay">application replay</a> as the new default application replay mode. File-based replay uses a temporary file for keeping replay data, instead of allocating them in memory. This keeps the required memory footprint close to constant, independent of the number of profiled kernels. Users can switch between buffer modes using the <code class="docutils literal notranslate"><span class="pre">--app-replay-buffer</span></code> option.</p></li>
<li><p>CLI output now shows NVTX color and message information.</p></li>
<li><p><code class="docutils literal notranslate"><span class="pre">--kernel-regex</span></code> and <code class="docutils literal notranslate"><span class="pre">--kernel-regex-base&gt;</span></code> options are deprecated and replaced by <code class="docutils literal notranslate"><span class="pre">--kernel-name</span></code> and <code class="docutils literal notranslate"><span class="pre">--kernel-regex-base</span></code>, respectively.</p></li>
<li><p>All options which support regex need to provide <code class="docutils literal notranslate"><span class="pre">regex:</span></code> as a prefix before an argument to match per the regex, e.g <code class="docutils literal notranslate"><span class="pre">&lt;option&gt;</span> <span class="pre">&lt;regex:expression&gt;</span></code></p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed that baselines were not updated properly on the <em>Comments</em> page.</p></li>
<li><p>Fixed that NVTX ranges named using their payloads can be used in <a class="reference external" href="../NsightComputeCli/index.html#nvtx-filtering">NVTX filtering</a> expressions.</p></li>
<li><p>Fixed crashes in MacOSX hosts when terminating the target application.</p></li>
<li><p>The NVLINK(<code class="docutils literal notranslate"><span class="pre">nvl*</span></code>) metrics are now added back.</p></li>
</ul>
</section>
<section id="updates-in-2020-3-1">
<h4>Updates in 2020.3.1<a class="headerlink" href="#updates-in-2020-3-1" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Added support for LDSM instruction-level metrics.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>LDSM instruction-level metrics are shown in the <em>Source</em> page and memory tables.</p></li>
<li><p>Improved reporting and documentation for collecting <em>Profile Series</em>.</p></li>
<li><p>Frozen columns in the <em>Source</em> page are automatically scrolled into view.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed an issue when profiling multi-threaded applications.</p></li>
<li><p>Fixed an issue that NVIDIA Nsight Compute would not automatically restart when using <em>Reset Application Data</em>.</p></li>
<li><p>Fixed issues with target applications using libstdc++.</p></li>
<li><p>Fixed an issue when collecting single-pass metrics in multiple Nsight Compute instances.</p></li>
<li><p>Fixed an issue when using <em>Kernel ID</em> and setting <em>Launch Capture Count</em> as non-zero in the UI’s <em>Profile</em> activity.</p></li>
<li><p>Fixed an issue that prevented different users on the same Linux system to use NVIDIA Nsight Compute in shared instance mode.</p></li>
<li><p>Fixed an issue that prevented resources from being properly renamed using NVTX information in the UI.</p></li>
</ul>
</section>
<section id="updates-in-2020-3">
<h4>Updates in 2020.3<a class="headerlink" href="#updates-in-2020-3" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Added support for <em>derived metrics</em> in section files. Derived metrics can be used to create new metrics based on existing metrics and constants. See the <a class="reference external" href="../CustomizationGuide/index.html#section-derived-metrics">Customization Guide</a> for details.</p></li>
<li><p>Added a new <em>Import Source</em> (<code class="docutils literal notranslate"><span class="pre">--import-source</span></code>) option to the UI and command line to permanently import source files into the report, when available.</p></li>
<li><p>Added a new section that shows selected <em>NVLink</em> metrics on supported systems.</p></li>
<li><p>Added a new <code class="docutils literal notranslate"><span class="pre">launch__func_cache_config</span></code> metric to the <em>Launch Statistics</em> section.</p></li>
<li><p>Added new branch efficiency metrics to the <em>Source Counters</em> section, including <code class="docutils literal notranslate"><span class="pre">smsp__sass_average_branch_targets_threads_uniform.pct</span></code> to replace nvprof’s <code class="docutils literal notranslate"><span class="pre">branch_efficiency</span></code>, as well as instruction-level metrics <code class="docutils literal notranslate"><span class="pre">smsp__branch_targets_threads_divergent</span></code>, <code class="docutils literal notranslate"><span class="pre">smsp__branch_targets_threads_uniform</span></code> and <code class="docutils literal notranslate"><span class="pre">branch_inst_executed</span></code>.</p></li>
<li><p>A warning is shown if kernel replay starts staging GPU memory to CPU memory or the file system.</p></li>
<li><p>Section and rule files are deployed to a versioned directory in the user’s home directory to allow easier editing of those files, and to prevent modifying the base installation.</p></li>
<li><p>Removed support for NVLINK(<code class="docutils literal notranslate"><span class="pre">nvl*</span></code>) metrics due to a potential application hang during data collection. The metrics will be added back in a future version of the driver/tool.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>Added support for <em>Profile Series</em>. Series allow you to profile a kernel with a range of configurable parameters to analyze the performance of each combination.</p></li>
<li><p>Added a new <em>Allocations</em> view to the <em>Resources</em> tool window which shows the state of all current memory allocations.</p></li>
<li><p>Added a new <em>Memory Pools</em> view to the <em>Resources</em> tool window which shows the state of all current memory pools.</p></li>
<li><p>Added coverage of peer memory to the <em>Memory Chart</em>.</p></li>
<li><p>The <em>Source</em> page now shows the number of excessive sectors requested from L1 or L2, e.g. due to uncoalesced memory accesses.</p></li>
<li><p>The <em>Source</em> column on the <em>Source</em> page can now be scrolled horizontally.</p></li>
<li><p>The kernel duration <code class="docutils literal notranslate"><span class="pre">gpu__time_duration.sum</span></code> was added as column on the <em>Summary</em> page.</p></li>
<li><p>Improved the performance of <em>application replay</em> when not all kernels in the application are profiled.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute CLI</strong></p>
<ul class="simple">
<li><p>Added a new <code class="docutils literal notranslate"><span class="pre">--app-replay-match</span></code> option to select the mechanism used for matching kernel instances across application replay passes.</p></li>
<li><p>An error is shown if <code class="docutils literal notranslate"><span class="pre">--nvtx-include/exclude</span></code> are used without <code class="docutils literal notranslate"><span class="pre">--nvtx</span></code>.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>The <em>Grid Size</em> column on the <em>Raw</em> page now shows the CUDA grid size like the <em>Launch Statistics</em> section, rather than the combined grid and block sizes.</p></li>
<li><p>The <em>Branch Resolving</em> wap stall reason was added to the PC sampling metric groups and the <em>Warp State Statistics</em> section.</p></li>
<li><p>The <em>API Stream</em> tool window shows kernel names according to the selected Function Name Mode.</p></li>
<li><p>Fixed that an incorrect line could be shown after a heatmap selection on the <em>Source</em> page.</p></li>
<li><p>Fixed incorrect metric usage for system memory in the <em>Memory Chart</em>. Previously, all requested memory of L2 from system memory was reported instead of only the portion that missed in L2.</p></li>
</ul>
</section>
<section id="updates-in-2020-2-1">
<h4>Updates in 2020.2.1<a class="headerlink" href="#updates-in-2020-2-1" title="Permalink to this headline"></a></h4>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed several issues related to auto-profiling in the UI.</p></li>
<li><p>Fixed a metric collection issue when profiling kernels on different GPU architectures with application replay.</p></li>
<li><p>Fixed a performance problem related to profiling large process trees.</p></li>
<li><p>Fixed that occupancy charts would not render correctly when comparing against baselines.</p></li>
<li><p>Fixed that no memory metrics were shown on the <em>Source</em> page for <code class="docutils literal notranslate"><span class="pre">LDGSTS</span></code> instructions.</p></li>
<li><p>Fixed the automatic sorting on the <em>Summary</em> and <em>Raw</em> pages.</p></li>
<li><p>Fixed an issue that would cause the NVIDIA Nsight Compute CLI to consume too much memory when importing or printing reports.</p></li>
<li><p>Long kernel names are now elided in the <em>Details</em> page source hot spot tables.</p></li>
<li><p>Fixed that function names in the <em>Resources</em> tool window were demangled differently.</p></li>
</ul>
</section>
<section id="updates-in-2020-2">
<h4>Updates in 2020.2<a class="headerlink" href="#updates-in-2020-2" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Added support for the NVIDIA Ampere GPUs with compute capability 8.6 and CUDA toolkit 11.1.</p></li>
<li><p>Added support for application replay to collect metric results across multiple application runs, instead of replaying individual kernels.</p></li>
<li><p>Added new <code class="docutils literal notranslate"><span class="pre">launch__device_id</span></code> metric.</p></li>
<li><p>Added support for NVLink (<code class="docutils literal notranslate"><span class="pre">nvl*</span></code>) metrics for GPUs with compute capabilities 7.0, 7.5 and 8.0</p></li>
<li><p>Added documentation for memory charts and tables in the <a class="reference external" href="../ProfilingGuide/index.html#memory-chart">Profiling Guide</a>.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>Updated menu and toolbar layout.</p></li>
<li><p>Added support for zoom and pan on roofline charts.</p></li>
<li><p>The <em>Resources</em> tool window shows the current CUDA stream attributes.</p></li>
<li><p>The memory chart shows a heatmap for link and port utilization.</p></li>
<li><p>The hot-spot tables in the <em>Source Counters</em> section now show values as percentages, too.</p></li>
<li><p>On-demand resolve of remote CUDA-C source is now available for MacOS hosts.</p></li>
<li><p>Metric columns in the <em>Summary</em> and <em>Raw</em> pages are now sortable.</p></li>
<li><p>Added a new option to set the number of recent API calls shown in the <em>API Stream</em> tool window.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute CLI</strong></p>
<ul class="simple">
<li><p>CLI output now shows NVTX payload information.</p></li>
<li><p>CSV output now shows NVTX states.</p></li>
<li><p>Added a new <code class="docutils literal notranslate"><span class="pre">--replay-mode</span></code> option to select the mechanism used for replaying a kernel launch multiple times.</p></li>
<li><p>Added a new <code class="docutils literal notranslate"><span class="pre">--kill</span></code> option to terminate the application once all requested kernels were profiled.</p></li>
<li><p>Added a new <code class="docutils literal notranslate"><span class="pre">--log-file</span></code> option to decide the output stream for printing tool output.</p></li>
<li><p>Added a new <code class="docutils literal notranslate"><span class="pre">--check-exit-code</span></code> option to decide if the child application exit code should be checked.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>The profiling progress dialog is not dismissed automatically anymore after an error.</p></li>
<li><p>The inter-process lock is now automatically given write permissions for all users.</p></li>
<li><p>All project extensions are enabled in the default dialog filter.</p></li>
<li><p>Fixed handling of targets using <em>tcsh</em> during remote profiling.</p></li>
<li><p>Fixed handling of quoted application arguments on Windows.</p></li>
</ul>
</section>
<section id="updates-in-2020-1-2">
<h4>Updates in 2020.1.2<a class="headerlink" href="#updates-in-2020-1-2" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>The NVIDIA Nsight Compute installer for Mac is now code-signed and notarized.</p></li>
<li><p>Disabled the creation of the Python cache when executing rules to avoid permission issues and signing conflicts.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed the launcher script of the NVIDIA Nsight Compute CLI to no longer fail if <code class="docutils literal notranslate"><span class="pre">uname</span> <span class="pre">-p</span></code> is not available.</p></li>
<li><p>Fixed the API parameter capture for function <code class="docutils literal notranslate"><span class="pre">cuDeviceGetLuid</span></code>.</p></li>
</ul>
</section>
<section id="updates-in-2020-1-1">
<h4>Updates in 2020.1.1<a class="headerlink" href="#updates-in-2020-1-1" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Added support for the NVIDIA GA100/SM 8.x GPU architecture</p></li>
</ul>
<ul class="simple">
<li><p>Metrics passed to <code class="docutils literal notranslate"><span class="pre">--metrics</span></code> on the NVIDIA Nsight Compute CLI or in the respective <em>Profile</em> activity option are automatically expanded to all first-level sub-metrics if required. See the documentation on <code class="docutils literal notranslate"><span class="pre">--metrics</span></code> for more details.</p></li>
<li><p>Added new rules for detecting inefficiencies of using the sparse data compression on the NVIDIA Ampere architecture.</p></li>
<li><p>The version of the NVIDIA Nsight Compute target collecting the results is shown in the <em>Session</em> page.</p></li>
<li><p>Added new <code class="docutils literal notranslate"><span class="pre">launch__grid_dim_[x,y,z]</span></code> and <code class="docutils literal notranslate"><span class="pre">launch__block_dim_[x,y,z]</span></code> metrics.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>The <em>Break on API Error</em> functionality has been improved when auto profiling.</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute CLI</strong></p>
<ul class="simple">
<li><p>The full path to the report output file is printed after profiling.</p></li>
<li><p>Added and corrected metrics in the nvprof <em>Metric Comparison</em> table.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Documented the <em>breakdown:</em> metrics prefix.</p></li>
<li><p>Fixed handling of escaped domain delimiters in NVTX filter expressions.</p></li>
<li><p>Fixed issues with the occupancy charts for small block sizes.</p></li>
<li><p>Fixed an issue when choosing a default report page in the options dialog.</p></li>
<li><p>Fixed that the scroll bar could overlap the content when exporting the report page as an image.</p></li>
</ul>
</section>
<section id="updates-in-2020-1">
<h4>Updates in 2020.1<a class="headerlink" href="#updates-in-2020-1" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Added support for the NVIDIA GA100/SM 8.x GPU architecture</p></li>
<li><p>Removed support for the Pascal SM 6.x GPU architecture</p></li>
<li><p>Windows 7 is not a supported host or target platform anymore</p></li>
<li><p>Added a rule for reporting uncoalesced memory accesses as part of the <em>Source Counters</em> section</p></li>
<li><p>Added support for report name placeholders %p, %q, %i and %h</p></li>
<li><p>The <a class="reference external" href="../ProfilingGuide/index.html#abstract">Kernel Profiling Guide</a> was added to the documentation</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>The UI command was renamed from <code class="docutils literal notranslate"><span class="pre">nv-nsight-cu</span></code> to <code class="docutils literal notranslate"><span class="pre">ncu-ui</span></code>. Old names remain for backwards compatibility.</p></li>
<li><p>Added support for roofline analysis charts</p></li>
<li><p>Added linked hot spot tables in section bodies to indicate performance problems in the source code</p></li>
<li><p>Added section navigation links in rule results to quickly jump to the referenced section</p></li>
<li><p>Added a new option to select how kernel names are shown in the UI</p></li>
<li><p>Added new memory tables for the L1/TEX cache and the L2 cache. The old tables are still available for backwards compatibility and moved to a new section containing deprecated UI elements.</p></li>
<li><p>Memory tables now show the metric name as a tooltip</p></li>
<li><p>Source resolution now takes into account file properties when selecting a file from disk</p></li>
<li><p>Results in the profile report can now be filtered by NVTX range</p></li>
<li><p>The Source page now supports collapsing views even for single files</p></li>
<li><p>The UI shows profiler error messages as dismissible banners for increased visibility</p></li>
<li><p>Improved the baseline name control in the profiler report header</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute CLI</strong></p>
<ul class="simple">
<li><p>The CLI command was renamed from <code class="docutils literal notranslate"><span class="pre">nv-nsight-cu-cli</span></code> to <code class="docutils literal notranslate"><span class="pre">ncu</span></code>. Old names remain for backwards compatibility.</p></li>
<li><p>Queried metrics on GV100 and newer chips are sorted alphabetically</p></li>
<li><p>Multiple instances of NVIDIA Nsight Compute CLI can now run concurrently on the same system, e.g. for profiling individual MPI ranks. Profiled kernels are serialized across all processes using a system-wide file lock.</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>More C++ kernel names can be properly demangled</p></li>
<li><p>Fixed a <code class="docutils literal notranslate"><span class="pre">free():</span> <span class="pre">invalid</span> <span class="pre">pointer</span></code> error when profiling applications using pytorch &gt; 19.07</p></li>
<li><p>Fixed profiling IBM Spectrum MPI applications that require PAMI GPU hooks (<code class="docutils literal notranslate"><span class="pre">--smpiargs=&quot;-gpu&quot;</span></code>)</p></li>
<li><p>Fixed that the first kernel instruction was missed when computing <code class="docutils literal notranslate"><span class="pre">sass__inst_executed_per_opcode</span></code></p></li>
<li><p>Reduced surplus DRAM write traffic created from flushing caches during kernel replay</p></li>
<li><p>The <em>Compute Workload Analysis</em> section shows the IMMA pipeline on GV11b GPUs</p></li>
<li><p>Profile reports now scroll properly on MacOS when using a trackpad</p></li>
<li><p>Relative output filenames for the Profile activity now use the document directory, instead of the current working directory</p></li>
<li><p>Fixed path expansion of <code class="docutils literal notranslate"><span class="pre">~</span></code> on Windows</p></li>
<li><p>Memory access information is now shown properly for RED assembly instructions on the Source page</p></li>
<li><p>Fixed that user <code class="docutils literal notranslate"><span class="pre">PYTHONHOME</span></code> and <code class="docutils literal notranslate"><span class="pre">PYTHONPATH</span></code> environment variables would be picked up by NVIDIA Nsight Compute, resulting in locale encoding issues.</p></li>
</ul>
</section>
<section id="updates-in-2019-5-3">
<h4>Updates in 2019.5.3<a class="headerlink" href="#updates-in-2019-5-3" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>More C++ kernel names can be properly demangled</p></li>
</ul>
</section>
<section id="updates-in-2019-5-2">
<h4>Updates in 2019.5.2<a class="headerlink" href="#updates-in-2019-5-2" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Bug fixes</p></li>
</ul>
</section>
<section id="updates-in-2019-5-1">
<h4>Updates in 2019.5.1<a class="headerlink" href="#updates-in-2019-5-1" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Added support for Nsight Compute Visual Studio Integration</p></li>
</ul>
</section>
<section id="updates-in-2019-5">
<h4>Updates in 2019.5<a class="headerlink" href="#updates-in-2019-5" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Added <em>section sets</em> to reduce the default overhead and make it easier to configure metric sets for profiling</p></li>
<li><p>Reduced the size of the installation</p></li>
<li><p>Added support for CUDA Graphs Recapture API</p></li>
<li><p>The NvRules API now supports accessing correlation IDs for instanced metrics</p></li>
<li><p>Added breakdown tables for <em>SOL SM</em> and <em>SOL Memory</em> in the Speed Of Light section for Volta+ GPUs</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>Added a snap-select feature to the Source page heatmap help navigate large files</p></li>
<li><p>Added support for loading remote CUDA-C source files via SSH on demand for Linux x86_64 targets</p></li>
<li><p>Charts on the Details page provide better help in tool tips when hovering metric names</p></li>
<li><p>Improved the performance of the Source page when scrolling or collapsing</p></li>
<li><p>The charts for Warp States and Compute pipelines are now sorted by value</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute CLI</strong></p>
<ul class="simple">
<li><p>Added support for GPU cache control, see <code class="docutils literal notranslate"><span class="pre">--cache-control</span></code></p></li>
<li><p>Added support for setting the kernel name base in command line output, see <code class="docutils literal notranslate"><span class="pre">--kernel-base</span></code></p></li>
<li><p>Added support for listing the available names for <code class="docutils literal notranslate"><span class="pre">--chips</span></code>, see <code class="docutils literal notranslate"><span class="pre">--list-chips</span></code></p></li>
<li><p>Improved the stability on Windows when using <code class="docutils literal notranslate"><span class="pre">--target-processes</span> <span class="pre">all</span></code></p></li>
<li><p>Reduced the profiling overhead for small metric sets in applications with many kernels</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Reduced the overhead caused by demangling kernel names multiple times</p></li>
<li><p>Fixed an issue that kernel names were not demangled in CUDA Graph Nodes resources window</p></li>
<li><p>The connection dialog better disables unsupported combinations or warns of invalid entries</p></li>
<li><p>Fixed metric <em>thread_inst_executed_true</em> to derive from <em>smsp_not_predicated_off_thread_inst_executed</em> on Volta+ GPUs</p></li>
<li><p>Fixed an issue with computing the theoretical occupancy on GV100</p></li>
<li><p>Selecting an entry on the Source page heatmap no longer selects the respective source line, to avoid losing the current selection</p></li>
<li><p>Fixed the current view indicator of the Source page heatmap to be line-accurate</p></li>
<li><p>Fixed an issue when comparing metrics from Pascal and later architectures on the Summary page</p></li>
<li><p>Fixed an issue that metrics representing constant values on Volta+ couldn’t be collected without non-constant metrics</p></li>
</ul>
</section>
<section id="updates-in-2019-4">
<h4>Updates in 2019.4<a class="headerlink" href="#updates-in-2019-4" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Added support for the Linux PowerPC target platform</p></li>
<li><p>Reduced the profiling overhead, especially if no source metrics are collected</p></li>
<li><p>Reduced the overhead for non-profiled kernels</p></li>
<li><p>Improved the deployment performance during remote launches</p></li>
<li><p>Trying to profile on an unsupported GPU now shows an “Unsupported GPU” error message</p></li>
<li><p>Added support for the <code class="docutils literal notranslate"><span class="pre">%i</span></code> sequential number placeholder to generate unique report file names</p></li>
<li><p>Added support for <em>smsp__sass_*</em> metrics on Volta and newer GPUs</p></li>
<li><p>The <em>launch__occupancy_limit_shared_mem</em> now reports the device block limit if no shared memory is used by the kernel</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>The <em>Profile</em> activity shows the command line used to launch ncu</p></li>
<li><p>The heatmap on the Source page now shows the represented metric in its tooltip</p></li>
<li><p>The <em>Memory Workload Analysis Chart</em> on the Details page now supports baselines</p></li>
<li><p>When applying rules, a message displaying the number of new rule results is shown in the status bar</p></li>
<li><p>The Visual Profiler Transition Guide was added to the documentation</p></li>
<li><p>Connection dialog activity options were added to the documentation</p></li>
<li><p>A warning dialog is shown if the application is resumed without Auto-Profile enabled</p></li>
<li><p>Pausing the application now has immediate feedback in the toolbar controls</p></li>
<li><p>Added a <em>Close All</em> command to the <em>File</em> menu</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute CLI</strong></p>
<ul class="simple">
<li><p>The <code class="docutils literal notranslate"><span class="pre">--query-metrics</span></code> option now shows only metric base names for faster metric query. The new option <code class="docutils literal notranslate"><span class="pre">--query-metrics-mode</span></code> can be used to display the valid suffixes for each base metric.</p></li>
<li><p>Added support for passing response files using the <code class="docutils literal notranslate"><span class="pre">&#64;</span></code> operator to specify command line options through a file</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed an issue that reported the wrong executable name in the Session page when attaching</p></li>
<li><p>Fixed issues that chart labels were shown elided on the Details page</p></li>
<li><p>Fixed an issue that caused the cache hitrates to be shown incorrectly when baselines were added</p></li>
<li><p>Fixed an illegal memory access when collecting <em>sass__*_histogram</em> metrics for applications using PyTorch on Pascal GPUs</p></li>
<li><p>Fixed an issue when attempting to collect all <em>smsp__*</em> metrics on Volta and newer GPUs</p></li>
<li><p>Fixed an issue when profiling multi-context applications</p></li>
<li><p>Fixed that profiling start/stop settings from the connection dialog weren’t properly passed to the interactive profile activity</p></li>
<li><p>Fixed that certain <em>smsp__warp_cycles_per_issue_stall*</em> metrics returned negative values on Pascal GPUs</p></li>
<li><p>Fixed that metric names were truncated in the <code class="docutils literal notranslate"><span class="pre">--page</span> <span class="pre">details</span></code> non-CSV command line output</p></li>
<li><p>Fixed that the target application could crash if a connection port was used by another application with higher privileges</p></li>
</ul>
</section>
<section id="updates-in-2019-3-1">
<h4>Updates in 2019.3.1<a class="headerlink" href="#updates-in-2019-3-1" title="Permalink to this headline"></a></h4>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>Added ability to send bug reports and suggestions for features using <em>Send Feedback</em> in the <em>Help</em> menu</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Fixed calculation of theoretical occupancy for grids with blocks that are not a multiple of 32 threads</p></li>
<li><p>Fixed intercepting child processes launched through Python’s subprocess.Popen class</p></li>
<li><p>Fixed issue of NVTX push/pop ranges not showing up for child threads in NVIDIA Nsight Compute CLI</p></li>
<li><p>Fixed performance regression for metric lookups on the Source page</p></li>
<li><p>Fixed description in rule covering the IMC stall reason</p></li>
<li><p>Fixed cases were baseline values were not correctly calculated in the Memory tables when comparing reports of different architectures</p></li>
<li><p>Fixed incorrect calculation of baseline values in the Executed Instruction Mix chart</p></li>
<li><p>Fixed accessing instanced metrics in the NvRules API</p></li>
<li><p>Fixed a bug that could cause the collection of unnecessary metrics in the Interactive Profile activity</p></li>
<li><p>Fixed potential crash on exit of the profiled target application</p></li>
<li><p>Switched underlying metric for <code class="docutils literal notranslate"><span class="pre">SOL</span> <span class="pre">FB</span></code> in the GPU Speed Of Light section to be driven by <code class="docutils literal notranslate"><span class="pre">dram__throughput.avg.pct_of_peak_sustained_elapsed</span></code> instead of <code class="docutils literal notranslate"><span class="pre">fbpa__throughput.avg.pct_of_peak_sustained_elapsed</span></code></p></li>
</ul>
</section>
<section id="updates-in-2019-3">
<h4>Updates in 2019.3<a class="headerlink" href="#updates-in-2019-3" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Improved performance</p></li>
<li><p>Bug fixes</p></li>
<li><p>Kernel launch context and stream are reported as metrics</p></li>
<li><p>PC sampling configuration options are reported as metrics</p></li>
<li><p>The default base port for connections to the target changed</p></li>
<li><p>Section files support multiple, named Body fields</p></li>
<li><p>NvRules allows users to query metrics using any convertible data type</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>Support for filtering kernel launches using their NVTX context</p></li>
<li><p>Support for new options to select the connection port range</p></li>
<li><p>The Profile activity supports configuring PC sampling parameters</p></li>
<li><p>Sections on the Details page support selecting individual bodies</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute CLI</strong></p>
<ul class="simple">
<li><p>Support for stepping to kernel launches from specific NVTX contexts</p></li>
<li><p>Support for new <code class="docutils literal notranslate"><span class="pre">--port</span></code> and <code class="docutils literal notranslate"><span class="pre">--max-connections</span></code> options</p></li>
<li><p>Support for new <code class="docutils literal notranslate"><span class="pre">--sampling-*</span></code> options to configure PC sampling parameters</p></li>
<li><p>Section file errors are reported with <code class="docutils literal notranslate"><span class="pre">--list-sections</span></code></p></li>
<li><p>A warning is shown if some section files could not be loaded</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Using the –summary option works for reports that include invalid metrics</p></li>
<li><p>The full process executable filename is reported for QNX targets</p></li>
<li><p>The project system now properly stores the state of opened reports</p></li>
<li><p>Fixed PTX syntax highlighting</p></li>
<li><p>Fixed an issue when switching between manual and auto profiling in NVIDIA Nsight Compute</p></li>
<li><p>The source page in NVIDIA Nsight Compute now works with results from multiple processes</p></li>
<li><p>Charts on the NVIDIA Nsight Compute details page uses proper localization for numbers</p></li>
<li><p>NVIDIA Nsight Compute no longer requires the system locale to be set to English</p></li>
</ul>
</section>
<section id="updates-in-2019-2">
<h4>Updates in 2019.2<a class="headerlink" href="#updates-in-2019-2" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Improved performance</p></li>
<li><p>Bug fixes</p></li>
<li><p>Kernel launch context and stream are reported as metrics</p></li>
<li><p>PC sampling configuration options are reported as metrics</p></li>
<li><p>The default base port for connections to the target changed</p></li>
<li><p>Section files support multiple, named Body fields</p></li>
<li><p>NvRules allows users to query metrics using any convertible data type</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>Support for filtering kernel launches using their NVTX context</p></li>
<li><p>Support for new options to select the connection port range</p></li>
<li><p>The Profile activity supports configuring PC sampling parameters</p></li>
<li><p>Sections on the Details page support selecting individual bodies</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute CLI</strong></p>
<ul class="simple">
<li><p>Support for stepping to kernel launches from specific NVTX contexts</p></li>
<li><p>Support for new <code class="docutils literal notranslate"><span class="pre">--port</span></code> and <code class="docutils literal notranslate"><span class="pre">--max-connections</span></code> options</p></li>
<li><p>Support for new <code class="docutils literal notranslate"><span class="pre">--sampling-*</span></code> options to configure PC sampling parameters</p></li>
<li><p>Section file errors are reported with <code class="docutils literal notranslate"><span class="pre">--list-sections</span></code></p></li>
<li><p>A warning is shown if some section files could not be loaded</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Using the –summary option works for reports that include invalid metrics</p></li>
<li><p>The full process executable filename is reported for QNX targets</p></li>
<li><p>The project system now properly stores the state of opened reports</p></li>
<li><p>Fixed PTX syntax highlighting</p></li>
<li><p>Fixed an issue when switching between manual and auto profiling in NVIDIA Nsight Compute</p></li>
<li><p>The source page in NVIDIA Nsight Compute now works with results from multiple processes</p></li>
<li><p>Charts on the NVIDIA Nsight Compute details page uses proper localization for numbers</p></li>
<li><p>NVIDIA Nsight Compute no longer requires the system locale to be set to English</p></li>
</ul>
</section>
<section id="updates-in-2019-1">
<h4>Updates in 2019.1<a class="headerlink" href="#updates-in-2019-1" title="Permalink to this headline"></a></h4>
<p><strong>General</strong></p>
<ul class="simple">
<li><p>Support for CUDA 10.1</p></li>
<li><p>Improved performance</p></li>
<li><p>Bug fixes</p></li>
<li><p>Profiling on Volta GPUs now uses the same metric names as on Turing GPUs</p></li>
<li><p>Section files support descriptions</p></li>
<li><p>The default sections and rules directory has been renamed to <em>sections</em></p></li>
</ul>
<p><strong>NVIDIA Nsight Compute</strong></p>
<ul class="simple">
<li><p>Added new profiling options to the options dialog</p></li>
<li><p>Details page shows rule result icons in the section headers</p></li>
<li><p>Section descriptions are shown in the details page and in the sections tool window</p></li>
<li><p>Source page supports collapsing multiple source files or functions to show aggregated results</p></li>
<li><p>Source page heatmap color scale has changed</p></li>
<li><p>Invalid metric results are highlighted in the profiler report</p></li>
<li><p>Loaded section and rule files can be opened from the sections tool window</p></li>
</ul>
<p><strong>NVIDIA Nsight Compute CLI</strong></p>
<ul class="simple">
<li><p>Support for profiling child processes on Linux and Windows x86_64 targets</p></li>
<li><p>NVIDIA Nsight Compute CLI uses a temporary file if no output file is specified</p></li>
<li><p>Support for new <code class="docutils literal notranslate"><span class="pre">--quiet</span></code> option</p></li>
<li><p>Support for setting the GPU clock control mode using new <code class="docutils literal notranslate"><span class="pre">--clock-control</span></code> option</p></li>
<li><p>Details page output shows the NVTX context when <code class="docutils literal notranslate"><span class="pre">--nvtx</span></code> is enabled</p></li>
<li><p>Support for filtering kernel launches for profiling based on their NVTX context using new <code class="docutils literal notranslate"><span class="pre">--nvtx-include</span></code> and <code class="docutils literal notranslate"><span class="pre">--nvtx-exclude</span></code> options</p></li>
<li><p>Added new <code class="docutils literal notranslate"><span class="pre">--summary</span></code> options for aggregating profiling results</p></li>
<li><p>Added option <code class="docutils literal notranslate"><span class="pre">--open-in-ui</span></code> to open reports collected with NVIDIA Nsight Compute CLI directly in NVIDIA Nsight Compute</p></li>
</ul>
<p><strong>Resolved Issues</strong></p>
<ul class="simple">
<li><p>Installation directory scripts use absolute paths</p></li>
<li><p>OpenACC kernel names are correctly demangled</p></li>
<li><p>Profile activity report file supports a relative path</p></li>
<li><p>Source view can resolve all applicable files at once</p></li>
<li><p>UI font colors are improved</p></li>
<li><p>Details page layout and label elision issues are resolved</p></li>
<li><p>Turing metrics are properly reported on the Summary page</p></li>
<li><p>All byte-based metrics use a factor of 1000 when scaling units to follow SI standards</p></li>
<li><p>CSV exports properly align columns with empty entries</p></li>
</ul>
<ul class="simple">
<li><p>Fixed the metric computation for double_precision_fu_utilization on GV11b</p></li>
</ul>
<ul class="simple">
<li><p>Fixed incorrect ‘selected’ PC sampling counter values</p></li>
<li><p>The SpeedOfLight section uses ‘max’ instead of ‘avg’ cycles metrics for Elapsed Cycles</p></li>
</ul>
</section>
</section>
</section>
<section id="known-issues">
<h2><span class="section-number">1.2. </span>Known Issues<a class="headerlink" href="#known-issues" title="Permalink to this headline"></a></h2>
<p><strong>Installation</strong></p>
<ul>
<li><p>The installer might not show all patch-level version numbers during installation.</p></li>
<li><p>Some command line options listed in the help of a <em>.run</em> installer of NVIDIA Nsight Compute are affecting only the archive extraction, but not the installation stage. To pass command line options to the embedded installer script, specify those options after <code class="docutils literal notranslate"><span class="pre">--</span></code> in the form of <code class="docutils literal notranslate"><span class="pre">--</span> <span class="pre">-&lt;option&gt;</span></code>. The available options for the installer script are:</p>
<div class="highlight-text notranslate"><div class="highlight"><pre><span></span>-help               : Print help message
-targetpath=&lt;PATH&gt;  : Specify install path
-noprompt           : No prompts. Implies acceptance of the EULA
</pre></div>
</div>
<p>For example, specifying only option <code class="docutils literal notranslate"><span class="pre">--quiet</span></code> extracts the installer archive without any output to the console, but still prompts for user interaction during the installation. To install NVIDIA Nsight Compute without any console output nor any user interaction, please specify <code class="docutils literal notranslate"><span class="pre">--quiet</span> <span class="pre">--</span> <span class="pre">-noprompt</span></code>.</p>
</li>
<li><p>After using the SDK Manager to install the NVIDIA Nsight Compute tools, their binary path needs to be manually added to your <code class="docutils literal notranslate"><span class="pre">PATH</span></code> environment variable.</p></li>
<li><p>See also the <a class="reference external" href="index.html#system-requirements">System Requirements</a> for more installation instructions.</p></li>
</ul>
<p><strong>Launch and Connection</strong></p>
<ul class="simple">
<li><p>Launching applications on remote targets/platforms is not supported for several combinations. See <a class="reference external" href="index.html#platform-support">Platform Support</a> for details. Manually launch the application using command line <code class="docutils literal notranslate"><span class="pre">ncu</span> <span class="pre">--mode=launch</span></code> on the remote system and connect using the UI or CLI afterwards.</p></li>
<li><p>In the NVIDIA Nsight Compute connection dialog, a remote system can only be specified for one target platform. Remove a connection from its current target platform in order to be able to add it to another.</p></li>
<li><p>Loading of CUDA sources via SSH requires that the remote connection is configured, and that the hostname/IP address of the connection matches the target (as seen in the report session details). For example, prefer my-machine.my-domain.com, instead of my-machine, even though the latter resolves to the same.</p></li>
<li><p>Other issues concerning remote connections are discussed in the documentation for <a class="reference external" href="../NsightCompute/index.html#remote-connections">remote connections</a>.</p></li>
<li><p>Local connections between NVIDIA Nsight Compute and the launched target application might not work on some ppc64le or aarch64 (sbsa) systems configured to only support IPv6. On these platforms, the <a class="reference external" href="../NsightComputeCli/index.html#environment-variables">NV_COMPUTE_PROFILER_LOCAL_CONNECTION_OVERRIDE=uds</a> environment variable can be set to use <em>Unix Domain Sockets</em> instead of <em>TCP</em> for local connections to workaround the problem. On x86_64 Linux, Unix Domain Sockets are used by default, but local TCP connections can be forced using <a class="reference external" href="../NsightComputeCli/index.html#environment-variables">NV_COMPUTE_PROFILER_LOCAL_CONNECTION_OVERRIDE=tcp</a>.</p></li>
</ul>
<p><strong>Profiling and Metrics</strong></p>
<ul class="simple">
<li><p>Profiling of 32-bit processes is not supported.</p></li>
<li><p>Profiling kernels executed on a device that is part of an SLI group is not supported. An “Unsupported GPU” error is shown in this case.</p></li>
<li><p>Profiling a kernel while other contexts are active on the same device (e.g. X server, or secondary CUDA or graphics application) can result in varying metric values for L2/FB (Device Memory) related metrics. Specifically, L2/FB traffic from non-profiled contexts cannot be excluded from the metric results. To completely avoid this issue, profile the application on a GPU without secondary contexts accessing the same device (e.g. no X server on Linux).</p></li>
<li><p>In the current release, profiling a kernel while any other GPU work is executing on the same MIG compute instance can result in varying metric values for all units. NVIDIA Nsight Compute enforces serialization of the CUDA launches within the target application to ensure those kernels do not influence each other. See <a class="reference external" href="../ProfilingGuide/index.html#serialization">Serialization</a> for more details. However, GPU work issued through other APIs in the target process or workloads created by non-target processes running simultaneously in the same MIG compute instance will influence the collected metrics. Note that it is acceptable to run CUDA processes in other MIG compute instances as they will not influence the profiled MIG compute instance.</p></li>
<li><p>On Linux kernels settings <code class="docutils literal notranslate"><span class="pre">fs.protected_regular=1</span></code> (e.g. some Ubuntu 20.04 cloud service provider instances), root users may not be able to access the <a class="reference external" href="../ProfilingGuide/index.html#serialization">inter-process lock file</a>. See the <a class="reference external" href="../ProfilingGuide/index.html#faq">FAQ</a> for workarounds.</p></li>
<li><p>Profiling only supports up to 32 device instances, including instances of MIG partitions. Profiling the 33rd or higher device instance will result in indeterminate data.</p></li>
<li><p>Enabling certain metrics can cause GPU kernels to run longer than the driver’s watchdog time-out limit. In these cases the driver will terminate the GPU kernel resulting in an application error and profiling data will not be available. Please disable the driver watchdog time out before profiling such long running CUDA kernels.</p>
<ul>
<li><p>On Linux, setting the X Config option Interactive to false is recommended.</p></li>
<li><p>For Windows, detailed information on disabling the Windows TDR is available at <a class="reference external" href="https://docs.microsoft.com/en-us/windows-hardware/drivers/display/timeout-detection-and-recovery">https://docs.microsoft.com/en-us/windows-hardware/drivers/display/timeout-detection-and-recovery</a></p></li>
</ul>
</li>
<li><p>Collecting device-level metrics, such as the NVLink metrics (<code class="docutils literal notranslate"><span class="pre">nvl*</span></code>), is not supported on <a class="reference external" href="https://www.nvidia.com/en-us/data-center/virtual-gpu-technology/">NVIDIA virtual GPUs</a> (vGPUs).</p></li>
<li><p>As of CUDA 11.4 and R470 TRD1 driver release, NVIDIA Nsight Compute is supported in a vGPU environment which requires a vGPU license. If the license is not obtained after 20 minutes, the reported performance metrics data from the GPU will be inaccurate. This is because of a feature in vGPU environment which reduces performance but retains functionality as specified <a class="reference external" href="https://docs.nvidia.com/grid/latest/grid-licensing-user-guide/index.html#software-enforcement-grid-licensing">here</a>.</p></li>
<li><p>Profiling on <a class="reference external" href="https://www.nvidia.com/en-us/data-center/virtualization/virtual-gpu-migration/">NVIDIA live-migrated virtual machines</a> is not supported and can result in undefined behavior.</p></li>
<li><p>Profiling with enabled multi-process service (MPS) can result in undefined behavior.</p></li>
<li><p>When Profiling using <em>Range Replay</em> or <em>Application Range Replay</em> with multiple CUDA Green Contexts active which belong to the same device context, the range result will contain counter values aggregated on all Green Contexts</p></li>
<li><p>The NVLink Topology section is not supported for a configuration using NVSwitch.</p></li>
<li><p>NVIDIA Nsight Compute does not support per-NVLink metrics.</p></li>
<li><p>NVIDIA Nsight Compute does not support the <em>Logical NVLink Throughput</em> table.</p></li>
<li><p>Setting a reduced NvLink Bandwidth mode does not impact the reported peak values for NvLink metrics. All peak values and corresponding percentages are calculated off the non-reduced NvLink bandwidth. Reconfiguring the NvLink Bandwidth mode using <cite>nvidia-smi`</cite> while profiling may lead to undefined tools’ behavior.</p></li>
</ul>
<ul class="simple">
<li><p>Profiling CUDA graph kernel nodes that can launch device graphs or are part of device-launchable graphs is not supported. Use <a class="reference external" href="../NsightComputeCli/index.html#command-line-options-profile">Graph Profiling</a> mode instead.</p></li>
<li><p>Profiling in <a class="reference external" href="../NsightComputeCli/index.html#command-line-options-profile">Graph Profiling</a> mode is performed on the context that is specified by the stream handle for the graph launch. Consequently, only memory allocated on this context is saved off and restored during graph replay and only kernel nodes executed on this context are profiled.</p></li>
</ul>
<ul class="simple">
<li><p>On CUDA drivers older than 530.x, profiling on Windows Subsystem for Linux (WSL) is not supported if the system has multiple physical NVIDIA GPUs. This is not affected by setting <code class="docutils literal notranslate"><span class="pre">CUDA_VISIBLE_DEVICES</span></code>.</p></li>
<li><p>Collecting software counters through PerfWorks currently forces all functions in the module of the profiled kernel to be loaded. This increases the host and device memory footprint of the target application for the remainder of the process lifetime.</p></li>
<li><p>PM Sampling is not supported when collecting a Profile Series.</p></li>
</ul>
<p><strong>Compatibility</strong></p>
<ul class="simple">
<li><p>Applications calling blocking functions on std input/output streams can result in the profiler to stop, until the blocking function call is resolved.</p></li>
<li><p>NVIDIA Nsight Compute can hang on applications using RAPIDS in versions 0.6 and 0.7, due to an issue in cuDF.</p></li>
<li><p>Profiling child processes launched via <code class="docutils literal notranslate"><span class="pre">clone()</span></code> is not supported.</p></li>
<li><p>Profiling child processes launched from Python using <code class="docutils literal notranslate"><span class="pre">os.system()</span></code> is not supported on ppc64le.</p></li>
<li><p>Profiling of Cooperative Groups kernels launched with <code class="docutils literal notranslate"><span class="pre">cuLaunchCooperativeKernelMultiDevice</span></code> is not yet supported.</p></li>
<li><p>On Linux systems, when profiling <em>bsd-csh</em> scripts, the original application output will not be printed. As a workaround, use a different C-shell, e.g. <em>tcsh</em>.</p></li>
<li><p>Attempting to use the <code class="docutils literal notranslate"><span class="pre">--clock-control</span></code> option to set the GPU clocks will fail when profiling on a GPU partition. Please use <code class="docutils literal notranslate"><span class="pre">nvidia-smi</span></code> (installed with NVIDIA display driver) to control the clocks for the entire GPU. This will require administrative privileges when the GPU is partitioned.</p></li>
<li><p>On Linux aarch64, NVIDIA Nsight Compute does not work if the <em>HOME</em> environment variable is not set.</p></li>
<li><p>NVIDIA Nsight Compute versions 2020.1.0 to 2020.2.1 are not compatible with CUDA driver version 460+ if the application launches Cooperative Groups kernels. Profiling will fail with error “UnknownError”.</p></li>
<li><p>Collecting CPU call stack information on Windows Server 2016 can hang NVIDIA Nsight Compute in some cases. Currently, the only workaround is to skip CPU call stack collection on such systems by not specifying the option <code class="docutils literal notranslate"><span class="pre">--call-stack</span></code>.</p></li>
<li><p>When profiling a script, <code class="docutils literal notranslate"><span class="pre">--target-processes</span> <span class="pre">all</span></code> may target utility executables such as <em>xargs</em>, <em>uname</em> or <em>ls</em>. To avoid profiling these, use the <code class="docutils literal notranslate"><span class="pre">--target-processes-filter</span></code> option accordingly.</p></li>
<li><p>On mobile platforms, <code class="docutils literal notranslate"><span class="pre">--kill</span></code> option is not supported with application replay mode.</p></li>
<li><p>NVIDIA Nsight Compute might show invalid characters for Unicode names and paths on Windows 10. As a workaround, use a third-party terminal emulator, e.g. Git bash.</p></li>
</ul>
<p><strong>User Interface</strong></p>
<ul class="simple">
<li><p>The API Statistics filter in NVIDIA Nsight Compute does not support units.</p></li>
<li><p>File size is the only property considered when resolving source files. Timestamps are currently ignored.</p></li>
<li><p>Terminating or disconnecting an application in the <em>Interactive Profiling</em> activity while the API Stream View is updated can lead to a crash.</p></li>
<li><p>See the <a class="reference external" href="index.html#library-support-optix">OptiX library support section</a> for limitations concerning the <a class="reference external" href="../NsightCompute/index.html#as-viewer">Acceleration Structure Viewer</a>.</p></li>
<li><p>After updating from a previous version of NVIDIA Nsight Compute on Linux, the file load dialog may not allow column resizing and sorting. As a workaround, the <em>~/.config/QtProject.conf</em> file can be edited to remove the <em>treeViewHeader</em> entry from the <em>[FileDialog]</em> section.</p></li>
</ul>
</section>
<section id="support">
<h2><span class="section-number">1.3. </span>Support<a class="headerlink" href="#support" title="Permalink to this headline"></a></h2>
<p>Information on supported platforms and GPUs.</p>
<section id="platform-support">
<h3><span class="section-number">1.3.1. </span>Platform Support<a class="headerlink" href="#platform-support" title="Permalink to this headline"></a></h3>
<p>Host denotes the UI can run on that platform. Target means that we can instrument applications on that platform for data collection. Applications launched with instrumentation on a target system can be connected to from most host platforms. The reports collected on one system can be opened on any other system.</p>
<table class="docutils align-default" id="id2">
<caption><span class="caption-text">Table 1. Platforms supported by NVIDIA Nsight Compute</span><a class="headerlink" href="#id2" title="Permalink to this table"></a></caption>
<colgroup>
<col style="width: 31%" />
<col style="width: 5%" />
<col style="width: 64%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"></th>
<th class="head"><p>Host</p></th>
<th class="head"><p>Targets</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p>Windows</p></td>
<td><p>Yes</p></td>
<td><p>Windows*, Linux (x86_64)</p></td>
</tr>
<tr class="row-odd"><td><p>Windows Subsystem for Linux (WSL2)</p></td>
<td><p>Yes</p></td>
<td><p>Windows Subsystem for Linux (WSL2) as part of the Linux (x86_64) package.</p></td>
</tr>
<tr class="row-even"><td><p>Linux (x86_64)</p></td>
<td><p>Yes</p></td>
<td><p>Windows*, Linux (x86_64), Linux (ppc64le), Linux (aarch64 sbsa)</p></td>
</tr>
<tr class="row-odd"><td><p>Linux (ppc64le)</p></td>
<td><p>No</p></td>
<td><p>Linux (ppc64le)</p></td>
</tr>
<tr class="row-even"><td><p>Linux (aarch64 sbsa)</p></td>
<td><p>Yes</p></td>
<td><p>Linux (aarch64 sbsa)</p></td>
</tr>
<tr class="row-odd"><td><p>Linux (x86_64) (Drive SDK)</p></td>
<td><p>Yes</p></td>
<td><p>Windows*, Linux (x86_64), Linux (aarch64), QNX</p></td>
</tr>
<tr class="row-even"><td><p>MacOSX 11+</p></td>
<td><p>Yes</p></td>
<td><p>Windows*, Linux (x86_64), Linux (ppc64le)</p></td>
</tr>
<tr class="row-odd"><td><p>Linux (aarch64)</p></td>
<td><p>No</p></td>
<td><p>Linux (aarch64)</p></td>
</tr>
<tr class="row-even"><td><p>QNX</p></td>
<td><p>No</p></td>
<td><p>QNX</p></td>
</tr>
</tbody>
</table>
<p>Target platforms marked with * do not support remote launch from the respective host. Remote launch means that the application can be launched on the target system from the host UI. Instead, the application must be launched from the target system.</p>
<p>Profiling of 32-bit processes is not supported.</p>
</section>
<section id="gpu-support">
<h3><span class="section-number">1.3.2. </span>GPU Support<a class="headerlink" href="#gpu-support" title="Permalink to this headline"></a></h3>
<table class="docutils align-default" id="id3">
<caption><span class="caption-text">Table 2. GPU architectures supported by NVIDIA Nsight Compute</span><a class="headerlink" href="#id3" title="Permalink to this table"></a></caption>
<colgroup>
<col style="width: 63%" />
<col style="width: 37%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Architecture</p></th>
<th class="head"><p>Support</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p>Kepler</p></td>
<td><p>No</p></td>
</tr>
<tr class="row-odd"><td><p>Maxwell</p></td>
<td><p>No</p></td>
</tr>
<tr class="row-even"><td><p>Pascal</p></td>
<td><p>No</p></td>
</tr>
<tr class="row-odd"><td><p>Volta GV100</p></td>
<td><p>Yes</p></td>
</tr>
<tr class="row-even"><td><p>Volta GV11b</p></td>
<td><p>Yes</p></td>
</tr>
<tr class="row-odd"><td><p>Turing TU1xx</p></td>
<td><p>Yes</p></td>
</tr>
<tr class="row-even"><td><p>NVIDIA GA100</p></td>
<td><p>Yes</p></td>
</tr>
<tr class="row-odd"><td><p>NVIDIA GA10x</p></td>
<td><p>Yes</p></td>
</tr>
<tr class="row-even"><td><p>NVIDIA GA10b</p></td>
<td><p>Yes</p></td>
</tr>
<tr class="row-odd"><td><p>NVIDIA GH100</p></td>
<td><p>Yes</p></td>
</tr>
</tbody>
</table>
<p>Most metrics used in NVIDIA Nsight Compute are identical to those of the PerfWorks Metrics API and follow the documented <a class="reference external" href="../ProfilingGuide/index.html#metrics-structure">Metrics Structure</a>. A comparison between the metrics used in nvprof and their equivalent in NVIDIA Nsight Compute can be found in the <a class="reference external" href="https://docs.nvidia.com/nsight-compute/NsightComputeCli/index.html#nvprof-metric-comparison">NVIDIA Nsight Compute CLI User Manual</a>.</p>
</section>
<section id="library-support">
<h3><span class="section-number">1.3.3. </span>Library Support<a class="headerlink" href="#library-support" title="Permalink to this headline"></a></h3>
<p>NVIDIA Nsight Compute can be used to profile CUDA applications, as well as applications that use CUDA via NVIDIA or third-party libraries. For most such libraries, the behavior is expected to be identical to applications using CUDA directly. However, for certain libraries, NVIDIA Nsight Compute has certain restrictions, alternate behavior, or requires non-default setup steps prior to profiling.</p>
<section id="optix">
<h4>OptiX<a class="headerlink" href="#optix" title="Permalink to this headline"></a></h4>
<p>NVIDIA Nsight Compute supports profiling of OptiX applications, but with certain restrictions.</p>
<ul>
<li><p><strong>Internal Kernels</strong></p>
<p>Kernels launched by OptiX that contain no user-defined code are given the generic name <em>NVIDIA internal</em>. These kernels show up on the API Stream in the NVIDIA Nsight Compute UI, and can be profiled in both the UI as well as the NVIDIA Nsight Compute CLI. However, no CUDA-C source, PTX or SASS is available for them.</p>
</li>
<li><p><strong>User Kernels</strong></p>
<p>Kernels launched by OptiX can contain user-defined code. OptiX identifies these kernels in the API Stream with a custom name. This name starts with <em>raygen__</em> (for “ray generation”). These kernels show up on the API Stream and can be profiled in the UI as well as the NVIDIA Nsight Compute CLI. The Source page displays CUDA-C source, PTX and SASS defined by the user. Certain parts of the kernel, including device functions that contain OptiX-internal code, will not be available in the Source page.</p>
</li>
<li><p><strong>SASS</strong></p>
<p>When SASS information is available in the profile report, certain instructions might not be available in the Source page and shown as <em>N/A</em>.</p>
</li>
</ul>
<p>The <a class="reference external" href="../NsightCompute/index.html#as-viewer">Acceleration Structure Viewer</a> for OptiX traversable handles currently has the following limitations:</p>
<ul class="simple">
<li><p>The <a class="reference external" href="../NsightCompute/index.html#as-viewer">Acceleration Structure Viewer</a> is not supported on MacOSX.</p></li>
<li><p>Viewing instance acceleration structures using multi-level instancing is not supported.</p></li>
<li><p>Applying motion traversables to acceleration structures is not supported.</p></li>
</ul>
<p>The following feature set is supported per OptiX API version:</p>
<table class="table-no-stripes docutils align-default">
<colgroup>
<col style="width: 26%" />
<col style="width: 24%" />
<col style="width: 24%" />
<col style="width: 26%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><strong>OptiX API Version</strong></p></td>
<td><p><strong>Kernel Profiling</strong></p></td>
<td><p><strong>API Interception</strong></p></td>
<td><p><strong>Resource Tracking</strong></p></td>
</tr>
<tr class="row-even"><td><p>6.x</p></td>
<td><p>Yes</p></td>
<td><p>No</p></td>
<td><p>No</p></td>
</tr>
<tr class="row-odd"><td><p>7.0 - 8.0</p></td>
<td><p>Yes</p></td>
<td><p>Yes</p></td>
<td><p>Yes</p></td>
</tr>
</tbody>
</table>
</section>
</section>
<section id="system-requirements">
<h3><span class="section-number">1.3.4. </span>System Requirements<a class="headerlink" href="#system-requirements" title="Permalink to this headline"></a></h3>
<p class="title sectiontitle rubric">Linux and WSL</p>
<p>On all Linux platforms, NVIDIA Nsight Compute CLI requires GLIBC version 2.15 or higher. On x86_64, the NVIDIA Nsight Compute UI requires GLIBC version 2.17 or higher. On Linux (aarch64 sbsa), the NVIDIA Nsight Compute UI requires GLIBC version 2.26 or higher.</p>
<p>The NVIDIA Nsight Compute UI requires several packages to be installed to enable Qt. Please refer to the <a class="reference external" href="https://doc.qt.io/qt-6/linux-requirements.html">Qt for X11 Requirements</a>. When executing <code class="docutils literal notranslate"><span class="pre">ncu-ui</span></code> with missing dependencies, an error message with information on the missing packages is shown. Note that only one package will be shown at a time, even though multiple may be missing from your system. For selected operating systems, the following commands install needed packages for NVIDIA Nsight Compute on X11:</p>
<ul>
<li><p><strong>Ubuntu 18.04</strong></p>
<p><code class="docutils literal notranslate"><span class="pre">apt</span> <span class="pre">install</span> <span class="pre">libopengl0</span> <span class="pre">libxcb-icccm4</span> <span class="pre">libxcb-image0</span> <span class="pre">libxcb-keysyms1</span> <span class="pre">libxcb-render-util0</span> <span class="pre">libxcb-xkb1</span> <span class="pre">libxkbcommon-x11-0</span></code></p>
</li>
<li><p><strong>Ubuntu 20.04</strong></p>
<p><code class="docutils literal notranslate"><span class="pre">apt</span> <span class="pre">install</span> <span class="pre">libopengl0</span> <span class="pre">libxcb-icccm4</span> <span class="pre">libxcb-image0</span> <span class="pre">libxcb-keysyms1</span> <span class="pre">libxcb-render-util0</span> <span class="pre">libxcb-xinerama0</span> <span class="pre">libxcb-xkb1</span> <span class="pre">libxkbcommon-x11-0</span></code></p>
</li>
<li><p><strong>RHEL 8.6</strong></p>
<p><code class="docutils literal notranslate"><span class="pre">yum</span> <span class="pre">install</span> <span class="pre">libglvnd-opengl</span> <span class="pre">libxcb</span> <span class="pre">libxkbcommon-x11</span></code></p>
</li>
</ul>
<p>Profiling on Windows Subsystem for Linux (WSL) is only supported with WSL version 2.
Profiling is supported on Windows 10 WSL with OS build version 19044 and greater, and NVIDIA display driver version 545 or higher.
It is not supported on Windows 10 WSL for systems that exceed 1 TB of system memory.
Profiling is supported on Windows 11 WSL with NVIDIA display driver version 525 or higher.</p>
<p>The Linux (x86_64) NVIDIA Nsight Compute package can be used and should be installed directly within WSL2.
Remote profiling to and from WSL2 works equivalently to regular Linux (x86_64) hosts and targets, as long as it’s accessible via SSH.
Access to NVIDIA GPU Performance Counters must be enabled in the NVIDIA Control Panel of the Windows host.
See also the <a class="reference external" href="https://docs.nvidia.com/cuda/wsl-user-guide/index.html">CUDA on WSL User Guide</a>.</p>
<p class="title sectiontitle rubric">Windows</p>
<p>Only Windows 10 and 11 are supported as host and target.</p>
<p>The Visual Studio 2017 redistributable is not automatically installed by the NVIDIA Nsight Compute installer. The workaround is to install the x64 version of the ‘Microsoft Visual C++ Redistributable for Visual Studio 2017’ manually. The installer is linked on the main download page for Visual Studio at <a class="reference external" href="https://www.visualstudio.com/downloads/">https://www.visualstudio.com/downloads/</a> or download directly from <a class="reference external" href="https://go.microsoft.com/fwlink/?LinkId=746572">https://go.microsoft.com/fwlink/?LinkId=746572</a>.</p>
<p class="rubric-h1 rubric">Notices</p>
<p class="rubric-h2 rubric">Notices</p>
<p>ALL NVIDIA DESIGN SPECIFICATIONS, REFERENCE BOARDS, FILES, DRAWINGS, DIAGNOSTICS, LISTS, AND OTHER DOCUMENTS (TOGETHER AND SEPARATELY, “MATERIALS”) ARE BEING PROVIDED “AS IS.” NVIDIA MAKES NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.</p>
<p>Information furnished is believed to be accurate and reliable. However, NVIDIA Corporation assumes no responsibility for the consequences of use of such information or for any infringement of patents or other rights of third parties that may result from its use. No license is granted by implication of otherwise under any patent rights of NVIDIA Corporation. Specifications mentioned in this publication are subject to change without notice. This publication supersedes and replaces all other information previously supplied. NVIDIA Corporation products are not authorized as critical components in life support devices or systems without express written approval of NVIDIA Corporation.</p>
<p class="rubric-h2 rubric">Trademarks</p>
<p>NVIDIA and the NVIDIA logo are trademarks or registered trademarks of NVIDIA Corporation in the U.S. and other countries. Other company and product names may be trademarks of the respective companies with which they are associated.</p>
</section>
</section>
</section>


           </div>
          </div>
          <footer>

  <hr/>

  <div role="contentinfo">
    <p>&#169; Copyright 2018-2024, NVIDIA Corporation &amp; Affiliates. All rights reserved.
      <span class="lastupdated">Last updated on Mar 06, 2024.
      </span></p>
  </div>

   

</footer>
        </div>
      </div>
    </section>
  </div>
  <script>
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>
 



</body>
</html>