File: index.html

package info (click to toggle)
nvidia-cuda-toolkit 12.4.1-2
  • links: PTS, VCS
  • area: non-free
  • in suites: trixie
  • size: 18,505,836 kB
  • sloc: ansic: 203,477; cpp: 64,769; python: 34,699; javascript: 22,006; xml: 13,410; makefile: 3,085; sh: 2,343; perl: 352
file content (2202 lines) | stat: -rw-r--r-- 205,216 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
  <meta charset="utf-8" /><meta name="generator" content="Docutils 0.17.1: http://docutils.sourceforge.net/" />
<meta content="The User Guide for Nsight Compute." name="description" />
<meta content="User Guide" name="keywords" />

  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  <title>3. Nsight Compute &mdash; NsightCompute 12.4 documentation</title>
      <link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
      <link rel="stylesheet" href="../_static/css/theme.css" type="text/css" />
      <link rel="stylesheet" href="../_static/design-style.b7bb847fb20b106c3d81b95245e65545.min.css" type="text/css" />
      <link rel="stylesheet" href="../_static/omni-style.css" type="text/css" />
      <link rel="stylesheet" href="../_static/api-styles.css" type="text/css" />
    <link rel="shortcut icon" href="../_static/nsight-compute.ico"/>
  <!--[if lt IE 9]>
    <script src="../_static/js/html5shiv.min.js"></script>
  <![endif]-->
  
        <script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js"></script>
        <script src="../_static/jquery.js"></script>
        <script src="../_static/underscore.js"></script>
        <script src="../_static/doctools.js"></script>
        <script src="../_static/mermaid-init.js"></script>
        <script src="../_static/design-tabs.js"></script>
        <script src="../_static/version.js"></script>
        <script src="../_static/social-media.js"></script>
    <script src="../_static/js/theme.js"></script>
    <link rel="index" title="Index" href="../genindex.html" />
    <link rel="search" title="Search" href="../search.html" />
    <link rel="next" title="4. Nsight Compute CLI" href="../NsightComputeCli/index.html" />
    <link rel="prev" title="2. Kernel Profiling Guide" href="../ProfilingGuide/index.html" />
 


</head>

<body class="wy-body-for-nav"> 
  <div class="wy-grid-for-nav">
    <nav data-toggle="wy-nav-shift" class="wy-nav-side">
      <div class="wy-side-scroll">
        <div class="wy-side-nav-search" >


  <a href="../index.html">
  <img src="../_static/nsight-compute.png" class="logo" alt="Logo"/>
</a>

<div role="search">
  <form id="rtd-search-form" class="wy-form" action="../search.html" method="get">
    <input type="text" name="q" placeholder="Search docs" />
    <input type="hidden" name="check_keywords" value="yes" />
    <input type="hidden" name="area" value="default" />
  </form>
</div>
        </div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
              <p class="caption" role="heading"><span class="caption-text">Nsight Compute</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="../ReleaseNotes/index.html">1. Release Notes</a></li>
<li class="toctree-l1"><a class="reference internal" href="../ProfilingGuide/index.html">2. Kernel Profiling Guide</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">3. Nsight Compute</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#introduction">3.1. Introduction</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#overview">3.1.1. Overview</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#quickstart">3.2. Quickstart</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#interactive-profile-activity">3.2.1. Interactive Profile Activity</a></li>
<li class="toctree-l3"><a class="reference internal" href="#non-interactive-profile-activity">3.2.2. Non-Interactive Profile Activity</a></li>
<li class="toctree-l3"><a class="reference internal" href="#system-trace-activity">3.2.3. System Trace Activity</a></li>
<li class="toctree-l3"><a class="reference internal" href="#navigate-the-report">3.2.4. Navigate the Report</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#connection-dialog">3.3. Connection Dialog</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#remote-connections">3.3.1. Remote Connections</a></li>
<li class="toctree-l3"><a class="reference internal" href="#id2">3.3.2. Interactive Profile Activity</a></li>
<li class="toctree-l3"><a class="reference internal" href="#profile-activity">3.3.3. Profile Activity</a></li>
<li class="toctree-l3"><a class="reference internal" href="#reset">3.3.4. Reset</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#main-menu-and-toolbar">3.4. Main Menu and Toolbar</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#id3">3.4.1. Main Menu</a></li>
<li class="toctree-l3"><a class="reference internal" href="#main-toolbar">3.4.2. Main Toolbar</a></li>
<li class="toctree-l3"><a class="reference internal" href="#status-banners">3.4.3. Status Banners</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#tool-windows">3.5. Tool Windows</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#api-statistics">3.5.1. API Statistics</a></li>
<li class="toctree-l3"><a class="reference internal" href="#api-stream">3.5.2. API Stream</a></li>
<li class="toctree-l3"><a class="reference internal" href="#baselines">3.5.3. Baselines</a></li>
<li class="toctree-l3"><a class="reference internal" href="#metric-details">3.5.4. Metric Details</a></li>
<li class="toctree-l3"><a class="reference internal" href="#launch-details">3.5.5. Launch Details</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#header">Header</a></li>
<li class="toctree-l4"><a class="reference internal" href="#body">Body</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#nvtx">3.5.6. NVTX</a></li>
<li class="toctree-l3"><a class="reference internal" href="#resources">3.5.7. Resources</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#memory-allocations">Memory Allocations</a></li>
<li class="toctree-l4"><a class="reference internal" href="#graphviz-dot-and-svg-exports">Graphviz DOT and SVG exports</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#metric-selection">3.5.8. Metric Selection</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#profiler-report">3.6. Profiler Report</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#profiler-report-header">3.6.1. Header</a></li>
<li class="toctree-l3"><a class="reference internal" href="#report-pages">3.6.2. Report Pages</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#session-page">Session Page</a></li>
<li class="toctree-l4"><a class="reference internal" href="#summary-page">Summary Page</a></li>
<li class="toctree-l4"><a class="reference internal" href="#details-page">Details Page</a></li>
<li class="toctree-l4"><a class="reference internal" href="#source-page">Source Page</a><ul>
<li class="toctree-l5"><a class="reference internal" href="#navigation">Navigation</a></li>
<li class="toctree-l5"><a class="reference internal" href="#metrics">Metrics</a></li>
<li class="toctree-l5"><a class="reference internal" href="#profiles">Profiles</a></li>
<li class="toctree-l5"><a class="reference internal" href="#limitations">Limitations</a></li>
</ul>
</li>
<li class="toctree-l4"><a class="reference internal" href="#comments-page">Comments Page</a></li>
<li class="toctree-l4"><a class="reference internal" href="#call-stack-nvtx-page">Call Stack / NVTX Page</a></li>
<li class="toctree-l4"><a class="reference internal" href="#raw-page">Raw Page</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#metrics-and-units">3.6.3. Metrics and Units</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#id8">3.7. Baselines</a></li>
<li class="toctree-l2"><a class="reference internal" href="#standalone-source-viewer">3.8. Standalone Source Viewer</a></li>
<li class="toctree-l2"><a class="reference internal" href="#source-comparison">3.9. Source Comparison</a></li>
<li class="toctree-l2"><a class="reference internal" href="#occupancy-calculator">3.10. Occupancy Calculator</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#tables">3.10.1. Tables</a></li>
<li class="toctree-l3"><a class="reference internal" href="#graphs">3.10.2. Graphs</a></li>
<li class="toctree-l3"><a class="reference internal" href="#gpu-data">3.10.3. GPU Data</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#acceleration-structure-viewer">3.11. Acceleration Structure Viewer</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#as-viewer-nav">3.11.1. Navigation</a></li>
<li class="toctree-l3"><a class="reference internal" href="#filtering-and-highlighting">3.11.2. Filtering and Highlighting</a></li>
<li class="toctree-l3"><a class="reference internal" href="#rendering-options">3.11.3. Rendering Options</a></li>
<li class="toctree-l3"><a class="reference internal" href="#exporting">3.11.4. Exporting</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#options">3.12. Options</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#profile">3.12.1. Profile</a></li>
<li class="toctree-l3"><a class="reference internal" href="#environment">3.12.2. Environment</a></li>
<li class="toctree-l3"><a class="reference internal" href="#connection">3.12.3. Connection</a><ul>
<li class="toctree-l4"><a class="reference internal" href="#target-connection-properties">Target Connection Properties</a></li>
<li class="toctree-l4"><a class="reference internal" href="#host-connection-properties">Host Connection Properties</a></li>
</ul>
</li>
<li class="toctree-l3"><a class="reference internal" href="#source-lookup">3.12.4. Source Lookup</a></li>
<li class="toctree-l3"><a class="reference internal" href="#send-feedback">3.12.5. Send Feedback</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#projects">3.13. Projects</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#project-dialogs">3.13.1. Project Dialogs</a></li>
<li class="toctree-l3"><a class="reference internal" href="#project-explorer">3.13.2. Project Explorer</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#visual-profiler-transition-guide">3.14. Visual Profiler Transition Guide</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#trace">3.14.1. Trace</a></li>
<li class="toctree-l3"><a class="reference internal" href="#sessions">3.14.2. Sessions</a></li>
<li class="toctree-l3"><a class="reference internal" href="#timeline">3.14.3. Timeline</a></li>
<li class="toctree-l3"><a class="reference internal" href="#analysis">3.14.4. Analysis</a></li>
<li class="toctree-l3"><a class="reference internal" href="#command-line-arguments">3.14.5. Command Line Arguments</a></li>
</ul>
</li>
<li class="toctree-l2"><a class="reference internal" href="#visual-studio-integration-guide">3.15. Visual Studio Integration Guide</a><ul>
<li class="toctree-l3"><a class="reference internal" href="#visual-studio-integration-overview">3.15.1. Visual Studio Integration Overview</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="../NsightComputeCli/index.html">4. Nsight Compute CLI</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Developer Interfaces</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../CustomizationGuide/index.html">1. Customization Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="../NvRulesAPI/index.html">2. NvRules API</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Training</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../Training/index.html">Training</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Release Information</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../Archives/index.html">Archives</a></li>
</ul>
<p class="caption" role="heading"><span class="caption-text">Copyright and Licenses</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="../CopyrightAndLicenses/index.html">Copyright and Licenses</a></li>
</ul>

        </div>
      </div>
    </nav>

    <section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
          <i data-toggle="wy-nav-top" class="fa fa-bars"></i>
          <a href="../index.html">NsightCompute</a>
      </nav>

      <div class="wy-nav-content">
        <div class="rst-content">
          <div role="navigation" aria-label="Page navigation">
  <ul class="wy-breadcrumbs">


<li><a href="../index.html" class="icon icon-home"></a> &raquo;</li>
<li><span class="section-number">3. </span>Nsight Compute</li>

      <li class="wy-breadcrumbs-aside">
      </li>
<li class="wy-breadcrumbs-aside">


  <span>v2024.1.1 |</span>



  <a href="https://developer.nvidia.com/nsight-compute-history" class="reference external">Archive</a>


  <span>&nbsp;</span>
</li>

  </ul>
  <hr/>
</div>
          <div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
           <div itemprop="articleBody">
             
  <section id="nsight-compute">
<h1><span class="section-number">3. </span>Nsight Compute<a class="headerlink" href="#nsight-compute" title="Permalink to this headline"></a></h1>
<p>The User Guide for Nsight Compute.</p>
<section id="introduction">
<h2><span class="section-number">3.1. </span>Introduction<a class="headerlink" href="#introduction" title="Permalink to this headline"></a></h2>
<p>For users migrating from Visual Profiler to NVIDIA Nsight Compute, please see the <a class="reference external" href="index.html#nvvp-guide">Visual Profiler Transition Guide</a> for comparison of features and workflows.</p>
<section id="overview">
<h3><span class="section-number">3.1.1. </span>Overview<a class="headerlink" href="#overview" title="Permalink to this headline"></a></h3>
<p id="about-nsight-compute">This document is a user guide to the next-generation NVIDIA Nsight Compute profiling tools. NVIDIA Nsight Compute is an interactive kernel profiler for CUDA applications. It provides detailed performance metrics and API debugging via a user interface and command line tool. In addition, its baseline feature allows users to compare results within the tool. NVIDIA Nsight Compute provides a customizable and data-driven user interface and metric collection and can be extended with analysis scripts for post-processing results.</p>
<p><strong>Important Features</strong></p>
<ul class="simple">
<li><p>Interactive kernel profiler and API debugger</p></li>
<li><p>Graphical profile report</p></li>
<li><p>Result comparison across one or multiple reports within the tool</p></li>
<li><p>Fast Data Collection</p></li>
<li><p>UI and Command Line interface</p></li>
<li><p>Fully customizable reports and analysis rules</p></li>
</ul>
</section>
</section>
<section id="quickstart">
<h2><span class="section-number">3.2. </span>Quickstart<a class="headerlink" href="#quickstart" title="Permalink to this headline"></a></h2>
<p>The following sections provide brief step-by-step guides of how to setup and run NVIDIA Nsight Compute to collect profile information. All directories are relative to the base directory of NVIDIA Nsight Compute, unless specified otherwise.</p>
<p>The UI executable is called ncu-ui. A shortcut with this name is located in the base directory of the NVIDIA Nsight Compute installation. The actual executable is located in the folder <code class="docutils literal notranslate"><span class="pre">host\windows-desktop-win7-x64</span></code> on Windows or <code class="docutils literal notranslate"><span class="pre">host/linux-desktop-glibc_2_11_3-x64</span></code> on Linux. By default, when installing from a Linux <code class="docutils literal notranslate"><span class="pre">.run</span></code> file, NVIDIA Nsight Compute is located in <code class="docutils literal notranslate"><span class="pre">/usr/local/cuda-&lt;cuda-version&gt;/nsight-compute-&lt;version&gt;</span></code>. When installing from a <code class="docutils literal notranslate"><span class="pre">.deb</span></code> or <code class="docutils literal notranslate"><span class="pre">.rpm</span></code> package, it is located in <code class="docutils literal notranslate"><span class="pre">/opt/nvidia/nsight-compute/&lt;version&gt;</span></code> to be consistent with <a class="reference external" href="https://developer.nvidia.com/nsight-systems">Nsight Systems</a>. In Windows, the default path is <code class="docutils literal notranslate"><span class="pre">C:\Program</span> <span class="pre">Files\NVIDIA</span> <span class="pre">Corporation\Nsight</span> <span class="pre">Compute</span> <span class="pre">&lt;version&gt;</span></code>.</p>
<p>After starting NVIDIA Nsight Compute, by default the <em>Welcome Page</em> is opened. The <em>Start</em> section allows the user to start a new activity, open an existing report, create a new project or load an existing project. The <em>Continue</em> section provides links to recently opened reports and projects. The <em>Explore</em> section provides information about what is new in the latest release, as well as links to additional training. See <a class="reference external" href="index.html#options-environment">Environment</a> on how to change the start-up action.</p>
<figure class="align-center" id="quick-start-fig-welcome-page">
<img alt="../_images/welcome-page.png" src="../_images/welcome-page.png" />
<figcaption>
<p><span class="caption-text">Welcome Page</span><a class="headerlink" href="#quick-start-fig-welcome-page" title="Permalink to this image"></a></p>
</figcaption>
</figure>
<section id="interactive-profile-activity">
<span id="quick-start-interactive"></span><h3><span class="section-number">3.2.1. </span>Interactive Profile Activity<a class="headerlink" href="#interactive-profile-activity" title="Permalink to this headline"></a></h3>
<ol class="arabic">
<li><p><strong>Launch the target application from NVIDIA Nsight Compute</strong></p>
<p>When starting NVIDIA Nsight Compute, the <em>Welcome Page</em> will appear. Click on <em>Quick Launch</em> to open the <em>Connection</em> dialog. If the <em>Connection</em> dialog doesn’t appear, you can open it using the <em>Connect</em> button from the main toolbar, as long as you are not currently connected. Select your target platform on the left-hand side and your connection target (machine) from the <em>Connection</em> drop down. If you have your local target platform selected, <code class="docutils literal notranslate"><span class="pre">localhost</span></code> will become available as a connection. Use the + button to add a new connection target. Then, continue by filling in the details in the <em>Launch</em> tab. In the <em>Activity</em> panel, select the Interactive Profile activity to initiate a session that allows controlling the execution of the target application and selecting the kernels of interest interactively. Press <em>Launch</em> to start the session.</p>
<figure class="align-center" id="quick-start-interactive-profiling-connect">
<img alt="../_images/quick-start-interactive-profiling-connect.png" src="../_images/quick-start-interactive-profiling-connect.png" />
</figure>
&nbsp;</li>
<li><p><strong>Launch the target application with tools instrumentation from the command line</strong></p>
<p>The ncu can act as a simple wrapper that forces the target application to load the necessary libraries for tools instrumentation. The parameter <code class="docutils literal notranslate"><span class="pre">--mode=launch</span></code> specifies that the target application should be launched and suspended before the first instrumented API call. That way the application waits until we connect with the UI.
.. code:: text</p>
<blockquote>
<div><p>$ ncu –mode=launch CuVectorAddDrv.exe</p>
</div></blockquote>
</li>
<li><p><strong>Launch NVIDIA Nsight Compute and connect to target application</strong></p>
<figure class="align-center" id="quick-start-interactive-profiling-attach">
<img alt="../_images/quick-start-interactive-profiling-attach.png" src="../_images/quick-start-interactive-profiling-attach.png" />
</figure>
<p>Select the target machine at the top of the dialog to connect and update the list of attachable applications. By default, <em>localhost</em> is pre-selected if the target matches your current local platform. Select the <em>Attach</em> tab and the target application of interest and press <em>Attach</em>. Once connected, the layout of NVIDIA Nsight Compute changes into stepping mode that allows you to control the execution of any calls into the instrumented API. When connected, the <em>API Stream</em> window indicates that the target application waits before the very first API call.</p>
<figure class="align-center" id="quick-start-interactive-profiling-connected">
<img alt="../_images/quick-start-interactive-profiling-connected.png" src="../_images/quick-start-interactive-profiling-connected.png" />
</figure>
&nbsp;</li>
<li><p><strong>Control application execution</strong></p>
<p>Use the <em>API Stream</em> window to step the calls into the instrumented API. The dropdown at the top allows switching between different CPU threads of the application. <em>Step In</em> (F11), <em>Step Over</em> (F10), and <em>Step Out</em> (Shift + F11) are available from the <em>Debug</em> menu or the corresponding toolbar buttons. While stepping, function return values and function parameters are captured.</p>
<figure class="align-center" id="quick-start-interactive-profiling-api-stream">
<img alt="../_images/quick-start-interactive-profiling-api-stream.png" src="../_images/quick-start-interactive-profiling-api-stream.png" />
</figure>
<p>Use <em>Resume</em> (F5) and <em>Pause</em> to allow the program to run freely. Freeze control is available to define the behavior of threads currently not in focus, i.e. selected in the thread drop down. By default, the <em>API Stream</em> stops on any API call that returns an error code. This can be toggled in the <em>Debug</em> menu by <em>Break On API Error</em>.</p>
</li>
<li><p><strong>Isolate a kernel launch</strong></p>
<p>To quickly isolate a kernel launch for profiling, use the <em>Run to Next Kernel</em> button in the toolbar of the <em>API Stream</em> window to jump to the next kernel launch. The execution will stop before the kernel launch is executed.</p>
<figure class="align-center" id="quick-start-interactive-profiling-next-launch">
<img alt="../_images/quick-start-interactive-profiling-next-launch.png" src="../_images/quick-start-interactive-profiling-next-launch.png" />
</figure>
&nbsp;</li>
<li><p><strong>Profile a kernel launch</strong></p>
<p>Once the execution of the target application is suspended at a kernel launch, additional actions become available in the UI. These actions are either available from the menu or from the toolbar. Please note that the actions are disabled, if the API stream is not at a qualifying state (not at a kernel launch or launching on an unsupported GPU). To profile, press <em>Profile Kernel</em> and wait until the result is shown in the <a class="reference external" href="index.html#profiler-report">Profiler Report</a>. Profiling progress is reported in the lower right corner status bar.</p>
<p>Instead of manually selecting <em>Profile</em>, it is also possible to enable <em>Auto Profile</em> from the <em>Profile</em> menu. If enabled, each kernel matching the current kernel filter (if any) will be profiled using the current section configuration. This is especially useful if an application is to be profiled unattended, or the number of kernel launches to be profiled is very large. Sections can be enabled or disabled using the <a class="reference external" href="index.html#tool-window-sections-info">Metric Selection</a> tool window.</p>
<p><em>Profile Series</em> allows to configure the collection of a set of profile results at once. Each result in the set is profiled with varying parameters. Series are useful to investigate the behavior of a kernel across a large set of parameters without the need to recompile and rerun the application many times.</p>
</li>
</ol>
<p>For a detailed description of the options available in this activity, see <a class="reference external" href="index.html#connection-activity-interactive">Interactive Profile Activity</a>.</p>
</section>
<section id="non-interactive-profile-activity">
<h3><span class="section-number">3.2.2. </span>Non-Interactive Profile Activity<a class="headerlink" href="#non-interactive-profile-activity" title="Permalink to this headline"></a></h3>
<ol class="arabic">
<li><p><strong>Launch the target application from NVIDIA Nsight Compute</strong></p>
<p>When starting NVIDIA Nsight Compute, the <em>Welcome Page</em> will appear. Click on <em>Quick Launch</em> to open the <em>Connection</em> dialog. If the <em>Connection</em> dialog doesn’t appear, you can open it using the <em>Connect</em> button from the main toolbar, as long as you are not currently connected. Select your target platform on the left-hand side and your localhost from the <em>Connection</em> drop down. Then, fill in the launch details. In the <em>Activity</em> panel, select the <em>Profile</em> activity to initiate a session that pre-configures the profile session and launches the command line profiler to collect the data. Provide the <em>Output File</em> name to enable starting the session with the <em>Launch</em> button.</p>
<figure class="align-center" id="quick-start-profiling-connect">
<img alt="../_images/quick-start-profiling-connect.png" src="../_images/quick-start-profiling-connect.png" />
</figure>
</li>
<li><p><strong>Additional Launch Options</strong></p>
<p>For more details on these options, see <a class="reference external" href="../NsightComputeCli/index.html#command-line-options">Command Line Options</a>. The options are grouped into tabs: The <em>Filter</em> tab exposes the options to specify which kernels should be profiled. Options include the kernel regex filter, the number of launches to skip, and the total number of launches to profile. The <em>Sections</em> tab allows you to select which sections should be collected for each kernel launch. Hover over a section to see its description as a tool-tip. To change the sections that are enabled by default, use the <a class="reference external" href="index.html#tool-window-sections-info">Metric Selection</a> tool window. The <em>Sampling</em> tab allows you to configure sampling options for each kernel launch. The <em>Other</em> tab includes the option to collect NVTX information or custom metrics via the <code class="docutils literal notranslate"><span class="pre">--metrics</span></code> option.</p>
<figure class="align-center" id="quick-start-profiling-options-sections">
<img alt="../_images/quick-start-profiling-options-sections.png" src="../_images/quick-start-profiling-options-sections.png" />
</figure>
</li>
</ol>
<p>For a detailed description of the options available in this activity, see <a class="reference external" href="index.html#connection-activity-non-interactive">Profile Activity</a>.</p>
</section>
<section id="system-trace-activity">
<span id="quickstart-system-trace-activity"></span><span id="quick-start-system-trace"></span><h3><span class="section-number">3.2.3. </span>System Trace Activity<a class="headerlink" href="#system-trace-activity" title="Permalink to this headline"></a></h3>
<ol class="arabic">
<li><p><strong>Launch the target application from NVIDIA Nsight Compute</strong></p>
<p>When starting NVIDIA Nsight Compute, the <em>Welcome Page</em> will appear. Click on <em>Quick Launch</em> to open the <em>Connection</em> dialog. If the <em>Connection</em> dialog doesn’t appear, you can open it using the <em>Connect</em> button from the main toolbar, as long as you are not currently connected. Select your local target platform on the left-hand side and your localhost from the <em>Connection</em> drop down. Then, fill in the launch details. In the <em>Activity</em> panel, select the <em>System Trace</em> activity to initiate a session with pre-configured settings. Press <em>Launch</em> to start the session.</p>
<figure class="align-center" id="quick-start-system-trace-connect">
<img alt="../_images/quick-start-system-trace-connect.png" src="../_images/quick-start-system-trace-connect.png" />
</figure>
</li>
<li><p><strong>Additional Launch Options</strong></p>
<p>For more details on these options, see <a class="reference external" href="https://docs.nvidia.com/nsight-systems/UserGuide/index.html#linux-system-wide-profiling-options">System-Wide Profiling Options</a>.</p>
<figure class="align-center" id="quick-start-system-trace-options">
<img alt="../_images/quick-start-system-trace-options.png" src="../_images/quick-start-system-trace-options.png" />
</figure>
</li>
<li><p>Once the session is completed, the <a class="reference external" href="https://docs.nvidia.com/nsight-systems/UserGuide/index.html">Nsight Systems</a> report is opened in a new document. By default, the timeline view is shown. It provides detailed information of the activity of the CPU and GPUs and helps understanding the overall behavior and performance of application. Once a CUDA kernel is identified to be on the critical path and not meeting the performance expectations, right click on the kernel launch on timeline and select <em>Profile Kernel</em> from the context menu. A new <a class="reference external" href="index.html#connection-dialog">Connection Dialog</a> opens up that is already preconfigured to profile the selected kernel launch. Proceed with optimizing the selected kernel using <a class="reference external" href="index.html#quick-start-non-interactive">Non-Interactive Profile Activity</a></p>
<figure class="align-center" id="quick-start-system-trace-timeline">
<img alt="../_images/quick-start-system-trace-timeline.png" src="../_images/quick-start-system-trace-timeline.png" />
</figure>
</li>
</ol>
</section>
<section id="navigate-the-report">
<span id="quickstart-navigate-report"></span><span id="quick-start-navigate-report"></span><h3><span class="section-number">3.2.4. </span>Navigate the Report<a class="headerlink" href="#navigate-the-report" title="Permalink to this headline"></a></h3>
<ol class="arabic">
<li><p><strong>Navigate the report</strong></p>
<p>The profile report comes up by default on the <em>Details</em> page. You can switch between different <a class="reference external" href="index.html#profiler-report-pages">Report Pages</a> of the report with the dropdown labeled Page on the top-left of the report. You can also use <em>Ctrl + Shift + N</em> and <em>Ctrl + Shift + P</em> shortcut keys or corresponding toolbar button to navigate next and previous pages, respectively. A report can contain any number of results from kernel launches. The <em>Result</em> dropdown allows switching between the different results in a report.</p>
<figure class="align-center" id="quick-start-report">
<img alt="../_images/quick-start-report.png" src="../_images/quick-start-report.png" />
</figure>
</li>
<li><p><strong>Diffing multiple results</strong></p>
<p>On the <em>Details</em> page, press the button <em>Add Baseline</em> in order for the current result to become the baseline all other results from this report and any other report opened in the same instance of NVIDIA Nsight Compute get compared to. If a baseline is set, every element on the Details page shows two values: The current value of the result in focus and the corresponding value of the baseline or the percentage of change from the corresponding baseline value.</p>
<figure class="align-center" id="quick-start-baseline">
<img alt="../_images/quick-start-baseline.png" src="../_images/quick-start-baseline.png" />
</figure>
<p>Use the <em>Clear Baselines</em> entry from the dropdown button, the Profile menu or the corresponding toolbar button to remove all baselines. For more information see <a class="reference external" href="index.html#baselines">Baselines</a>.</p>
</li>
<li><p><strong>Executing rules</strong></p>
<p>On the <em>Details</em> page some sections may provide rules. Press the <em>Apply</em> button to execute an individual rule. The <em>Apply Rules</em> button on the top executes all available rules for the current result in focus. Rules can be user-defined too. For more information see the <a class="reference external" href="../CustomizationGuide/index.html#rule-system">Customization Guide</a>.</p>
<figure class="align-center" id="quick-start-rule">
<img alt="../_images/quick-start-rule.png" src="../_images/quick-start-rule.png" />
</figure>
</li>
</ol>
</section>
</section>
<section id="connection-dialog">
<h2><span class="section-number">3.3. </span>Connection Dialog<a class="headerlink" href="#connection-dialog" title="Permalink to this headline"></a></h2>
<p>Use the <em>Connection Dialog</em> to launch and attach to applications on your local and remote platforms. Start by selecting the <em>Target Platform</em> for profiling. By default (and if supported) your local platform will be selected. Select the platform on which you would like to start the target application or connect to a running process.</p>
<figure class="align-center" id="id1">
<img alt="../_images/connection-dialog.png" src="../_images/connection-dialog.png" />
</figure>
<p>When using a remote platform, you will be asked to select or create a <em>Connection</em> in the top drop down. To create a new connection, select <em>+</em> and enter your connection details. When using the local platform, <em>localhost</em> will be selected as the default and no further connection settings are required. You can still create or select a remote connection, if profiling will be on a remote system of the same platform.</p>
<p>Depending on your target platform, select either <em>Launch</em> or <em>Remote Launch</em> to launch an application for profiling on the target. Note that <em>Remote Launch</em> will only be available if supported on the target platform.</p>
<p>Fill in the following launch details for the application:</p>
<ul class="simple">
<li><p><strong>Application Executable:</strong> Specifies the root application to launch. Note that this may not be the final application that you wish to profile. It can be a script or launcher that creates other processes.</p></li>
<li><p><strong>Working Directory:</strong> The directory in which the application will be launched.</p></li>
<li><p><strong>Command Line Arguments:</strong> Specify the arguments to pass to the application executable.</p></li>
<li><p><strong>Environment:</strong> The environment variables to set for the launched application.</p></li>
</ul>
<p>Select <em>Attach</em> to attach the profiler to an application already running on the target platform. This application must have been started using another NVIDIA Nsight Compute CLI instance. The list will show all application processes running on the target system which can be attached. Select the refresh button to re-create this list.</p>
<p>Finally, select the <em>Activity</em> to be run on the target for the launched or attached application. Note that not all activities are necessarily compatible with all targets and connection options. Currently, the following activities exist:</p>
<ul class="simple">
<li><p><a class="reference external" href="index.html#connection-activity-interactive">Interactive Profile Activity</a></p></li>
<li><p><a class="reference external" href="index.html#connection-activity-non-interactive">Profile Activity</a></p></li>
<li><p><a class="reference external" href="index.html#quick-start-system-trace">System Trace Activity</a></p></li>
<li><p><a class="reference external" href="index.html#occupancy-calculator">Occupancy Calculator</a></p></li>
</ul>
<section id="remote-connections">
<h3><span class="section-number">3.3.1. </span>Remote Connections<a class="headerlink" href="#remote-connections" title="Permalink to this headline"></a></h3>
<p>Remote devices that support SSH can also be configured as a target in the <em>Connection Dialog</em>. To configure a remote device, ensure an SSH-capable <em>Target Platform</em> is selected, then press the <em>+</em> button. The following configuration dialog will be presented.</p>
<figure class="align-center" id="add-remote-connection">
<img alt="../_images/add-remote-connection.png" src="../_images/add-remote-connection.png" />
</figure>
<p>NVIDIA Nsight Compute supports both password and private key authentication methods. In this dialog, select the authentication method and enter the following information:</p>
<ul>
<li><p><strong>Password</strong></p>
<ul class="simple">
<li><p><strong>IP/Host Name:</strong> The IP address or host name of the target device.</p></li>
<li><p><strong>User Name:</strong> The user name to be used for the SSH connection.</p></li>
<li><p><strong>Password:</strong> The user password to be used for the SSH connection.</p></li>
<li><p><strong>Port:</strong> The port to be used for the SSH connection. (The default value is 22)</p></li>
<li><p><strong>Deployment Directory:</strong> The directory to use on the target device to deploy supporting files. The specified user must have write permissions to this location.</p></li>
<li><p><strong>Connection Name:</strong> The name of the remote connection that will show up in the <em>Connection Dialog</em>. If not set, it will default to &lt;User&gt;&#64;&lt;Host&gt;:&lt;Port&gt;.</p></li>
</ul>
</li>
<li><p><strong>Private Key</strong></p>
<figure class="align-center" id="add-remote-connection-private-key">
<img alt="../_images/add-remote-connection-private-key.png" src="../_images/add-remote-connection-private-key.png" />
</figure>
<ul class="simple">
<li><p><strong>IP/Host Name:</strong> The IP address or host name of the target device.</p></li>
<li><p><strong>User Name:</strong> The user name to be used for the SSH connection.</p></li>
<li><p><strong>SSH Private Key:</strong> The private key that is used to authenticate to SSH server.</p></li>
<li><p><strong>SSH Key Passphrase:</strong> The passphrase for your private key.</p></li>
<li><p><strong>Port:</strong> The port to be used for the SSH connection. (The default value is 22)</p></li>
<li><p><strong>Deployment Directory:</strong> The directory to use on the target device to deploy supporting files. The specified user must have write permissions to this location.</p></li>
<li><p><strong>Connection Name:</strong> The name of the remote connection that will show up in the <em>Connection Dialog</em>. If not set, it will default to &lt;User&gt;&#64;&lt;Host&gt;:&lt;Port&gt;.</p></li>
</ul>
</li>
</ul>
<p>In addition to keyfiles specified by path and plain password authentication, NVIDIA Nsight Compute supports keyboard-interactive authentication, standard keyfile path searching and SSH agents.</p>
<p>When all information is entered, click the <em>Add</em> button to make use of this new connection.</p>
<p>When a remote connection is selected in the <em>Connection Dialog</em>, the <em>Application Executable</em> file browser will browse the remote file system using the configured SSH connection, allowing the user to select the target application on the remote device.</p>
<p>When an activity is launched on a remote device, the following steps are taken:</p>
<ol class="arabic simple">
<li><p>The command line profiler and supporting files are copied into the <em>Deployment Directory</em> on the the remote device. (Only files that do not exist or are out of date are copied.)</p></li>
<li><p>Communication channels are opened to prepare for the traffic between the UI and the <em>Application Executable</em>.</p>
<ul class="simple">
<li><p>For <em>Interactive Profile</em> activities, a <em>SOCKS proxy</em> is started on the host machine.</p></li>
<li><p>For <em>Non-Interactive Profile</em> activities, a remote forwarding channel is opened on the target machine to tunnel profiling information back to the host.</p></li>
</ul>
</li>
<li><p>The <em>Application Executable</em> is executed on the remote device.</p>
<ul class="simple">
<li><p>For <em>Interactive Profile</em> activities, a connection is established to the remote application and the profiling session begins.</p></li>
<li><p>For <em>Non-Interactive Profile</em> activities, the remote application is executed under the command line profiler and the specified report file is generated.</p></li>
</ul>
</li>
<li><p>For non-interactive profiling activities, the generated report file is copied back to the host, and opened.</p></li>
</ol>
<p>The progress of each of these steps is presented in the <em>Progress Log</em>.</p>
<figure class="align-center" id="progress-log">
<img alt="../_images/progress-log.png" src="../_images/progress-log.png" />
<figcaption>
<p><span class="caption-text">Progress Log</span><a class="headerlink" href="#progress-log" title="Permalink to this image"></a></p>
</figcaption>
</figure>
<p>Note that once either activity type has been launched remotely, the tools necessary for further profiling sessions can be found in the <em>Deployment Directory</em> on the remote device.</p>
<p>On Linux and Mac host platforms, NVIDIA Nsight Compute supports SSH remote profiling on target machines which are not directly addressable from the machine the UI is running on through the <code class="docutils literal notranslate"><span class="pre">ProxyJump</span></code> and <code class="docutils literal notranslate"><span class="pre">ProxyCommand</span></code> SSH options.</p>
<p>These options can be used to specify intermediate hosts to connect to or actual commands to run to obtain a socket connected to the SSH server on the target host and can be added to your SSH configuration file.</p>
<p>Note that for both options, NVIDIA Nsight Compute runs external commands and does not implement any mechanism to authenticate to the intermediate hosts using the credentials entered in the <a class="reference external" href="index.html#connection-dialog">Connection Dialog</a>. These credentials will only be used to authenticate to the final target in the chain of machines.</p>
<p>When using the <code class="docutils literal notranslate"><span class="pre">ProxyJump</span></code> option NVIDIA Nsight Compute uses the <em>OpenSSH client</em> to establish the connection to the intermediate hosts. This means that in order to use <code class="docutils literal notranslate"><span class="pre">ProxyJump</span></code> or <code class="docutils literal notranslate"><span class="pre">ProxyCommand</span></code>, a version of OpenSSH supporting these options must be installed on the host machine.</p>
<p>A common way to authenticate to the intermediate hosts in this case is to use a <em>SSH agent</em> and have it hold the private keys used for authentication.</p>
<p>Since the <em>OpenSSH SSH client</em> is used, you can also use the <em>SSH askpass</em> mechanism to handle these authentications in an interactive manner.</p>
<p>It might happen on slow networks that connections used for remote profiling through SSH time out. If this is the case, the <code class="docutils literal notranslate"><span class="pre">ConnectTimeout</span></code> option can be used to set the desired timeout value.</p>
<p>A known limitation of the remote profiling through SSH is that problems may arise if NVIDIA Nsight Compute tries to do remote profiling through <em>SSH</em> by connecting to the same machine it is running on. In this case, the workaround is to do local profiling through <code class="docutils literal notranslate"><span class="pre">localhost</span></code>.</p>
<p>For more information about available options for the <em>OpenSSH client</em> and the ecosystem of tools it can be used with for authentication refer to the official <a class="reference external" href="https://www.openssh.com/manual.html">manual pages</a>.</p>
</section>
<section id="id2">
<h3><span class="section-number">3.3.2. </span>Interactive Profile Activity<a class="headerlink" href="#id2" title="Permalink to this headline"></a></h3>
<p>The <em>Interactive Profile</em> activity allows you to initiate a session that controls the execution of the target application, similar to a debugger. You can step API calls and workloads (CUDA kernels), pause and resume, and interactively select the kernels of interest and which metrics to collect.</p>
<p>This activity does currently not support profiling or attaching to child processes.</p>
<ul>
<li><p><strong>Enable CPU Call Stack</strong></p>
<p>Collect the CPU-sided Call Stack at the location of each profiled kernel launch.</p>
</li>
<li><p><strong>Enable NVTX Support</strong></p>
<p>Collect NVTX information provided by the application or its libraries. Required to support stepping to specific NVTX contexts.</p>
</li>
<li><p><strong>Disable Profiling Start/Stop</strong></p>
<p>Ignore calls to <code class="docutils literal notranslate"><span class="pre">cu(da)ProfilerStart</span></code> or <code class="docutils literal notranslate"><span class="pre">cu(da)ProfilerStop</span></code> made by the application.</p>
</li>
<li><p><strong>Enable Profiling From Start</strong></p>
<p>Enables profiling from the application start. Disabling this is useful if the application calls <code class="docutils literal notranslate"><span class="pre">cu(da)ProfilerStart</span></code> and kernels before the first call to this API should not be profiled. Note that disabling this does not prevent you from manually profiling kernels.</p>
</li>
<li><p><strong>Cache Control</strong></p>
<p>Control the behavior of the GPU caches during profiling. Allowed values: For <em>Flush All</em>, all GPU caches are flushed before each kernel replay iteration during profiling. While metric values in the execution environment of the application might be slightly different without invalidating the caches, this mode offers the most reproducible metric results across the replay passes and also across multiple runs of the target application.</p>
<p>For <em>Flush None</em>, no GPU caches are flushed during profiling. This can improve performance and better replicates the application behavior if only a single kernel replay pass is necessary for metric collection. However, some metric results will vary depending on prior GPU work, and between replay iterations. This can lead to inconsistent and out-of-bounds metric values.</p>
</li>
<li><p><strong>Clock Control</strong></p>
<p>Control the behavior of the GPU clocks during profiling. Allowed values: For <em>Base</em>, GPC and memory clocks are locked to their respective base frequency during profiling. This has no impact on thermal throttling. For <em>None</em>, no GPC or memory frequencies are changed during profiling.</p>
</li>
<li><p><strong>Import Source</strong></p>
<p>Enables permanently importing available source files into the report. Missing source files are searched in <a class="reference external" href="index.html#options-source-lookup">Source Lookup</a> folders. Source information must be embedded in the executable, e.g. via the <code class="docutils literal notranslate"><span class="pre">-lineinfo</span></code> compiler option. Imported files are used in the <em>CUDA-C</em> view on the <a class="reference external" href="index.html#profiler-report-source-page">Source Page</a>.</p>
</li>
</ul>
<ul>
<li><p><strong>Graph Profiling</strong></p>
<p>Set if CUDA graphs should be stepped and profiled as individual <em>Nodes</em> or as complete <em>Graphs</em>. See the <a class="reference external" href="../ProfilingGuide/index.html#graph-profiling">Kernel Profiling Guide</a> for more information on this mode.</p>
</li>
</ul>
</section>
<section id="profile-activity">
<span id="connection-activity-non-interactive"></span><h3><span class="section-number">3.3.3. </span>Profile Activity<a class="headerlink" href="#profile-activity" title="Permalink to this headline"></a></h3>
<p>The <em>Profile</em> activity provides a traditional, pre-configurable profiler. After configuring which kernels to profile, which metrics to collect, etc, the application is run under the profiler without interactive control. The activity completes once the application terminates. For applications that normally do not terminate on their own, e.g. interactive user interfaces, you can cancel the activity once all expected kernels are profiled.</p>
<p>This activity does not support attaching to processes previously launched via NVIDIA Nsight Compute. These processes will be shown grayed out in the <em>Attach</em> tab.</p>
<ul>
<li><p><strong>Output File</strong></p>
<p>Path to report file where the collected profile should be stored. If not present, the report extension <code class="docutils literal notranslate"><span class="pre">.ncu-rep</span></code> is added automatically. The placeholder <code class="docutils literal notranslate"><span class="pre">%i</span></code> is supported for the filename component. It is replaced by a sequentially increasing number to create a unique filename. This maps to the <code class="docutils literal notranslate"><span class="pre">--export</span></code> command line option.</p>
</li>
<li><p><strong>Force Overwrite</strong></p>
<p>If set, existing report file are overwritten. This maps to the <code class="docutils literal notranslate"><span class="pre">--force-overwrite</span></code> command line option.</p>
</li>
<li><p><strong>Target Processes</strong></p>
<p>Select the processes you want to profile. In mode <em>Application Only</em>, only the root application process is profiled. In mode <em>all</em>, the root application process and all its child processes are profiled. This maps to the <code class="docutils literal notranslate"><span class="pre">--target-processes</span></code> command line option.</p>
</li>
<li><p><strong>Replay Mode</strong></p>
<p>Select the method for replaying kernel launches multiple times. In mode <em>Kernel</em>, individual kernel launches are replayed transparently during the single execution of the target application. In mode <em>Application</em>, the entire target application is relaunched multiple times. In each iteration, additional data for the target kernel launches is collected. Application replay requires the program execution to be deterministic. This maps to the <code class="docutils literal notranslate"><span class="pre">--replay-mode</span></code> command line option. See the <a class="reference external" href="../ProfilingGuide/index.html#kernel-replay">Kernel Profiling Guide</a> for more details on the replay modes.</p>
</li>
</ul>
<ul>
<li><p><strong>Graph Profiling</strong></p>
<p>Set if CUDA graphs should be profiled as individual <em>Nodes</em> or as complete <em>Graphs</em>.</p>
</li>
</ul>
<ul>
<li><p><strong>Additional Options</strong></p>
<p>All remaining options map to their command line profiler equivalents. See the <a class="reference external" href="../NsightComputeCli/index.html#command-line-options">Command Line Options</a> for details.</p>
</li>
</ul>
</section>
<section id="reset">
<span id="connection-dialog-reset"></span><h3><span class="section-number">3.3.4. </span>Reset<a class="headerlink" href="#reset" title="Permalink to this headline"></a></h3>
<p>Entries in the connection dialog are saved as part of the current <a class="reference external" href="index.html#projects">project</a>. When working in a custom project, simply close the project to reset the dialog.</p>
<p>When not working in a custom project, entries are stored as part of the <em>default project</em>. You can delete all information from the default project by closing NVIDIA Nsight Compute and then <a class="reference external" href="index.html#projects">deleting the project file from disk</a>.</p>
</section>
</section>
<section id="main-menu-and-toolbar">
<h2><span class="section-number">3.4. </span>Main Menu and Toolbar<a class="headerlink" href="#main-menu-and-toolbar" title="Permalink to this headline"></a></h2>
<p>Information on the main menu and toolbar.</p>
<figure class="align-center" id="main-menu">
<img alt="../_images/main-menu.png" src="../_images/main-menu.png" />
</figure>
<section id="id3">
<h3><span class="section-number">3.4.1. </span>Main Menu<a class="headerlink" href="#id3" title="Permalink to this headline"></a></h3>
<ul>
<li><p>File</p>
<ul>
<li><p><strong>New Project</strong> Create new profiling <a class="reference external" href="index.html#projects">Projects</a> with the <a class="reference external" href="index.html#projects-dialog">New Project Dialog</a>.</p></li>
<li><p><strong>Open Project</strong> Open an existing profiling project.</p></li>
<li><p><strong>Recent Projects</strong> Open an existing profiling project from the list of recently used projects.</p></li>
<li><p><strong>Save Project</strong> Save the current profiling project.</p></li>
<li><p><strong>Save Project As</strong> Save the current profiling project with a new filename.</p></li>
<li><p><strong>Close Project</strong> Close the current profiling project.</p></li>
<li><p><strong>New File</strong> Create a new file.</p></li>
<li><p><strong>Open File</strong> Open an existing file.</p></li>
<li><p><strong>Open Remote File</strong></p>
<p>Download an existing file from a remote host and open it locally. The opened file will only exist in memory and will not be written to the local machine’s disk unless the user explicitly saves it. For more information concerning the selection of a remote host to download the file from, see the section about <a class="reference external" href="index.html#remote-connections">Remote Connections</a>.</p>
<p>Only a subset of file types that are supported locally can be opened from a remote target. The following table lists file types that can be opened remotely.</p>
<table class="table-no-stripes docutils align-default" id="id11">
<caption><span class="caption-text">Remote File Type Support</span><a class="headerlink" href="#id11" title="Permalink to this table"></a></caption>
<colgroup>
<col style="width: 25%" />
<col style="width: 47%" />
<col style="width: 28%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Extensions</p></th>
<th class="head"><p>Description</p></th>
<th class="head"><p>Supported</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p>ncu-rep</p></td>
<td><p>Nsight Compute Profiler Report</p></td>
<td><p>Yes</p></td>
</tr>
<tr class="row-odd"><td><p>ncu-occ</p></td>
<td><p>Occupancy Calculator File</p></td>
<td><p>Yes</p></td>
</tr>
<tr class="row-even"><td><p>ncu-bvh</p></td>
<td><p>OptiX AS Viewer File</p></td>
<td><p>Yes (except on MacOSX)</p></td>
</tr>
<tr class="row-odd"><td><p>section</p></td>
<td><p>Section Description</p></td>
<td><p>No</p></td>
</tr>
<tr class="row-even"><td><p>cubin</p></td>
<td><p>Cubin File</p></td>
<td><p>No</p></td>
</tr>
<tr class="row-odd"><td><p>cuh,h,hpp</p></td>
<td><p>Header File</p></td>
<td><p>No</p></td>
</tr>
<tr class="row-even"><td><p>c,cpp,cu</p></td>
<td><p>Source File</p></td>
<td><p>No</p></td>
</tr>
<tr class="row-odd"><td><p>txt</p></td>
<td><p>Text file</p></td>
<td><p>No</p></td>
</tr>
<tr class="row-even"><td><p>nsight-cuprof-report</p></td>
<td><p>Nsight Compute Profiler Report (legacy)</p></td>
<td><p>Yes</p></td>
</tr>
</tbody>
</table>
</li>
<li><p><strong>Save</strong> Save the current file</p></li>
<li><p><strong>Save As</strong> Save a copy of the current file with a different name or type or in a different location.</p></li>
<li><p><strong>Save All Files</strong> Save all open files.</p></li>
<li><p><strong>Close</strong> Close the current file.</p></li>
<li><p><strong>Close All Files</strong> Close all open files.</p></li>
<li><p><strong>Recent Files</strong> Open an existing file from the list of recently used files.</p></li>
<li><p><strong>Exit</strong> Exit Nsight Compute.</p></li>
</ul>
</li>
<li><p>Connection</p>
<ul class="simple">
<li><p><strong>Connect</strong> Open the <a class="reference external" href="index.html#connection-dialog">Connection Dialog</a> to launch or attach to a target application. Disabled when already connected.</p></li>
<li><p><strong>Disconnect</strong> Disconnect from the current target application, allows the application to continue normally and potentially re-attach.</p></li>
<li><p><strong>Terminate</strong> Disconnect from and terminate the current target application immediately.</p></li>
</ul>
</li>
<li><p>Debug</p>
<ul>
<li><p><strong>Pause</strong> Pause the target application at the next intercepted API call or launch.</p></li>
<li><p><strong>Resume</strong> Resume the target application.</p></li>
<li><p><strong>Step In</strong> Step into the current API call or launch to the next nested call, if any, or the subsequent API call, otherwise.</p></li>
<li><p><strong>Step Over</strong> Step over the current API call or launch and suspend at the next, non-nested API call or launch.</p></li>
<li><p><strong>Step Out</strong> Step out of the current nested API call or launch to the next, non-parent API call or launch one level above.</p></li>
<li><p><strong>Freeze API</strong></p>
<p>When disabled, all CPU threads are enabled and continue to run during stepping or resume, and all threads stop as soon as at least one thread arrives at the next API call or launch. This also means that during stepping or resume the currently selected thread might change as the old selected thread makes no forward progress and the API Stream automatically switches to the thread with a new API call or launch. When enabled, only the currently selected CPU thread is enabled. All other threads are disabled and blocked.</p>
<p>Stepping now completes if the current thread arrives at the next API call or launch. The selected thread never changes. However, if the selected thread does not call any further API calls or waits at a barrier for another thread to make progress, stepping may not complete and hang indefinitely. In this case, pause, select another thread, and continue stepping until the original thread is unblocked. In this mode, only the selected thread will ever make forward progress.</p>
</li>
<li><p><strong>Break On API Error</strong> When enabled, during resume or stepping, execution is suspended as soon as an API call returns an error code.</p></li>
<li><p><strong>Run to Next Kernel</strong> See <a class="reference external" href="index.html#tool-window-api-stream">API Stream</a> tool window.</p></li>
<li><p><strong>Run to Next API Call</strong> See <a class="reference external" href="index.html#tool-window-api-stream">API Stream</a> tool window.</p></li>
<li><p><strong>Run to Next Range Start</strong> See <a class="reference external" href="index.html#tool-window-api-stream">API Stream</a> tool window.</p></li>
<li><p><strong>Run to Next Range End</strong> See <a class="reference external" href="index.html#tool-window-api-stream">API Stream</a> tool window.</p></li>
<li><p><strong>API Statistics</strong> Opens the <a class="reference external" href="index.html#tool-window-api-statistics">API Statistics</a> tool window</p></li>
<li><p><strong>API Stream</strong> Opens the <a class="reference external" href="index.html#tool-window-api-stream">API Stream</a> tool window</p></li>
<li><p><strong>Resources</strong> Opens the <a class="reference external" href="index.html#tool-window-resources">Resources</a> tool window</p></li>
<li><p><strong>NVTX</strong> Opens the <a class="reference external" href="index.html#tool-window-nvtx">NVTX</a> tool window</p></li>
</ul>
</li>
<li><p>Profile</p>
<ul class="simple">
<li><p><strong>Profile Kernel</strong> When suspended at a kernel launch, select the profile using the current configuration.</p></li>
<li><p><strong>Profile Series</strong> When suspended at a kernel launch, open the Profile Series configuration dialog to setup and collect a series of profile results.</p></li>
<li><p><strong>Auto Profile</strong> Enable or disable auto profiling. If enabled, each kernel matching the current kernel filter (if any) will be profiled using the current section configuration.</p></li>
<li><p><strong>Baselines</strong> Opens the <a class="reference external" href="index.html#tool-window-baselines">Baselines</a> tool window.</p></li>
<li><p><strong>Clear Baselines</strong> Clear all current baselines.</p></li>
<li><p><strong>Import Source</strong> Permanently import resolved source files into the report. Existing content may be overwritten.</p></li>
<li><p><strong>Section/Rules Info</strong> Opens the <a class="reference external" href="index.html#tool-window-sections-info">Metric Selection</a> tool window.</p></li>
</ul>
</li>
<li><p>Tools</p>
<ul class="simple">
<li><p><strong>Project Explorer</strong> Opens the <a class="reference external" href="index.html#projects-explorer">Project Explorer</a> tool window.</p></li>
<li><p><strong>Output Messages</strong> Opens the Output Messages tool window.</p></li>
<li><p><strong>Options</strong> Opens the <a class="reference external" href="index.html#options">Options</a> dialog.</p></li>
</ul>
</li>
<li><p>Window</p>
<ul class="simple">
<li><p><strong>Save Window Layout</strong> Allows you to specify a name for the current layout. The layouts are saved to a Layouts folder in the documents directory as named “.nvlayout” files.</p></li>
<li><p><strong>Apply Window Layout</strong> Once you have saved a layout, you can restore them by using the “Apply Window Layout” menu entry. Simply select the entry from sub-menu you want to apply.</p></li>
<li><p><strong>Manage Window Layout</strong> Allows you to delete or rename old layouts.</p></li>
<li><p><strong>Restore Default Layout</strong> Restore views to their original size and position.</p></li>
<li><p><strong>Show Welcome Page</strong> Opens the <a class="reference external" href="index.html#quick-start__fig-welcome-page">Welcome Page</a>.</p></li>
</ul>
</li>
<li><p>Help</p>
<ul class="simple">
<li><p><strong>Documentation</strong> Opens the latest documentation for NVIDIA Nsight Compute online.</p></li>
<li><p><strong>Documentation (local)</strong> Opens the local HTML documentation for NVIDIA Nsight Compute that has shipped with the tool.</p></li>
<li><p><strong>Check For Updates</strong> Checks online if a newer version of NVIDIA Nsight Compute is available for download.</p></li>
<li><p><strong>Reset Application Data</strong> Reset all NVIDIA Nsight Compute configuration data saved on disk, including option settings, default paths, recent project references etc. This will not delete saved reports.</p></li>
<li><p><strong>Send Feedback</strong> Opens a dialog that allows you to send bug reports and suggestions for features. Optionally, the feedback includes basic system information, screenshots, or additional files (such as profile reports).</p></li>
<li><p><strong>About</strong> Opens the About dialog with information about the version of NVIDIA Nsight Compute.</p></li>
</ul>
</li>
</ul>
</section>
<section id="main-toolbar">
<h3><span class="section-number">3.4.2. </span>Main Toolbar<a class="headerlink" href="#main-toolbar" title="Permalink to this headline"></a></h3>
<p>The main toolbar shows commonly used operations from the main menu. See <a class="reference external" href="index.html#main-menu">Main Menu</a> for their description.</p>
</section>
<section id="status-banners">
<span id="status-banner"></span><h3><span class="section-number">3.4.3. </span>Status Banners<a class="headerlink" href="#status-banners" title="Permalink to this headline"></a></h3>
<p>Status banners are used to display important messages, such as profiler errors. The message can be dismissed by clicking the ‘X’ button. The number of banners shown at the same time is limited and old messages can get dismissed automatically if new ones appear. Use the <em>Output Messages</em> window to see the complete message history.</p>
<figure class="align-center" id="id4">
<img alt="../_images/status-banner.png" src="../_images/status-banner.png" />
</figure>
</section>
</section>
<section id="tool-windows">
<h2><span class="section-number">3.5. </span>Tool Windows<a class="headerlink" href="#tool-windows" title="Permalink to this headline"></a></h2>
<section id="api-statistics">
<span id="tool-window-api-statistics"></span><h3><span class="section-number">3.5.1. </span>API Statistics<a class="headerlink" href="#api-statistics" title="Permalink to this headline"></a></h3>
<p>The <em>API Statistics</em> window is available when NVIDIA Nsight Compute is connected to a target application. It opens by default as soon as the connection is established. It can be re-opened using <em>Debug &gt; API Statistics</em> from the main menu.</p>
<figure class="align-center" id="tool-window-api-statistics-fig">
<img alt="../_images/tool-window-api-statistics.png" src="../_images/tool-window-api-statistics.png" />
</figure>
<p>Whenever the target application is suspended, it shows a summary of tracked API calls with some statistical information, such as the number of calls, their total, average, minimum and maximum duration. Note that this view cannot be used as a replacement for <a class="reference external" href="https://developer.nvidia.com/nsight-systems">Nsight Systems</a> when trying to optimize CPU performance of your application.</p>
<p>The <em>Reset</em> button deletes all statistics collected to the current point and starts a new collection. Use the <em>Export to CSV</em> button to export the current statistics to a CSV file.</p>
</section>
<section id="api-stream">
<span id="tool-window-api-stream"></span><h3><span class="section-number">3.5.2. </span>API Stream<a class="headerlink" href="#api-stream" title="Permalink to this headline"></a></h3>
<p>The <em>API Stream</em> window is available when NVIDIA Nsight Compute is connected to a target application. It opens by default as soon as the connection is established. It can be re-opened using <em>Debug &gt; API Stream</em> from the main menu.</p>
<figure class="align-center" id="tool-window-api-stream-fig">
<img alt="../_images/tool-window-api-stream.png" src="../_images/tool-window-api-stream.png" />
</figure>
<p>Whenever the target application is suspended, the window shows the history of API calls and traced kernel launches. The currently suspended API call or kernel launch (activity) is marked with a yellow arrow. If the suspension is at a subcall, the parent call is marked with a green arrow. The API call or kernel is suspended before being executed.</p>
<p>For each activity, further information is shown such as the kernel name or the function parameters (<em>Func Parameters</em>) and return value (<em>Func Return</em>). Note that the function return value will only become available once you step out or over the API call.</p>
<p>Use the <em>Current Thread</em> dropdown to switch between the active threads. The dropdown shows the thread ID followed by the current API name. One of several options can be chosen in the trigger dropdown, which are executed by the adjacent <em>&gt;&gt;</em> button. <em>Run to Next Kernel</em> resumes execution until the next kernel launch is found in any enabled thread. <em>Run to Next API Call</em> resumes execution until the next API call matching <em>Next Trigger</em> is found in any enabled thread. <em>Run to Next Range Start</em> resumes execution until the next start of an active profiler range is found. Profiler ranges are defined by using the <code class="docutils literal notranslate"><span class="pre">cu(da)ProfilerStart/Stop</span></code> API calls. <em>Run to Next Range Stop</em> resumes execution until the next stop of an active profiler range is found. The <em>API Level</em> dropdown changes which API levels are shown in the stream. The <em>Export to CSV</em> button exports the currently visible stream to a CSV file.</p>
</section>
<section id="baselines">
<span id="tool-window-baselines"></span><h3><span class="section-number">3.5.3. </span>Baselines<a class="headerlink" href="#baselines" title="Permalink to this headline"></a></h3>
<p>The <em>Baselines</em> tool window can be opened by clicking the <em>Baselines</em> entry in the <em>Profile</em> menu. It provides a centralized place from which to manage configured baselines. (Refer to <a class="reference external" href="index.html#baselines">Baselines</a>, for information on how to create baselines from profile results.)</p>
<figure class="align-center" id="tool-window-baselines-fig">
<img alt="../_images/tool-window-baselines.png" src="../_images/tool-window-baselines.png" />
</figure>
<p>The baseline visibility can be controlled by clicking on the check box in a table row. When the check box is checked, the baseline will be visible in the summary header as well as all graphs in all sections. When unchecked the baseline will be hidden and will not contribute to metric difference calculations.</p>
<p>The baseline color can be changed by double-clicking on the color swatch in the table row. The color dialog which is opened provides the ability to choose an arbitrary color as well as offers a palette of predefined colors associated with the stock baseline color rotation.</p>
<p>The baseline name can be changed by double-clicking on the <em>Name</em> column in the table row. The name must not be empty and must be less than the <em>Maximum Baseline Name Length</em> as specified in the options dialog.</p>
<p>The z-order of a selected baseline can be changed by clicking the <em>Move Baseline Up</em> and <em>Move Baseline Down</em> buttons in the tool bar. When a baseline is moved up or down its new position will be reflected in the report header as well as in each graph. Currently, only one baseline may be moved at a time.</p>
<p>The selected baselines may be removed by clicking on the <em>Clear Selected Baselines</em> button in the tool bar. All baselines can be removed at once by clicking on the <em>Clear All Baselines</em> button, from either the global tool bar or the tool window tool bar.</p>
<p>The configured baselines can be saved to a file by clicking on the <em>Save Baselines</em> button in the tool bar. By default baseline files use the <code class="docutils literal notranslate"><span class="pre">.ncu-bln</span></code> extension. Baseline files can be opened locally and/or shared with other users.</p>
<p>Baseline information can be loaded by clicking on the <em>Load Baselines</em> button in the tool bar. When a baseline file is loaded, currently configured baselines will be replaced. A dialog will be presented to the user to confirm this operation when necessary.</p>
<p>Differences between the current result and the baselines can be visualized with graphical bars for metrics in Details page section headers. Use the <em>Difference Bars</em> drop down to select the visualization mode. Bars are extending from left to right and have a fixed maximum.</p>
</section>
<section id="metric-details">
<span id="tool-window-metric-details"></span><h3><span class="section-number">3.5.4. </span>Metric Details<a class="headerlink" href="#metric-details" title="Permalink to this headline"></a></h3>
<p>The <em>Metric Details</em> tool window can be opened using the <em>Metric Details</em> entry in the <em>Profile</em> menu or the respective tool bar button. When a report and the tool window are open, a metric can be selected in the report to display additional information in the tool window. It also contains a search bar to look up metrics in the focused report.</p>
<figure class="align-center" id="tool-window-metric-details-fig">
<img alt="../_images/tool-window-metric-details.png" src="../_images/tool-window-metric-details.png" />
</figure>
<p>Report metrics can be selected in the <a class="reference external" href="index.html#profiler-report-details-page">Details Page</a> or the <a class="reference external" href="index.html#profiler-report-raw-page">Raw Page</a>. The window will show basic information (name, unit and raw value of the metric) as well as additional information, such as its extended description.</p>
<p>The search bar can be used to open metrics in the focused report. It shows available matches as you type. The entered string must match from the start of the metric name.</p>
<p>By default, selecting or searching for a new metric updates the current <em>Default Tab</em>. You can click the <em>Pin Tab</em> button to create a copy of the default tab, unless the same metric is already pinned. This makes it possible to save multiple tabs and quickly switch between them to compare values.</p>
<p>Some metrics contain <a class="reference external" href="../ProfilingGuide/index.html#metrics-structure">Instance Values</a>. When available, they are listed in the tool window. Instance values can have a <em>Correlation ID</em> that allows correlating the individual value with its associated entity, e.g. a function address or instruction name.</p>
<p>For metrics collected with <a class="reference external" href="../ProfilingGuide/index.html#pm-sampling">PM sampling</a>, the correlation ID is the GPU timestamp in nanoseconds. It is shown as an absolute value and relative to the first timestamp for this metric.</p>
</section>
<section id="launch-details">
<span id="tool-window-launch-details"></span><h3><span class="section-number">3.5.5. </span>Launch Details<a class="headerlink" href="#launch-details" title="Permalink to this headline"></a></h3>
<p>The <em>Launch Details</em> tool window can be opened using the <em>Launch Details</em> entry
in the <em>Profile</em> menu or the respective tool bar button. When a result
containing multiple sub-launches is selected and this tool window is open, it
will display information about each sub-launch contained in the result.</p>
<figure class="align-center" id="tool-window-launch-details-fig">
<a class="reference internal image-reference" href="../_images/tool-window-launch-details.png"><img alt="../_images/tool-window-launch-details.png" src="../_images/tool-window-launch-details.png" style="width: 648.6px; height: 658.8px;" /></a>
</figure>
<p>This tool window is split into two sections:</p>
<ul class="simple">
<li><p>a header displaying information applying to the result as a whole</p></li>
<li><p>a body displaying information specific to the viewed sub-launch</p></li>
</ul>
<section id="header">
<h4>Header<a class="headerlink" href="#header" title="Permalink to this headline"></a></h4>
<p>On the left side of its header, this tool window displays the selected result’s
name and the number of sub-launches it is comprised of.</p>
<p>The right side contains a combo box that allows selection of the sub-launch the
body should represent. Each element of the combo box contains an index for the
sub-launch as well as the name of the function that it launched if available.</p>
</section>
<section id="body">
<h4>Body<a class="headerlink" href="#body" title="Permalink to this headline"></a></h4>
<p>The body of this tool window displays a table with sub-launch-specific metrics.
This table has four columns:</p>
<ul class="simple">
<li><p><em>Metric Name</em>: the name of the metric</p></li>
<li><p><em>Metric Unit</em>: the unit for metric values</p></li>
<li><p><em>Instance Value</em>: the value of this metric for the selected sub-launch</p></li>
<li><p><em>Aggregate Value</em>: the aggregate value for this metric over all sub-launches
in the selected result</p></li>
</ul>
</section>
</section>
<section id="nvtx">
<span id="tool-window-nvtx"></span><h3><span class="section-number">3.5.6. </span>NVTX<a class="headerlink" href="#nvtx" title="Permalink to this headline"></a></h3>
<p>The <em>NVTX</em> window is available when NVIDIA Nsight Compute is connected to a target application. If closed, it can be re-opened using <em>Debug &gt; NVTX</em> from the main menu. Whenever the target application is suspended, the window shows the state of all active NVTX domains and ranges in the currently selected thread. Note that <a class="reference external" href="https://devblogs.nvidia.com/cuda-pro-tip-generate-custom-application-profile-timelines-nvtx/">NVTX</a> information is only tracked if the launching command line profiler instance was started with <code class="docutils literal notranslate"><span class="pre">--nvtx</span></code> or NVTX was enabled in the NVIDIA Nsight Compute launch dialog.</p>
<figure class="align-center" id="tool-window-nvtx-fig">
<img alt="../_images/tool-window-nvtx.png" src="../_images/tool-window-nvtx.png" />
</figure>
<p>Use the <em>Current Thread</em> dropdown in the <a class="reference external" href="index.html#tool-window-api-stream">API Stream</a> window to change the currently selected thread. NVIDIA Nsight Compute supports NVTX named resources, such as threads, CUDA devices, CUDA contexts, etc. If a resource is named using NVTX, the appropriate UI elements will be updated.</p>
<figure class="align-center" id="tool-window-nvtx-resources">
<img alt="../_images/tool-window-nvtx-resources.png" src="../_images/tool-window-nvtx-resources.png" />
</figure>
</section>
<section id="resources">
<span id="tool-window-resources"></span><h3><span class="section-number">3.5.7. </span>Resources<a class="headerlink" href="#resources" title="Permalink to this headline"></a></h3>
<p>The <em>Resources</em> window is available when NVIDIA Nsight Compute is connected to a target application. It shows information about the currently known resources, such as CUDA devices, CUDA streams or kernels. The window is updated every time the target application is suspended. If closed, it can be re-opened using <em>Debug &gt; Resources</em> from the main menu.</p>
<figure class="align-center" id="tool-window-resources-fig">
<img alt="../_images/tool-window-resources.png" src="../_images/tool-window-resources.png" />
</figure>
<p>Using the dropdown on the top, different views can be selected, where each view is specific to one kind of resource (context, stream, kernel, …). The <em>Filter</em> edit allows you to create filter expressions using the column headers of the currently selected resource.</p>
<p>The resource table shows all information for each resource instance. Each instance has a unique ID, the <em>API Call ID</em> when this resource was created, its handle, associated handles, and further parameters. When a resource is destroyed, it is removed from its table.</p>
<section id="memory-allocations">
<span id="tool-window-resources-memory"></span><h4>Memory Allocations<a class="headerlink" href="#memory-allocations" title="Permalink to this headline"></a></h4>
<p>When using the asynchronous malloc/free APIs, the resource view for <em>Memory Allocation</em> will also include the memory objects created in this manner. These memory objects have a non-zero memory pool handle. The <em>Mode</em> column will indicate which code path was taken during the allocation of the corresponding object. The modes are:</p>
<ul class="simple">
<li><p>REUSE_STREAM_SUBPOOL: The memory object was allocated in memory that was previously freed. The memory was backed by the memory pool set as current for the stream on which the allocation was made.</p></li>
<li><p>USE_EXISTING_POOL_MEMORY: The memory object was allocated in memory that was previously freed. The memory is backed by the default memory pool of the stream on which the allocation was made.</p></li>
<li><p>REUSE_EVENT_DEPENDENCIES: The memory object was allocated in memory that was previously freed in another stream of the same context. A stream ordering dependency of the allocating stream on the free action existed. Cuda events and null stream interactions can create the required stream ordered dependencies.</p></li>
<li><p>REUSE_OPPORTUNISTIC: The memory object was allocated in memory that was previously freed in another stream of the same context. However, no dependency between the free and allocation existed. This mode requires that the free be already committed at the time the allocation is requested. Changes in execution behavior might result in different modes for multiple runs of the application.</p></li>
<li><p>REUSE_INTERNAL_DEPENDENCIES: The memory object was allocated in memory that was previously freed in another stream of the same context. New internal stream dependencies may have been added in order to establish the stream ordering required to reuse a piece of memory previously released.</p></li>
<li><p>REQUEST_NEW_ALLOCATION: New memory had to be allocated for this memory object as no viable reusable pool memory was found. The allocation performance is comparable to using the non-asynchronous malloc/free APIs.</p></li>
</ul>
</section>
<section id="graphviz-dot-and-svg-exports">
<span id="tool-window-resources-dot-svg"></span><h4>Graphviz DOT and SVG exports<a class="headerlink" href="#graphviz-dot-and-svg-exports" title="Permalink to this headline"></a></h4>
<p>Some of the shown <em>Resources</em> can also be exported to <em>GraphViz DOT</em> or SVG* files using the <code class="docutils literal notranslate"><span class="pre">Export</span> <span class="pre">to</span> <span class="pre">GraphViz</span></code> or <code class="docutils literal notranslate"><span class="pre">Export</span> <span class="pre">to</span> <span class="pre">SVG</span></code> buttons.</p>
<p>When exporting <em>OptiX traversable handles</em>, the traversable graph node types will be encoded using shapes and colors as described in the following table.</p>
<table class="table-no-stripes colwidths-given docutils align-default" id="id12">
<caption><span class="caption-text">Table 2. OptiX Traversable Graph Node Types</span><a class="headerlink" href="#id12" title="Permalink to this table"></a></caption>
<colgroup>
<col style="width: 44%" />
<col style="width: 44%" />
<col style="width: 11%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Node Type</p></th>
<th class="head"><p>Shape</p></th>
<th class="head"><p>Color</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p>IAS</p></td>
<td><p>Hexagon</p></td>
<td><p>#8DD3C7</p>
<svg width="70" height="30">
   <rect x="5" y="5" rx="5" ry="5" width="60" height="20" stroke="black" stroke-width="2" fill="#8DD3C7" />
 </svg></td>
</tr>
<tr class="row-odd"><td><p>Triangle GAS</p></td>
<td><p>Box</p></td>
<td><p>#FFFFB3</p>
<svg width="70" height="30">
  <rect x="5" y="5" rx="5" ry="5" width="60" height="20" stroke="black" stroke-width="2" fill="#FFFFB3" />
</svg></td>
</tr>
<tr class="row-even"><td><p>AABB GAS</p></td>
<td><p>Box</p></td>
<td><p>#FCCDE5</p>
<svg width="70" height="30">
  <rect x="5" y="5" rx="5" ry="5" width="60" height="20" stroke="black" stroke-width="2" fill="#FCCDE5" />
</svg></td>
</tr>
<tr class="row-odd"><td><p>Curve GAS</p></td>
<td><p>Box</p></td>
<td><p>#CCEBC5</p>
<svg width="70" height="30">
  <rect x="5" y="5" rx="5" ry="5" width="60" height="20" stroke="black" stroke-width="2" fill="#CCEBC5" />
</svg></td>
</tr>
<tr class="row-even"><td><p>Sphere GAS</p></td>
<td><p>Box</p></td>
<td><p>#BEBADA</p>
<svg width="70" height="30">
  <rect x="5" y="5" rx="5" ry="5" width="60" height="20" stroke="black" stroke-width="2" fill="#BEBADA" />
</svg></td>
</tr>
<tr class="row-odd"><td><p>Static Transform</p></td>
<td><p>Diamond</p></td>
<td><p>#FB8072</p>
<svg width="70" height="30">
  <rect x="5" y="5" rx="5" ry="5" width="60" height="20" stroke="black" stroke-width="2" fill="#FB8072" />
</svg></td>
</tr>
<tr class="row-even"><td><p>SRT Transform</p></td>
<td><p>Diamond</p></td>
<td><p>#FDB462</p>
<svg width="70" height="30">
  <rect x="5" y="5" rx="5" ry="5" width="60" height="20" stroke="black" stroke-width="2" fill="#FDB462" />
</svg></td>
</tr>
<tr class="row-odd"><td><p>Matrix Motion Transform</p></td>
<td><p>Diamond</p></td>
<td><p>#80B1D3</p>
<svg width="70" height="30">
  <rect x="5" y="5" rx="5" ry="5" width="60" height="20" stroke="black" stroke-width="2" fill="#80B1D3" />
</svg></td>
</tr>
<tr class="row-even"><td><p>Error</p></td>
<td><p>Paralellogram</p></td>
<td><p>#D9D9D9</p>
<svg width="70" height="30">
  <rect x="5" y="5" rx="5" ry="5" width="60" height="20" stroke="black" stroke-width="2" fill="#D9D9D9" />
</svg></td>
</tr>
</tbody>
</table>
</section>
</section>
<section id="metric-selection">
<span id="tool-window-sections-info"></span><h3><span class="section-number">3.5.8. </span>Metric Selection<a class="headerlink" href="#metric-selection" title="Permalink to this headline"></a></h3>
<p>The <em>Metric Selection</em> window can be opened from the main menu using <em>Profile &gt; Metric Selection</em>. It tracks all metric sets, sections and rules currently loaded in NVIDIA Nsight Compute, independent from a specific connection or report. The directory to load those files from can be configured in the <a class="reference external" href="index.html#options-profile">Profile</a> options dialog. It is used to inspect available sets, sections and rules, as well as to configure which should be collected, and which rules should be applied. You can also specify a comma separated list of individual metrics, that should be collected. The window has two views, which can be selected using the dropdown in its header.</p>
<p>The <strong>Metric Sets</strong> view shows all available metric sets. Each set is associated with a number of metrics sections. You can choose a set appropriate to the level of detail for which you want to collect performance metrics. Sets which collect more detailed information normally incur higher runtime overhead during profiling.</p>
<figure class="align-center" id="tool-window-section-sets">
<img alt="../_images/tool-window-section-sets.png" src="../_images/tool-window-section-sets.png" />
</figure>
<p>When enabling a set in this view, the associated metric sections are enabled in the <em>Metric Sections/Rules</em> view. When disabling a set in this view, the associated sections in the <em>Metric Sections/Rules</em> view are disabled. If no set is enabled, or if sections are manually enabled/disabled in the <em>Metric Sections/Rules</em> view, the &lt;<em>custom</em>&gt; entry is marked active to represent that no section set is currently enabled. Note that the <em>basic</em> set is enabled by default.</p>
<p>Whenever a kernel is profiled manually, or when auto-profiling is enabled, only sections enabled in the <strong>Metric Sections/Rules</strong> view and individual metrics specified in input box are collected. Similarly, whenever rules are applied, only rules enabled in this view are active.</p>
<figure class="align-center" id="tool-window-sections">
<img alt="../_images/tool-window-sections.png" src="../_images/tool-window-sections.png" />
</figure>
<p>The enabled states of sections and rules are persisted across NVIDIA Nsight Compute launches. The <em>Reload</em> button reloads all sections and rules from disk again. If a new section or rule is found, it will be enabled if possible. If any errors occur while loading a rule, they will be listed in an extra entry with a warning icon and a description of the error.</p>
<p>Use the <em>Enable All</em> and <em>Disable All</em> checkboxes to enable or disable all sections and rules at once. The Filter text box can be used to filter what is currently shown in the view. It does not alter activation of any entry.</p>
<p>The table shows sections and rules with their activation status, their relationship and further parameters, such as associated metrics or the original file on disk. Rules associated with a section are shown as children of their section entry. Rules independent of any section are shown under an additional <em>Independent Rules</em> entry.</p>
<p>Double-clicking an entry in the table’s <em>Filename</em> column opens this file as a document. It can be edited and saved directly in NVIDIA Nsight Compute. After editing the file, <em>Reload</em> must be selected to apply those changes.</p>
<p>When a section or rule file is modified, the entry in the <em>State</em> column will show <em>User Modified</em> to reflect that it has been modified from its default state. When a <em>User Modified</em> row is selected, the <em>Restore</em> button will be enabled. Clicking the Restore button will restore the entry to its default state and automatically <em>Reload</em> the sections and rules.</p>
<p>Similarly, when a stock section or rule file is removed from the configured <em>Sections Directory</em> (specified in the <a class="reference external" href="index.html#options-profile">Profile</a> options dialog), the <em>State</em> column will show <em>User Deleted</em>. <em>User Deleted</em> files can also be restored using the <em>Restore</em> button.</p>
<p>Section and rule files that are created by the user (and not shipped with NVIDIA Nsight Compute) will show up as <em>User Created</em> in the <em>state column</em>.</p>
<p>See the <a class="reference external" href="../ProfilingGuide/index.html#sections-and-rules">Sections and Rules</a> for the list of default sections for NVIDIA Nsight Compute.</p>
</section>
</section>
<section id="profiler-report">
<h2><span class="section-number">3.6. </span>Profiler Report<a class="headerlink" href="#profiler-report" title="Permalink to this headline"></a></h2>
<p>The profiler report contains all the information collected during profiling for each kernel launch. In the user interface, it consists of a header with general information, as well as controls to switch between report pages or individual collected launches.</p>
<section id="profiler-report-header">
<span id="id5"></span><h3><span class="section-number">3.6.1. </span>Header<a class="headerlink" href="#profiler-report-header" title="Permalink to this headline"></a></h3>
<p>The <em>Page</em> dropdown can be used to switch between the available report pages, which are explained in detail in the <a class="reference external" href="index.html#profiler-report-pages">next section</a>.</p>
<figure class="align-center" id="profiler-report-header-fig">
<img alt="../_images/profiler-report-header.png" src="../_images/profiler-report-header.png" />
</figure>
<p>The <em>Result</em> dropdown can be used to switch between all collected kernel launches. The information displayed in each page commonly represents the selected launch instance. On some pages (e.g. <em>Raw</em>), information for all launches is shown and the selected instance is highlighted. You can type in this dropdown to quickly filter and find a kernel launch.</p>
<p>The <em>Apply Filters</em> button opens the filter dialog. You can use more than one filter to narrow down your results. On the filter dialog, enter your filter parameters and press OK button. The <em>Launch</em> dropdown, <a class="reference external" href="index.html#profiler-report-summary-page">Summary Page</a> table, and <a class="reference external" href="index.html#profiler-report-raw-page">Raw Page</a> table will be filtered accordingly. Select the arrow dropdown to access the <em>Clear Filters</em> button, which removes all filters.</p>
<figure class="align-center" id="profiler-report-header-filter-dialog">
<img alt="../_images/profiler-report-header-filter-dialog.png" src="../_images/profiler-report-header-filter-dialog.png" />
<figcaption>
<p><span class="caption-text">Filter Dialog</span><a class="headerlink" href="#profiler-report-header-filter-dialog" title="Permalink to this image"></a></p>
</figcaption>
</figure>
<p>The <em>Add Baseline</em> button promotes the current result in focus to become the baseline of all other results from this report and any other report opened in the same instance of NVIDIA Nsight Compute. Select the arrow dropdown to access the <em>Clear Baselines</em> button, which removes all currently active baselines.</p>
<p>The <em>Apply Rules</em> button applies all rules available for this report. If rules had been applied previously, those results will be replaced. By default, rules are applied immediately once the kernel launch has been profiled. This can be changed in the options under <em>Tools &gt; Options &gt; Profile &gt; Report UI &gt; Apply Applicable Rules Automatically</em>.</p>
<p>The <em>Occupancy Calculator</em> button navigates to the <a class="reference external" href="index.html#occupancy-calculator">Occupancy Calculator</a> which is opened in a new document.</p>
<p>The <em>Source Comparison</em> button navigates to the <a class="reference external" href="index.html#source-comparison">Source Comparison</a> document in case at least two profile results are available for comparison.</p>
<p>A button on the right-hand side offers multiple operations that may be performed on the page. Available operations include:</p>
<ul class="simple">
<li><p><strong>Copy as Image</strong> - Copies the contents of the page to the clipboard as an image.</p></li>
<li><p><strong>Save as Image</strong> - Saves the contents of the page to a file as an image.</p></li>
<li><p><strong>Save as PDF</strong> - Saves the contents of the page to a file as a PDF.</p></li>
<li><p><strong>Export to CSV</strong> - Exports the contents of page to CSV format.</p></li>
<li><p><strong>Reset to Default</strong> - Resets the page to a default state by removing any persisted settings.</p></li>
</ul>
<p>Note that not all functions are available on all pages.</p>
<p>Below the buttons described above, a table is shown with information about the selected profile result (as <em>Current</em>) and potentially additional baselines.
For many values in this table tooltips provide additional information or data, e.g., the tooltip of the column <em>Attributes</em> provides
additional information about the context type and resources used for the launch.</p>
<p>The [+] and [-] buttons can be used to show or hide the section body content. The visibility of the output of the rules can be toggled with the <em>R</em> button. The info toggle button <em>i</em> changes the section description’s visibility.</p>
</section>
<section id="report-pages">
<span id="profiler-report-pages"></span><h3><span class="section-number">3.6.2. </span>Report Pages<a class="headerlink" href="#report-pages" title="Permalink to this headline"></a></h3>
<p>Use the <em>Page</em> dropdown in the header to switch between the report pages.</p>
<p>By default, when opening a report with a single profile result, the <a class="reference external" href="index.html#profiler-report-details-page">Details Page</a> is shown. When opening a report with multiple results, the <a class="reference external" href="index.html#profiler-report-summary-page">Summary Page</a> is selected instead. You can change the default report page in the <a class="reference external" href="index.html#options-profile">Profile</a> options.</p>
<section id="session-page">
<span id="profiler-report-session-page"></span><h4>Session Page<a class="headerlink" href="#session-page" title="Permalink to this headline"></a></h4>
<p>This <em>Session</em> page contains basic information about the report and the machine, as well as device attributes of all devices for which launches were profiled. When switching between launch instances, the respective device attributes are highlighted.</p>
</section>
<section id="summary-page">
<span id="profiler-report-summary-page"></span><h4>Summary Page<a class="headerlink" href="#summary-page" title="Permalink to this headline"></a></h4>
<p>The <em>Summary</em> page shows a table of all collected results in the report, as well as a list of the most important rule outputs (<em>Prioritized Rules</em>) which are ordered by the estimated speedup that could potential be obtained by following their guidance. <em>Prioritized Rules</em> are shown by default and can be toggled with the [R] button on the upper right of the page.</p>
<figure class="align-center" id="profiler-report-pages-summary-fig">
<img alt="../_images/profiler-report-pages-summary.png" src="../_images/profiler-report-pages-summary.png" />
<figcaption>
<p><span class="caption-text">Summary page with Summary Table and Prioritized Rules.</span><a class="headerlink" href="#profiler-report-pages-summary-fig" title="Permalink to this image"></a></p>
</figcaption>
</figure>
<p>The <em>Summary Table</em> gives you a quick comparison overview across all profiled workloads. It contains a number of important, pre-selected metrics which can be customized as explained below. Its columns can be sorted by clicking the column header. You can transpose the table with the <em>Transpose</em> button. Aggregate of all results per each counter metric is shown in the table header along with the column name. You can change the aggregated values by selecting the desired results for multiple metrics simultaneously. When selecting any entry by single-click, a list of its <em>Prioritized Rules</em> will be shown below the table. Double-click any entry to make the result the currently active one and switch to the <a class="reference external" href="index.html#profiler-report-details-page">Details Page</a> page to inspect its performance data.</p>
<figure class="align-center" id="profiler-report-pages-summary-table">
<img alt="../_images/profiler-report-pages-summary-table.png" src="../_images/profiler-report-pages-summary-table.png" />
</figure>
<p>You can configure the list of metrics included in this table in the <a class="reference external" href="index.html#options-profile">Profile</a> options dialog. If a metric has multiple instance values, the number of instances is shown after its standard value. A metric with ten instance values could for example look like this: <code class="docutils literal notranslate"><span class="pre">35.48</span> <span class="pre">{10}</span></code>. In the <a class="reference external" href="index.html#options-profile">Profile</a> options dialog, you can select that all instance values should be shown individually. You can also inspect the instances values of a metric result in the <a class="reference external" href="index.html#tool-window-metric-details">Metric Details</a> tool window.</p>
<p>In addition to metrics, you can also configure the table to include any of the following properties:</p>
<blockquote>
<div><p class="rubric-h3 rubric" id="summary-page-property">Properties</p>
<table class="colwidths-auto table-no-stripes docutils align-default" id="id13">
<caption><span class="caption-text">Properties</span><a class="headerlink" href="#id13" title="Permalink to this table"></a></caption>
<tbody>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">property__api_call_id</span></code></p></td>
<td><p>ID of the API call associated with this profile result.</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">property__block_size</span></code></p></td>
<td><p>Block Size.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">property__creation_time</span></code></p></td>
<td><p>Local collection time.</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">property__demangled_name</span></code></p></td>
<td><p>Kernel demangled name.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">property__device_name</span></code></p></td>
<td><p>GPU device name.</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">property__estimated_speedup</span></code></p></td>
<td><p>Maximal relative speedup achievable for this profile result as estimated by the guided analysis rules.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">property__function_name</span></code></p></td>
<td><p>Kernel function name or range name.</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">property__grid_dimensions</span></code></p></td>
<td><p>Grid Dimensions.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">property__grid_offset</span></code></p></td>
<td><p>Grid Offset.</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">property__grid_size</span></code></p></td>
<td><p>Grid Size.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">property__issues_detected</span></code></p></td>
<td><p>Number of issues detected by guided analysis rules for this profile result.</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">property__kernel_id</span></code></p></td>
<td><p>Kernel ID.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">property__mangled_name</span></code></p></td>
<td><p>Kernel mangled name.</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">property__process_name</span></code></p></td>
<td><p>Process name.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">property__runtime_improvement</span></code></p></td>
<td><p>Runtime improvement corresponding to the estimated speedup.</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">property__series_id</span></code></p></td>
<td><p>ID of the profile series.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="docutils literal notranslate"><span class="pre">property__series_parameters</span></code></p></td>
<td><p>Profile series parameters.</p></td>
</tr>
<tr class="row-even"><td><p><code class="docutils literal notranslate"><span class="pre">property__thread_id</span></code></p></td>
<td><p>CPU thread ID.</p></td>
</tr>
</tbody>
</table>
</div></blockquote>
<p>For <a class="reference external" href="../ProfilingGuide/index.html#range-replay">Range Replay</a> reports, a smaller set of columns is shown by default, as not all apply to such results.</p>
<p>For the currently selected metric result the <em>Prioritized Rules</em> show the most impactful rule results with respect to the estimated potential speedup. Clicking on any of the rule names on the left allows you to easily navigate to the containing section on the details page. With the downward-facing arrow on the right a table with the relevant <em>key performance indicators</em> can be toggled. This table contains the metrics which should be tracked when optimizing performance according to the rule guidance.</p>
<figure class="align-center" id="profiler-report-pages-summary-rules">
<img alt="../_images/profiler-report-pages-summary-rules.png" src="../_images/profiler-report-pages-summary-rules.png" />
<figcaption>
<p><span class="caption-text"><em>Prioritized Rules</em> with key performance indicators table.</span><a class="headerlink" href="#profiler-report-pages-summary-rules" title="Permalink to this image"></a></p>
</figcaption>
</figure>
</section>
<section id="details-page">
<span id="profiler-report-details-page"></span><h4>Details Page<a class="headerlink" href="#details-page" title="Permalink to this headline"></a></h4>
<p class="rubric-h4 rubric">Overview</p>
<p>The <em>Details</em> page is the main page for all metric data collected during a kernel launch. The page is split into individual sections. Each section consists of a header table and an optional body that can be expanded. The sections are completely user defined and can be changed easily by updating their respective files. For more information on customizing sections, see the <a class="reference external" href="../CustomizationGuide/index.html#abstract">Customization Guide</a>. For a list of sections shipped with NVIDIA Nsight Compute, see <a class="reference external" href="../ProfilingGuide/index.html#sections-and-rules">Sections and Rules</a>.</p>
<p>By default, once a new profile result is collected, all applicable rules are applied. Any rule results will be shown as <em>Recommendations</em> on this page. Most rule results will contain an optimization advice along with an estimate of the improvement that could be achieved when successfully implementing this advice. Other rule results will be purely informative or have a warning icon to indicate a problem that occurred during execution (e.g., an optional metric that could not be collected). Results with error icons typically indicate an error while applying the rule.</p>
<p>Estimates of potential improvement are shown below the rule result’s name and exist in two types. <em>Global estimates</em> (“Est. Speedup”) are an approximation of the decrease in workload runtime, whereas <em>local estimates</em> (“Est. Local Speedup”) are an approximation of the increase in efficiency of the hardware utilization of the particular performance problem the rule addresses.</p>
<figure class="align-center" id="profiler-report-pages-section-with-rule">
<img alt="../_images/profiler-report-pages-section-with-rule.png" src="../_images/profiler-report-pages-section-with-rule.png" />
<figcaption>
<p><span class="caption-text">Rule results often point out performance problems and guide through the analysis process.</span><a class="headerlink" href="#profiler-report-pages-section-with-rule" title="Permalink to this image"></a></p>
</figcaption>
</figure>
<p>If a rule result references another report section, it will appear as a link in the recommendation. Select the link to scroll to the respective section. If the section was not collected in the same profile result, enable it in the <a class="reference external" href="index.html#tool-window-sections-info">Metric Selection</a> tool window.</p>
<p>You can add or edit comments in each section of the <em>Details</em> view by clicking on the comment button (speech bubble). The comment icon will be highlighted in sections that contain a comment. Comments are persisted in the report and are summarized in the <a class="reference external" href="index.html#profiler-report-comments-page">Comments Page</a>.</p>
<figure class="align-center" id="profiler-report-pages-details-comments">
<img alt="../_images/profiler-report-pages-details-comments.png" src="../_images/profiler-report-pages-details-comments.png" />
<figcaption>
<p><span class="caption-text">Use the Comments button to annotate sections.</span><a class="headerlink" href="#profiler-report-pages-details-comments" title="Permalink to this image"></a></p>
</figcaption>
</figure>
<p>Besides their header, sections typically have one or more <em>bodies</em> with additional charts or tables. Click the triangle <em>Expander</em> icon in the top-left corner of each section to show or hide those. If a section has multiple bodies, a dropdown in their top-right corner allows you to switch between them.</p>
<figure class="align-center" id="profiler-report-pages-section-bodies">
<img alt="../_images/profiler-report-pages-section-bodies.png" src="../_images/profiler-report-pages-section-bodies.png" />
<figcaption>
<p><span class="caption-text">Sections with multiple bodies have a dropdown to switch between them.</span><a class="headerlink" href="#profiler-report-pages-section-bodies" title="Permalink to this image"></a></p>
</figcaption>
</figure>
<p class="rubric-h4 rubric" id="memory">Memory</p>
<p>If enabled, the <em>Memory Workload Analysis</em> section contains a Memory chart that visualizes data transfers, cache hit rates, instructions and memory requests. More information on how to use and read this chart can be found in the <a class="reference external" href="../ProfilingGuide/index.html#memory-chart">Kernel Profiling Guide</a>.</p>
<p class="rubric-h4 rubric">Occupancy</p>
<p>You can open the <a class="reference external" href="index.html#occupancy-calculator">Occupancy Calculator</a> by clicking on the calculator button in the report header or in the header of the <em>Occupancy Section</em>.</p>
<p class="rubric-h4 rubric">Range Replay</p>
<p>Note that for <a class="reference external" href="../ProfilingGuide/index.html#range-replay">Range Replay</a> results some UI elements, analysis rules, metrics or section body items such as charts or tables might not be available, as they only apply to kernel launch-based results. The filters can be checked in the corresponding section files.</p>
<p class="rubric-h4 rubric">Rooflines</p>
<p>If enabled, the <em>GPU Speed Of Light Roofline Chart</em> section contains a Roofline chart that is particularly helpful for visualizing kernel performance at a glance. (To enable roofline charts in the report, ensure that the section is enabled when profiling.) More information on how to use and read this chart can be found in <a class="reference external" href="../ProfilingGuide/index.html#roofline">Roofline Charts</a>. NVIDIA Nsight Compute ships with several different definitions for roofline charts, including hierarchical rooflines. These additional rooflines are defined in different section files. While not part of the <em>full</em> section set, a new section set called <em>roofline</em> was added to collect and show all rooflines in one report. The idea of hierarchical rooflines is that they define multiple ceilings that represent the limiters of a hardware hierarchy. For example, a hierarchical roofline focusing on the memory hierarchy could have ceilings for the throughputs of the L1 cache, L2 cache and device memory. If the achieved performance of a kernel is limited by one of the ceilings of a hierarchical roofline, it can indicate that the corresponding unit of the hierarchy is a potential bottleneck.</p>
<figure class="align-center" id="id14">
<img alt="../_images/profiler-report-pages-section-rooflines.png" src="../_images/profiler-report-pages-section-rooflines.png" />
<figcaption>
<p><span class="caption-text">Sample roofline chart.</span><a class="headerlink" href="#id14" title="Permalink to this image"></a></p>
</figcaption>
</figure>
<p>The roofline chart can be zoomed and panned for more effective data analysis, using the controls in the table below.</p>
<table class="table-no-stripes docutils align-default" id="id15">
<caption><span class="caption-text">Table 3. Roofline Chart Zoom and Pan Controls</span><a class="headerlink" href="#id15" title="Permalink to this table"></a></caption>
<colgroup>
<col style="width: 30%" />
<col style="width: 21%" />
<col style="width: 22%" />
<col style="width: 27%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Zoom In</p></th>
<th class="head"><p>Zoom Out</p></th>
<th class="head"><p>Zoom Reset</p></th>
<th class="head"><p>Pan</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><ul class="simple">
<li><p>Click the Zoom In button in the top right corner of the chart.</p></li>
<li><p>Click the left mouse button and drag to create a rectangle that bounds the area of interest.</p></li>
<li><p>Press the plus (+) key.</p></li>
<li><p>Use Ctrl + MouseWheel (Windows and Linux only)</p></li>
</ul>
</td>
<td><ul class="simple">
<li><p>Click the Zoom Out button in the top right corner of the chart.</p></li>
<li><p>Click the right mouse button.</p></li>
<li><p>Press the minus (-) key.</p></li>
<li><p>Use Ctrl + MouseWheel (Windows and Linux only)</p></li>
</ul>
</td>
<td><ul class="simple">
<li><p>Click the Zoom Reset button in the top right corner of the chart.</p></li>
<li><p>Press the Escape (Esc) key.</p></li>
</ul>
</td>
<td><ul class="simple">
<li><p>Use Ctrl (Command on Mac) + LeftMouseButton to grab the chart, then move the mouse.</p></li>
<li><p>Use the cursor keys.</p></li>
</ul>
</td>
</tr>
</tbody>
</table>
<p class="rubric-h4 rubric">Source</p>
<p>Sections such as <em>Source Counters</em> can contain source hot spot tables. These tables indicate the N highest or lowest values of one or more metrics in your kernel source code. Select the location links to navigate directly to this location in the <a class="reference external" href="index.html#profiler-report-source-page">Source Page</a>. Hover the mouse over a value to see which metrics contribute to it.</p>
<figure class="align-center" id="id16">
<img alt="../_images/profiler-report-pages-details-source-table.png" src="../_images/profiler-report-pages-details-source-table.png" />
<figcaption>
<p><span class="caption-text">Hot spot tables point out performance problems in your source.</span><a class="headerlink" href="#id16" title="Permalink to this image"></a></p>
</figcaption>
</figure>
<p class="rubric-h4 rubric">Timelines</p>
<p>When collecting metrics with <a class="reference external" href="../ProfilingGuide/index.html#pm-sampling">PM sampling</a>, they can be viewed in a <em>timeline</em>. The timeline shows metrics selected in the respective section file or on the command line with their labels/names and their values over time.</p>
<p>Different metrics may be collected in different passes (replays) of the workload, as only a limited number of them can be sampled in the same pass. Context switch trace is used to filter the collected data to only include samples from the profiled contexts and to align it in the timeline.</p>
<p>You can hover the mouse over a metric row label to see further information on the metrics in the row. Hovering over a sample on the timeline shows the metric values at that timestamp within the current row. With the <a class="reference external" href="index.html#tool-window-metric-details">Metric Details</a> tool window open, click to select a value on the timeline and show the metric and all its raw timestamps (absolute and relative) correlated values in the tool window.</p>
<p>You can also use the <a class="reference external" href="index.html#tool-window-metric-details">Metric Details</a> tool window to inspect profiler metrics generated during PM sampling. These provide information about the used sampling intervals, buffer sizes, dropped samples and other properties for each collection pass. A detailed list can be found in the <a class="reference external" href="../ProfilingGuide/index.html#metrics-reference">metrics reference</a>.</p>
<p>The timeline has a context menu for further actions regarding copying content and zooming.
In addition, the <em>Enable/Disable Context Switch Filter</em> option can be used to enable or disable the filtering of the timeline data with <a class="reference external" href="../ProfilingGuide/index.html#pm-sampling">context switch</a> information, if it is available.
When the context switch filter is enabled (the default), samples from each pass group are only shown for the active contexts.
When the context switch filter is disabled, the raw collected sampling data is shown along with a separate row for each pass group’s context switch trace.</p>
<p>When the context menu option is not available, the report does not include context switch trace data.
In this case, the option <em>Enable/Disable Trim Filter</em> is shown instead, which, when enabled, tries to align based on the first non-zero value in any sampling metric in this pass group.
However, this fallback does not take into account actual context switches.</p>
</section>
<section id="source-page">
<span id="profiler-report-source-page"></span><h4>Source Page<a class="headerlink" href="#source-page" title="Permalink to this headline"></a></h4>
<p>The <em>Source</em> page correlates assembly (SASS) with high-level code such as CUDA-C or PTX. In addition, it displays instruction-correlated metrics to help pinpoint performance problems in your code.</p>
<figure class="align-center" id="profiler-report-pages-source-fig">
<img alt="../_images/profiler-report-pages-source.png" src="../_images/profiler-report-pages-source.png" />
</figure>
<p>The page can be switched between different <em>Views</em> to focus on a specific source layer or see two layers side-by-side. This includes SASS, PTX and Source (CUDA-C, Fortran, Python, …), as well as their combinations. Which options are available depends on the source information embedded into the executable.</p>
<p>The high-level Source (CUDA-C) view is available if the application was built with the <code class="docutils literal notranslate"><span class="pre">-lineinfo</span></code> or <code class="docutils literal notranslate"><span class="pre">--generate-line-info</span></code> nvcc flag to correlate SASS and source. When using separate linking at the ELF level, there is no PTX available in the ELF that would correspond to the final SASS. As such, NVIDIA Nsight Compute does not show any PTX even though it would be available statically in the executable and could be shown with <code class="docutils literal notranslate"><span class="pre">cuobjdump</span> <span class="pre">-all</span> <span class="pre">-lptx</span></code>. However, this is a pre-linked version of the PTX and cannot be reliably used for correlation.</p>
<p>The code in the different <em>Views</em> can also contain warnings, errors or just notifications that are displayed as <em>Source Markers</em> in the left header, as shown below. These can be generated from multiple systems, but as of now only NvRules are supported.</p>
<figure class="align-center" id="profiler-report-pages-source-markers">
<img alt="../_images/profiler-report-pages-source-markers.png" src="../_images/profiler-report-pages-source-markers.png" />
</figure>
<section id="navigation">
<span id="profiler-report-source-page-navigation"></span><h5>Navigation<a class="headerlink" href="#navigation" title="Permalink to this headline"></a></h5>
<p>The <em>View</em> dropdown can be used to select different code (correlation) options: SASS, PTX and Source (CUDA-C, Fortran, Python, …).</p>
<p>In side-by-side views, when selecting a line in the left-hand- or right-hand-side, any correlated lines in the opposite view are highlighted. However, when the <a class="reference external" href="index.html#options-profile">Show Single File For Multi-File Sources</a> option is set to <em>Yes</em>, the target file or source object must already be selected in the respective view for those correlated lines to be shown.</p>
<p>The <em>Source</em> drop down allows you to switch between the files or functions that provide the content in the view. When a different source entry is selected, the view scrolls to the start of this file or function. If a view contains multiple source files or functions, [+] and [-] buttons are shown. These can be used to expand or collapse the view, thereby showing or hiding the file or function content except for its header. If collapsed, all <a class="reference external" href="index.html#profiler-report-source-page-metrics">metrics</a> are shown aggregated to provide a quick overview.</p>
<figure class="align-center" id="profiler-report-pages-source-collapse">
<img alt="../_images/profiler-report-pages-source-collapse.png" src="../_images/profiler-report-pages-source-collapse.png" />
</figure>
<p>You can use the <em>Find</em> (source code) line edit to search the <em>Source</em> column of each view. Enter the text to search and use the associated buttons to find the next or previous occurrence in this column. While the line edit is selected, you can also use the <em>Enter</em> or <em>Shift*+*Enter</em> keys to search for the next or previous occurrence, respectively.</p>
<p>The SASS view is filtered to only show functions that were executed in the launch. You can toggle the <a class="reference external" href="index.html#options-profile">Show Only Executed Functions</a> option to change this, but performance of this page may be negatively affected for large binaries. It is possible that some SASS instructions are shown as <em>N/A</em>. Those instructions are not currently exposed publicly.</p>
<p>Only filenames are shown in the view, together with a <em>File Not Found</em> error, if the source files cannot be found in their original location. This can occur, for example, if the report was moved to a different system. Select a filename and click the <em>Resolve</em> button above to specify where this source can be found on the local filesystem. However, the view always shows the source files if the <a class="reference external" href="index.html#connection-activity-interactive">import source</a> option was selected during profiling, and the files were available at that time. If a file is found in its original or any source lookup location, but its attributes don’t match, a <em>File Mismatch</em> error is shown. See the <a class="reference external" href="index.html#options-source-lookup">Source Lookup</a> options for changing file lookup behavior.</p>
<figure class="align-center" id="profiler-report-pages-source-resolve">
<img alt="../_images/profiler-report-pages-source-resolve.png" src="../_images/profiler-report-pages-source-resolve.png" />
</figure>
<p>If the report was collected using remote profiling, and automatic resolution of remote files is enabled in the <a class="reference external" href="index.html#options-profile">Profile</a> options, NVIDIA Nsight Compute will attempt to load the source from the remote target. If the connection credentials are not yet available in the current NVIDIA Nsight Compute instance, they are prompted in a dialog. Loading from a remote target is currently only available for Linux x86_64 targets and Linux and Windows hosts.</p>
</section>
<section id="metrics">
<span id="profiler-report-source-page-metrics"></span><h5>Metrics<a class="headerlink" href="#metrics" title="Permalink to this headline"></a></h5>
<p class="title sectiontitle rubric" id="metrics-correlation">Metrics Correlation</p>
<p>The page is most useful when inspecting performance information and metrics correlated with your code. Metrics are shown in columns, which can be enabled or disabled using the <em>Column Chooser</em> accessible using the column header right click menu.</p>
<figure class="align-center" id="profiler-report-pages-source-column-chooser">
<img alt="../_images/profiler-report-pages-source-column-chooser.png" src="../_images/profiler-report-pages-source-column-chooser.png" />
</figure>
<p>To not move out of view when scrolling horizontally, columns can be fixed. By default, the <em>Source</em> column is fixed to the left, enabling easy inspection of all metrics correlated to a source line. To change fixing of columns, right click the column header and select <em>Freeze</em> or <em>Unfreeze</em>, respectively.</p>
<figure class="align-center" id="profiler-report-pages-fix-column">
<img alt="../_images/profiler-report-pages-fix-column.png" src="../_images/profiler-report-pages-fix-column.png" />
</figure>
<p>The heatmap on the right-hand side of each view can be used to quickly identify locations with high metric values of the currently selected metric in the dropdown. The heatmap uses a black-body radiation color scale where black denotes the lowest mapped value and white the highest, respectively. The current scale is shown when clicking and holding the heatmap with the right mouse button.</p>
<figure class="align-center" id="profiler-report-pages-source-heatmap">
<img alt="../_images/profiler-report-pages-source-heatmap.png" src="../_images/profiler-report-pages-source-heatmap.png" />
</figure>
<p>By default, applicable metrics are shown as percentage values relative to their sum across the launch. A bar is filling from left to right to indicate the value at a specific source location relative to this metric’s maximum within the launch. The [%] and [+-] buttons can be used to switch the display from relative to absolute and from abbreviated absolute to full-precision absolute, respectively. For relative values and bars, the [circle/pie] button can be used to switch the display between relative to global (launch) and relative to local (function/file) scope. This button is disabled when the view is collapsed, as percentages are always relative to the global launch scope in this case.</p>
<figure class="align-center" id="profiler-report-pages-source-rel-abs">
<img alt="../_images/profiler-report-pages-source-rel-abs.png" src="../_images/profiler-report-pages-source-rel-abs.png" />
</figure>
<p class="title sectiontitle rubric" id="pre-defined-source-metrics">Pre-Defined Source Metrics</p>
<ul>
<li><p><strong>Live Registers</strong></p>
<p>Number of registers that need to be kept valid by the compiler. A high value indicates that many registers are required at this code location, potentially increasing the register pressure and the maximum number of register required by the kernel.</p>
<p>The total number of registers reported as <code class="docutils literal notranslate"><span class="pre">launch__registers_per_thread</span></code> may be significantly higher than the maximum live registers. The compiler may need to allocate specific registers that can creates holes in the allocation, thereby affecting <code class="docutils literal notranslate"><span class="pre">launch__registers_per_thread</span></code>, even if the maximum live registers is smaller. This may happen due to ABI restrictions, or restrictions enforced by particular hardware instructions. The compiler may not have a complete picture of which registers may be used in either callee or caller and has to obey ABI conventions, thereby allocating different registers even if some register could have theoretically been re-used.</p>
</li>
<li><p><strong>Warp Stall Sampling (All Samples)</strong><a class="footnote-reference brackets" href="#fn1" id="id6">1</a></p>
<p>The number of samples from the <a class="reference external" href="../ProfilingGuide/index.html#statistical-sampler">Statistical Sampler</a> at this program location.</p>
</li>
<li><p><strong>Warp Stall Sampling (Not-issued Samples)</strong><a class="footnote-reference brackets" href="#fn2" id="id7">2</a></p>
<p>The number of samples from the <a class="reference external" href="../ProfilingGuide/index.html#statistical-sampler">Statistical Sampler</a> at this program location on cycles the warp scheduler issued no instructions. Note that <em>(Not Issued)</em> samples may be taken on a different profiling pass than <em>(All)</em> samples mentioned above, so their values do not strictly correlate.</p>
<p>This metric is only available on devices with compute capability 7.0 or higher.</p>
</li>
<li><p><strong>Instructions Executed</strong></p>
<p>Number of times the source (instruction) was executed per individual warp, independent of the number of participating threads within each warp.</p>
</li>
<li><p><strong>Thread Instructions Executed</strong></p>
<p>Number of times the source (instruction) was executed by any thread, regardless of predicate presence or evaluation.</p>
</li>
<li><p><strong>Predicated-On Thread Instructions Executed</strong></p>
<p>Number of times the source (instruction) was executed by any active, predicated-on thread. For instructions that are executed unconditionally (i.e. without predicate), this is the number of active threads in the warp, multiplied with the respective <em>Instructions Executed</em> value.</p>
</li>
<li><p><strong>Avg. Threads Executed</strong></p>
<p>Average number of thread-level executed instructions per warp, regardless of their predicate.</p>
</li>
<li><p><strong>Avg. Predicated-On Threads Executed</strong></p>
<p>Average number of predicated-on thread-level executed instructions per warp.</p>
</li>
<li><p><strong>Divergent Branches</strong></p>
<p>Number of divergent branch targets, including fallthrough. Incremented only when there are two or more active threads with divergent targets. Divergent branches can lead to warp stalls due to resolving the branch or instruction cache misses.</p>
</li>
<li><p><strong>Information on Memory Operations</strong></p>
<table class="table-no-stripes docutils align-default">
<colgroup>
<col style="width: 13%" />
<col style="width: 18%" />
<col style="width: 70%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><strong>Label</strong></p></td>
<td><p><strong>Name</strong></p></td>
<td><p><strong>Description</strong></p></td>
</tr>
<tr class="row-even"><td><p>Address Space</p></td>
<td><p>memory_type</p></td>
<td><p>The accessed address space (global/local/shared).</p></td>
</tr>
<tr class="row-odd"><td><p>Access Operation</p></td>
<td><p>memory_access_type</p></td>
<td><p>The type of memory access (e.g. load or store).</p></td>
</tr>
<tr class="row-even"><td><p>Access Size</p></td>
<td><p>memory_access_size_type</p></td>
<td><p>The size of the memory access, in bits.</p></td>
</tr>
<tr class="row-odd"><td><p>L1 Tag Requests Global</p></td>
<td><p>memory_l1_tag_requests_global</p></td>
<td><p>Number of L1 tag requests generated by global memory instructions.</p></td>
</tr>
<tr class="row-even"><td><p>L1 Conflicts Shared N-Way</p></td>
<td><p>derived__memory_l1_conflicts_shared_nway</p></td>
<td><p>Average N-way conflict in L1 per shared memory instruction. A 1-way access has no conflicts and resolves in a single pass. Note: This is a derived metric which can not be collected directly.</p></td>
</tr>
<tr class="row-odd"><td><p>L1 Wavefronts Shared Excessive</p></td>
<td><p>derived__memory_l1_wavefronts_shared_excessive</p></td>
<td><p>Excessive number of wavefronts in L1 from shared memory instructions, because not all not predicated-off threads performed the operation. Note: This is a derived metric which can not be collected directly.</p></td>
</tr>
<tr class="row-even"><td><p>L1 Wavefronts Shared</p></td>
<td><p>memory_l1_wavefronts_shared</p></td>
<td><p>Number of wavefronts in L1 from shared memory instructions.</p></td>
</tr>
<tr class="row-odd"><td><p>L1 Wavefronts Shared Ideal</p></td>
<td><p>memory_l1_wavefronts_shared_ideal</p></td>
<td><p>Ideal number of wavefronts in L1 from shared memory instructions, assuming each not predicated-off thread performed the operation.</p></td>
</tr>
<tr class="row-even"><td><p>L2 Theoretical Sectors Global Excessive</p></td>
<td><p>derived__memory_l2_theoretical_sectors_global_excessive</p></td>
<td><p>Excessive theoretical number of sectors requested in L2 from global memory instructions, because not all not predicated-off threads performed the operation. Note: This is a derived metric which can not be collected directly.</p></td>
</tr>
<tr class="row-odd"><td><p>L2 Theoretical Sectors Global</p></td>
<td><p>memory_l2_theoretical_sectors_global</p></td>
<td><p>Theoretical number of sectors requested in L2 from global memory instructions.</p></td>
</tr>
<tr class="row-even"><td><p>L2 Theoretical Sectors Global Ideal</p></td>
<td><p>memory_l2_theoretical_sectors_global_ideal</p></td>
<td><p>Ideal number of sectors requested in L2 from global memory instructions, assuming each not predicated-off thread performed the operation.</p></td>
</tr>
<tr class="row-odd"><td><p>L2 Theoretical Sectors Local</p></td>
<td><p>memory_l2_theoretical_sectors_local</p></td>
<td><p>Theoretical number of sectors requested in L2 from local memory instructions.</p></td>
</tr>
</tbody>
</table>
<p>All <em>L1/L2 Sectors/Wavefronts/Requests</em> metrics give the number of achieved (actually required), ideal, and excessive (achieved - ideal) sectors/wavefronts/requests. <em>Ideal</em> metrics indicate the number that would needed, given each not predicated-off thread performed the operation of given width. <em>Excessive</em> metrics indicate the required surplus over the ideal case. Reducing divergence between threads can reduce the excess amount and result in less work for the respective HW units.</p>
</li>
</ul>
<p>Several of the above metrics on memory operations were renamed in version 2021.2 as follows:</p>
<table class="table-no-stripes docutils align-default">
<colgroup>
<col style="width: 42%" />
<col style="width: 58%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><strong>Old name</strong></p></td>
<td><p><strong>New name</strong></p></td>
</tr>
<tr class="row-even"><td><p>memory_l2_sectors_global</p></td>
<td><p>memory_l2_theoretical_sectors_global</p></td>
</tr>
<tr class="row-odd"><td><p>memory_l2_sectors_global_ideal</p></td>
<td><p>memory_l2_theoretical_sectors_global_ideal</p></td>
</tr>
<tr class="row-even"><td><p>memory_l2_sectors_local</p></td>
<td><p>memory_l2_theoretical_sectors_local</p></td>
</tr>
<tr class="row-odd"><td><p>memory_l1_sectors_global</p></td>
<td><p>memory_l1_tag_requests_global</p></td>
</tr>
<tr class="row-even"><td><p>memory_l1_sectors_shared</p></td>
<td><p>memory_l1_wavefronts_shared</p></td>
</tr>
<tr class="row-odd"><td><p>memory_l1_sectors_shared_ideal</p></td>
<td><p>memory_l1_wavefronts_shared_ideal</p></td>
</tr>
</tbody>
</table>
<ul>
<li><p><strong>L2 Explicit Evict Policy Metrics</strong></p>
<p>Starting with the NVIDIA Ampere architecture the eviction policy of the L2 cache can be tuned to match the kernel’s access pattern. The eviction policy can be either set implicitly for a memory window (for more details see <a class="reference external" href="https://docs.nvidia.com/cuda/cuda-runtime-api/structcudaAccessPolicyWindow.html">CUaccessProperty</a>) or set explicitly per executed memory instruction. If set explicitly, the desired eviction behavior for the cases of an L2 cache hit or miss are passed as input to the instruction. For more details refer to CUDA’s <a class="reference external" href="https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-eviction-priority-hints">Cache Eviction Priority Hints</a>.</p>
<table class="table-no-stripes docutils align-default">
<colgroup>
<col style="width: 8%" />
<col style="width: 13%" />
<col style="width: 79%" />
</colgroup>
<tbody>
<tr class="row-odd"><td><p><strong>Label</strong></p></td>
<td><p><strong>Name</strong></p></td>
<td><p><strong>Description</strong></p></td>
</tr>
<tr class="row-even"><td><p>L2 Explicit Evict Policies</p></td>
<td><p>smsp__inst_executed_memdesc_explicit_evict_type</p></td>
<td><p>Comma separated list of configured explicit eviction policies. As the policies can be set dynamically at runtime, this list includes all policies that were part of any executed instruction.</p></td>
</tr>
<tr class="row-odd"><td><p>L2 Explicit Hit Policy Evict First</p></td>
<td><p>smsp__inst_executed_memdesc_explicit_hitprop_evict_first</p></td>
<td><p>Number of times a memory instruction was executed by any warp which had the <code class="docutils literal notranslate"><span class="pre">evict_first</span></code> policy set in case the access leads to a cache hit in L2. Data cached with this policy will be first in the eviction priority order and will likely be evicted when cache eviction is required. This policy is suitable for streaming data.</p></td>
</tr>
<tr class="row-even"><td><p>L2 Explicit Hit Policy Evict Last</p></td>
<td><p>smsp__inst_executed_memdesc_explicit_hitprop_evict_last</p></td>
<td><p>Number of times a memory instruction was executed by any warp which had the <code class="docutils literal notranslate"><span class="pre">evict_last</span></code> policy set in case the access leads to a cache hit in L2. Data cached with this policy will be last in the eviction priority order and will likely be evicted only after other data with <code class="docutils literal notranslate"><span class="pre">evict_normal</span></code> or <code class="docutils literal notranslate"><span class="pre">evict_first</span></code> eviction policy is already evicted. This policy is suitable for data that should remain persistent in cache.</p></td>
</tr>
<tr class="row-odd"><td><p>L2 Explicit Hit Policy Evict Normal</p></td>
<td><p>smsp__inst_executed_memdesc_explicit_hitprop_evict_normal</p></td>
<td><p>Number of times a memory instruction was executed by any warp which had the <code class="docutils literal notranslate"><span class="pre">evict_normal</span></code> (default) policy set in case the access leads to a cache hit in L2.</p></td>
</tr>
<tr class="row-even"><td><p>L2 Explicit Hit Policy Evict Normal Demote</p></td>
<td><p>smsp__inst_executed_memdesc_explicit_hitprop_evict_normal_demote</p></td>
<td><p>Number of times a memory instruction was executed by any warp which had the <code class="docutils literal notranslate"><span class="pre">evict_normal_demote</span></code> policy set in case the access leads to a cache hit in L2.</p></td>
</tr>
<tr class="row-odd"><td><p>L2 Explicit Miss Policy Evict First</p></td>
<td><p>smsp__inst_executed_memdesc_explicit_missprop_evict_first</p></td>
<td><p>Number of times a memory instruction was executed by any warp which had the <code class="docutils literal notranslate"><span class="pre">evict_first</span></code> policy set in case the access leads to a cache miss in L2. Data cached with this policy will be first in the eviction priority order and will likely be evicted cache eviction is required. This policy is suitable for streaming data.</p></td>
</tr>
<tr class="row-even"><td><p>L2 Explicit Miss Policy Evict Normal</p></td>
<td><p>smsp__inst_executed_memdesc_explicit_missprop_evict_normal</p></td>
<td><p>Number of times a memory instruction was executed by any warp which had the <code class="docutils literal notranslate"><span class="pre">evict_normal</span></code> (default) policy set in case the access leads to a cache miss in L2.</p></td>
</tr>
</tbody>
</table>
</li>
<li><p><strong>Individual Warp Stall Sampling Metrics</strong></p>
<p>All <em>stall_*</em> metrics show the information combined in <em>Warp Stall Sampling</em> individually. See <a class="reference external" href="../ProfilingGuide/index.html#statistical-sampler">Statistical Sampler</a> for their descriptions.</p>
</li>
<li><p>See the <a class="reference external" href="../CustomizationGuide/index.html#abstract">Customization Guide</a> on how to add additional metrics for this view and the <a class="reference external" href="../ProfilingGuide/index.html#metrics-reference">Metrics Reference</a> for further information on available metrics.</p></li>
</ul>
<p class="title sectiontitle rubric" id="register-dependencies">Register Dependencies</p>
<p>Dependencies between registers are displayed in the SASS view. When a register is read, all the potential addresses where it could have been written are found. The links between these lines are drawn in the view. All dependencies for registers, predicates, uniform registers and uniform predicates are shown in their respective columns.</p>
<figure class="align-center" id="profiler-report-pages-source-register-dependencies">
<img alt="../_images/profiler-report-pages-source-register-dependencies.png" src="../_images/profiler-report-pages-source-register-dependencies.png" />
</figure>
<p>The picture above shows some dependencies for a simple CUDA kernel. On the first row, which is line 9 of the SASS code, we can see <em>writes</em> on registers R2 and R3, represented by <em>filled triangles pointing to the left</em>. These registers are then read on lines 17, 20 and 23, and this is represented by <em>regular triangles pointing to the right</em>. There are also some lines where both types of triangles are on the same line, which means that a read and a write occured for the same register.</p>
<p>Dependencies across source files and functions are not tracked.</p>
<p>The Register Dependencies Tracking feature is enabled by default, but can be disabled completely in <em>Tools &gt; Options &gt; Profile &gt; Report Source Page &gt; Enable Register Dependencies</em>.</p>
<dl class="footnote brackets">
<dt class="label" id="fn1"><span class="brackets"><a class="fn-backref" href="#id6">1</a></span></dt>
<dd><p>This metric was previously called Sampling Data (All).</p>
</dd>
<dt class="label" id="fn2"><span class="brackets"><a class="fn-backref" href="#id7">2</a></span></dt>
<dd><p>This metric was previously called Sampling Data (Not Issued).</p>
</dd>
</dl>
</section>
<section id="profiles">
<span id="profiler-report-source-page-profiles"></span><h5>Profiles<a class="headerlink" href="#profiles" title="Permalink to this headline"></a></h5>
<p>The icon next to the <em>View</em> dropdown can be used to manage <em>Source View Profiles</em>.</p>
<figure class="align-center" id="profiler-report-pages-source-profiles-button">
<img alt="../_images/profiler-report-pages-source-profiles-button.png" src="../_images/profiler-report-pages-source-profiles-button.png" />
</figure>
<p>This button opens a dialog that shows you the list of saved source view profiles. Such profiles can be created using the <em>Create</em> button in the dialog. Profiles let you store the column properties of all views in the report to a file. Such properties include column visibility, freeze state, width, order and the selected navigation metric. A saved profile can be applied to any opened report using the <em>Apply</em> button. This updates the column properties mentioned above from the selected profile in all views.</p>
<figure class="align-center" id="profiler-report-pages-source-profiles">
<img alt="../_images/profiler-report-pages-source-profiles.png" src="../_images/profiler-report-pages-source-profiles.png" />
</figure>
<p>Profiles are useful for configuring views to your preferences, or for a certain use case. Start by choosing metric columns from the <em>Column Chooser</em>. Next, configure other properties like freezing column, changing width or order and setting a heatmap metric in the <em>Navigation</em> dropdown before creating the profile. Once a profile is created, you can always use this profile on any opened report to hide all non-required columns or to restore your configured properties. Simply select the profile from the source view profiles dialog and click the <em>Apply</em> button.</p>
<p>Note that the column properties are stored separately for each <em>View</em> in the profile and when applied, only those views will be updated which are present in the selected profile. You will not see the metric columns that are not available in your report even if those were configured to be visible in the source profile you have applied.</p>
</section>
<section id="limitations">
<span id="profiler-report-source-page-limitations"></span><h5>Limitations<a class="headerlink" href="#limitations" title="Permalink to this headline"></a></h5>
<p class="rubric-h5 rubric" id="range-replay-1">Range Replay</p>
<p>When using <a class="reference external" href="../ProfilingGuide/index.html#range-replay">Range Replay</a> mode, instruction-level source metrics are not available.</p>
<p class="rubric-h5 rubric" id="graph-profiling">Graph Profiling</p>
<p>When profiling complete CUDA graphs, instruction-level source metrics are not available.</p>
</section>
</section>
<section id="comments-page">
<span id="profiler-report-comments-page"></span><h4>Comments Page<a class="headerlink" href="#comments-page" title="Permalink to this headline"></a></h4>
<p>The <em>Comments</em> page aggregates all section comments in a single view and allows the user to edit those comments on any launch instance or section, as well as on the overall report. Comments are persisted with the report. If a section comment is added, the comment icon of the respective section in the <a class="reference external" href="index.html#profiler-report-details-page">Details Page</a> will be highlighted.</p>
</section>
<section id="call-stack-nvtx-page">
<span id="profiler-report-nvtx-page"></span><h4>Call Stack / NVTX Page<a class="headerlink" href="#call-stack-nvtx-page" title="Permalink to this headline"></a></h4>
<p>The <em>CPU Call Stack</em> section of this report page shows the CPU call stack for the executing CPU thread at the time the kernel was launched. For this information to show up in the profiler report, the option to collect CPU call stacks had to be enabled in the <a class="reference external" href="index.html#connection-dialog">Connection Dialog</a> or using the corresponding NVIDIA Nsight Compute CLI command line parameter.</p>
<figure class="align-center" id="profiler-report-pages-callstack">
<img alt="../_images/profiler-report-pages-callstack.png" src="../_images/profiler-report-pages-callstack.png" />
</figure>
<p>The <em>NVTX State</em> section of this report page shows the NVTX context when the kernel was launched. All thread-specific information is with respect to the thread of the kernel’s launch API call. Note that NVTX information is only collected if the profiler is started with NVTX support enabled, either in the <a class="reference external" href="index.html#connection-dialog">Connection Dialog</a> or using the NVIDIA Nsight Compute CLI command line parameter.</p>
<figure class="align-center" id="profiler-report-pages-nvtx">
<img alt="../_images/profiler-report-pages-nvtx.png" src="../_images/profiler-report-pages-nvtx.png" />
</figure>
</section>
<section id="raw-page">
<span id="profiler-report-raw-page"></span><h4>Raw Page<a class="headerlink" href="#raw-page" title="Permalink to this headline"></a></h4>
<p>The <em>Raw</em> page shows a list of all collected metrics with their units per profiled kernel launch. It can be exported, for example, to CSV format for further analysis. The page features a filter edit to quickly find specific metrics. You can transpose the table of kernels and metrics by using the <em>Transpose</em> button.</p>
<p>If a metric has multiple instance values, the number of instances is shown after the standard value. This metric for example has ten instance values: <code class="docutils literal notranslate"><span class="pre">35.48</span> <span class="pre">{10}</span></code>. You can select in the <a class="reference external" href="index.html#options-profile">Profile</a> options dialog that all instance values should be shown individually or inspect the metric result in the <a class="reference external" href="index.html#tool-window-metric-details">Metric Details</a> tool window.</p>
</section>
</section>
<section id="metrics-and-units">
<span id="profiler-report-metrics"></span><h3><span class="section-number">3.6.3. </span>Metrics and Units<a class="headerlink" href="#metrics-and-units" title="Permalink to this headline"></a></h3>
<p>Numeric metric values are shown in various places in the report, including the header and tables and charts on most pages. NVIDIA Nsight Compute supports various ways to display those metrics and their values.</p>
<p>When available and applicable to the UI component, metrics are shown along with their unit. This is to make it apparent if a metric represents cycles, threads, bytes/s, and so on. The unit will normally be shown in rectangular brackets, e.g. <code class="docutils literal notranslate"><span class="pre">Metric</span> <span class="pre">Name</span> <span class="pre">[bytes]</span> <span class="pre">128</span></code>.</p>
<p>By default, units are scaled automatically so that metric values are shown with a reasonable order of magnitude. Units are scaled using their SI-factors, i.e. byte-based units are scaled using a factor of 1000 and the prefixes K, M, G, etc. Time-based units are also scaled using a factor of 1000, with the prefixes n, u and m. This scaling can be disabled in the <a class="reference external" href="index.html#options-profile">Profile</a> options.</p>
<p>Metrics which could not be collected are shown as <code class="docutils literal notranslate"><span class="pre">n/a</span></code> and assigned a warning icon. If the metric floating point value is out of the regular range (i.e. <code class="docutils literal notranslate"><span class="pre">nan</span></code> (Not a number) or <code class="docutils literal notranslate"><span class="pre">inf</span></code> (infinite)), they are also assigned a warning icon. The exception are metrics for which these values are expected and which are allow-listed internally.</p>
</section>
</section>
<section id="id8">
<h2><span class="section-number">3.7. </span>Baselines<a class="headerlink" href="#id8" title="Permalink to this headline"></a></h2>
<p>NVIDIA Nsight Compute supports diffing collected results across one or multiple reports using Baselines. Each result in any report can be promoted to a baseline. This causes metric values from all results in all reports to show the difference to the baseline. If multiple baselines are selected simultaneously, metric values are compared to the average across all current baselines. Baselines are not stored with a report and are only available as long as the same NVIDIA Nsight Compute instance is open, unless they are saved to a <code class="docutils literal notranslate"><span class="pre">ncu-bln</span></code> file from the <a class="reference external" href="index.html#tool-window-baselines">Baselines tool window</a>.</p>
<figure class="align-center" id="id9">
<img alt="../_images/baselines.png" src="../_images/baselines.png" />
<figcaption>
<p><span class="caption-text">Profiler report with one baseline</span><a class="headerlink" href="#id9" title="Permalink to this image"></a></p>
</figcaption>
</figure>
<p>Select <em>Add Baseline</em> to promote the current result in focus to become a baseline. If a baseline is set, most metrics on the <a class="reference external" href="index.html#profiler-report-details-page">Details Page</a>, <a class="reference external" href="index.html#profiler-report-raw-page">Raw Page</a> and <a class="reference external" href="index.html#profiler-report-summary-page">Summary Page</a> show two values: the current value of the result in focus, and the corresponding value of the baseline or the percentage of change from the corresponding baseline value. (Note that an infinite percentage gain, <em>inf%</em>, may be displayed when the baseline value for the metric is zero, while the focus value is not.)</p>
<p>If multiple baselines are selected, each metric will show the following notation:</p>
<div class="highlight-text notranslate"><div class="highlight"><pre><span></span>&lt;focus value&gt; (&lt;difference to baselines average [%]&gt;, z=&lt;standard score&gt;@&lt;number of values&gt;)
</pre></div>
</div>
<p>The standard score is the difference between the current value and the average across all baselines, normalized by the standard deviation. If the number of metric values contributing to the standard score equals the number of results (current and all baselines), the &#64;&lt;number of values&gt; notation is omitted.</p>
<figure class="align-center" id="baselines-multiple">
<img alt="../_images/baselines-multiple.png" src="../_images/baselines-multiple.png" />
<figcaption>
<p><span class="caption-text">Profiler report with multiple baselines</span><a class="headerlink" href="#baselines-multiple" title="Permalink to this image"></a></p>
</figcaption>
</figure>
<p>Double-clicking on a baseline name allows the user to edit the displayed name. Edits are committed by pressing <code class="docutils literal notranslate"><span class="pre">Enter/Return</span></code> or upon loss of focus, and abandoned by pressing <code class="docutils literal notranslate"><span class="pre">Esc</span></code>. Hovering over the baseline color icon allows the user to remove this specific baseline from the list.</p>
<p>Use the <em>Clear Baselines</em> entry from the dropdown button, the <a class="reference external" href="index.html#options-profile">Profile</a> menu, or the corresponding toolbar button to remove all baselines.</p>
<p>Baseline changes can also be made in the <a class="reference external" href="index.html#tool-window-baselines">Baselines tool window</a>.</p>
</section>
<section id="standalone-source-viewer">
<span id="cubin-viewer"></span><h2><span class="section-number">3.8. </span>Standalone Source Viewer<a class="headerlink" href="#standalone-source-viewer" title="Permalink to this headline"></a></h2>
<p>NVIDIA Nsight Compute includes a standalone source viewer for <em>cubin</em> files. This view is identical to the <a class="reference external" href="index.html#profiler-report-source-page">Source Page</a>, except that it won’t include any performance metrics.</p>
<p>Cubin files can be opened from the <em>File</em> &gt; <em>Open</em> main menu command. The SM Selection dialog will be shown before opening the standalone source view. If available, the SM version present in the file name is pre-selected. For example, if your file name is <code class="docutils literal notranslate"><span class="pre">mergeSort.sm_80.cubin</span></code> then SM 8.0 will be pre-selected in the dialog. Choose the appropriate SM version from the drop down menu if it’s not included in the file name.</p>
<figure class="align-center" id="baselines-multiple-fig">
<img alt="../_images/sm-selection-dialog.png" src="../_images/sm-selection-dialog.png" />
<figcaption>
<p><span class="caption-text">SM Selection Dialog</span><a class="headerlink" href="#baselines-multiple-fig" title="Permalink to this image"></a></p>
</figcaption>
</figure>
<p>Click Ok button to open <a class="reference external" href="index.html#cubin-viewer">Standalone Source Viewer</a>.</p>
<figure class="align-center" id="standalone-cubin-viewer">
<img alt="../_images/cubin-viewer.png" src="../_images/cubin-viewer.png" />
<figcaption>
<p><span class="caption-text">Standalone Source Viewer</span><a class="headerlink" href="#standalone-cubin-viewer" title="Permalink to this image"></a></p>
</figcaption>
</figure>
</section>
<section id="source-comparison">
<h2><span class="section-number">3.9. </span>Source Comparison<a class="headerlink" href="#source-comparison" title="Permalink to this headline"></a></h2>
<p>Source comparison provides a way to see the source files of two profile results side by side. It enables to quickly identify source differences and understand changes in metric values.</p>
<p>To compare two results side by side add one result as a baseline, navigate to the other result, and then click the <em>Source Comparison</em> button located in the report header.</p>
<p>For example, if you want to compare kernel XYZ from report R1 with kernel XYZ from report R2, first open report R1, add the profile result for kernel XYZ as baseline, open report R2, choose kernel XYZ, and then click the Source Comparison button.</p>
<p>Source comparison will be shown only with first added baseline result.</p>
<figure class="align-center" id="source-comparison-from-header">
<img alt="../_images/source-comparison-from-header.png" src="../_images/source-comparison-from-header.png" />
<figcaption>
<p><span class="caption-text">Source Comparison Button</span><a class="headerlink" href="#source-comparison-from-header" title="Permalink to this image"></a></p>
</figcaption>
</figure>
<figure class="align-center" id="source-comparison-document">
<img alt="../_images/source-comparison-document.png" src="../_images/source-comparison-document.png" />
<figcaption>
<p><span class="caption-text">Source Comparison</span><a class="headerlink" href="#source-comparison-document" title="Permalink to this image"></a></p>
</figcaption>
</figure>
<p>Currently only high-level Source (CUDA-C) view and SASS view are supported for comparison.</p>
<p>Navigation to the previous or next difference is supported using the navigation buttons or the keyboard shortcuts <em>Ctrl + 1</em> and <em>Ctrl + 2</em>.</p>
<figure class="align-center" id="source-comparison-navigation-buttons">
<img alt="../_images/source-comparison-navigation-buttons.png" src="../_images/source-comparison-navigation-buttons.png" />
<figcaption>
<p><span class="caption-text">Source Comparison Navigation Buttons</span><a class="headerlink" href="#source-comparison-navigation-buttons" title="Permalink to this image"></a></p>
</figcaption>
</figure>
</section>
<section id="occupancy-calculator">
<h2><span class="section-number">3.10. </span>Occupancy Calculator<a class="headerlink" href="#occupancy-calculator" title="Permalink to this headline"></a></h2>
<p>NVIDIA Nsight Compute provides an <em>Occupancy Calculator</em> that allows you to compute the multiprocessor occupancy of a GPU for a given CUDA kernel. It offers feature parity to the CUDA Occupancy Calculator <a class="reference external" href="http://docs.nvidia.com/cuda/cuda-occupancy-calculator/index.html">spreadsheet</a>.</p>
<p>The Occupancy Calculator can be opened directly from a profile report or as a new activity. The occupancy calculator data can be saved to a file using <em>File &gt; Save</em>. By default, the file uses the <code class="docutils literal notranslate"><span class="pre">.ncu-occ</span></code> extension. The occupancy calculator file can be opened using <em>File &gt; Open File</em></p>
<ol class="arabic">
<li><p><strong>Launching from the Connection Dialog</strong></p>
<p>Select the Occupancy Calculator activity from the connection dialog. You can optionally specify an occupancy calculator data file, which is used to initialize the calculator with the data from the saved file. Click the <em>Launch</em> button to open the Occupancy Calculator.</p>
<figure class="align-center">
<img alt="../_images/occupancy-calculator-activity.png" src="../_images/occupancy-calculator-activity.png" />
</figure>
</li>
<li><p><strong>Launching from the Profiler Report</strong></p>
<p>The Occupancy Calculator can be opened from the <em>Profiler Report</em> using the calculator button located in the report header or in the header of the <em>Occupancy</em> section on the <em>Detail Page</em>.</p>
<figure class="align-center" id="occupancy-calculator-from-header">
<img alt="../_images/occupancy-calculator-from-header.png" src="../_images/occupancy-calculator-from-header.png" />
<figcaption>
<p><span class="caption-text">Details page header</span><a class="headerlink" href="#occupancy-calculator-from-header" title="Permalink to this image"></a></p>
</figcaption>
</figure>
<figure class="align-center" id="occupancy-calculator-from-section">
<img alt="../_images/occupancy-calculator-from-section.png" src="../_images/occupancy-calculator-from-section.png" />
<figcaption>
<p><span class="caption-text">Occupancy section header</span><a class="headerlink" href="#occupancy-calculator-from-section" title="Permalink to this image"></a></p>
</figcaption>
</figure>
</li>
</ol>
<p>The user interface consists of an input section as well as tables and graphs that display information about GPU occupancy. To use the calculator, change the input values in the input section, click the <em>Apply</em> button and examine the tables and graphs.</p>
<section id="tables">
<span id="occupancy-calculator-tables"></span><h3><span class="section-number">3.10.1. </span>Tables<a class="headerlink" href="#tables" title="Permalink to this headline"></a></h3>
<p>The tables show the occupancy, as well as the number of active threads, warps, and thread blocks per multiprocessor, and the maximum number of active blocks on the GPU.</p>
<figure class="align-center" id="occupancy-calculator-tables-fig">
<img alt="../_images/occupancy-calculator-tables.png" src="../_images/occupancy-calculator-tables.png" />
<figcaption>
<p><span class="caption-text">Tables</span><a class="headerlink" href="#occupancy-calculator-tables-fig" title="Permalink to this image"></a></p>
</figcaption>
</figure>
</section>
<section id="graphs">
<span id="occupancy-calculator-graphs"></span><h3><span class="section-number">3.10.2. </span>Graphs<a class="headerlink" href="#graphs" title="Permalink to this headline"></a></h3>
<p>The graphs show the occupancy for your chosen block size as a blue circle, and for all other possible block sizes as a line graph.</p>
<figure class="align-center" id="occupancy-calculator-graphs-fig">
<img alt="../_images/occupancy-calculator-graphs.png" src="../_images/occupancy-calculator-graphs.png" />
<figcaption>
<p><span class="caption-text">Graphs</span><a class="headerlink" href="#occupancy-calculator-graphs-fig" title="Permalink to this image"></a></p>
</figcaption>
</figure>
</section>
<section id="gpu-data">
<span id="occupancy-calculator-gpu-data"></span><h3><span class="section-number">3.10.3. </span>GPU Data<a class="headerlink" href="#gpu-data" title="Permalink to this headline"></a></h3>
<p>The <em>GPU Data</em> shows the properties of all supported devices.</p>
<figure class="align-center" id="occupancy-calculator-gpu-data-fig">
<img alt="../_images/occupancy-calculator-gpu-data.png" src="../_images/occupancy-calculator-gpu-data.png" />
<figcaption>
<p><span class="caption-text">GPU Data</span><a class="headerlink" href="#occupancy-calculator-gpu-data-fig" title="Permalink to this image"></a></p>
</figcaption>
</figure>
</section>
</section>
<section id="acceleration-structure-viewer">
<span id="as-viewer"></span><h2><span class="section-number">3.11. </span>Acceleration Structure Viewer<a class="headerlink" href="#acceleration-structure-viewer" title="Permalink to this headline"></a></h2>
<p>The <em>Acceleration Structure Viewer</em> allows inspection of acceleration structures built using the OptiX API. In modern ray tracing APIs like OptiX, <em>acceleration structures</em> are data structures describing the rendered scene’s geometries that will be intersected when performing ray tracing operations. More information concerning acceleration structures can be found in the <a class="reference external" href="https://raytracing-docs.nvidia.com/optix7/guide/index.html#acceleration_structures#acceleration-structures">OptiX programming guide</a>.</p>
<p>It is the responsibility of the user to set these up and pass them to the OptiX API which translates them to internal data structures that perform well on modern GPUs. The description created by the user can be very error-prone and it is sometimes hard to understand why the rendered result is not as expected. The <em>Acceleration Structure Viewer</em> is a component allowing OptiX users to inspect the acceleration structures they build before launching a ray tracing pipeline.</p>
<p>The <em>Acceleration Structure Viewer</em> is opened through a button in the <a class="reference external" href="index.html#tool-window-resources">Resources</a> window. The button will only be available when the currently viewed resource is <em>OptiX: TraversableHandles</em>. It opens the currently selected handle.</p>
<figure class="align-center" id="as-viewer-open-button">
<img alt="../_images/as-viewer-open-button.png" src="../_images/as-viewer-open-button.png" />
</figure>
<p>The viewer is multi-paned: it shows a hierarchical view of the acceleration structure on the left, a graphical view of the acceleration structure in the middle, and controls and options on the right. In the hierarchical tree view on the left of the viewer the <em>instance acceleration structures (IAS)</em>, <em>geometry acceleration structures (GAS)</em>, child instances and child geometries are shown. In addition to this, some general properties for each of them is shown such as their primitive count, surface area and size on the device.</p>
<figure class="align-center" id="as-viewer-fig">
<img alt="../_images/as-viewer.png" src="../_images/as-viewer.png" />
</figure>
<p>In the hierarchical view on the left of the <em>Acceleration Structure Viewer</em>, the following information is displayed where applicable.</p>
<table class="table-no-stripes docutils align-default" id="id17">
<caption><span class="caption-text">Table 4. Acceleration Structure Hierarchical Columns</span><a class="headerlink" href="#id17" title="Permalink to this table"></a></caption>
<colgroup>
<col style="width: 6%" />
<col style="width: 94%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Column</p></th>
<th class="head"><p>Description</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p>Name</p></td>
<td><p>An identifier for each row in the hierarchy. Click on the check box next to the name to show or hide the selected geometry or hierarchy. Double-click on this entry to jump to the item in the rendering view.</p></td>
</tr>
<tr class="row-odd"><td><p># Prims</p></td>
<td><p>The number of primitives that make up this acceleration structure.</p></td>
</tr>
<tr class="row-even"><td><p>Surface Area</p></td>
<td><p>A calculation of the total surface area for the AABB that bounds the particular entry.</p></td>
</tr>
<tr class="row-odd"><td><p>Size</p></td>
<td><p>The size of the output buffer on the device holding this <em>acceleration structure</em>.</p></td>
</tr>
</tbody>
</table>
<p>Performance analysis tools are accessible in the bottom left corner on the main view. These tools help identify potential performance problems that are outlined in the <a class="reference external" href="https://developer.nvidia.com/blog/best-practices-using-nvidia-rtx-ray-tracing">RTX Ray Tracing Best Practices Guide</a>. These analysis tools aim to give a broad picture of acceleration structures that may exhibit sub-optimal performance. To find the most optimal solution, profiling and experimentation is recommended but these tools may paint a better picture as to why one structure performs poorly compared to another.</p>
<table class="table-no-stripes docutils align-default" id="id18">
<caption><span class="caption-text">Table 5. Acceleration Structure Analysis Tools</span><a class="headerlink" href="#id18" title="Permalink to this table"></a></caption>
<colgroup>
<col style="width: 9%" />
<col style="width: 91%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Action</p></th>
<th class="head"><p>Description</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p>Instance Overlaps</p></td>
<td><p>Identifies instance AABBs that overlap with other instances in 3D. Consider merging GASes when instance world-space AABBs overlap significantly to potentially increase performance.</p></td>
</tr>
<tr class="row-odd"><td><p>Instance Heatmap</p></td>
<td><p>This allows you to set the threshold used by the AABB heatmap rendered in the visualizer.</p></td>
</tr>
</tbody>
</table>
<section id="as-viewer-nav">
<span id="id10"></span><h3><span class="section-number">3.11.1. </span>Navigation<a class="headerlink" href="#as-viewer-nav" title="Permalink to this headline"></a></h3>
<p>The <em>Acceleration Structure Viewer</em> supports multiple navigation modes. The navigation mode can be changed using the combo box in the camera controls pane, to the right of the rendering pane. The keyboard and mouse bindings for each mode are as follows:</p>
<table class="table-no-stripes docutils align-default" id="id19">
<caption><span class="caption-text">Table 6. Acceleration Structure Key Bindings</span><a class="headerlink" href="#id19" title="Permalink to this table"></a></caption>
<colgroup>
<col style="width: 15%" />
<col style="width: 27%" />
<col style="width: 31%" />
<col style="width: 26%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Binding</p></th>
<th class="head"><p>Fly Camera</p></th>
<th class="head"><p>Dolly Camera</p></th>
<th class="head"><p>Orbit Camera</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><strong>WASD/Arrow Keys</strong></p></td>
<td><p>Move forward, backward, left, right</p></td>
<td><p>Move forward, backward, left, right</p></td>
<td><p>Track (Move up, down, left, right)</p></td>
</tr>
<tr class="row-odd"><td><p><strong>E/Q</strong></p></td>
<td><p>Move up/down</p></td>
<td><p>Move up/down</p></td>
<td><p>n/a</p></td>
</tr>
<tr class="row-even"><td><p><strong>Z/C</strong></p></td>
<td><p>Increase/decrease field of view</p></td>
<td><p>Increase/decrease field of view</p></td>
<td><p>Increase/decrease field of view</p></td>
</tr>
<tr class="row-odd"><td><p><strong>Shift/Ctrl</strong></p></td>
<td><p>Move faster/slower</p></td>
<td><p>Move faster/slower</p></td>
<td><p>Move faster/slower</p></td>
</tr>
<tr class="row-even"><td><p><strong>Mousewheel</strong></p></td>
<td><p>Zoom in/out</p></td>
<td><p>Zoom in/out</p></td>
<td><p>Zoom in/out</p></td>
</tr>
<tr class="row-odd"><td><p><strong>LMB + Drag</strong></p></td>
<td><p>Rotate in place</p></td>
<td><p>Rotate left/right, move forward/backward</p></td>
<td><p>Rotate around the geometry</p></td>
</tr>
<tr class="row-even"><td><p><strong>RMB + Drag</strong></p></td>
<td><p>Zoom in/out</p></td>
<td><p>Rotate in place</p></td>
<td><p>Zoom in/out</p></td>
</tr>
<tr class="row-odd"><td><p><strong>MMB + Drag</strong></p></td>
<td><p>Track (Move up, down, left, right)</p></td>
<td><p>Track (Move up, down, left, right)</p></td>
<td><p>Track (Move up, down, left, right)</p></td>
</tr>
<tr class="row-even"><td><p><strong>Alt</strong></p></td>
<td><p>Temporarily switch to Orbit Camera</p></td>
<td><p>Temporarily switch to Orbit Camera</p></td>
<td><p>n/a</p></td>
</tr>
<tr class="row-odd"><td><p><strong>F/Double Click</strong></p></td>
<td><p>Focus on the selected geometry</p></td>
<td><p>Focus on the selected geometry</p></td>
<td><p>Focus on the selected geometry</p></td>
</tr>
</tbody>
</table>
<p>Based on the coordinate system of the input geometry, you may need to change the <strong>Up Direction</strong> setting to Z-Axis or the <strong>Coordinates</strong> setting to RHS. To reset the camera to its original location, click <strong>Reset Camera</strong>.</p>
<p>There are also a selection of Camera Controls for fast and precise navigation. To save a position, use the bookmarks controls. Each node within the acceleration structure hierarchy can also be double-clicked to quickly navigate to that location.</p>
<figure class="align-center" id="as-viewer-cam">
<img alt="../_images/as-viewer-cam.png" src="../_images/as-viewer-cam.png" />
</figure>
</section>
<section id="filtering-and-highlighting">
<span id="as-viewer-filtering"></span><h3><span class="section-number">3.11.2. </span>Filtering and Highlighting<a class="headerlink" href="#filtering-and-highlighting" title="Permalink to this headline"></a></h3>
<p>The acceleration structure view supports acceleration structure filtering as well as highlighting of data matching particular characteristics. The checkboxes next to each geometry allow users to toggle the rendering of each traversable.</p>
<p>Geometry instances can also be selected by clicking on them in the main graphical view. Additionally, right clicking in the main graphical view gives options to hide or show all geometry, hide the selected geometry, or hide all but the selected geometry.</p>
<figure class="align-center" id="as-viewer-display-filter">
<img alt="../_images/as-viewer-display-filter.png" src="../_images/as-viewer-display-filter.png" />
</figure>
<p>Beyond filtering, the view also supports highlight-based identification of geometry specified with particular flags. Checking each highlight option will identify those resources matching that flag, colorizing for easy identification. Clicking an entry in this section will dim all geometry that does <strong>not</strong> meet the filter criteria allowing items that match the filter to standout. Selecting multiple filters requires the passing geometry to meet all selected filters (e.g., AND logic). Additionally, the heading text will be updated to reflect the number of items that meet this filter criteria.</p>
<figure class="align-center" id="as-viewer-property-filter">
<img alt="../_images/as-viewer-property-filter.png" src="../_images/as-viewer-property-filter.png" />
</figure>
</section>
<section id="rendering-options">
<span id="as-viewer-rendering-options"></span><h3><span class="section-number">3.11.3. </span>Rendering Options<a class="headerlink" href="#rendering-options" title="Permalink to this headline"></a></h3>
<p>Under the highlight controls, additional rendering options are available. These include methods to control the geometry colors and the ability to toggle the drawing of wireframes for meshes and AABBs.</p>
<figure class="align-center" id="as-viewer-rendering-options-fig">
<img alt="../_images/as-viewer-rendering-options.png" src="../_images/as-viewer-rendering-options.png" />
</figure>
</section>
<section id="exporting">
<h3><span class="section-number">3.11.4. </span>Exporting<a class="headerlink" href="#exporting" title="Permalink to this headline"></a></h3>
<p>The data displayed in the acceleration structure viewer document can be saved to file. Exporting an <em>Acceleration Structure Viewer</em> document allows for persisting the data you have collected beyond the immediate analysis session. This capability is particularly valuable for comparing different revisions of your geometry or sharing with others. Bookmarks are persisted as well.</p>
</section>
</section>
<section id="options">
<h2><span class="section-number">3.12. </span>Options<a class="headerlink" href="#options" title="Permalink to this headline"></a></h2>
<p>NVIDIA Nsight Compute options can be accessed via the main menu under <em>Tools</em> &gt; <em>Options</em>. All options are persisted on disk and available the next time NVIDIA Nsight Compute is launched. When an option is changed from its default setting, its label will become bold. You can use the <em>Restore Defaults</em> button to restore all options to their default values.</p>
<figure class="align-center" id="options-profile-fig">
<img alt="../_images/options-profile.png" src="../_images/options-profile.png" />
<figcaption>
<p><span class="caption-text">Profile options</span><a class="headerlink" href="#options-profile-fig" title="Permalink to this image"></a></p>
</figcaption>
</figure>
<section id="profile">
<span id="options-profile"></span><h3><span class="section-number">3.12.1. </span>Profile<a class="headerlink" href="#profile" title="Permalink to this headline"></a></h3>
<table class="table-no-stripes docutils align-default" id="id20">
<caption><span class="caption-text">Table 7. NVIDIA Nsight Compute Profile Options</span><a class="headerlink" href="#id20" title="Permalink to this table"></a></caption>
<colgroup>
<col style="width: 12%" />
<col style="width: 60%" />
<col style="width: 28%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Name</p></th>
<th class="head"><p>Description</p></th>
<th class="head"><p>Values</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p>Sections Directory</p></td>
<td><p>Directory from which to import section files and rules. Relative paths are with respect to the NVIDIA Nsight Compute installation directory.</p></td>
<td></td>
</tr>
<tr class="row-odd"><td><p>Include Sub-Directories</p></td>
<td><p>Recursively include section files and rules from sub-directories.</p></td>
<td><p>Yes (Default)/No</p></td>
</tr>
<tr class="row-even"><td><p>Apply Applicable Rules Automatically</p></td>
<td><p>Automatically apply active and applicable rules.</p></td>
<td><p>Yes (Default)/No</p></td>
</tr>
<tr class="row-odd"><td><p>Reload Rules Before Applying</p></td>
<td><p>Force a rule reload before applying the rule to ensure changes in the rule script are recognized.</p></td>
<td><p>Yes/No (Default)</p></td>
</tr>
<tr class="row-even"><td><p>Default Report Page</p></td>
<td><p>The report page to show when a report is generated or opened. <em>Auto</em> lets the tool decide the best page to show when opening a report.</p></td>
<td><ul class="simple">
<li><p>Session</p></li>
<li><p>Summary</p></li>
<li><p>Details</p></li>
<li><p>Source</p></li>
<li><p>Comments</p></li>
<li><p>Call Stack/NVTX</p></li>
<li><p>Raw</p></li>
<li><p>Auto (default)</p></li>
</ul>
</td>
</tr>
<tr class="row-odd"><td><p>Function Name Mode</p></td>
<td><p>Determines how function/kernel names are shown.</p></td>
<td><ul class="simple">
<li><p>Auto (default): each component uses its preferred mode</p></li>
<li><p>Demangled: kernel names are shown demangled with all parameters</p></li>
<li><p>Function: kernel names are shown with their demangled function name without parameters</p></li>
<li><p>Mangled: kernel names are shown with their mangled name, if applicable</p></li>
</ul>
</td>
</tr>
<tr class="row-even"><td><p>NVTX Rename Mode</p></td>
<td><p>Determines how NVTX information is used for renaming. Range replay results are always renamed when possible.</p></td>
<td><ul class="simple">
<li><p>None: no renaming</p></li>
<li><p>Kernel: kernel names are renamed using the most recent enclosing push/pop range</p></li>
<li><p>Resources (default): resources like CPU threads or CUDA contexts and streams are renamed</p></li>
<li><p>All: Kernel and Resources</p></li>
</ul>
</td>
</tr>
<tr class="row-odd"><td><p>Maximum Baseline Name Length</p></td>
<td><p>The maximum length of baseline names.</p></td>
<td><p>1..N (Default: 40)</p></td>
</tr>
<tr class="row-even"><td><p>Number of Full Baselines to Display</p></td>
<td><p>Number of baselines to display in the report header with all details in addition to the current result.</p></td>
<td><p>0..N (Default: 2)</p></td>
</tr>
<tr class="row-odd"><td><p>Auto-Convert Metric Units</p></td>
<td><p>Auto-adjust displayed metric units and values (e.g. Bytes to KBytes).</p></td>
<td><p>Yes (Default)/No</p></td>
</tr>
<tr class="row-even"><td><p>Show Instanced Metric Values</p></td>
<td><p>Show the individual values of instanced metrics in tables.</p></td>
<td><p>Yes/No (Default)</p></td>
</tr>
<tr class="row-odd"><td><p>Show Metrics As Floating Point</p></td>
<td><p>Show all numeric metrics as floating-point numbers.</p></td>
<td><p>Yes/No (Default)</p></td>
</tr>
<tr class="row-even"><td><p>Show Knowledge Base Information</p></td>
<td><p>Show information from the knowledge base in (metric) tooltips to explain terminology. Note: Nsight Compute needs to be restarted for this option to take effect.</p></td>
<td><p>Yes (Default)/No</p></td>
</tr>
<tr class="row-odd"><td><p>Metrics/Properties</p></td>
<td><p>List of metrics and properties to show on the summary page. Comma-separated list of metric entries. Each entry has the format {Label:MetricName}.</p></td>
<td></td>
</tr>
<tr class="row-even"><td><p>Delay Load ‘Source’ Page</p></td>
<td><p>Delays loading the content of the report page until the page becomes visible. Avoids processing costs and memory overhead until the report page is opened.</p></td>
<td><p>Yes/No (Default)</p></td>
</tr>
<tr class="row-odd"><td><p>Show Single File For Multi-File Sources</p></td>
<td><p>Shows a single file in each Source page view, even for multi-file sources.</p></td>
<td><p>Yes/No (Default)</p></td>
</tr>
<tr class="row-even"><td><p>Show Only Executed Functions</p></td>
<td><p>Shows only executed functions in the source page views. Disabling this can impact performance.</p></td>
<td><p>Yes (Default)/No</p></td>
</tr>
<tr class="row-odd"><td><p>Auto-Resolve Remote Source Files</p></td>
<td><p>Automatically try to resolve remote source files on the source page (e.g. via SSH) if the connection is still registered.</p></td>
<td><p>Yes/No (Default)</p></td>
</tr>
<tr class="row-even"><td><p>Enable Register Dependencies</p></td>
<td><p>Track dependencies between SASS registers/predicates and display them in the SASS view.</p></td>
<td><p>Yes (Default)/No</p></td>
</tr>
<tr class="row-odd"><td><p>Kernel Analysis Size Threshold (KB)</p></td>
<td><p>Enable SASS flow graph analysis for functions below this threshold. SASS analysis is required for Live Register and Register Dependency information. Set to -1 to enable analysis for all functions.</p></td>
<td><p>-1..N (Default: 1024)</p></td>
</tr>
<tr class="row-even"><td><p>Enable ELF Verification</p></td>
<td><p>Enable ELF (cubin) verification to run every time before SASS analysis. This should only be enabled when working with applications compiled before CUDA 11.0 or when encountering source page issues.</p></td>
<td><p>Yes/No (Default)</p></td>
</tr>
<tr class="row-odd"><td><p>API Call History</p></td>
<td><p>Number of recent API calls shown in API Stream View.</p></td>
<td><p>1..N (Default: 100)</p></td>
</tr>
</tbody>
</table>
</section>
<section id="environment">
<span id="options-environment"></span><h3><span class="section-number">3.12.2. </span>Environment<a class="headerlink" href="#environment" title="Permalink to this headline"></a></h3>
<table class="table-no-stripes docutils align-default" id="id21">
<caption><span class="caption-text">Table 8. NVIDIA Nsight Compute Environment Options</span><a class="headerlink" href="#id21" title="Permalink to this table"></a></caption>
<colgroup>
<col style="width: 21%" />
<col style="width: 61%" />
<col style="width: 19%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Name</p></th>
<th class="head"><p>Description</p></th>
<th class="head"><p>Values</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p>Color Theme</p></td>
<td><p>The currently selected UI color theme.</p></td>
<td><ul class="simple">
<li><p>Dark (Default)</p></li>
<li><p>Light</p></li>
</ul>
</td>
</tr>
<tr class="row-odd"><td><p>Mixed DPI Scaling</p></td>
<td><p>Disable Mixed DPI Scaling if unwanted artifacts are detected when using monitors with different DPIs.</p></td>
<td><ul class="simple">
<li><p>Auto (Default)</p></li>
<li><p>Off</p></li>
</ul>
</td>
</tr>
<tr class="row-even"><td><p>Default Document Folder</p></td>
<td><p>Directory where documents unassociated with a project will be saved.</p></td>
<td></td>
</tr>
<tr class="row-odd"><td><p>At Startup</p></td>
<td><p>What to do when NVIDIA Nsight Compute is launched.</p></td>
<td><ul class="simple">
<li><p>Show welcome page (Default)</p></li>
<li><p>Show quick launch dialog</p></li>
<li><p>Load last project</p></li>
<li><p>Show empty environment</p></li>
</ul>
</td>
</tr>
<tr class="row-even"><td><p>Show version update notifications</p></td>
<td><p>Show notifications when a new version of this product is available.</p></td>
<td><ul class="simple">
<li><p>Yes (Default)</p></li>
<li><p>No</p></li>
</ul>
</td>
</tr>
</tbody>
</table>
</section>
<section id="connection">
<span id="options-connection"></span><h3><span class="section-number">3.12.3. </span>Connection<a class="headerlink" href="#connection" title="Permalink to this headline"></a></h3>
<p>Connection properties are grouped into <em>Target Connection Options</em> and <em>Host Connection Properties</em>.</p>
<section id="target-connection-properties">
<span id="options-connection-target"></span><h4>Target Connection Properties<a class="headerlink" href="#target-connection-properties" title="Permalink to this headline"></a></h4>
<p>The <em>Target Connection Properties</em> determine how the host connects to the target application during an <em>Interactive Profile Activity</em>. This connection is used to transfer profile information to the host during the profile session.</p>
<table class="table-no-stripes docutils align-default" id="id22">
<caption><span class="caption-text">Table 9. NVIDIA Nsight Compute Target Connection Properties</span><a class="headerlink" href="#id22" title="Permalink to this table"></a></caption>
<colgroup>
<col style="width: 8%" />
<col style="width: 78%" />
<col style="width: 14%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Name</p></th>
<th class="head"><p>Description</p></th>
<th class="head"><p>Values</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p>Base Port</p></td>
<td><p>Base port used to establish a connection from the host to the target application during an <em>Interactive Profile</em> activity (both local and remote).</p></td>
<td><p>1-65535 (Default: 49152)</p></td>
</tr>
<tr class="row-odd"><td><p>Maximum Ports</p></td>
<td><p>Maximum number of ports to try (starting from <em>Base Port</em>) when attempting to connect to the target application.</p></td>
<td><p>2-65534 (Default: 64)</p></td>
</tr>
</tbody>
</table>
</section>
<section id="host-connection-properties">
<span id="options-connection-target-host"></span><h4>Host Connection Properties<a class="headerlink" href="#host-connection-properties" title="Permalink to this headline"></a></h4>
<p>The <em>Host Connection Properties</em> determine how the command line profiler will connect to the host application during a <em>Profile Activity</em>. This connection is used to transfer profile information to the host during the profile session.</p>
<table class="table-no-stripes docutils align-default" id="id23">
<caption><span class="caption-text">Table 10. NVIDIA Nsight Compute Host Connection Options</span><a class="headerlink" href="#id23" title="Permalink to this table"></a></caption>
<colgroup>
<col style="width: 8%" />
<col style="width: 79%" />
<col style="width: 14%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Name</p></th>
<th class="head"><p>Description</p></th>
<th class="head"><p>Values</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p>Base Port</p></td>
<td><p>Base port used to establish a connection from the command line profiler to the host application during a <em>Profile</em> activity (both local and remote).</p></td>
<td><p>1-65535 (Default: 50152)</p></td>
</tr>
<tr class="row-odd"><td><p>Maximum Ports</p></td>
<td><p>Maximum number of ports to try (starting from <em>Base Port</em>) when attempting to connect to the host application.</p></td>
<td><p>1-100 (Default: 10)</p></td>
</tr>
</tbody>
</table>
</section>
</section>
<section id="source-lookup">
<span id="options-source-lookup"></span><h3><span class="section-number">3.12.4. </span>Source Lookup<a class="headerlink" href="#source-lookup" title="Permalink to this headline"></a></h3>
<table class="table-no-stripes docutils align-default" id="id24">
<caption><span class="caption-text">Table 11. NVIDIA Nsight Compute Source Lookup Options</span><a class="headerlink" href="#id24" title="Permalink to this table"></a></caption>
<colgroup>
<col style="width: 6%" />
<col style="width: 91%" />
<col style="width: 4%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Name</p></th>
<th class="head"><p>Description</p></th>
<th class="head"><p>Values</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p>Program Source Locations</p></td>
<td><p>Set program source search paths. These paths are used to resolve CUDA-C source files on the Source page if the respective file cannot be found in its original location. Files which cannot be found are marked with a <em>File Not Found</em> error. See the <em>Ignore File Properties</em> option for files that are found but don’t match.</p></td>
<td></td>
</tr>
<tr class="row-odd"><td><p>Ignore File Properties</p></td>
<td><p>Ignore file properties (e.g. timestamp, size) for source resolution. If this is disabled, all file properties like modification timestamp and file size are checked against the information stored by the compiler in the application during compilation. If a file with the same name exists on a source lookup path, but not all properties match, it won’t be used for resolution (and a <em>File Mismatch</em> error will be shown).</p></td>
<td><p>Yes/No (Default)</p></td>
</tr>
</tbody>
</table>
</section>
<section id="send-feedback">
<span id="options-send-feedback"></span><h3><span class="section-number">3.12.5. </span>Send Feedback<a class="headerlink" href="#send-feedback" title="Permalink to this headline"></a></h3>
<table class="table-no-stripes docutils align-default" id="id25">
<caption><span class="caption-text">Table 12. NVIDIA Nsight Compute Send Feedback Options</span><a class="headerlink" href="#id25" title="Permalink to this table"></a></caption>
<colgroup>
<col style="width: 21%" />
<col style="width: 64%" />
<col style="width: 15%" />
</colgroup>
<thead>
<tr class="row-odd"><th class="head"><p>Name</p></th>
<th class="head"><p>Description</p></th>
<th class="head"><p>Values</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p>Collect Usage and Platform Data</p></td>
<td><p>Choose whether or not you wish to allow NVIDIA Nsight Compute to collect usage and platform data.</p></td>
<td><ul class="simple">
<li><p>Yes</p></li>
<li><p>No (Default)</p></li>
</ul>
</td>
</tr>
</tbody>
</table>
</section>
</section>
<section id="projects">
<h2><span class="section-number">3.13. </span>Projects<a class="headerlink" href="#projects" title="Permalink to this headline"></a></h2>
<p>NVIDIA Nsight Compute uses <em>Project Files</em> to group and organize profiling reports. At any given time, only one project can be open in NVIDIA Nsight Compute. Collected reports are automatically assigned to the current project. Reports stored on disk can be assigned to a project at any time. In addition to profiling reports, related files such as notes or source code can be associated with the project for future reference.</p>
<p>Note that only references to reports or other files are saved in the project file. Those references can become invalid, for example when associated files are deleted, removed or not available on the current system, in case the project file was moved itself.</p>
<p>NVIDIA Nsight Compute uses the <code class="docutils literal notranslate"><span class="pre">ncu-proj</span></code> file extension for project files.</p>
<p>When no custom project is current, a <em>default project</em> is used to store e.g. the current <a class="reference external" href="index.html#connection-dialog">Connection Dialog</a> entries. To remove all information from the default project, you must close NVIDIA Nsight Compute and then delete the file from disk.</p>
<ul class="simple">
<li><p>On Windows, the file is located at <code class="docutils literal notranslate"><span class="pre">&lt;USER&gt;\AppData\Local\NVIDIA</span> <span class="pre">Corporation\NVIDIA</span> <span class="pre">Nsight</span> <span class="pre">Compute\</span></code></p></li>
<li><p>On Linux, the file is located at <code class="docutils literal notranslate"><span class="pre">&lt;USER&gt;/.local/share/NVIDIA</span> <span class="pre">Corporation/NVIDIA</span> <span class="pre">Nsight</span> <span class="pre">Compute/</span></code></p></li>
<li><p>On MacOSX, the file is located at <code class="docutils literal notranslate"><span class="pre">&lt;USER&gt;/Library/Application</span> <span class="pre">Support/NVIDIA</span> <span class="pre">Corporation/NVIDIA</span> <span class="pre">Nsight</span> <span class="pre">Compute/</span></code></p></li>
</ul>
<section id="project-dialogs">
<span id="projects-dialogs"></span><h3><span class="section-number">3.13.1. </span>Project Dialogs<a class="headerlink" href="#project-dialogs" title="Permalink to this headline"></a></h3>
<p><strong>New Project</strong></p>
<p>Creates a new project. The project must be given a name, which will also be used for the project file. You can select the location where the project file should be saved on disk. Select whether a new directory with the project name should be created in that location.</p>
</section>
<section id="project-explorer">
<span id="projects-explorer"></span><h3><span class="section-number">3.13.2. </span>Project Explorer<a class="headerlink" href="#project-explorer" title="Permalink to this headline"></a></h3>
<p>The <em>Project Explorer</em> window allows you to inspect and manage the current project. It shows the project name as well as all <em>Items</em> (profile reports and other files) associated with it. Right-click on any entry to see further actions, such as adding, removing or grouping items. Type in the <em>Search project</em> toolbar at the top to filter the currently shown entries.</p>
<figure class="align-center" id="projects-explorer-fig">
<img alt="../_images/projects-explorer.png" src="../_images/projects-explorer.png" />
<figcaption>
<p><span class="caption-text">Project Explorer</span><a class="headerlink" href="#projects-explorer-fig" title="Permalink to this image"></a></p>
</figcaption>
</figure>
</section>
</section>
<section id="visual-profiler-transition-guide">
<span id="nvvp-guide"></span><h2><span class="section-number">3.14. </span>Visual Profiler Transition Guide<a class="headerlink" href="#visual-profiler-transition-guide" title="Permalink to this headline"></a></h2>
<p>This guide provides tips for moving from Visual Profiler to NVIDIA Nsight Compute. NVIDIA Nsight Compute tries to provide as much parity as possible with Visual Profiler’s kernel profiling features, but some functionality is now covered by different tools.</p>
<section id="trace">
<span id="nvvp-trace"></span><h3><span class="section-number">3.14.1. </span>Trace<a class="headerlink" href="#trace" title="Permalink to this headline"></a></h3>
<p>NVIDIA Nsight Compute does not support tracing GPU or API activities on an accurate timeline. This functionality is covered by <a class="reference external" href="https://developer.nvidia.com/nsight-systems">NVIDIA Nsight Systems</a>. In the <a class="reference external" href="index.html#quick-start-interactive">Interactive Profile Activity</a>, the <a class="reference external" href="index.html#tool-window-api-stream">API Stream</a> tool window provides a stream of recent API calls on each thread. However, since all tracked API calls are serialized by default, it does not collect accurate timestamps.</p>
</section>
<section id="sessions">
<span id="nvvp-sessions"></span><h3><span class="section-number">3.14.2. </span>Sessions<a class="headerlink" href="#sessions" title="Permalink to this headline"></a></h3>
<p>Instead of sessions, NVIDIA Nsight Compute uses <a class="reference external" href="index.html#projects">Projects</a> to launch and gather connection details and collected reports.</p>
<ul>
<li><p><strong>Executable and Import Sessions</strong></p>
<p>Use the <a class="reference external" href="index.html#projects-explorer">Project Explorer</a> or the <a class="reference external" href="index.html#main-menu">Main Menu</a> to create a new project. Reports collected from the command line, i.e. using NVIDIA Nsight Compute CLI, can be opened directly using the main menu. In addition, you can use the Project Explorer to associate existing reports as well as any other artifacts such as executables, notes, etc., with the project. Note that those associations are only references; in other words, moving or deleting the project file on disk will not update its artifacts.</p>
<p>nvprof or command-line profiler output files, as well as Visual Profiler sessions, cannot be imported into NVIDIA Nsight Compute.</p>
</li>
</ul>
</section>
<section id="timeline">
<span id="nvvp-timeline"></span><h3><span class="section-number">3.14.3. </span>Timeline<a class="headerlink" href="#timeline" title="Permalink to this headline"></a></h3>
<p>Since trace analysis is now covered by Nsight Systems, NVIDIA Nsight Compute does not provide views of the application timeline. The <a class="reference external" href="index.html#tool-window-api-stream">API Stream</a> tool window does show a per-thread stream of the last captured CUDA API calls. However, those are serialized and do not maintain runtime concurrency or provide accurate timing information.</p>
</section>
<section id="analysis">
<span id="nvvp-analysis"></span><h3><span class="section-number">3.14.4. </span>Analysis<a class="headerlink" href="#analysis" title="Permalink to this headline"></a></h3>
<ul>
<li><p><strong>Guided Analysis</strong></p>
<p>All trace-based analysis is now covered by <a class="reference external" href="https://developer.nvidia.com/nsight-systems">NVIDIA Nsight Systems</a>. This means that NVIDIA Nsight Compute does not include analysis regarding concurrent CUDA streams or (for example) UVM events. For per-kernel analysis, NVIDIA Nsight Compute provides recommendations based on collected performance data on the <a class="reference external" href="index.html#profiler-report-details-page">Details Page</a>. These rules currently require you to collect the required metrics via their sections up front, and do not support partial on-demand profiling.</p>
<p>To use the rule-based recommendations, enable the respective rules in the <a class="reference external" href="index.html#tool-window-sections-info">Metric Selection</a>. Before profiling, enable <em>Apply Rules</em> in the <a class="reference external" href="index.html#options-profile">Profile Options</a>, or click the <em>Apply Rules</em> button in the report afterward.</p>
</li>
<li><p><strong>Unguided Analysis</strong></p>
<p>All trace-based analysis is now covered by Nsight Systems. For per-kernel analysis, Python-based rules provide analysis and recommendations. See <em>Guided Analysis</em> above for more details.</p>
</li>
<li><p><strong>PC Sampling View</strong></p>
<p>Source-correlated PC sampling information can now be viewed in the <a class="reference external" href="index.html#profiler-report-source-page">Source Page</a>. Aggregated warp states are shown on the <a class="reference external" href="index.html#profiler-report-details-page">Details Page</a> in the <em>Warp State Statistics</em> section.</p>
</li>
<li><p><strong>Memory Statistics</strong></p>
<p>Memory Statistics are located on the <a class="reference external" href="index.html#profiler-report-details-page">Details Page</a>. Enable the <em>Memory Workload Analysis</em> sections to collect the respective information.</p>
</li>
<li><p><strong>NVLink View</strong></p>
<p>NVLink topology diagram and NVLink property table are located on the <a class="reference external" href="index.html#profiler-report-details-page">Details Page</a>. Enable the <em>NVLink Topology</em> and <em>NVLink Table</em> sections to collect the respective information.</p>
<p>Refer to the <a class="reference external" href="../ReleaseNotes/index.html#known-issues">Known Issues</a> section for the limitations related to NVLink.</p>
</li>
<li><p><strong>Source-Disassembly View</strong></p>
<p>Source correlated with PTX and SASS disassembly is shown on the <a class="reference external" href="index.html#profiler-report-source-page">Source Page</a>. Which information is available depends on your application’s compilation/JIT flags.</p>
</li>
<li><p><strong>GPU Details View</strong></p>
<p>NVIDIA Nsight Compute does not automatically collect data for each executed kernel, and it does not collect any data for device-side memory copies. Summary information for all profiled kernel launches is shown on the <a class="reference external" href="index.html#profiler-report-summary-page">Summary Page</a>. Comprehensive information on all collected metrics for all profiled kernel launches is shown on the <a class="reference external" href="index.html#profiler-report-raw-page">Raw Page</a>.</p>
</li>
<li><p><strong>CPU Details View</strong></p>
<p>CPU callstack sampling is now covered by <a class="reference external" href="https://developer.nvidia.com/nsight-systems">NVIDIA Nsight Systems</a>.</p>
</li>
<li><p><strong>OpenACC Details View</strong></p>
<p>OpenACC performance analysis with NVIDIA Nsight Compute is available to limited extent. OpenACC parallel regions are not explicitly recognized, but CUDA kernels generated by the OpenACC compiler can be profiled as regular CUDA kernels. See the <a class="reference external" href="https://developer.nvidia.com/nsight-systems">NVIDIA Nsight Systems</a> release notes to check its latest support status.</p>
</li>
<li><p><strong>OpenMP Details View</strong></p>
<p>OpenMP performance analysis is not supported by NVIDIA Nsight Compute. See the <a class="reference external" href="https://developer.nvidia.com/nsight-systems">NVIDIA Nsight Systems</a> release notes to check its latest support status.</p>
</li>
<li><p><strong>Properties View</strong></p>
<p>NVIDIA Nsight Compute does not collect CUDA API and GPU activities and their properties. Performance data for profiled kernel launches is reported (for example) on the <a class="reference external" href="index.html#profiler-report-details-page">Details Page</a>.</p>
</li>
<li><p><strong>Console View</strong></p>
<p>NVIDIA Nsight Compute does not currently collect stdout/stderr application output.</p>
</li>
<li><p><strong>Settings View</strong></p>
<p>Application launch settings are specified in the <a class="reference external" href="index.html#connection-dialog">Connection Dialog</a>. For reports collected from the UI, launch settings can be inspected on the <a class="reference external" href="index.html#profiler-report-session-page">Session Page</a> after profiling.</p>
</li>
<li><p><strong>CPU Source View</strong></p>
<p>Source for CPU-only APIs is not available. Source for profiled GPU kernel launches is shown on the <a class="reference external" href="index.html#profiler-report-source-page">Source Page</a>.</p>
</li>
</ul>
</section>
<section id="command-line-arguments">
<span id="nvvp-command-line"></span><h3><span class="section-number">3.14.5. </span>Command Line Arguments<a class="headerlink" href="#command-line-arguments" title="Permalink to this headline"></a></h3>
<p>Please execute ncu-ui with the <code class="docutils literal notranslate"><span class="pre">-h</span></code> parameter within a shell window to see the currently supported command line arguments for the NVIDIA Nsight Compute UI.</p>
<p>To open a collected profile report with ncu-ui, simply pass the path to the report file as a parameter to the shell command.</p>
</section>
</section>
<section id="visual-studio-integration-guide">
<h2><span class="section-number">3.15. </span>Visual Studio Integration Guide<a class="headerlink" href="#visual-studio-integration-guide" title="Permalink to this headline"></a></h2>
<p>This guide provides information on using NVIDIA Nsight Compute within Microsoft Visual Studio, using the <a class="reference external" href="https://developer.nvidia.com/nsight-tools-visual-studio-integration">NVIDIA Nsight Integration</a> Visual Studio extension, allowing for a seamless development workflow.</p>
<section id="visual-studio-integration-overview">
<h3><span class="section-number">3.15.1. </span>Visual Studio Integration Overview<a class="headerlink" href="#visual-studio-integration-overview" title="Permalink to this headline"></a></h3>
<p>NVIDIA Nsight Integration is a Visual Studio extension that allows you to access the power of NVIDIA Nsight Compute from within Visual Studio.</p>
<p>When NVIDIA Nsight Compute is installed along with NVIDIA Nsight Integration, NVIDIA Nsight Compute activities will appear under the NVIDIA ‘Nsight’ menu in the Visual Studio menu bar. These activities launch NVIDIA Nsight Compute with the current project settings and executable.</p>
<p>For more information about using NVIDIA Nsight Compute from within Visual Studio, please visit</p>
<ul class="simple">
<li><p><a class="reference external" href="https://developer.nvidia.com/nsight-tools-visual-studio-integration">NVIDIA Nsight Integration Overview</a></p></li>
<li><p><a class="reference external" href="https://docs.nvidia.com/nsight-vs-integration/index.html">NVIDIA Nsight Integration User Guide</a></p></li>
</ul>
<p class="rubric-h1 rubric">Notices</p>
<p class="rubric-h2 rubric">Notices</p>
<p>ALL NVIDIA DESIGN SPECIFICATIONS, REFERENCE BOARDS, FILES, DRAWINGS, DIAGNOSTICS, LISTS, AND OTHER DOCUMENTS (TOGETHER AND SEPARATELY, “MATERIALS”) ARE BEING PROVIDED “AS IS.” NVIDIA MAKES NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE WITH RESPECT TO THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTABILITY, AND FITNESS FOR A PARTICULAR PURPOSE.</p>
<p>Information furnished is believed to be accurate and reliable. However, NVIDIA Corporation assumes no responsibility for the consequences of use of such information or for any infringement of patents or other rights of third parties that may result from its use. No license is granted by implication of otherwise under any patent rights of NVIDIA Corporation. Specifications mentioned in this publication are subject to change without notice. This publication supersedes and replaces all other information previously supplied. NVIDIA Corporation products are not authorized as critical components in life support devices or systems without express written approval of NVIDIA Corporation.</p>
<p class="rubric-h2 rubric">Trademarks</p>
<p>NVIDIA and the NVIDIA logo are trademarks or registered trademarks of NVIDIA Corporation in the U.S. and other countries. Other company and product names may be trademarks of the respective companies with which they are associated.</p>
</section>
</section>
</section>


           </div>
          </div>
          <footer>

  <hr/>

  <div role="contentinfo">
    <p>&#169; Copyright 2018-2024, NVIDIA Corporation &amp; Affiliates. All rights reserved.
      <span class="lastupdated">Last updated on Mar 06, 2024.
      </span></p>
  </div>

   

</footer>
        </div>
      </div>
    </section>
  </div>
  <script>
      jQuery(function () {
          SphinxRtdTheme.Navigation.enable(true);
      });
  </script>
 



</body>
</html>