File: index.html

package info (click to toggle)
nvidia-cuda-toolkit 11.8.0-5~deb12u1
  • links: PTS, VCS
  • area: non-free
  • in suites: bookworm
  • size: 18,338,396 kB
  • sloc: ansic: 172,472; cpp: 57,058; javascript: 21,597; python: 12,656; xml: 12,438; makefile: 2,949; sh: 2,056; perl: 352
file content (3489 lines) | stat: -rw-r--r-- 319,820 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
1920
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
1936
1937
1938
1939
1940
1941
1942
1943
1944
1945
1946
1947
1948
1949
1950
1951
1952
1953
1954
1955
1956
1957
1958
1959
1960
1961
1962
1963
1964
1965
1966
1967
1968
1969
1970
1971
1972
1973
1974
1975
1976
1977
1978
1979
1980
1981
1982
1983
1984
1985
1986
1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018
2019
2020
2021
2022
2023
2024
2025
2026
2027
2028
2029
2030
2031
2032
2033
2034
2035
2036
2037
2038
2039
2040
2041
2042
2043
2044
2045
2046
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058
2059
2060
2061
2062
2063
2064
2065
2066
2067
2068
2069
2070
2071
2072
2073
2074
2075
2076
2077
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088
2089
2090
2091
2092
2093
2094
2095
2096
2097
2098
2099
2100
2101
2102
2103
2104
2105
2106
2107
2108
2109
2110
2111
2112
2113
2114
2115
2116
2117
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
2139
2140
2141
2142
2143
2144
2145
2146
2147
2148
2149
2150
2151
2152
2153
2154
2155
2156
2157
2158
2159
2160
2161
2162
2163
2164
2165
2166
2167
2168
2169
2170
2171
2172
2173
2174
2175
2176
2177
2178
2179
2180
2181
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191
2192
2193
2194
2195
2196
2197
2198
2199
2200
2201
2202
2203
2204
2205
2206
2207
2208
2209
2210
2211
2212
2213
2214
2215
2216
2217
2218
2219
2220
2221
2222
2223
2224
2225
2226
2227
2228
2229
2230
2231
2232
2233
2234
2235
2236
2237
2238
2239
2240
2241
2242
2243
2244
2245
2246
2247
2248
2249
2250
2251
2252
2253
2254
2255
2256
2257
2258
2259
2260
2261
2262
2263
2264
2265
2266
2267
2268
2269
2270
2271
2272
2273
2274
2275
2276
2277
2278
2279
2280
2281
2282
2283
2284
2285
2286
2287
2288
2289
2290
2291
2292
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
2432
2433
2434
2435
2436
2437
2438
2439
2440
2441
2442
2443
2444
2445
2446
2447
2448
2449
2450
2451
2452
2453
2454
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473
2474
2475
2476
2477
2478
2479
2480
2481
2482
2483
2484
2485
2486
2487
2488
2489
2490
2491
2492
2493
2494
2495
2496
2497
2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
2576
2577
2578
2579
2580
2581
2582
2583
2584
2585
2586
2587
2588
2589
2590
2591
2592
2593
2594
2595
2596
2597
2598
2599
2600
2601
2602
2603
2604
2605
2606
2607
2608
2609
2610
2611
2612
2613
2614
2615
2616
2617
2618
2619
2620
2621
2622
2623
2624
2625
2626
2627
2628
2629
2630
2631
2632
2633
2634
2635
2636
2637
2638
2639
2640
2641
2642
2643
2644
2645
2646
2647
2648
2649
2650
2651
2652
2653
2654
2655
2656
2657
2658
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671
2672
2673
2674
2675
2676
2677
2678
2679
2680
2681
2682
2683
2684
2685
2686
2687
2688
2689
2690
2691
2692
2693
2694
2695
2696
2697
2698
2699
2700
2701
2702
2703
2704
2705
2706
2707
2708
2709
2710
2711
2712
2713
2714
2715
2716
2717
2718
2719
2720
2721
2722
2723
2724
2725
2726
2727
2728
2729
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770
2771
2772
2773
2774
2775
2776
2777
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787
2788
2789
2790
2791
2792
2793
2794
2795
2796
2797
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
2872
2873
2874
2875
2876
2877
2878
2879
2880
2881
2882
2883
2884
2885
2886
2887
2888
2889
2890
2891
2892
2893
2894
2895
2896
2897
2898
2899
2900
2901
2902
2903
2904
2905
2906
2907
2908
2909
2910
2911
2912
2913
2914
2915
2916
2917
2918
2919
2920
2921
2922
2923
2924
2925
2926
2927
2928
2929
2930
2931
2932
2933
2934
2935
2936
2937
2938
2939
2940
2941
2942
2943
2944
2945
2946
2947
2948
2949
2950
2951
2952
2953
2954
2955
2956
2957
2958
2959
2960
2961
2962
2963
2964
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974
2975
2976
2977
2978
2979
2980
2981
2982
2983
2984
2985
2986
2987
2988
2989
2990
2991
2992
2993
2994
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
3030
3031
3032
3033
3034
3035
3036
3037
3038
3039
3040
3041
3042
3043
3044
3045
3046
3047
3048
3049
3050
3051
3052
3053
3054
3055
3056
3057
3058
3059
3060
3061
3062
3063
3064
3065
3066
3067
3068
3069
3070
3071
3072
3073
3074
3075
3076
3077
3078
3079
3080
3081
3082
3083
3084
3085
3086
3087
3088
3089
3090
3091
3092
3093
3094
3095
3096
3097
3098
3099
3100
3101
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121
3122
3123
3124
3125
3126
3127
3128
3129
3130
3131
3132
3133
3134
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152
3153
3154
3155
3156
3157
3158
3159
3160
3161
3162
3163
3164
3165
3166
3167
3168
3169
3170
3171
3172
3173
3174
3175
3176
3177
3178
3179
3180
3181
3182
3183
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209
3210
3211
3212
3213
3214
3215
3216
3217
3218
3219
3220
3221
3222
3223
3224
3225
3226
3227
3228
3229
3230
3231
3232
3233
3234
3235
3236
3237
3238
3239
3240
3241
3242
3243
3244
3245
3246
3247
3248
3249
3250
3251
3252
3253
3254
3255
3256
3257
3258
3259
3260
3261
3262
3263
3264
3265
3266
3267
3268
3269
3270
3271
3272
3273
3274
3275
3276
3277
3278
3279
3280
3281
3282
3283
3284
3285
3286
3287
3288
3289
3290
3291
3292
3293
3294
3295
3296
3297
3298
3299
3300
3301
3302
3303
3304
3305
3306
3307
3308
3309
3310
3311
3312
3313
3314
3315
3316
3317
3318
3319
3320
3321
3322
3323
3324
3325
3326
3327
3328
3329
3330
3331
3332
3333
3334
3335
3336
3337
3338
3339
3340
3341
3342
3343
3344
3345
3346
3347
3348
3349
3350
3351
3352
3353
3354
3355
3356
3357
3358
3359
3360
3361
3362
3363
3364
3365
3366
3367
3368
3369
3370
3371
3372
3373
3374
3375
3376
3377
3378
3379
3380
3381
3382
3383
3384
3385
3386
3387
3388
3389
3390
3391
3392
3393
3394
3395
3396
3397
3398
3399
3400
3401
3402
3403
3404
3405
3406
3407
3408
3409
3410
3411
3412
3413
3414
3415
3416
3417
3418
3419
3420
3421
3422
3423
3424
3425
3426
3427
3428
3429
3430
3431
3432
3433
3434
3435
3436
3437
3438
3439
3440
3441
3442
3443
3444
3445
3446
3447
3448
3449
3450
3451
3452
3453
3454
3455
3456
3457
3458
3459
3460
3461
3462
3463
3464
3465
3466
3467
3468
3469
3470
3471
3472
3473
3474
3475
3476
3477
3478
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
<!DOCTYPE html
  PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en-us" xml:lang="en-us">
   <head>
      <meta http-equiv="Content-Type" content="text/html; charset=utf-8"></meta>
      <meta http-equiv="X-UA-Compatible" content="IE=edge"></meta>
      <meta name="copyright" content="(C) Copyright 2005"></meta>
      <meta name="DC.rights.owner" content="(C) Copyright 2005"></meta>
      <meta name="DC.Type" content="concept"></meta>
      <meta name="DC.Title" content="Nsight Compute CLI"></meta>
      <meta name="abstract" content="The user manual for the NVIDIA Nsight Compute Command Line Interface."></meta>
      <meta name="description" content="The user manual for the NVIDIA Nsight Compute Command Line Interface."></meta>
      <meta name="DC.Coverage" content="Nsight Compute"></meta>
      <meta name="DC.subject" content="Nsight Compute CLI"></meta>
      <meta name="keywords" content="Nsight Compute CLI"></meta>
      <meta name="DC.Format" content="XHTML"></meta>
      <meta name="DC.Identifier" content="abstract"></meta>
      <link rel="stylesheet" type="text/css" href="../common/formatting/commonltr.css"></link>
      <link rel="stylesheet" type="text/css" href="../common/formatting/site.css"></link>
      <title>Nsight Compute CLI :: Nsight Compute Documentation</title>
      <!--[if lt IE 9]>
      <script src="../common/formatting/html5shiv-printshiv.min.js"></script>
      <![endif]-->
      <script type="text/javascript" charset="utf-8" src="../common/scripts/tynt/tynt.js"></script>
      --&gt;
      
      <script src="https://assets.adobedtm.com/5d4962a43b79/c1061d2c5e7b/launch-191c2462b890.min.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.min.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.ba-hashchange.min.js"></script>
      <script type="text/javascript" charset="utf-8" src="../common/formatting/jquery.scrollintoview.min.js"></script>
      <script type="text/javascript" src="../search/htmlFileList.js"></script>
      <script type="text/javascript" src="../search/htmlFileInfoList.js"></script>
      <script type="text/javascript" src="../search/nwSearchFnt.min.js"></script>
      <script type="text/javascript" src="../search/stemmers/en_stemmer.min.js"></script>
      <script type="text/javascript" src="../search/index-1.js"></script>
      <script type="text/javascript" src="../search/index-2.js"></script>
      <script type="text/javascript" src="../search/index-3.js"></script>
      <link rel="canonical" href="https://developer.nvidia.com/NsightComputeCli/index.html"></link>
      <link rel="stylesheet" type="text/css" href="../common/formatting/qwcode.highlight.css"></link>
   </head>
   <body>
      
      <header id="header"><span id="company">NVIDIA</span><span id="site-title">Nsight Compute Documentation</span><form id="search" method="get" action="search">
            <input type="text" name="search-text"></input><fieldset id="search-location">
               <legend>Search In:</legend>
               <label><input type="radio" name="search-type" value="site"></input>Entire Site</label>
               <label><input type="radio" name="search-type" value="document"></input>Just This Document</label></fieldset>
            <button type="reset">clear search</button>
            <button id="submit" type="submit">search</button></form>
      </header>
      <div id="site-content">
         <nav id="site-nav">
            <div class="category closed"><a href="../index.html" title="The root of the site.">Nsight Compute
                  v2022.3.0</a></div>
            <div class="category"><a href="index.html" title="Nsight Compute CLI">Nsight Compute CLI</a></div>
            <ul>
               <li>
                  <div class="section-link"><a href="#introduction">1.&nbsp;Introduction</a></div>
               </li>
               <li>
                  <div class="section-link"><a href="#quick-start">2.&nbsp;Quickstart</a></div>
               </li>
               <li>
                  <div class="section-link"><a href="#usage">3.&nbsp;Usage</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#modes">3.1.&nbsp;Modes</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#multi-process">3.2.&nbsp;Multi-Process Support</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#output-pages">3.3.&nbsp;Output Pages</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#profile-import">3.4.&nbsp;Profile Import</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#metrics-and-units">3.5.&nbsp;Metrics and Units</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#nvtx-filtering">3.6.&nbsp;NVTX Filtering</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#command-line-options">4.&nbsp;Command Line Options</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#command-line-options-general">4.1.&nbsp;General</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#command-line-options-launch">4.2.&nbsp;Launch</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#command-line-options-attach">4.3.&nbsp;Attach</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#command-line-options-profile">4.4.&nbsp;Profile</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#command-line-options-sampling">4.5.&nbsp;Sampling</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#command-line-options-file">4.6.&nbsp;File</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#command-line-options-console-output">4.7.&nbsp;Console Output</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#command-line-options-response-file">4.8.&nbsp;Response File</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#command-line-options-file-macros">4.9.&nbsp;File Macros</a></div>
                     </li>
                  </ul>
               </li>
               <li>
                  <div class="section-link"><a href="#environment-variables">5.&nbsp;Environment Variables</a></div>
               </li>
               <li>
                  <div class="section-link"><a href="#nvprof-guide">6.&nbsp;Nvprof Transition Guide</a></div>
                  <ul>
                     <li>
                        <div class="section-link"><a href="#nvprof-trace">6.1.&nbsp;Trace</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#nvprof-metric-collection">6.2.&nbsp;Metric Collection</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#nvprof-metric-comparison">6.3.&nbsp;Metric Comparison</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#nvprof-event-comparison">6.4.&nbsp;Event Comparison</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#nvprof-filtering">6.5.&nbsp;Filtering</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#nvprof-output">6.6.&nbsp;Output</a></div>
                     </li>
                     <li>
                        <div class="section-link"><a href="#nvprof-launch-attach">6.7.&nbsp;Launch and Attach</a></div>
                     </li>
                  </ul>
               </li>
            </ul>
         </nav>
         <div id="resize-nav"></div>
         <nav id="search-results">
            <h2>Search Results</h2>
            <ol></ol>
         </nav>
         
         <div id="contents-container">
            <div id="breadcrumbs-container">
               <div id="release-info">Nsight Compute CLI
                  (<a href="../pdf/NsightComputeCli.pdf">PDF</a>)
                  
                  -
                  
                  v2022.3.0
                  (<a href="https://developer.nvidia.com">older</a>)
                  -
                  Last updated August 24, 2022
                  -
                  <a href="mailto:devtools@nvidia.com?subject=Nsight Compute Documentation Feedback: Nsight Compute CLI">Send Feedback</a>
                  -
                  <span class="st_facebook"></span><span class="st_twitter"></span><span class="st_linkedin"></span><span class="st_reddit"></span><span class="st_slashdot"></span><span class="st_tumblr"></span><span class="st_sharethis"></span></div>
            </div>
            <article id="contents">
               <div class="topic nested0" id="abstract"><a name="abstract" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#abstract" name="abstract" shape="rect">Nsight Compute CLI</a></h2>
                  <div class="body conbody">
                     <p class="shortdesc">The user manual for the NVIDIA Nsight Compute Command Line Interface.</p>
                     <p class="p">
                        NVIDIA Nsight Compute Command Line Interface (CLI) manual.
                        Information on workflows and options for the command line, including multi-process profiling and NVTX filtering.
                        Transitions guide for Nvprof.
                        
                     </p>
                  </div>
               </div>
               <div class="topic concept nested0" id="introduction"><a name="introduction" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#introduction" name="introduction" shape="rect">1.&nbsp;Introduction</a></h2>
                  <div class="body conbody">
                     <p class="p"><span class="keyword">NVIDIA Nsight Compute CLI</span> (<span class="keyword">ncu</span>) provides a non-interactive way
                        to profile applications from the command line.
                        It can print the results directly on the command line or store them in a report file.
                        It can also be used to simply launch the target application (see <a class="xref" href="index.html#command-line-options-general" shape="rect">General</a> for details)
                        and later attach with <span class="keyword">NVIDIA Nsight Compute</span> or another <span class="keyword">ncu</span> instance.
                        
                     </p>
                     <p class="p">
                        For users migrating from nvprof to <span class="keyword">NVIDIA Nsight Compute</span>, please additionally see the <a class="xref" href="index.html#nvprof-guide" shape="rect">Nvprof Transition Guide</a>
                        for comparison of features and workflows.
                        
                     </p>
                  </div>
               </div>
               <div class="topic concept nested0" id="quick-start"><a name="quick-start" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#quick-start" name="quick-start" shape="rect">2.&nbsp;Quickstart</a></h2>
                  <div class="body conbody">
                     <ol class="ol">
                        <li class="li"><strong class="ph b">Launch the target application with the command line profiler</strong><p class="p"> The command line profiler launches the target application, instruments the
                              target API, and collects profile results for the specified kernels. The CLI
                              executable is called <span class="keyword">ncu</span>. A shortcut with
                              this name is located in the base directory of the <span class="keyword">NVIDIA Nsight Compute</span> installation. The actual executable is located in
                              the folder <samp class="ph codeph">target\windows-desktop-win7-x64</samp> on Windows or
                              <samp class="ph codeph">target/linux-desktop-glibc_2_11_3-x64</samp> on Linux. By default,
                              <span class="keyword">NVIDIA Nsight Compute</span> is installed in
                              <samp class="ph codeph">/usr/local/cuda-&lt;cuda-version&gt;/NsightCompute-&lt;version&gt;</samp>
                              on Linux and in <samp class="ph codeph">C:\Program Files\NVIDIA Corporation\Nsight Compute
                                 &lt;version&gt;</samp> on Windows. 
                           </p>
                           <div class="p"> To collect the default set of data for all kernel launches in the target
                              application, launch:
                              <pre class="pre screen" xml:space="preserve">$ <span class="keyword">ncu</span> -o profile CuVectorAddMulti.exe</pre></div>
                           <div class="p"> The application runs in instrumented mode and for each kernel launch, a profile
                              result is created. The results are written by default to profile.nsight-cuprof.
                              Each output from the compute profiler starts with <samp class="ph codeph">==PROF==</samp> The
                              other lines are output from the application itself. For each profiled kernel,
                              the name of the kernel function and the progress of data collection is shown. To
                              collect all requested profile information, it may be required to replay the
                              kernels multiple times. The total number of replay passes per kernel is shown
                              after profiling has completed.
                              <pre class="pre screen" xml:space="preserve">
[Vector addition of 1144477 elements]
==PROF== Connected to process 5268
Copy input data from the host memory to the CUDA device
CUDA kernel launch A with 4471 blocks of 256 threads
==PROF== Profiling "vectorAdd_A" - 0: 0%....50%....100% - 46 passes
CUDA kernel launch B with 4471 blocks of 256 threads
==PROF== Profiling "vectorAdd_B" - 1: 0%....50%....100% - 46 passes
Copy output data from the CUDA device to the host memory
Done
==PROF== Disconnected from process 5268
==PROF== Report: profile.ncu-rep
            </pre></div>
                        </li>
                        <li class="li"><strong class="ph b">Customizing data collection</strong><p class="p">
                              Options are available to specify for which kernels data should be collected.
                              <samp class="ph codeph">-c</samp> limits the number of kernel launches collected.
                              <samp class="ph codeph">-s</samp> skips the given number of kernels before data collection starts.
                              <samp class="ph codeph">-k</samp> allows you to filter the kernels by a regex match of their names.
                              <samp class="ph codeph">--kernel-id</samp> allows you to filter kernels by context, stream,
                              name and invocation, similar to nvprof.
                              
                           </p>
                           <p class="p">
                              To limit what should be collected for each kernel launch, specify the exact *.section (files) by their identifier using <samp class="ph codeph">--section</samp>.
                              Each section file defines a set of metrics to be collected, grouped logically to solve a specific performance question.
                              By default, the sections associated with the default set are collected.
                              Use <samp class="ph codeph">--list-sets</samp> to see the list of currently available sets.
                              Use <samp class="ph codeph">--list-sections</samp> to see the list of currently available sections.
                              The default search directory and location of pre-defined section files is also called <samp class="ph codeph">sections/</samp>.
                              See the <a class="xref" href="../ProfilingGuide/index.html#sets-and-sections" shape="rect">Profiling Guide</a> for more details.
                              
                           </p>
                           <p class="p">
                              Alternatively, you can collect a set of individual metrics using <samp class="ph codeph">--metrics</samp>.
                              The available metrics can be queried using <samp class="ph codeph">--query-metrics</samp>.
                              For an explanation of the naming conventions and structuring of metrics, see
                              <a class="xref" href="../ProfilingGuide/index.html#metrics-structure" shape="rect">Metrics Structure</a>.
                              
                           </p>
                           <p class="p">
                              Most metrics in <span class="keyword">NVIDIA Nsight Compute</span> are named using a base name and various suffixes, e.g. <dfn class="term">sm__throughput.avg.pct_of_peak_sustained_elapsed</dfn>.
                              The base name is <dfn class="term">sm__throughput</dfn> and the suffix is <dfn class="term">avg.pct_of_peak_sustained_elapsed</dfn>.
                              This is because most metrics follow the same structure and have the same set of suffixes.
                              You need to pass the base or full name to <span class="keyword">NVIDIA Nsight Compute</span> when selecting a metric for profiling.
                              Use <samp class="ph codeph">--query-metrics-mode suffix --metrics &lt;metrics list&gt;</samp> to see the full names for the chosen metrics.
                              
                           </p>
                           <p class="p">
                              Some additional metrics do not follow this structured naming. They are documented in the
                              <a class="xref" href="../ProfilingGuide/index.html#metrics-reference" shape="rect">Metrics Reference</a>.
                              
                           </p>
                        </li>
                        <li class="li"><strong class="ph b">Changing command line output</strong><p class="p">
                              By default, a temporary file is used to store profiling results, and data is printed to the command line.
                              To permanently store the profiler report, use <samp class="ph codeph">-o</samp> to specify the output filename.
                              
                           </p>
                           <p class="p">
                              Besides storing results in a report file, the command line profiler can print
                              results using different pages. Those pages correspond to the respective pages in the UI’s report.
                              By default, the <a class="xref" href="index.html#output-pages" shape="rect">Details page</a> is printed, if no explicit output file is specified.
                              To select a different page or print in addition to storing in an explicit file,
                              use the <samp class="ph codeph">--page=&lt;Page&gt;</samp> command. Currently, the following pages are supported: <samp class="ph codeph">details, raw, source</samp>.
                              
                           </p>
                           <p class="p">
                              Use <samp class="ph codeph">--csv</samp> to make any output comma separated and easier to process further.
                              See <a class="xref" href="index.html#command-line-options-console-output" shape="rect">Console Output</a> for further options, e.g. summary views.
                              
                           </p>
                        </li>
                        <li class="li"><strong class="ph b">Open the report in the UI</strong><p class="p">
                              The UI executable is called <span class="keyword">ncu-ui</span>. A shortcut with this name is located in the base directory
                              of the <span class="keyword">NVIDIA Nsight Compute</span> installation. The actual executable is located in the folder
                              <samp class="ph codeph">host\windows-desktop-win7-x64</samp> on Windows or <samp class="ph codeph">host/linux-desktop-glibc_2_11_3-x64</samp> on Linux.
                              In the UI window, close the <dfn class="term">Connection</dfn> dialog and open the report file through <dfn class="term">File &gt; Open</dfn>,
                              by dragging the report file into <span class="keyword">NVIDIA Nsight Compute</span>.
                              
                           </p>
                           <p class="p">
                              You can also specify the report file as a command line parameter to the executable, i.e. as
                              <samp class="ph codeph"><span class="keyword">ncu-ui</span> &lt;MyReport.ncu-rep&gt;</samp>.
                              Alternatively, when using <span class="keyword">NVIDIA Nsight Compute CLI</span> on a platform with host support,
                              <samp class="ph codeph">--open-in-ui</samp> can be used directly with <span class="keyword">ncu</span>
                              to open a collected report in the user interface.
                              
                           </p>
                           <p class="p">
                              The report opens in a new document window.
                              For more information about the report, see the <a class="xref" href="../NsightCompute/index.html#profiler-report" shape="rect">Profiler Report</a>
                              for collecting profile information through <span class="keyword">NVIDIA Nsight Compute</span>.
                              
                           </p>
                        </li>
                     </ol>
                  </div>
               </div>
               <div class="topic concept nested0" id="usage"><a name="usage" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#usage" name="usage" shape="rect">3.&nbsp;Usage</a></h2>
                  <div class="body conbody"></div>
                  <div class="topic concept nested1" id="modes"><a name="modes" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#modes" name="modes" shape="rect">3.1.&nbsp;Modes</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           Modes change the fundamental behavior of the command line profiler.
                           Depending on which mode is chosen, different <a class="xref" href="index.html#command-line-options" shape="rect">Command Line Options</a> become available.
                           For example, <a class="xref" href="index.html#command-line-options-launch" shape="rect">Launch</a> is invalid if the <dfn class="term">Attach</dfn> mode is selected.
                           
                        </p>
                        <ul class="ul">
                           <li class="li">
                              <p class="p"><strong class="ph b">Launch-and-attach:</strong>
                                 The target application is launched on the local system with the tool's injection libraries.
                                 Depending on which profiling options are chosen, selected kernels in
                                 the application are profiled and the results printed to the console or stored in a report file.
                                 The tool exits once the target application finishes or crashes, and once all results are processed.
                                 
                              </p>
                              <p class="p">
                                 This is the default, and the only mode that supports profiling of child processes on selected platforms.
                                 
                              </p>
                           </li>
                           <li class="li">
                              <p class="p"><strong class="ph b">Launch:</strong>
                                 The target application is launched on the local system with the tool's injection libraries.
                                 As soon as the first intercepted API call is reached (commonly <samp class="ph codeph">cuInit()</samp>), all application threads are suspended.
                                 The application now expects a tool to attach for profiling.
                                 You can attach using <span class="keyword">NVIDIA Nsight Compute</span> or using the command line profiler's <dfn class="term">Attach</dfn> mode.
                                 
                              </p>
                           </li>
                           <li class="li">
                              <p class="p"><strong class="ph b">Attach:</strong>
                                 The tool tries to connect to a target application previously launched using <span class="keyword">NVIDIA Nsight Compute</span>
                                 or using the command line profiler's <dfn class="term">Launch</dfn> mode.
                                 The tool can attach to a target on the local system or using a remote connection.
                                 
                              </p>
                           </li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="multi-process"><a name="multi-process" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#multi-process" name="multi-process" shape="rect">3.2.&nbsp;Multi-Process Support</a></h3>
                     <div class="body conbody">
                        <p class="p"><span class="keyword">NVIDIA Nsight Compute CLI</span> supports profiling multi-process applications on the following platforms:
                           x86_64 Windows, x86_64 Linux, DRIVE OS Linux, DRIVE OS QNX, PowerPC.
                           See the <a class="xref" href="index.html#command-line-options-launch" shape="rect">Launch</a> options on how to enable this feature.
                           
                        </p>
                        <p class="p"> On x86_64 Windows, <span class="keyword">NVIDIA Nsight Compute CLI</span> supports profiling 64-bit
                           processes launched from 32-bit applications by default . On x86_64 Linux, launching from
                           32-bit applications requires you to enable the <samp class="ph codeph">support-32bit</samp> option,
                           and the required 32-bit libraries must be installed on your system. On DRIVE OS Linux, DRIVE OS QNX
                           and PowerPC, tracking of 32-bit applications is not supported. <dfn class="term">Profiling</dfn> of 32-bit
                           processes is not supported on any platform. 
                        </p>
                        <p class="p"><strong class="ph b">Profiling MPI applications is a special case of multi-process profiling.</strong><br clear="none"></br><span class="keyword">NVIDIA Nsight Compute CLI</span>  can be used to profile applications launched with the <samp class="ph codeph">mpirun</samp> command.
                           
                        </p>
                        <ul class="ul">
                           <li class="li">
                              <div class="p">
                                 To profile all ranks on a node and store all the profiling data in a single report file:
                                 <pre xml:space="preserve">
ncu --target-processes all -o &lt;report-name&gt; mpirun [mpi arguments] &lt;app&gt; [app arguments]
            </pre></div>
                           </li>
                           <li class="li">
                              <div class="p">
                                 To profile multi-node submissions, one instance of <span class="keyword">NVIDIA Nsight Compute CLI</span>  can be used per node.
                                 Ensure that you specify unique report files per rank.
                                 <pre xml:space="preserve">
mpirun [mpi arguments] ncu -o report_%q{OMPI_COMM_WORLD_RANK} &lt;app&gt; [app arguments]
           </pre></div>
                           </li>
                           <li class="li">
                              <div class="p">
                                 To profile a single rank one can use a wrapper script. The following script (called "wrap.sh") profiles rank 0 only:
                                 <pre xml:space="preserve">
#!/bin/bash
if [[ $OMPI_COMM_WORLD_RANK == 0 ]]; then
   ncu -o report_${OMPI_COMM_WORLD_RANK}  --target-processes all "$@"
else
   "$@"
fi
           </pre>
                                 and then execute:
                                 <pre xml:space="preserve">
mpirun [mpi arguments] ./wrap.sh &lt;app&gt; [app arguments]
           </pre></div>
                           </li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="output-pages"><a name="output-pages" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#output-pages" name="output-pages" shape="rect">3.3.&nbsp;Output Pages</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           The command line profiler supports printing results to the console using various pages.
                           Each page has an equivalent in <span class="keyword">NVIDIA Nsight Compute</span>'s <dfn class="term">Profiler Report</dfn>.
                           In the command line profiler, they are slightly adapted to fit console output.
                           To select a page, use the <samp class="ph codeph">--page</samp> option.
                           By default, the details page is used.
                           Note that if <samp class="ph codeph">--page</samp> is not used but <samp class="ph codeph">--export</samp> is, no results will be printed to the console.
                           
                        </p>
                        <ul class="ul">
                           <li class="li">
                              <p class="p"><strong class="ph b">Details:</strong>
                                 This page represents <span class="keyword">NVIDIA Nsight Compute</span>'s <dfn class="term">Details</dfn> page.
                                 For every profiled kernel launch, each collected is printed as section as a three-column table,
                                 followed by any rule results applied to this section.
                                 Rule results not associated with any section are printed after the kernel’s sections.
                                 
                              </p>
                              <p class="p">
                                 The first section table column shows the metric name.
                                 If the metric was given a label in the section, it is used instead.
                                 The second column shows the metric unit, if available.
                                 The third column shows the unit value.
                                 Both metric unit and value are automatically adjusted to the most fitting order of magnitude.
                                 By default, only metrics defined in section headers are shown.
                                 This can be changed by passing the <samp class="ph codeph">--details-all</samp> option on the command line.
                                 
                              </p>
                              <p class="p">
                                 Some metrics will show multiple values, separated by ";", e.g.
                                 memory_l2_transactions_global&nbsp;Kbytes&nbsp;240; 240; 240; 240; 240.
                                 Those are instanced metrics, which have one value per represented instance.
                                 An instance can be a streaming multiprocessor, an assembly source line, etc.
                                 
                              </p>
                           </li>
                           <li class="li">
                              <p class="p"><strong class="ph b">Raw:</strong>
                                 This page represents <span class="keyword">NVIDIA Nsight Compute</span>'s <dfn class="term">Raw</dfn> page.
                                 For every profiled kernel launch, each collected metric is printed as a three-column table.
                                 Besides metrics from sections, this includes automatically collected metrics such as device
                                 attributes and kernel launch information.
                                 
                              </p>
                              <p class="p">
                                 The first column shows the metric name.
                                 The second and third columns show the metric unit and value, respectively.
                                 Both metric unit and value are automatically adjusted to the most fitting order of magnitude.
                                 No unresolved regex:, group:, or breakdown: metrics are included.
                                 
                              </p>
                           </li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="profile-import"><a name="profile-import" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#profile-import" name="profile-import" shape="rect">3.4.&nbsp;Profile Import</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           Using the <samp class="ph codeph">--import</samp> option, saved reports can be imported into the command line profiler.
                           When using this flag, most other options are not available, except for certain result filterting options.
                           
                           They are marked as such in the <a class="xref" href="index.html#command-line-options-profile" shape="rect">Profile options</a> table.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="metrics-and-units"><a name="metrics-and-units" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#metrics-and-units" name="metrics-and-units" shape="rect">3.5.&nbsp;Metrics and Units</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           When available and applicable, metrics are shown along with their unit.
                           This is to make it apparent if a metric represents cycles, threads, bytes/s, and so on.
                           
                        </p>
                        <p class="p">
                           By default, units are scaled automatically so that metric values are shown with a reasonable order of magnitude.
                           Units are scaled using their SI-factors, i.e. byte-based units are scaled using a factor of 1000 and the prefixes K, M, G,
                           etc.
                           Time-based units are also scaled using a factor of 1000, with the prefixes n, u and m.
                           This scaling can be changed using a command line option, see <a class="xref" href="index.html#command-line-options-console-output" shape="rect">Console Output</a> options for details.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="nvtx-filtering"><a name="nvtx-filtering" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#nvtx-filtering" name="nvtx-filtering" shape="rect">3.6.&nbsp;NVTX Filtering</a></h3>
                     <div class="body conbody">
                        <p class="p"><samp class="ph codeph">--nvtx-include &lt;configuration&gt; --nvtx-exclude
                              &lt;configuration&gt;</samp><br clear="none"></br> These options are used to profile only those
                           kernels which satisfy the conditions mentioned in the configuration. Through these
                           options, you can choose which kernel falls into a specific range or collection of
                           ranges. 
                        </p>
                        <p class="p"> You can use both options multiple times, mentioning all the
                           <samp class="ph codeph">--nvtx-include</samp> configurations followed by all
                           <samp class="ph codeph">--nvtx-exclude</samp> configurations. NVTX filtering requires
                           <samp class="ph codeph">--nvtx</samp> option. 
                        </p>
                        <p class="p">
                           NVTX ranges are of two types: NvtxRangeStart/End and NvtxRangePush/Pop. The configuration syntax for both the types are briefly
                           described below.
                           <br clear="none"></br><br clear="none"></br><br clear="none"></br></p>
                        <ul class="ul">
                           <li class="li"><strong class="ph b">Start-End Ranges</strong><div class="tablenoborder"><a name="nvtx-filtering__nvtx-filtering-start-end-table" shape="rect">
                                    <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="nvtx-filtering__nvtx-filtering-start-end-table" class="table" frame="border" border="1" rules="all">
                                    <thead class="thead" align="left">
                                       <tr class="row">
                                          <th class="entry" valign="top" width="33.33333333333333%" id="d54e631" rowspan="1" colspan="1">Quantifier</th>
                                          <th class="entry" valign="top" width="33.33333333333333%" id="d54e634" rowspan="1" colspan="1">Description</th>
                                          <th class="entry" valign="top" width="33.33333333333333%" id="d54e637" rowspan="1" colspan="1">Example</th>
                                       </tr>
                                    </thead>
                                    <tbody class="tbody">
                                       <tr class="row">
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e631" rowspan="1" colspan="1">,</td>
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e634" rowspan="1" colspan="1">Delimiter between range
                                             names
                                          </td>
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e637" rowspan="1" colspan="1">
                                             <p class="p">Range A,Range B</p>
                                             <p class="p">Range B,Range A,Range C</p>
                                          </td>
                                       </tr>
                                       <tr class="row">
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e631" rowspan="1" colspan="1">@</td>
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e634" rowspan="1" colspan="1">Specify domain name. If not
                                             mentioned, assuming &lt;default domain&gt;
                                          </td>
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e637" rowspan="1" colspan="1">
                                             <p class="p">Domain A@Range A</p>
                                             <p class="p">Domain B@Range B,Range Z</p>
                                          </td>
                                       </tr>
                                    </tbody>
                                 </table>
                              </div><pre class="pre screen" xml:space="preserve">
ncu --nvtx --nvtx-include "Domain A@Range A" CuNvtx.exe
                    </pre><p class="p"> The kernels wrapped inside 'Range A' of 'Domain A' in the application are
                                 profiled. 
                              </p><pre class="pre screen" xml:space="preserve">
ncu --nvtx --nvtx-include "Range A,Range B" CuNvtx.exe
                    </pre><p class="p"> The kernels wrapped inside both ranges, 'Range A' and 'Range B' of '&lt;default
                                 domain&gt;' in the application are profiled. 
                              </p><pre class="pre screen" xml:space="preserve">
ncu --nvtx --nvtx-include "Range A" --nvtx-include "Range B" CuNvtx.exe
                    </pre><p class="p"> The kernels wrapped inside ranges, 'Range A' or 'Range B' of '&lt;default
                                 domain&gt;' in the application are profiled. 
                              </p><pre class="pre screen" xml:space="preserve">
ncu --nvtx --nvtx-exclude "Range A" CuNvtx.exe
                    </pre><p class="p"> All the kernels in the application are profiled except those which are wrapped
                                 inside 'Range A' of '&lt;default domain&gt;'. 
                              </p><pre class="pre screen" xml:space="preserve">
ncu --nvtx --nvtx-include "Range B"--nvtx-exclude "Range A" CuNvtx.exe
                    </pre><p class="p"> The kernels wrapped inside only 'Range B' and not 'Range A' of '&lt;default
                                 domain&gt;' in the application are profiled.<br clear="none"></br></p>
                           </li>
                           <li class="li"><strong class="ph b">Push-Pop Ranges</strong><div class="tablenoborder"><a name="nvtx-filtering__nvtx-filtering-push-pop-table" shape="rect">
                                    <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="nvtx-filtering__nvtx-filtering-push-pop-table" class="table" frame="border" border="1" rules="all">
                                    <thead class="thead" align="left">
                                       <tr class="row">
                                          <th class="entry" valign="top" width="33.33333333333333%" id="d54e737" rowspan="1" colspan="1">Quantifier</th>
                                          <th class="entry" valign="top" width="33.33333333333333%" id="d54e740" rowspan="1" colspan="1">Description</th>
                                          <th class="entry" valign="top" width="33.33333333333333%" id="d54e743" rowspan="1" colspan="1">Example</th>
                                       </tr>
                                    </thead>
                                    <tbody class="tbody">
                                       <tr class="row">
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e737" rowspan="1" colspan="1">/</td>
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e740" rowspan="1" colspan="1">Delimiter between range names</td>
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e743" rowspan="1" colspan="1">
                                             <p class="p">Range A/Range B</p>
                                             <p class="p">Range A/*/Range B</p>
                                             <p class="p">Range A/</p>
                                          </td>
                                       </tr>
                                       <tr class="row">
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e737" rowspan="1" colspan="1">[</td>
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e740" rowspan="1" colspan="1">Range is at the bottom of the stack</td>
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e743" rowspan="1" colspan="1">
                                             <p class="p">[Range A</p>
                                             <p class="p">[Range A/+/Range Z</p>
                                          </td>
                                       </tr>
                                       <tr class="row">
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e737" rowspan="1" colspan="1">]</td>
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e740" rowspan="1" colspan="1">Range is at the top of the stack</td>
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e743" rowspan="1" colspan="1">
                                             <p class="p">Range Z]</p>
                                             <p class="p">Range C/*/Range Z]</p>
                                          </td>
                                       </tr>
                                       <tr class="row">
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e737" rowspan="1" colspan="1">+</td>
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e740" rowspan="1" colspan="1">Only one range between the two other ranges</td>
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e743" rowspan="1" colspan="1">
                                             <p class="p">Range B/+/Range D</p>
                                          </td>
                                       </tr>
                                       <tr class="row">
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e737" rowspan="1" colspan="1">*</td>
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e740" rowspan="1" colspan="1">Zero or more range(s) between the two other ranges</td>
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e743" rowspan="1" colspan="1">
                                             <p class="p">Range B/*/Range Z</p>
                                          </td>
                                       </tr>
                                       <tr class="row">
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e737" rowspan="1" colspan="1">@</td>
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e740" rowspan="1" colspan="1">Specify domain name. If not mentioned, assuming &lt;default domain&gt;</td>
                                          <td class="entry" valign="top" width="33.33333333333333%" headers="d54e743" rowspan="1" colspan="1">
                                             <p class="p">Domain A@Range A</p>
                                             <p class="p">Domain B@Range A/*/Range Z]</p>
                                          </td>
                                       </tr>
                                    </tbody>
                                 </table>
                              </div><pre class="pre screen" xml:space="preserve">
ncu --nvtx --nvtx-include "Domain A@Range A/" CuNvtx.exe
                    </pre><p class="p">
                                 The kernels wrapped inside 'Range A' of 'Domain A' in the application are profiled.
                                 
                              </p><pre class="pre screen" xml:space="preserve">
ncu --nvtx --nvtx-include "[Range A" CuNvtx.exe
                    </pre><p class="p">
                                 The kernels wrapped inside 'Range A' of '&lt;default domain&gt;' where 'Range A' is at the bottom of the stack in the application
                                 are profiled.
                                 
                              </p><pre class="pre screen" xml:space="preserve">
ncu --nvtx --nvtx-include "Range A/*/Range B" CuNvtx.exe
                    </pre><p class="p">
                                 The kernels wrapped inside 'Range A' and 'Range B' of '&lt;default domain&gt;' with zero or many ranges between them in the application
                                 are profiled.
                                 
                              </p><pre class="pre screen" xml:space="preserve">
ncu --nvtx --nvtx-exclude "Range A/*/Range B" CuNvtx.exe
                    </pre><p class="p">
                                 All the kernels in the application are profiled except those which are wrapped inside 'Range A' and 'Range B' of '&lt;default
                                 domain&gt;' with zero or many ranges between them.
                                 
                              </p><pre class="pre screen" xml:space="preserve">
ncu --nvtx --nvtx-include "Range A/" --nvtx-exclude "Range B]" CuNvtx.exe
                    </pre><p class="p">
                                 The kernels wrapped inside only 'Range A' of '&lt;default domain&gt;' but not inside 'Range B' at the top of the stack in the application
                                 are profiled.<br clear="none"></br></p>
                           </li>
                           <li class="li"><strong class="ph b">Regular Expression Support</strong><p class="p">
                                 The configuration syntax for both the types NvtxRangeStart/End and NvtxRangePush/Pop is the same. Additionally, to use regular
                                 expressions, follow the following syntax.
                                 
                              </p>
                              <ul class="ul">
                                 <li class="li">
                                    <p class="p">Provide prefix 'regex:' to treat nvtx config as regular expression.</p><pre class="pre screen" xml:space="preserve">
ncu --nvtx --nvtx-include "<strong class="ph b">regex:</strong>Domain[A-Z]@Range[0-9]/" CuNvtx.exe
                        </pre><p class="p">
                                       The kernels wrapped inside push/pop range with matching regex 'Range[0-9]' of domain with matching regex 'Domain[A-Z]' are
                                       profiled.<br clear="none"></br></p>
                                 </li>
                                 <li class="li">
                                    <p class="p">
                                       Provide '/' as a prefix to "[" or "]" only for the range part of the config if "[" or "]" is at the start or at the end of
                                       the range part, respectively. This is needed so that NCU can distinguish if "[" or "]" is part of the regex or represents
                                       the top/bottom of the stack.
                                       
                                    </p><pre class="pre screen" xml:space="preserve">
ncu --nvtx --nvtx-include "regex:[0-9]domainA@<strong class="ph b">/</strong>[0-9]rangeA,RangeC[0-9<strong class="ph b">/</strong>]" CuNvtx.exe
                        </pre><p class="p">
                                       The kernels wrapped inside start/end ranges with matching regex '[0-9]rangeA' and 'RangeC[0-9]' of domain with matching regex
                                       '[0-9]domainA' are profiled.<br clear="none"></br></p>
                                 </li>
                                 <li class="li">
                                    <p class="p">
                                       If any quantifier is part of the domain/range name, you need to use '\\' or '\' as a prefix. For the "$" quantifier, only
                                       the '\\' prefix is valid.
                                       
                                    </p>
                                 </li>
                              </ul>
                           </li>
                           <li class="li"><strong class="ph b">Additional Information</strong><pre class="pre screen" xml:space="preserve">
--nvtx-include DomainA@RangeA,DomainB@RangeB //Not a valid config
                </pre><p class="p"> In a single NVTX configuration, multiple ranges with regard to a single domain
                                 can be specified. Mentioning ranges from different domains inside a single NVTX
                                 config is not supported. 
                              </p><pre class="pre screen" xml:space="preserve">
--nvtx-include "Range A\[i\]"
                </pre><p class="p"> Quantifiers '@' ',' '[' ']' '/' '*' '+' can be used in range names using prefix
                                 '\'. The kernels wrapped inside 'Range A[i]' of '&lt;default domain&gt;' in the
                                 application are profiled. 
                              </p><pre class="pre screen" xml:space="preserve">
--nvtx-include "Range A"  //Start/End configuration
--nvtx-inlcude "Range A/" //Push/Pop configuration
--nvtx-inlcude "Range A]" //Push/Pop configuration
                </pre><p class="p"> If the domain/range name contains '\', you need to provide '\\\\' in the config.</p>
                              <p class="p"> Do not use '\\\\' before any quantifier.</p>
                              <p class="p"> Including/Excluding only single range for Push/Pop configuration without
                                 specifying stack frame position '[' or ']', use '/' quantifier at the end. 
                              </p><pre class="pre screen" xml:space="preserve">
--nvtx-include "Range A/*/RangeB"
                </pre><p class="p"> The order in which you mention Push/Pop configurations is important. In the
                                 above example, 'Range A' should be below 'Range B' in the stack of ranges so
                                 that the kernel is profiled. 
                              </p>
                              <p class="p"> NVTX filtering honors cudaProfilerStart() and cudaProfilerStop(). There is no
                                 support for ranges with no name. 
                              </p>
                           </li>
                        </ul>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="command-line-options"><a name="command-line-options" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#command-line-options" name="command-line-options" shape="rect">4.&nbsp;Command Line Options</a></h2>
                  <div class="body conbody">
                     <p class="p">
                        For long command line options, passing a unique initial substring can be sufficient.
                        
                     </p>
                  </div>
                  <div class="topic concept nested1" id="command-line-options-general"><a name="command-line-options-general" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#command-line-options-general" name="command-line-options-general" shape="rect">4.1.&nbsp;General</a></h3>
                     <div class="body conbody">
                        <div class="tablenoborder"><a name="command-line-options-general__command-line-options-general-table" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="command-line-options-general__command-line-options-general-table" class="table" frame="border" border="1" rules="all">
                              <caption><span class="tablecap">Table 1. General Command Line Options</span></caption>
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e1012" rowspan="1" colspan="1">Option</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e1015" rowspan="1" colspan="1">Description</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e1018" rowspan="1" colspan="1">Default</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1012" rowspan="1" colspan="1">h,help</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1015" rowspan="1" colspan="1">Show help message</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1018" rowspan="1" colspan="1">&nbsp;</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1012" rowspan="1" colspan="1">v,version</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1015" rowspan="1" colspan="1">Show version information</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1018" rowspan="1" colspan="1">&nbsp;</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1012" rowspan="1" colspan="1">mode</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1015" rowspan="1" colspan="1">
                                       Select the mode of interaction with the target application
                                       
                                       <ul class="ul">
                                          <li class="li"><strong class="ph b">launch-and-attach:</strong> Launch the target application and immediately attach for profiling.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">launch:</strong> Launch the target application and suspend in the first intercepted API call, wait for tool to attach.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">attach:</strong> Attach to a previously launched application to which no other tool is attached.
                                             
                                          </li>
                                       </ul>
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1018" rowspan="1" colspan="1">launch-and-attach</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1012" rowspan="1" colspan="1">p,port</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1015" rowspan="1" colspan="1">Base port used for connecting to target applications for <samp class="ph codeph">--mode launch/attach</samp></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1018" rowspan="1" colspan="1">49152</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1012" rowspan="1" colspan="1">max-connections</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1015" rowspan="1" colspan="1">Maximum number of ports for connecting to target applications</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1018" rowspan="1" colspan="1">64</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="command-line-options-launch"><a name="command-line-options-launch" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#command-line-options-launch" name="command-line-options-launch" shape="rect">4.2.&nbsp;Launch</a></h3>
                     <div class="body conbody">
                        <div class="tablenoborder"><a name="command-line-options-launch__command-line-options-launch-table" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="command-line-options-launch__command-line-options-launch-table" class="table" frame="border" border="1" rules="all">
                              <caption><span class="tablecap">Table 2. Launch Command Line Options</span></caption>
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e1135" rowspan="1" colspan="1">Option</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e1138" rowspan="1" colspan="1">Description</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e1141" rowspan="1" colspan="1">Default</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1135" rowspan="1" colspan="1">check-exit-code</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1138" rowspan="1" colspan="1">
                                       Check the application exit code and print an error if it is different than 0.
                                       If set, <samp class="ph codeph">--replay-mode application</samp> will stop after the first pass if the exit code is not 0.
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1141" rowspan="1" colspan="1">yes</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1135" rowspan="1" colspan="1">injection-path-64</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1138" rowspan="1" colspan="1">
                                       Override the default path for the injection libraries.
                                       The injection libraries are used by the tools to intercept relevant APIs (like CUDA or NVTX).
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1141" rowspan="1" colspan="1">&nbsp;</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1135" rowspan="1" colspan="1">call-stack</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1138" rowspan="1" colspan="1">Enable CPU Call Stack collection.</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1141" rowspan="1" colspan="1">false</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1135" rowspan="1" colspan="1">nvtx</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1138" rowspan="1" colspan="1">Enable NVTX support for tools.</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1141" rowspan="1" colspan="1">false</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1135" rowspan="1" colspan="1">target-processes</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1138" rowspan="1" colspan="1">
                                       Select the processes you want to profile. Available modes are:
                                       
                                       <ul class="ul">
                                          <li class="li"><strong class="ph b">application-only</strong>
                                             Profile only the root application process.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">all</strong>
                                             Profile the application and all its child processes.
                                             
                                          </li>
                                       </ul>
                                       
                                       
                                       This option is only available for Linux and Windows targets.
                                       
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1141" rowspan="1" colspan="1">
                                       application-only
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1135" rowspan="1" colspan="1">target-processes-filter</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1138" rowspan="1" colspan="1">
                                       Set the comma separated expressions to filter which processes are profiled.
                                       
                                       <ul class="ul">
                                          <li class="li"><samp class="ph codeph">&lt;process name&gt;</samp> Set the process name to filter by. Only exactly matched processes are profiled.
                                             
                                          </li>
                                          <li class="li"><samp class="ph codeph">regex:&lt;expression&gt;</samp> Set the regex to filter matching process name profiling.
                                             On shells that recognize regular expression symbols as special characters (e.g. Linux bash),
                                             the expression needs to be escaped with quotes, e.g. <samp class="ph codeph">--target-processes-filter regex:".*Process"</samp>.
                                             
                                          </li>
                                       </ul>
                                       
                                       
                                       The executable name will be considered as process name to match.
                                       If the process name or the provided expression match, the process will be profiled.
                                       
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1141" rowspan="1" colspan="1">
                                       <p class="p"><strong class="ph b">Examples</strong></p>
                                       <p class="p"><samp class="ph codeph">--target-processes-filter MatrixMul</samp> Filter all processes having executable name exactly as "MatrixMul".
                                       </p>
                                       <p class="p"><samp class="ph codeph">--target-processes-filter regex:Matrix</samp>Filter all processes that include the string "Matrix" in their executable name, e.g. "MatrixMul" and "MatrixAdd".
                                       </p>
                                       <p class="p"><samp class="ph codeph">--target-processes-filter MatrixMul,MatrixAdd</samp>Filter all processes having executable name exactly as "MatrixMul" or "MatrixAdd".
                                       </p>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1135" rowspan="1" colspan="1">support-32bit</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1138" rowspan="1" colspan="1">
                                       Support profiling processes launched from 32-bit applications.
                                       This option is only available on x86_64 Linux.
                                       On Windows, tracking 32-bit applications is enabled by default.
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1141" rowspan="1" colspan="1">no</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1135" rowspan="1" colspan="1">null-stdin</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1138" rowspan="1" colspan="1">
                                       Launch the application with '/dev/null' as its standard input.
                                       This avoids applications reading from standard input being stopped by
                                       <samp class="ph codeph">SIGTTIN</samp> signals and hanging when running as backgrounded processes.
                                       
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1141" rowspan="1" colspan="1">false</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="command-line-options-attach"><a name="command-line-options-attach" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#command-line-options-attach" name="command-line-options-attach" shape="rect">4.3.&nbsp;Attach</a></h3>
                     <div class="body conbody">
                        <div class="tablenoborder"><a name="command-line-options-attach__command-line-options-attach-table" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="command-line-options-attach__command-line-options-attach-table" class="table" frame="border" border="1" rules="all">
                              <caption><span class="tablecap">Table 3. Attach Command Line Options</span></caption>
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e1331" rowspan="1" colspan="1">Option</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e1334" rowspan="1" colspan="1">Description</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e1337" rowspan="1" colspan="1">Default</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1331" rowspan="1" colspan="1">hostname</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1334" rowspan="1" colspan="1">
                                       Set the hostname or IP address for connecting to the machine on which the target application is running.
                                       When attaching to a local target application, use 127.0.0.1.
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1337" rowspan="1" colspan="1">127.0.0.1</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="command-line-options-profile"><a name="command-line-options-profile" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#command-line-options-profile" name="command-line-options-profile" shape="rect">4.4.&nbsp;Profile</a></h3>
                     <div class="body conbody">
                        <div class="tablenoborder"><a name="command-line-options-profile__command-line-options-profile-table" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="command-line-options-profile__command-line-options-profile-table" class="table" frame="border" border="1" rules="all">
                              <caption><span class="tablecap">Table 4. Profile Command Line Options</span></caption>
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e1386" rowspan="1" colspan="1">Option</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e1389" rowspan="1" colspan="1">Description</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e1392" rowspan="1" colspan="1">Default/Examples</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">devices</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       List the GPU devices to enable profiling on, separated by comma.
                                       
                                       <a class="xref" href="index.html#command-line-options-profile__import-filtering-enabled-option" shape="rect"><sup>1</sup></a></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">
                                       <p class="p">All devices</p>
                                       <p class="p"><strong class="ph b">Examples</strong></p>
                                       <p class="p"><samp class="ph codeph">--devices 0,2</samp></p>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">filter-mode</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       Set the filtering mode for kernel launches. Available modes:
                                       
                                       <ul class="ul">
                                          <li class="li"><strong class="ph b">global:</strong> Apply provided launch filters on kernel launches collectively.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">per-gpu:</strong> Apply provided launch filters on kernel launches separately on each device. Effective launch filters for this mode are <samp class="ph codeph">--launch-count</samp> and <samp class="ph codeph">--launch-skip</samp></li>
                                       </ul>
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">global</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">kernel-id</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       <p class="p">
                                          Set the identifier to use for matching kernels.
                                          If the kernel does not match the identifier, it will be ignored for profiling.
                                          
                                       </p>
                                       <p class="p">
                                          The identifier must be of the following format:
                                          <dfn class="term">context-id:stream-id:[name-operator:]kernel-name:invocation-nr</dfn></p>
                                       <div class="p">
                                          <ul class="ul">
                                             <li class="li"><strong class="ph b">context-id</strong> is the CUDA context ID or regular expression to match the NVTX name.
                                                
                                             </li>
                                             <li class="li"><strong class="ph b">stream-id</strong> is the CUDA stream ID or regular expression to match the NVTX name.
                                                
                                             </li>
                                             <li class="li"><strong class="ph b">name-operator</strong> is an optional operator to <dfn class="term">kernel-name</dfn>. Currently, only <dfn class="term">regex</dfn> is the only supported operator.
                                                
                                             </li>
                                             <li class="li"><strong class="ph b">kernel-name</strong> is the expression to match the kernel name.
                                                By default, this is a full, literal match to what is specified by <samp class="ph codeph">--kernel-name-base</samp>.
                                                When specifying the optional <dfn class="term">regex</dfn> name operator, this is a partial regular expression match to what is specified by <samp class="ph codeph">--kernel-name-base</samp>.
                                                
                                             </li>
                                             <li class="li"><strong class="ph b">invocation-nr</strong> is the N’th invocation of this kernel function. Multiple invocations can also be specified using regular expressions.
                                                
                                             </li>
                                          </ul>
                                          
                                          
                                          If the context/stream ID is a positive number, it will be strictly matched against the CUDA context/stream ID. Otherwise it
                                          will be treated as a regular expression and matched against the context/stream name specified using the NVTX library.
                                          
                                       </div><a class="xref" href="index.html#command-line-options-profile__import-filtering-enabled-option" shape="rect"><sup>1</sup></a></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">
                                       <p class="p"><strong class="ph b">Examples</strong></p>
                                       <p class="p"><samp class="ph codeph">--kernel-id ::foo:2</samp> For kernel "foo", match the second invocation.
                                       </p>
                                       <p class="p"><samp class="ph codeph">--kernel-id :::".*5|3"</samp> For all kernels, match the third invocation, and all for which the invocation number ends in "5".
                                       </p>
                                       <p class="p"><samp class="ph codeph">--kernel-id ::regex:^.*foo$:</samp> Match all kernels ending in "foo".
                                       </p>
                                       <p class="p"><samp class="ph codeph">--kernel-id ::regex:^(?!foo):</samp> Match all kernels except those starting with "foo".
                                          Note that depending on your OS and shell, you might need to quote the expression,
                                          e.g. using single quotes in Linux <dfn class="term">bash</dfn>: <samp class="ph codeph">--kernel-id ::regex:'^(?!foo)':</samp></p>
                                       <p class="p"><samp class="ph codeph">--kernel-id 1:2::7</samp> Match all seventh kernel invocations on context 1, stream 2.
                                       </p>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">k,kernel-name</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       Set the expression to use when matching kernel names.
                                       
                                       <ul class="ul">
                                          <li class="li"><samp class="ph codeph">&lt;kernel name&gt;</samp> Set the kernel name for an exact match.
                                             
                                          </li>
                                          <li class="li"><samp class="ph codeph">regex:&lt;expression&gt;</samp> Set the regex to use for matching the kernel name.
                                             On shells that recognize regular expression symbols as special characters (e.g. Linux bash),
                                             the expression needs to be escaped with quotes, e.g. <samp class="ph codeph">--kernel-name regex:".*Foo"</samp>.
                                             
                                          </li>
                                       </ul>
                                       
                                       
                                       If the kernel name or the provided expression do not match, it will be ignored for profiling.
                                       <a class="xref" href="index.html#command-line-options-profile__import-filtering-enabled-option" shape="rect"><sup>1</sup></a></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">
                                       <p class="p"><strong class="ph b">Examples</strong></p>
                                       <p class="p"><samp class="ph codeph">-k foo</samp> Match all kernels named exactly "foo".
                                       </p>
                                       <p class="p"><samp class="ph codeph">-k regex:foo</samp> Match all kernels that include the string "foo", e.g. "foo" and "fooBar".
                                       </p>
                                       <p class="p"><samp class="ph codeph">-k regex:"foo|bar"</samp> Match all kernels including the strings "foo" or "bar", e.g. "foo", "foobar", "_bar2".
                                       </p>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">kernel-name-base</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       Set the basis for <samp class="ph codeph">--kernel-name</samp>, and <samp class="ph codeph">--kernel-id</samp> kernel-name. Options are:
                                       
                                       <ul class="ul">
                                          <li class="li"><strong class="ph b">function:</strong> Function name without parameters, templates etc.
                                             e.g. <samp class="ph codeph">dmatrixmul</samp></li>
                                          <li class="li"><strong class="ph b">demangled:</strong> Demangled function name, including parameters, templates, etc.
                                             e.g. <samp class="ph codeph">dmatrixmul(float*,int,int)</samp></li>
                                          <li class="li"><strong class="ph b">mangled:</strong> Mangled function name.
                                             e.g. <samp class="ph codeph">_Z10dmatrixmulPfiiS_iiS_</samp></li>
                                       </ul><a class="xref" href="index.html#command-line-options-profile__import-filtering-enabled-option" shape="rect"><sup>1</sup></a></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">function</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">c,launch-count</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">Limit the number of profiled kernel launches.
                                       The count is only incremented for launches that match the kernel filters.
                                       <a class="xref" href="index.html#command-line-options-profile__import-filtering-enabled-option" shape="rect"><sup>1</sup></a></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">&nbsp;</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">s,launch-skip</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       Set the number of kernel launches to skip before starting to profile kernels.
                                       The number takes into account only launches that match the kernel filters.
                                       <a class="xref" href="index.html#command-line-options-profile__import-filtering-enabled-option" shape="rect"><sup>1</sup></a></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">0</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">launch-skip-before-match</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       Set the number of kernel launches to skip before starting to profile.
                                       The count is incremented for all launches, regardless of the kernel filters.
                                       <a class="xref" href="index.html#command-line-options-profile__import-filtering-enabled-option" shape="rect"><sup>1</sup></a></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">0</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">kill</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       Terminate the target application when the requested --launch-count was profiled.
                                       Allowed values:
                                       
                                       <ul class="ul">
                                          <li class="li">
                                             on/off
                                             
                                          </li>
                                          <li class="li">
                                             yes/no
                                             
                                          </li>
                                       </ul>
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">no</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">replay-mode</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       Mechanism used for replaying a kernel launch multiple times to collect all requested profiling data:
                                       
                                       <ul class="ul">
                                          <li class="li"><strong class="ph b">kernel:</strong> Replay individual kernel launches "transparently" during the execution of the application.
                                             See <a class="xref" href="../ProfilingGuide/index.html#kernel-replay" shape="rect">Kernel Replay</a> for more details.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">application:</strong> Relaunch the entire application multiple times. Requires deterministic program execution.
                                             See <a class="xref" href="../ProfilingGuide/index.html#application-replay" shape="rect">Application Replay</a> for more details.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">range:</strong> Replay ranges of CUDA API calls and kernel launches "transparently" during the execution of the application.
                                             Ranges must be defined using <samp class="ph codeph">cu(da)ProfilerStart/Stop</samp> API pairs or <a class="xref" href="index.html#nvtx-filtering" shape="rect">NVTX expressions</a>.
                                             See <a class="xref" href="../ProfilingGuide/index.html#range-replay" shape="rect">Range Replay</a> for more details.
                                             
                                          </li>
                                       </ul>
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">kernel</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">app-replay-buffer</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       Application replay buffer location.
                                       
                                       <ul class="ul">
                                          <li class="li"><strong class="ph b">file:</strong>
                                             Replay pass data is buffered in a temporary file. The report is created after profiling completed.
                                             This mode is more scalable, as the amount of required memory does not scale with the number of profiled kernels.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">memory:</strong>
                                             Replay pass data is buffered in memory, and the report is created while profiling.
                                             This mode can result in better performance if the filesystem is slow,
                                             but the amount of required memory scales with the number of profiled kernels.
                                             
                                          </li>
                                       </ul>
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">file</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">app-replay-match</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       Application replay kernel matching strategy.
                                       For all options, kernels are matched on a per-process and per-device (GPU) basis.
                                       Below options are used to configure the applied strategy in more detail.
                                       
                                       <ul class="ul">
                                          <li class="li"><strong class="ph b">name:</strong> Kernels are matched in the following order:
                                             1. (mangled) name,
                                             2. order of execution
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">grid:</strong> Kernels are matched in the following order:
                                             1. (mangled) name,
                                             2. CUDA grid/block size,
                                             3. order of execution
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">all:</strong> Kernels are matched in the following order:
                                             1. (mangled) name,
                                             2. CUDA grid/block size,
                                             3. CUDA context ID,
                                             4. CUDA stream ID,
                                             5. order of execution
                                             
                                          </li>
                                       </ul>
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">grid</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">app-replay-mode</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       Application replay kernel matching mode:
                                       
                                       <ul class="ul">
                                          <li class="li"><strong class="ph b">strict:</strong> Requires all kernels to match across all replay passes.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">relaxed:</strong> Produces results only for kernels that could be matched across replay passes.
                                             
                                          </li>
                                       </ul>
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">strict</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">range-replay-options</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       Range replay options, separated by comma.
                                       Below options are supported:
                                       
                                       <ul class="ul">
                                          <li class="li"><strong class="ph b">enable-greedy-sync</strong><p class="p">
                                                Insert ctx sync for applicable deferred APIs during capture.
                                                
                                             </p>
                                          </li>
                                          <li class="li"><strong class="ph b">disable-host-restore</strong><p class="p">
                                                Disable restoring device-written host allocations.
                                                
                                             </p>
                                          </li>
                                       </ul>
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">none</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">list-sets</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       List all section sets found in the searched section folders and exit.
                                       For each set, the associated sections are shown, as well as the estimated number of metrics collected as part of this set.
                                       This number can be used as an estimate of the relative profiling overhead per kernel launch of this set.
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">&nbsp;</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">set</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       Identifier of section set to collect.
                                       If not specified, the default set is collected.
                                       The full set of sections can be collected with <samp class="ph codeph">--set full</samp>.
                                       
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">
                                       If no <samp class="ph codeph">--set</samp> option is given, the default set if collected.
                                       If not specified and <samp class="ph codeph">--section</samp> or <samp class="ph codeph">--metrics</samp> are used, no sets are collected.
                                       Use <samp class="ph codeph">--list-sets</samp> to see which set is the default.
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">list-sections</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">List all sections found in the searched section folders and exit.</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">&nbsp;</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">section</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       Add a section identifier to collect in one of the following ways:
                                       
                                       <ul class="ul">
                                          <li class="li"><samp class="ph codeph">&lt;section identifier&gt;</samp> Set the section identifier for an exact match.
                                             
                                          </li>
                                          <li class="li"><samp class="ph codeph">regex:&lt;expression&gt;</samp> Regular expression allows matching full section identifier.
                                             For example, <samp class="ph codeph">.*Stats</samp>, matches all sections ending with 'Stats'.
                                             On shells that recognize regular expression symbols as special characters (e.g. Linux bash),
                                             the expression needs to be escaped with quotes, e.g. <samp class="ph codeph">--section "regex:.*Stats"</samp>.
                                             
                                          </li>
                                       </ul>
                                       
                                       
                                       This option is ignored when used with <samp class="ph codeph">--import</samp> and <samp class="ph codeph">--page raw</samp> or <samp class="ph codeph">--page source</samp>.
                                       <a class="xref" href="index.html#command-line-options-profile__import-filtering-enabled-option" shape="rect"><sup>1</sup></a></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">
                                       If no <samp class="ph codeph">--section</samp> options are given, the sections associated with the default set are collected.
                                       If no sets are found, all sections are collected.
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">section-folder</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       Add a non-recursive search path for .section files.
                                       Section files in this folder will be made available to the <samp class="ph codeph">--section</samp> option.
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">
                                       If no <samp class="ph codeph">--section-folder</samp> options are given, the <samp class="ph codeph">sections</samp> folder is added by default.
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">section-folder-recursive</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1"> Add a recursive search path for .section
                                       files. Section files in this folder and all folders below will be made
                                       available to the <samp class="ph codeph">--section</samp> option. 
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">
                                       If no <samp class="ph codeph">--section-folder</samp> options are given, the <samp class="ph codeph">sections</samp> folder is added by default.
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">list-rules</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">List all rules found in the searched section folders and exit.</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">&nbsp;</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">apply-rules</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">Apply active and applicable rules to each profiling result.
                                       Use <samp class="ph codeph">--rule</samp> to limit which rules to apply.
                                       Allowed values:
                                       
                                       <ul class="ul">
                                          <li class="li">
                                             on/off
                                             
                                          </li>
                                          <li class="li">
                                             yes/no
                                             
                                          </li>
                                       </ul>
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">yes</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">rule</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">Add a rule identifier to apply. Implies <samp class="ph codeph">--apply-rules yes</samp>.
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1"> If no <samp class="ph codeph">--rule</samp> options are
                                       given, all applicable rules in the <samp class="ph codeph">sections</samp> folder are
                                       applied.
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">import-source</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">If available from -lineinfo, correlated CUDA source files are permanently imported into the report.
                                       Allowed values:
                                       
                                       <ul class="ul">
                                          <li class="li">
                                             on/off
                                             
                                          </li>
                                          <li class="li">
                                             yes/no
                                             
                                          </li>
                                       </ul>
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">no</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">list-metrics</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       List all metrics collected from active sections.
                                       If the list of active sections is restricted using the <samp class="ph codeph">--section</samp> option,
                                       only metrics from those sections will be listed.
                                       
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">&nbsp;</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">query-metrics</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       Query available metrics for the devices on system.
                                       Use <samp class="ph codeph">--devices</samp> and <samp class="ph codeph">--chips</samp> to filter which devices to query.
                                       Note that by default, listed metric names need to be appended a valid suffix in order for them to become valid metrics.
                                       See <samp class="ph codeph">--query-metrics-mode</samp> for how to get the list of valid suffixes, or check the
                                       <a class="xref" href="../ProfilingGuide/index.html#metrics-structure" shape="rect">Kernel Profiling Guide</a>.
                                       
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">&nbsp;</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">query-metrics-mode</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       Set the mode for querying metrics. Implies <samp class="ph codeph">--query-metrics</samp>.
                                       Available modes:
                                       
                                       <ul class="ul">
                                          <li class="li"><strong class="ph b">base:</strong> Only the base names of the metrics.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">suffix:</strong> Suffix names for the base metrics. This gives the list of all metrics derived from the base metrics. Use <samp class="ph codeph">--metrics</samp> to specify the base metrics to query.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">all:</strong> Full names for all metrics. This gives the list of all base metrics and their suffix metrics.
                                             
                                          </li>
                                       </ul>
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">base</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">metrics</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       <p class="p">
                                          Specify all metrics to be profiled, separated
                                          by comma. If no <samp class="ph codeph">--section</samp> options are given, only the
                                          temporary section containing all metrics listed using this option is
                                          collected. If <samp class="ph codeph">--section</samp> options are given in addition
                                          to <samp class="ph codeph">--metrics</samp>, all metrics from those sections and from
                                          <samp class="ph codeph">--metrics</samp> are collected.
                                          
                                       </p>
                                       <p class="p">
                                          Names passed to this option support the following prefixes:
                                          <samp class="ph codeph">regex:&lt;expression&gt;</samp> expands to all metrics that partially match the expression. Enclose the regular expression in ^...$ to force a full match.
                                          <samp class="ph codeph">group:&lt;name&gt;</samp> lists all metrics of the metric group with that name. See section files for valid group names.
                                          <samp class="ph codeph">breakdown:&lt;metric&gt;</samp> expands to the input metrics of the high-level throughput metric. If the specified metric does not support a breakdown, no
                                          metrics are added.
                                          
                                       </p>
                                       <p class="p">
                                          If a metric requires a suffix to be valid, and neither <samp class="ph codeph">regex:</samp> nor <samp class="ph codeph">group:</samp>
                                          are used, this option automatically expands the name to all available first-level sub-metrics.
                                          
                                       </p>
                                       <p class="p">
                                          When importing a report <samp class="ph codeph">:group</samp> and <samp class="ph codeph">:breakdown</samp> are not supported.
                                          
                                       </p><a class="xref" href="index.html#command-line-options-profile__import-filtering-enabled-option" shape="rect"><sup>1</sup></a></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1"></td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">list-chips</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">List all supported chips that can be used with <samp class="ph codeph">--chips</samp>.
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">&nbsp;</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">chips</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">Specify the chips for querying metrics, separated by comma.</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">
                                       <p class="p"><strong class="ph b">Examples</strong></p>
                                       <p class="p"><samp class="ph codeph">--chips gv100,tu102</samp></p>
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">profile-from-start</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       Set if application should be profiled from its start. Allowed values:
                                       
                                       <ul class="ul">
                                          <li class="li">
                                             on/off
                                             
                                          </li>
                                          <li class="li">
                                             yes/no
                                             
                                          </li>
                                       </ul>
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">yes</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">disable-profiler-start-stop</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       Disable profiler start/stop. When enabled, <samp class="ph codeph">cu(da)ProfilerStart/Stop</samp> API calls are ignored.
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">&nbsp;</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">quiet</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       Suppress all profiling output.
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">&nbsp;</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">cache-control</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       Control the behavior of the GPU caches during profiling. Allowed values:
                                       
                                       <ul class="ul">
                                          <li class="li"><strong class="ph b">all:</strong> All GPU caches are flushed before each kernel replay iteration during profiling.
                                             While metric values in the execution environment of the application might be slightly different without invalidating the caches,
                                             this mode offers the most reproducible metric results across the replay passes and also across multiple runs of the target
                                             application.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">none:</strong> No GPU caches are flushed during profiling. This can improve performance and better replicates the
                                             application behavior if only a single kernel replay pass is necessary for metric collection.
                                             However, some metric results will vary depending on prior GPU work, and between replay iterations.
                                             This can lead to inconsistent and out-of-bounds metric values.
                                             
                                          </li>
                                       </ul>
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">all</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">clock-control</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       Control the behavior of the GPU clocks during profiling. Allowed values:
                                       
                                       <ul class="ul">
                                          <li class="li"><strong class="ph b">base:</strong>
                                             GPC and memory clocks are locked to their respective base frequency during profiling.
                                             This has no impact on thermal throttling.
                                             Note that actual clocks might still vary, depending on the level of driver support for this feature.
                                             As an alternative, use <samp class="ph codeph">nvidia-smi</samp> to lock the clocks externally and set this
                                             option to <samp class="ph codeph">none</samp>.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">none:</strong>
                                             No GPC or memory frequencies are changed during profiling.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">reset:</strong>
                                             Reset GPC and memory clocks for all or the selected devices and exit.
                                             Use if a previous, killed execution of <span class="keyword">ncu</span>
                                             left the GPU clocks in a locked state.
                                             
                                          </li>
                                       </ul>
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">base</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">nvtx-include</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       Adds an include statement to the <a class="xref" href="index.html#nvtx-filtering" shape="rect">NVTX filter</a>, which allows selecting kernels to profile based on NVTX ranges.
                                       <a class="xref" href="index.html#command-line-options-profile__import-filtering-enabled-option" shape="rect"><sup>1</sup></a></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">&nbsp;</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1386" rowspan="1" colspan="1">nvtx-exclude</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1389" rowspan="1" colspan="1">
                                       Adds an exclude statement to the <a class="xref" href="index.html#nvtx-filtering" shape="rect">NVTX filter</a>, which allows selecting kernels to profile based on NVTX ranges.
                                       <a class="xref" href="index.html#command-line-options-profile__import-filtering-enabled-option" shape="rect"><sup>1</sup></a></td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e1392" rowspan="1" colspan="1">&nbsp;</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="command-line-options-sampling"><a name="command-line-options-sampling" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#command-line-options-sampling" name="command-line-options-sampling" shape="rect">4.5.&nbsp;Sampling</a></h3>
                     <div class="body conbody">
                        <div class="tablenoborder"><a name="command-line-options-sampling__command-line-options-sampling-table" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="command-line-options-sampling__command-line-options-sampling-table" class="table" frame="border" border="1" rules="all">
                              <caption><span class="tablecap">Table 5. Sampling Command Line Options</span></caption>
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e2477" rowspan="1" colspan="1">Option</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e2480" rowspan="1" colspan="1">Description</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e2483" rowspan="1" colspan="1">Default</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2477" rowspan="1" colspan="1">sampling-interval</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2480" rowspan="1" colspan="1">
                                       Set the sampling period in the range of [0..31].
                                       The actual frequency is 2 ^ (5 + value) cycles.
                                       If set to 'auto', the profiler tries to automatically determine a high sampling frequency without skipping samples or overflowing
                                       the output buffer.
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2483" rowspan="1" colspan="1">auto</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2477" rowspan="1" colspan="1">sampling-max-passes</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2480" rowspan="1" colspan="1">
                                       Set maximum number of passes used for sampling (see the <a class="xref" href="../ProfilingGuide/index.html#overhead" shape="rect">Kernel Profiling Guide</a> for more details on profiling overhead).
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2483" rowspan="1" colspan="1">5</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2477" rowspan="1" colspan="1">sampling-buffer-size</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2480" rowspan="1" colspan="1">Set the size of the device-sided allocation for samples in bytes.</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2483" rowspan="1" colspan="1">32*1024*1024</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="command-line-options-file"><a name="command-line-options-file" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#command-line-options-file" name="command-line-options-file" shape="rect">4.6.&nbsp;File</a></h3>
                     <div class="body conbody">
                        <div class="tablenoborder"><a name="command-line-options-file__command-line-options-file-table" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="command-line-options-file__command-line-options-file-table" class="table" frame="border" border="1" rules="all">
                              <caption><span class="tablecap">Table 6. File Command Line Options</span></caption>
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e2558" rowspan="1" colspan="1">Option</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e2561" rowspan="1" colspan="1">Description</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e2564" rowspan="1" colspan="1">Default</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2558" rowspan="1" colspan="1">log-file</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2561" rowspan="1" colspan="1">Send all tool output to the specified file, or 
                                       one of the standard channels. The file will be overwritten. If the file doesn't exist, a new one will be created."stdout"
                                       as the whole file name indicates standard output channel (stdout). "stderr" as the whole file name indicates standard error
                                       channel (stderr)."
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2564" rowspan="1" colspan="1">
                                       If <samp class="ph codeph">--log-file</samp> is not set , profile results will be printed on the console.
                                       
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2558" rowspan="1" colspan="1">o,export</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2561" rowspan="1" colspan="1">Set the output file for writing the profile report.
                                       If not set, a temporary file will be used which is removed afterwards. The specified
                                       name supports macro expansion. See <a class="xref" href="index.html#command-line-options-file-macros" shape="rect">File Macros</a> for more details.
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2564" rowspan="1" colspan="1">
                                       If <samp class="ph codeph">--export</samp> is set and no <samp class="ph codeph">--page</samp> option is given,
                                       no profile results will be printed on the console.
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2558" rowspan="1" colspan="1">f,force-overwrite</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2561" rowspan="1" colspan="1">Force overwriting all output files.</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2564" rowspan="1" colspan="1">By default, the profiler won't overwrite existing output files and show an error instead.</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2558" rowspan="1" colspan="1">i,import</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2561" rowspan="1" colspan="1">Set the input file for reading the profile results.</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2564" rowspan="1" colspan="1">&nbsp;</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2558" rowspan="1" colspan="1">open-in-ui</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2561" rowspan="1" colspan="1">Open report in UI instead of showing result on terminal. (Only available on host platforms)</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2564" rowspan="1" colspan="1">&nbsp;</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2558" rowspan="1" colspan="1">section-folder-restore</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2561" rowspan="1" colspan="1">Restores stock files to the default section folder or the folder specified by an accompanying <dfn class="term">--section-folder</dfn> option. If the operation will overwrite modified files then the <dfn class="term">--force-overwrite</dfn> option is required.
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2564" rowspan="1" colspan="1">&nbsp;</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="command-line-options-console-output"><a name="command-line-options-console-output" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#command-line-options-console-output" name="command-line-options-console-output" shape="rect">4.7.&nbsp;Console Output</a></h3>
                     <div class="body conbody">
                        <div class="tablenoborder"><a name="command-line-options-console-output__command-line-options-console-output-table" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="command-line-options-console-output__command-line-options-console-output-table" class="table" frame="border" border="1" rules="all">
                              <caption><span class="tablecap">Table 7. Console Output Command Line Options</span></caption>
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e2688" rowspan="1" colspan="1">Option</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e2691" rowspan="1" colspan="1">Description</th>
                                    <th class="entry" valign="top" width="33.33333333333333%" id="d54e2694" rowspan="1" colspan="1">Default</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2688" rowspan="1" colspan="1">csv</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2691" rowspan="1" colspan="1">Use comma-separated values as console output. Implies --print-units base by default.</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2694" rowspan="1" colspan="1">&nbsp;</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2688" rowspan="1" colspan="1">page</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2691" rowspan="1" colspan="1">
                                       Select the report page to print console output for. Available pages are:
                                       
                                       <ul class="ul">
                                          <li class="li"><strong class="ph b">details</strong>
                                             Show results grouped as sections, include rule results.
                                             Some metrics that are collected by default (e.g. device attributes) are omitted
                                             if not specified explicitly in any section or using <samp class="ph codeph">--metrics</samp>.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">raw</strong>
                                             Show all collected metrics by kernel launch.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">source</strong>
                                             Show source. See <samp class="ph codeph">--print-source</samp> to select the source view.
                                             
                                          </li>
                                       </ul>
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2694" rowspan="1" colspan="1"><strong class="ph b">details</strong>.
                                       If no <samp class="ph codeph">--page</samp> option is given and <samp class="ph codeph">--export</samp> is set,
                                       no results are printed to the console output.
                                       
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2688" rowspan="1" colspan="1">print-source</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2691" rowspan="1" colspan="1">
                                       Select the source view:
                                       
                                       <ul class="ul">
                                          <li class="li"><strong class="ph b">sass</strong>
                                             Show SASS (assembly) instructions for each kernel launch.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">ptx</strong>
                                             Show PTX source of every cubin whose at least one kernel is profiled.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">cuda</strong>
                                             Show entire CUDA-C source file which has kernel code as per kernel launch.
                                             CLI shows CUDA source only if file exists on the host machine.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">cuda,sass</strong>
                                             Show SASS CUDA-C source correlation for each kernel launch.
                                             CLI shows CUDA source only if file exists on the host machine.
                                             
                                          </li>
                                       </ul>
                                       
                                       
                                       Metric correlation with source is available in <strong class="ph b">sass</strong>, and <strong class="ph b">cuda,sass</strong> source view.
                                       Metrics specified with <samp class="ph codeph">--metrics</samp> and specified section file with <samp class="ph codeph">--section</samp> are correlated.
                                       Consider restricting the number of selected metrics such that values fit into a single output row.
                                       
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2694" rowspan="1" colspan="1">sass</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2688" rowspan="1" colspan="1">resolve-source-file</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2691" rowspan="1" colspan="1">
                                       Resolve CUDA source file in the <samp class="ph codeph">--page source</samp> output.
                                       Provide comma separated files full path.
                                       
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2694" rowspan="1" colspan="1">&nbsp;</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2688" rowspan="1" colspan="1">details-all</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2691" rowspan="1" colspan="1">Include all section metrics on details page.</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2694" rowspan="1" colspan="1">&nbsp;</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2688" rowspan="1" colspan="1">print-units</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2691" rowspan="1" colspan="1">
                                       Select the mode for scaling of metric units. Available modes are:
                                       
                                       <ul class="ul">
                                          <li class="li"><strong class="ph b">auto</strong>
                                             Show all metrics automatically scaled to the most fitting order of magnitude.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">base</strong>
                                             Show all metrics in their base unit.
                                             
                                          </li>
                                       </ul>
                                       
                                       
                                       Replaces deprecated option <samp class="ph codeph">--units</samp>.
                                       
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2694" rowspan="1" colspan="1">auto</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2688" rowspan="1" colspan="1">print-fp</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2691" rowspan="1" colspan="1">Show all numeric metrics in the console output as floating point numbers. Replaces deprecated option <samp class="ph codeph">--fp</samp>.
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2694" rowspan="1" colspan="1">false</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2688" rowspan="1" colspan="1">print-kernel-base</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2691" rowspan="1" colspan="1">Set the basis for kernel name output. See <samp class="ph codeph">--kernel-regex-base</samp> for options. Replaces deprecated option <samp class="ph codeph">--kernel-base</samp>.
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2694" rowspan="1" colspan="1">demangled</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2688" rowspan="1" colspan="1">print-metric-instances</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2691" rowspan="1" colspan="1">
                                       Set output mode for metrics with instance values:
                                       
                                       <ul class="ul">
                                          <li class="li"><strong class="ph b">none</strong>
                                             Only show GPU aggregate value.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">values</strong>
                                             Show GPU aggregate followed by all instance values.
                                             
                                          </li>
                                       </ul>
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2694" rowspan="1" colspan="1">none</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2688" rowspan="1" colspan="1">print-nvtx-rename</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2691" rowspan="1" colspan="1">
                                       Select how NVTX should be used for renaming:
                                       
                                       <ul class="ul">
                                          <li class="li"><strong class="ph b">none</strong>
                                             Don't use NVTX for renaming.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">kernel</strong>
                                             Rename kernels with the most recent enclosing NVTX push/pop range.
                                             
                                          </li>
                                       </ul>
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2694" rowspan="1" colspan="1">none</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2688" rowspan="1" colspan="1">print-rule-details</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2691" rowspan="1" colspan="1">
                                       Print additional details for rule results, such as the triggering metrics.
                                       Currently has no effect in CSV mode.
                                       
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2694" rowspan="1" colspan="1">false</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2688" rowspan="1" colspan="1">print-summary</td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2691" rowspan="1" colspan="1">
                                       Select the summary output mode. Available modes are:
                                       
                                       <ul class="ul">
                                          <li class="li"><strong class="ph b">none</strong>
                                             No summary.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">per-gpu</strong>
                                             Summary for each GPU.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">per-kernel</strong>
                                             Summary for each kernel type.
                                             
                                          </li>
                                          <li class="li"><strong class="ph b">per-nvtx</strong>
                                             Summary for each NVTX context.
                                             
                                          </li>
                                       </ul>
                                       
                                       
                                       Replaces deprecated option <samp class="ph codeph">--summary</samp>.
                                       
                                    </td>
                                    <td class="entry" valign="top" width="33.33333333333333%" headers="d54e2694" rowspan="1" colspan="1">none</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="command-line-options-response-file"><a name="command-line-options-response-file" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#command-line-options-response-file" name="command-line-options-response-file" shape="rect">4.8.&nbsp;Response File</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           Response files can be specified by adding <samp class="ph codeph">@FileName</samp> to the command line.
                           The file name must immediately follow the <samp class="ph codeph">@</samp> character.
                           The content of each response file is inserted in place of the corresponding response file option.
                           
                        </p>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="command-line-options-file-macros"><a name="command-line-options-file-macros" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#command-line-options-file-macros" name="command-line-options-file-macros" shape="rect">4.9.&nbsp;File Macros</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           The file name specified with option <samp class="ph codeph">-o</samp> or <samp class="ph codeph">--export</samp>
                           supports the following macro expansions. Occurrences of these macros in the report
                           file name are replaced by the corresponding character sequence. If not specified
                           otherwise, the macros cannot be used as part of the file path.
                           
                        </p>
                        <div class="tablenoborder"><a name="command-line-options-file-macros__command-line-options-file-macro-expansions" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="command-line-options-file-macros__command-line-options-file-macro-expansions" class="table" frame="border" border="1" rules="all">
                              <caption><span class="tablecap">Table 8. Macro Expansions</span></caption>
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="20%" id="d54e3063" rowspan="1" colspan="1">Macro</th>
                                    <th class="entry" valign="top" width="80%" id="d54e3066" rowspan="1" colspan="1">Description</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e3063" rowspan="1" colspan="1">%h</td>
                                    <td class="entry" valign="top" width="80%" headers="d54e3066" rowspan="1" colspan="1">
                                       Expands to the host name of the machine on which the command line profiler is running.
                                       
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e3063" rowspan="1" colspan="1">%q{ENV_NAME}</td>
                                    <td class="entry" valign="top" width="80%" headers="d54e3066" rowspan="1" colspan="1">
                                       Expands to the content of the variable with the given name <samp class="ph codeph">ENV_NAME</samp> from the environment of the command line profiler.
                                       
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e3063" rowspan="1" colspan="1">%p</td>
                                    <td class="entry" valign="top" width="80%" headers="d54e3066" rowspan="1" colspan="1">
                                       Expands to the process ID of the command line profiler.
                                       
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e3063" rowspan="1" colspan="1">%i</td>
                                    <td class="entry" valign="top" width="80%" headers="d54e3066" rowspan="1" colspan="1">
                                       Expands to the lowest unused positive integer number that guarantees the resulting file name is not yet used.
                                       This macro can only be used once in the output file name.
                                       
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="20%" headers="d54e3063" rowspan="1" colspan="1">%%</td>
                                    <td class="entry" valign="top" width="80%" headers="d54e3066" rowspan="1" colspan="1">
                                       Expands to a single <samp class="ph codeph">%</samp> character in the output file name.
                                       This macro can be used in the file path and the file name.
                                       
                                    </td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="environment-variables"><a name="environment-variables" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#environment-variables" name="environment-variables" shape="rect">5.&nbsp;Environment Variables</a></h2>
                  <div class="body conbody">
                     <p class="p">
                        The following environment variables can be set before launching <span class="keyword">NVIDIA Nsight Compute CLI</span>, or the UI, respectively.
                        
                     </p>
                     <div class="tablenoborder"><a name="environment-variables__environment-variables-table" shape="rect">
                           <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="environment-variables__environment-variables-table" class="table" frame="border" border="1" rules="all">
                           <caption><span class="tablecap">Table 9. Environment Variables</span></caption>
                           <thead class="thead" align="left">
                              <tr class="row">
                                 <th class="entry" valign="top" width="33.33333333333333%" id="d54e3160" rowspan="1" colspan="1">Name</th>
                                 <th class="entry" valign="top" width="33.33333333333333%" id="d54e3163" rowspan="1" colspan="1">Description</th>
                                 <th class="entry" valign="top" width="33.33333333333333%" id="d54e3166" rowspan="1" colspan="1">Default/Values</th>
                              </tr>
                           </thead>
                           <tbody class="tbody">
                              <tr class="row">
                                 <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3160" rowspan="1" colspan="1">
                                    NV_COMPUTE_PROFILER_DISABLE_STOCK_FILE_DEPLOYMENT
                                    
                                 </td>
                                 <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3163" rowspan="1" colspan="1">
                                    <p class="p">
                                       Disable file deployment to the versioned <samp class="ph codeph">Sections</samp> directory, using section and rule files from the stock directory
                                       within the <span class="keyword"></span> installation instead.
                                       
                                    </p>
                                    <p class="p">
                                       By default, the versioned directory from the user's documents folder is used to ensure that
                                       any potential user updates are taken into account.
                                       
                                    </p>
                                    <p class="p">
                                       Only supported in the <span class="keyword">NVIDIA Nsight Compute CLI</span>.
                                       
                                    </p>
                                 </td>
                                 <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3166" rowspan="1" colspan="1">
                                    <p class="p">
                                       Default: unset
                                       
                                    </p>
                                    <p class="p">
                                       Set to "1" to disable deployment.
                                       
                                    </p>
                                 </td>
                              </tr>
                              <tr class="row">
                                 <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3160" rowspan="1" colspan="1">
                                    NV_COMPUTE_PROFILER_LOCAL_CONNECTION_OVERRIDE
                                    
                                 </td>
                                 <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3163" rowspan="1" colspan="1">
                                    <p class="p">
                                       Override the default local connection mechanism between frontend and profiled target processes.
                                       The default mechanism is platform-dependent.
                                       This should only be used if there are connection problems between frontend and target processes in a local launch.
                                       
                                    </p>
                                 </td>
                                 <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3166" rowspan="1" colspan="1">
                                    <p class="p">
                                       Default: unset (use default mechanism)
                                       
                                    </p>
                                    <p class="p">
                                       Set to "uds" to use Unix Domain Socket connections (available on Posix platforms, only).
                                       Set to "tcp" to use TCP (available on all platforms).
                                       Set to "named-pipes" to use Windows Named Pipes (available on Windows, only).
                                       
                                    </p>
                                 </td>
                              </tr>
                              <tr class="row">
                                 <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3160" rowspan="1" colspan="1">
                                    NV_COMPUTE_PROFILER_DISABLE_SW_PRE_PASS
                                    
                                 </td>
                                 <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3163" rowspan="1" colspan="1">
                                    <p class="p">
                                       Disable the instruction-level software (SW) metric pre-pass.
                                       When collectinh SW-patched metrics, such as <samp class="ph codeph">inst_executed</samp>,
                                       the pre-pass is used to determine which functions are executed as part of the kernel and should be patched.
                                       This requires a separat e replay pass and if only instruction-level SW metrics are to be collected, prevents single-pass data
                                       collection.
                                       Disabling the pre-pass can improve performance if memory save-and-restore is undesirable and application replay is not possible.
                                       
                                    </p>
                                 </td>
                                 <td class="entry" valign="top" width="33.33333333333333%" headers="d54e3166" rowspan="1" colspan="1">
                                    <p class="p">
                                       Default: unset (use pre-pass when applicable)
                                       
                                    </p>
                                    <p class="p">
                                       Set to "1" to disable pre-pass.
                                       
                                    </p>
                                 </td>
                              </tr>
                           </tbody>
                        </table>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="nvprof-guide"><a name="nvprof-guide" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#nvprof-guide" name="nvprof-guide" shape="rect">6.&nbsp;Nvprof Transition Guide</a></h2>
                  <div class="body conbody">
                     <p class="p">
                        This guide provides tips for moving from nvprof to <span class="keyword">NVIDIA Nsight Compute CLI</span>.
                        <span class="keyword">NVIDIA Nsight Compute CLI</span> tries to provide as much feature and usage parity as possible with nvprof,
                        but some features are now covered by different tools and some command line options have changed their name or meaning.
                        
                     </p>
                  </div>
                  <div class="topic concept nested1" id="nvprof-trace"><a name="nvprof-trace" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#nvprof-trace" name="nvprof-trace" shape="rect">6.1.&nbsp;Trace</a></h3>
                     <div class="body conbody">
                        <ul class="ul">
                           <li class="li"><strong class="ph b">GPU and API trace</strong><p class="p"><span class="keyword">NVIDIA Nsight Compute CLI</span> does not support any form of tracing GPU or API activities.
                                 This functionality is covered by <a class="xref" href="https://developer.nvidia.com/nsight-systems" target="_blank" shape="rect">NVIDIA Nsight Systems</a>.
                                 
                              </p>
                           </li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="nvprof-metric-collection"><a name="nvprof-metric-collection" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#nvprof-metric-collection" name="nvprof-metric-collection" shape="rect">6.2.&nbsp;Metric Collection</a></h3>
                     <div class="body conbody">
                        <ul class="ul">
                           <li class="li"><strong class="ph b">Finding available metrics</strong><p class="p">
                                 For nvprof, you can use <samp class="ph codeph">--query-metrics</samp> to see the list of metrics available for the current devices on your machine.
                                 You can also use <samp class="ph codeph">--devices</samp> to filter which local devices to query.
                                 For <span class="keyword">NVIDIA Nsight Compute CLI</span>, this functionality is the same.
                                 However, in addition, you can combine <samp class="ph codeph">--query-metrics</samp> with <samp class="ph codeph">--chip [chipname]</samp> to query the
                                 available metrics for any chip, not only the ones in your present CUDA devices.
                                 
                              </p>
                              <p class="p">
                                 Note that metric names have changed between nvprof and <span class="keyword">NVIDIA Nsight Compute CLI</span>
                                 and metric names also differ between chips after (and including) GV100 and those before.
                                 See <a class="xref" href="index.html#nvprof-metric-comparison" shape="rect">Metric Comparison</a> for a comparison of nvprof and <span class="keyword">NVIDIA Nsight Compute</span> metric names.
                                 
                              </p>
                              <p class="p">
                                 On Volta and newer GPUs, most metrics are named using a base name and various suffixes, e.g. <dfn class="term">sm__throughput.avg.pct_of_peak_sustained_elapsed</dfn>.
                                 The base name is <dfn class="term">sm__throughput</dfn> and the suffix is <dfn class="term">avg.pct_of_peak_sustained_elapsed</dfn>.
                                 This is because most metrics follow the same structure and have the same set of suffixes.
                                 You need to pass the full name to <span class="keyword">NVIDIA Nsight Compute</span> when selecting a metric for profiling.
                                 
                              </p>
                              <p class="p">
                                 To reduce the number of metrics shown for Volta and newer GPUs when using <samp class="ph codeph">--query-metrics</samp>, by default only the base names are shown.
                                 Use <samp class="ph codeph">--query-metrics-mode suffix --metrics &lt;metrics list&gt;</samp> to see the full names for the chosen metrics.
                                 Use <samp class="ph codeph">--query-metrics-mode all</samp> to see all metrics with their full name directly.
                                 
                              </p>
                           </li>
                           <li class="li"><strong class="ph b">Selecting which metrics to collect</strong><p class="p">
                                 In both nvprof and <span class="keyword">NVIDIA Nsight Compute CLI</span>, you can specify a comma-separated list of metric names
                                 to the <samp class="ph codeph">--metrics</samp> option. While nvprof would allow you to collect either a list or all metrics,
                                 in <span class="keyword">NVIDIA Nsight Compute CLI</span> you can use regular expressions to select a more fine-granular subset of all available metrics.
                                 For example, you can use <samp class="ph codeph">--metrics "regex:.*"</samp> to collect all metrics, or <samp class="ph codeph">--metrics "regex:smsp__cycles_elapsed"</samp>
                                 to collect all "smsp__cycles_elapsed" metrics.
                                 
                              </p>
                           </li>
                           <li class="li"><strong class="ph b">Selecting which events to collect</strong><p class="p">
                                 You cannot collect any events in <span class="keyword">NVIDIA Nsight Compute CLI</span>.
                                 
                              </p>
                           </li>
                           <li class="li"><strong class="ph b">Selecting which section to collect</strong><p class="p">
                                 In nvprof, you can either collect individual metrics or events, or a pre-configured set (all, analysis-metrics).
                                 <span class="keyword">NVIDIA Nsight Compute CLI</span> adds the concept of a <dfn class="term">section</dfn>.
                                 A section is a file that describes which metrics to collect for which GPU architecture, or architecture range.
                                 Furthermore, it defines how those metrics will be shown in both the command line output or the user interface.
                                 This includes structuring in tables, charts, histograms etc.
                                 
                              </p>
                              <p class="p"><span class="keyword">NVIDIA Nsight Compute CLI</span> comes with a set of pre-defined sections, located in the <samp class="ph codeph">sections</samp>
                                 directory. You can inspect, modify or extend those, as well as add new ones, e.g. to easily collect recurring metric sets.
                                 Each section specifies a unique <dfn class="term">section identifier</dfn>, and there must not be two sections with the same identifier in the search path.
                                 
                              </p>
                              <p class="p">
                                 By default, the sections associated with the default section set are collected.
                                 You can select one or more individual sections using the <samp class="ph codeph">--section [section identifier]</samp> option one or more times.
                                 If no <samp class="ph codeph">--section</samp> option is given, but <samp class="ph codeph">--metrics</samp> is used, no sections will be collected.
                                 
                              </p>
                           </li>
                           <li class="li"><strong class="ph b">Selecting which section set to collect</strong><p class="p">
                                 In nvprof, you can either collect individual metrics or events, or a pre-configured set (all, analysis-metrics).
                                 <span class="keyword">NVIDIA Nsight Compute CLI</span> adds the concept of <dfn class="term">section sets</dfn>.
                                 A section set defines a group of sections to collect together, in order to achieve different profiling overheads,
                                 depending on the required analysis level of detail.
                                 
                              </p>
                              <p class="p">
                                 If no other options are selected, the default section set if collected.
                                 You can select one or more sets using the <samp class="ph codeph">--set [set identifier]</samp> option one or more times.
                                 If no <samp class="ph codeph">--set</samp> option is given, but <samp class="ph codeph">--section</samp> or <samp class="ph codeph">--metrics</samp> is used,
                                 no sets will be collected.
                                 
                              </p>
                           </li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="nvprof-metric-comparison"><a name="nvprof-metric-comparison" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#nvprof-metric-comparison" name="nvprof-metric-comparison" shape="rect">6.3.&nbsp;Metric Comparison</a></h3>
                     <div class="body conbody">
                        <p class="p"><span class="keyword">NVIDIA Nsight Compute</span> uses two groups of metrics, depending on which GPU
                           architecture is profiled. For nvprof metrics, the following table lists the equivalent
                           metrics in <span class="keyword">NVIDIA Nsight Compute</span>, if available. For a detailed explanation
                           of the structuring of PerfWorks metrics, see <a class="xref" href="../ProfilingGuide/index.html#metrics-structure" shape="rect">Metrics Structure</a>.
                           
                        </p>
                        <p class="p">
                           Metrics starting with <dfn class="term">sm__</dfn> are collected per-SM.
                           Metrics starting with <dfn class="term">smsp__</dfn> are collected per-SM subpartition.
                           However, all corresponding nvprof events are collected per-SM, only.
                           Check the <a class="xref" href="../ProfilingGuide/index.html#metrics-guide" shape="rect">Metrics Guide</a> for more details on these terms.
                           
                        </p>
                        <div class="tablenoborder"><a name="nvprof-metric-comparison__nvprof-metric-comparison-table" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="nvprof-metric-comparison__nvprof-metric-comparison-table" class="table" frame="border" border="1" rules="all">
                              <caption><span class="tablecap">Table 10. Metrics Mapping Table from CUPTI to PerfWorks</span></caption>
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="24%" id="d54e3529" rowspan="1" colspan="1">nvprof Metric</th>
                                    <th class="entry" valign="top" width="76%" id="d54e3532" rowspan="1" colspan="1">PerfWorks Metric or Formula (&gt;= SM 7.0)</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">achieved_occupancy</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">sm__warps_active.avg.pct_of_peak_sustained_active</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">atomic_transactions</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__t_set_accesses_pipe_lsu_mem_global_op_atom.sum + l1tex__t_set_accesses_pipe_lsu_mem_global_op_red.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">atomic_transactions_per_request</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">(l1tex__t_sectors_pipe_lsu_mem_global_op_atom.sum + l1tex__t_sectors_pipe_lsu_mem_global_op_red.sum) / (l1tex__t_requests_pipe_lsu_mem_global_op_atom.sum
                                       + l1tex__t_requests_pipe_lsu_mem_global_op_red.sum)
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">branch_efficiency</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_average_branch_targets_threads_uniform.pct</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">cf_executed</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_executed_pipe_cbu.sum + smsp__inst_executed_pipe_adu.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">cf_fu_utilization</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">cf_issued</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">double_precision_fu_utilization</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_executed_pipe_fp64.avg.pct_of_peak_sustained_active</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">dram_read_bytes</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">dram__bytes_read.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">dram_read_throughput</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">dram__bytes_read.sum.per_second</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">dram_read_transactions</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">dram__sectors_read.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">dram_utilization</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">dram__throughput.avg.pct_of_peak_sustained_elapsed</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">dram_write_bytes</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">dram__bytes_write.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">dram_write_throughput</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">dram__bytes_write.sum.per_second</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">dram_write_transactions</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">dram__sectors_write.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">eligible_warps_per_cycle</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__warps_eligible.sum.per_cycle_active</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">flop_count_dp</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_op_dadd_pred_on.sum + smsp__sass_thread_inst_executed_op_dmul_pred_on.sum + smsp__sass_thread_inst_executed_op_dfma_pred_on.sum
                                       * 2
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">flop_count_dp_add</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_op_dadd_pred_on.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">flop_count_dp_fma</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_op_dfma_pred_on.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">flop_count_dp_mul</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_op_dmul_pred_on.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">flop_count_hp</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_op_hadd_pred_on.sum + smsp__sass_thread_inst_executed_op_hmul_pred_on.sum + smsp__sass_thread_inst_executed_op_hfma_pred_on.sum
                                       * 2
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">flop_count_hp_add</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_op_hadd_pred_on.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">flop_count_hp_fma</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_op_hfma_pred_on.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">flop_count_hp_mul</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_op_hmul_pred_on.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">flop_count_sp</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_op_fadd_pred_on.sum + smsp__sass_thread_inst_executed_op_fmul_pred_on.sum + smsp__sass_thread_inst_executed_op_ffma_pred_on.sum
                                       * 2
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">flop_count_sp_add</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_op_fadd_pred_on.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">flop_count_sp_fma</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_op_ffma_pred_on.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">flop_count_sp_mul</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_op_fmul_pred_on.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">flop_count_sp_special</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">flop_dp_efficiency</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_ops_dadd_dmul_dfma_pred_on.avg.pct_of_peak_sustained_elapsed</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">flop_hp_efficiency</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_ops_hadd_hmul_hfma_pred_on.avg.pct_of_peak_sustained_elapsed</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">flop_sp_efficiency</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_ops_fadd_fmul_ffma_pred_on.avg.pct_of_peak_sustained_elapsed</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">gld_efficiency</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_average_data_bytes_per_sector_mem_global_op_ld.pct</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">gld_requested_throughput</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">gld_throughput</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__t_bytes_pipe_lsu_mem_global_op_ld.sum.per_second</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">gld_transactions</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">gld_transactions_per_request</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_ld.ratio</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">global_atomic_requests</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__t_requests_pipe_lsu_mem_global_op_atom.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">global_hit_rate</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">(l1tex__t_sectors_pipe_lsu_mem_global_op_ld_lookup_hit.sum +
                                       l1tex__t_sectors_pipe_lsu_mem_global_op_st_lookup_hit.sum +
                                       l1tex__t_sectors_pipe_lsu_mem_global_op_red_lookup_hit.sum +
                                       l1tex__t_sectors_pipe_lsu_mem_global_op_atom_lookup_hit.sum) /
                                       (l1tex__t_sectors_pipe_lsu_mem_global_op_ld.sum +
                                       l1tex__t_sectors_pipe_lsu_mem_global_op_st.sum +
                                       l1tex__t_sectors_pipe_lsu_mem_global_op_red.sum +
                                       l1tex__t_sectors_pipe_lsu_mem_global_op_atom.sum)
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">global_load_requests</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__t_requests_pipe_lsu_mem_global_op_ld.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">global_reduction_requests</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__t_requests_pipe_lsu_mem_global_op_red.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">global_store_requests</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__t_requests_pipe_lsu_mem_global_op_st.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">gst_efficiency</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_average_data_bytes_per_sector_mem_global_op_st.pct</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">gst_requested_throughput</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">gst_throughput</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__t_bytes_pipe_lsu_mem_global_op_st.sum.per_second</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">gst_transactions</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__t_sectors_pipe_lsu_mem_global_op_st.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">gst_transactions_per_request</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__average_t_sectors_per_request_pipe_lsu_mem_global_op_st.ratio</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">half_precision_fu_utilization</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_executed_pipe_fp16.avg.pct_of_peak_sustained_active</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_bit_convert</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_op_conversion_pred_on.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_compute_ld_st</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_op_memory_pred_on.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_control</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_op_control_pred_on.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_executed</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_executed.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_executed_global_atomics</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_inst_executed_op_global_atom.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_executed_global_loads</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_executed_op_global_ld.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_executed_global_reductions</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_executed_op_global_red.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_executed_global_stores</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_executed_op_global_st.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_executed_local_loads</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_executed_op_local_ld.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_executed_local_stores</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_executed_op_local_st.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_executed_shared_atomics</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_executed_op_shared_atom.sum + smsp__inst_executed_op_shared_atom_dot_alu.sum + smsp__inst_executed_op_shared_atom_dot_cas.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_executed_shared_loads</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_executed_op_shared_ld.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_executed_shared_stores</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_executed_op_shared_st.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_executed_surface_atomics</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_executed_op_surface_atom.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_executed_surface_loads</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_executed_op_surface_ld.sum + smsp__inst_executed_op_shared_atom_dot_alu.sum + smsp__inst_executed_op_shared_atom_dot_cas.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_executed_surface_reductions</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_executed_op_surface_red.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_executed_surface_stores</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_executed_op_surface_st.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_executed_tex_ops</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_executed_op_texture.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_fp_16</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_op_fp16_pred_on.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_fp_32</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_op_fp32_pred_on.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_fp_64</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_op_fp64_pred_on.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_integer</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_op_integer_pred_on.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_inter_thread_communication</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_op_inter_thread_communication_pred_on.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_issued</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_issued.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_misc</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_thread_inst_executed_op_misc_pred_on.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_per_warp</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__average_inst_executed_per_warp.ratio</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">inst_replay_overhead</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">ipc</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_executed.avg.per_cycle_active</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">issue_slot_utilization</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__issue_active.avg.pct_of_peak_sustained_active</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">issue_slots</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_issued.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">issued_ipc</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_issued.avg.per_cycle_active</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">l1_sm_lg_utilization</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__lsu_writeback_active.avg.pct_of_peak_sustained_active</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">l2_atomic_throughput</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">2 * (  lts__t_sectors_op_atom.sum.per_second
                                       + lts__t_sectors_op_red.sum.per_second
                                       )
                                       
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">l2_atomic_transactions</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">2 * (  lts__t_sectors_op_atom.sum
                                       + lts__t_sectors_op_red.sum
                                       )
                                       
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">l2_global_atomic_store_bytes</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">lts__t_bytes_equiv_l1sectormiss_pipe_lsu_mem_global_op_atom.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">l2_global_load_bytes</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">lts__t_bytes_equiv_l1sectormiss_pipe_lsu_mem_global_op_ld.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">l2_local_global_store_bytes</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">lts__t_bytes_equiv_l1sectormiss_pipe_lsu_mem_local_op_st.sum + lts__t_bytes_equiv_l1sectormiss_pipe_lsu_mem_global_op_st.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">l2_local_load_bytes</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">lts__t_bytes_equiv_l1sectormiss_pipe_lsu_mem_local_op_ld.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">l2_read_throughput</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">  lts__t_sectors_op_read.sum.per_second
                                       + lts__t_sectors_op_atom.sum.per_second
                                       + lts__t_sectors_op_red.sum.per_second
                                       
                                       <a class="xref" href="index.html#nvprof-metric-comparison__l2-read-reduction-footnote" shape="rect"><sup>2</sup></a></td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">l2_read_transactions</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">  lts__t_sectors_op_read.sum
                                       + lts__t_sectors_op_atom.sum
                                       + lts__t_sectors_op_red.sum
                                       <a class="xref" href="index.html#nvprof-metric-comparison__l2-read-reduction-footnote" shape="rect"><sup>2</sup></a></td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">l2_surface_load_bytes</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">lts__t_bytes_equiv_l1sectormiss_pipe_tex_mem_surface_op_ld.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">l2_surface_store_bytes</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">lts__t_bytes_equiv_l1sectormiss_pipe_tex_mem_surface_op_st.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">l2_tex_hit_rate</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">lts__t_sector_hit_rate.pct</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">l2_tex_read_hit_rate</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">lts__t_sector_op_read_hit_rate.pct</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">l2_tex_read_throughput</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">lts__t_sectors_srcunit_tex_op_read.sum.per_second</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">l2_tex_read_transactions</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">lts__t_sectors_srcunit_tex_op_read.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">l2_tex_write_hit_rate</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">lts__t_sector_op_write_hit_rate.pct</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">l2_tex_write_throughput</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">lts__t_sectors_srcunit_tex_op_write.sum.per_second</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">l2_tex_write_transactions</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">lts__t_sectors_srcunit_tex_op_write.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">l2_utilization</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">lts__t_sectors.avg.pct_of_peak_sustained_elapsed</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">l2_write_throughput</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">  lts__t_sectors_op_write.sum.per_second
                                       + lts__t_sectors_op_atom.sum.per_second
                                       + lts__t_sectors_op_red.sum.per_second
                                       
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">l2_write_transactions</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">  lts__t_sectors_op_write.sum
                                       + lts__t_sectors_op_atom.sum
                                       + lts__t_sectors_op_red.sum
                                       
                                    </td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">ldst_executed</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">ldst_fu_utilization</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_executed_pipe_lsu.avg.pct_of_peak_sustained_active</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">ldst_issued</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">local_hit_rate</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">local_load_requests</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__t_requests_pipe_lsu_mem_local_op_ld.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">local_load_throughput</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__t_bytes_pipe_lsu_mem_local_op_ld.sum.per_second</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">local_load_transactions</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__t_sectors_pipe_lsu_mem_local_op_ld.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">local_load_transactions_per_request</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__average_t_sectors_per_request_pipe_lsu_mem_local_op_ld.ratio</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">local_memory_overhead</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">local_store_requests</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__t_requests_pipe_lsu_mem_local_op_st.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">local_store_throughput</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum.per_second</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">local_store_transactions</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__t_sectors_pipe_lsu_mem_local_op_st.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">local_store_transactions_per_request</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__average_t_sectors_per_request_pipe_lsu_mem_local_op_st.ratio</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">nvlink_data_receive_efficiency</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">nvlink_data_transmission_efficiency</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">nvlink_overhead_data_received</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">(nvlrx__bytes_data_protocol.sum / nvlrx__bytes.sum) * 100</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">nvlink_overhead_data_transmitted</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">(nvltx__bytes_data_protocol.sum / nvltx__bytes.sum) * 100</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">nvlink_receive_throughput</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">nvlrx__bytes.sum.per_second</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">nvlink_total_data_received</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">nvlrx__bytes.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">nvlink_total_data_transmitted</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">nvltx__bytes.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">nvlink_total_nratom_data_transmitted</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">nvlink_total_ratom_data_transmitted</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">nvlink_total_response_data_received</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">nvlink_total_write_data_transmitted</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">nvlink_transmit_throughput</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">nvltx__bytes.sum.per_second</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">nvlink_user_data_received</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">nvlrx__bytes_data_user.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">nvlink_user_data_transmitted</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">nvltx__bytes_data_user.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">nvlink_user_nratom_data_transmitted</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">nvlink_user_ratom_data_transmitted</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">nvlink_user_response_data_received</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">nvlink_user_write_data_transmitted</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">pcie_total_data_received</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">pcie__read_bytes.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">pcie_total_data_transmitted</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">pcie__write_bytes.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">shared_efficiency</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__sass_average_data_bytes_per_wavefront_mem_shared.pct</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">shared_load_throughput</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum.per_second</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">shared_load_transactions</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">shared_load_transactions_per_request</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">shared_store_throughput</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__data_pipe_lsu_wavefronts_mem_shared_op_st.sum.per_second</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">shared_store_transactions</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__data_pipe_lsu_wavefronts_mem_shared_op_st.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">shared_store_transactions_per_request</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">shared_utilization</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__data_pipe_lsu_wavefronts_mem_shared.avg.pct_of_peak_sustained_elapsed</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">single_precision_fu_utilization</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__pipe_fma_cycles_active.avg.pct_of_peak_sustained_active</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">sm_efficiency</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__cycles_active.avg.pct_of_peak_sustained_elapsed</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">sm_tex_utilization</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__texin_sm2tex_req_cycles_active.avg.pct_of_peak_sustained_elapsed</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">special_fu_utilization</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_executed_pipe_xu.avg.pct_of_peak_sustained_active</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">stall_constant_memory_dependency</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__warp_issue_stalled_imc_miss_per_warp_active.pct</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">stall_exec_dependency</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__warp_issue_stalled_short_scoreboard_per_warp_active.pct + smsp__warp_issue_stalled_wait_per_warp_active.pct</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">stall_inst_fetch</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__warp_issue_stalled_no_instruction_per_warp_active.pct</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">stall_memory_dependency</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__warp_issue_stalled_long_scoreboard_per_warp_active.pct</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">stall_memory_throttle</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__warp_issue_stalled_drain_per_warp_active.pct + smsp__warp_issue_stalled_lg_throttle_per_warp_active.pct</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">stall_not_selected</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__warp_issue_stalled_not_selected_per_warp_active.pct</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">stall_other</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__warp_issue_stalled_dispatch_stall_per_warp_active.pct + smsp__warp_issue_stalled_misc_per_warp_active.pct</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">stall_pipe_busy</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__warp_issue_stalled_math_pipe_throttle_per_warp_active.pct + smsp__warp_issue_stalled_mio_throttle_per_warp_active.pct</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">stall_sleeping</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__warp_issue_stalled_sleeping_per_warp_active.pct</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">stall_sync</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__warp_issue_stalled_barrier_per_warp_active.pct + smsp__warp_issue_stalled_membar_per_warp_active.pct</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">stall_texture</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__warp_issue_stalled_tex_throttle_per_warp_active.pct</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">surface_atomic_requests</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__t_requests_pipe_tex_mem_surface_op_atom.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">surface_load_requests</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__t_requests_pipe_tex_mem_surface_op_ld.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">surface_reduction_requests</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__t_requests_pipe_tex_mem_surface_op_red.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">surface_store_requests</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__t_requests_pipe_tex_mem_surface_op_st.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">sysmem_read_bytes</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">lts__t_sectors_aperture_sysmem_op_read * 32</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">sysmem_read_throughput</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">lts__t_sectors_aperture_sysmem_op_read.sum.per_second</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">sysmem_read_transactions</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">lts__t_sectors_aperture_sysmem_op_read.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">sysmem_read_utilization</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">sysmem_utilization</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">sysmem_write_bytes</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">lts__t_sectors_aperture_sysmem_op_write * 32</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">sysmem_write_throughput</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">lts__t_sectors_aperture_sysmem_op_write.sum.per_second</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">sysmem_write_transactions</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">lts__t_sectors_aperture_sysmem_op_write.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">sysmem_write_utilization</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">tensor_precision_fu_utilization</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">sm__pipe_tensor_op_hmma_cycles_active.avg.pct_of_peak_sustained_active</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">tensor_precision_int_utilization</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">sm__pipe_tensor_op_imma_cycles_active.avg.pct_of_peak_sustained_active (SM 7.2+)</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">tex_cache_hit_rate</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__t_sector_hit_rate.pct</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">tex_cache_throughput</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">tex_cache_transactions</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__lsu_writeback_active.avg.pct_of_peak_sustained_active + l1tex__tex_writeback_active.avg.pct_of_peak_sustained_active</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">tex_fu_utilization</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__inst_executed_pipe_tex.avg.pct_of_peak_sustained_active</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">tex_sm_tex_utilization</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__f_tex2sm_cycles_active.avg.pct_of_peak_sustained_elapsed</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">tex_sm_utilization</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">sm__mio2rf_writeback_active.avg.pct_of_peak_sustained_elapsed</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">tex_utilization</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">texture_load_requests</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">l1tex__t_requests_pipe_tex_mem_texture.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">warp_execution_efficiency</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__thread_inst_executed_per_inst_executed.ratio</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="24%" headers="d54e3529" rowspan="1" colspan="1">warp_nonpred_execution_efficiency</td>
                                    <td class="entry" valign="top" width="76%" headers="d54e3532" rowspan="1" colspan="1">smsp__thread_inst_executed_per_inst_executed.pct</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="nvprof-event-comparison"><a name="nvprof-event-comparison" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#nvprof-event-comparison" name="nvprof-event-comparison" shape="rect">6.4.&nbsp;Event Comparison</a></h3>
                     <div class="body conbody">
                        <p class="p">
                           For nvprof events, the following table lists the equivalent
                           metrics in <span class="keyword">NVIDIA Nsight Compute</span>, if available. For a detailed explanation
                           of the structuring of PerfWorks metrics, see <a class="xref" href="../ProfilingGuide/index.html#metrics-structure" shape="rect">Metrics Structure</a>.
                           
                        </p>
                        <p class="p">
                           Metrics starting with <dfn class="term">sm__</dfn> are collected per-SM.
                           Metrics starting with <dfn class="term">smsp__</dfn> are collected per-SM subpartition.
                           However, all corresponding nvprof events are collected per-SM, only.
                           Check the <a class="xref" href="../ProfilingGuide/index.html#metrics-guide" shape="rect">Metrics Guide</a> for more details on these terms.
                           
                        </p>
                        <div class="tablenoborder"><a name="nvprof-event-comparison__nvprof-event-comparison-table" shape="rect">
                              <!-- --></a><table cellpadding="4" cellspacing="0" summary="" id="nvprof-event-comparison__nvprof-event-comparison-table" class="table" frame="border" border="1" rules="all">
                              <caption><span class="tablecap">Table 11. Events Mapping Table from CUPTI Events to PerfWorks Metrics for Compute Capability &gt;= 7.0</span></caption>
                              <thead class="thead" align="left">
                                 <tr class="row">
                                    <th class="entry" valign="top" width="30%" id="d54e5256" rowspan="1" colspan="1">nvprof Event</th>
                                    <th class="entry" valign="top" id="d54e5259" rowspan="1" colspan="1">PerfWorks Metric or Formula (&gt;= SM 7.0)</th>
                                 </tr>
                              </thead>
                              <tbody class="tbody">
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">active_cycles</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">sm__cycles_active.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">active_cycles_pm</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">sm__cycles_active.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">active_cycles_sys</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">sys__cycles_active.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">active_warps</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">sm__warps_active.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">active_warps_pm</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">sm__warps_active.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">atom_count</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_op_generic_atom_dot_alu.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">elapsed_cycles_pm</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">sm__cycles_elapsed.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">elapsed_cycles_sm</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">sm__cycles_elapsed.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">elapsed_cycles_sys</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">sys__cycles_elapsed.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">fb_subp0_read_sectors</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">dram__sectors_read.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">fb_subp1_read_sectors</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">dram__sectors_read.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">fb_subp0_write_sectors</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">dram__sectors_write.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">fb_subp1_write_sectors</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">dram__sectors_write.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">global_atom_cas</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_op_generic_atom_dot_cas.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">gred_count</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_op_global_red.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">inst_executed</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">sm__inst_executed.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">inst_executed_fma_pipe_s0</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_pipe_fma.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">inst_executed_fma_pipe_s1</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_pipe_fma.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">inst_executed_fma_pipe_s2</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_pipe_fma.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">inst_executed_fma_pipe_s3</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_pipe_fma.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">inst_executed_fp16_pipe_s0</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_pipe_fp16.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">inst_executed_fp16_pipe_s1</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_pipe_fp16.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">inst_executed_fp16_pipe_s2</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_pipe_fp16.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">inst_executed_fp16_pipe_s3</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_pipe_fp16.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">inst_executed_fp64_pipe_s0</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_pipe_fp64.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">inst_executed_fp64_pipe_s1</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_pipe_fp64.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">inst_executed_fp64_pipe_s2</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_pipe_fp64.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">inst_executed_fp64_pipe_s3</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_pipe_fp64.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">inst_issued1</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">sm__inst_issued.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">l2_subp0_read_sector_misses</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">lts__t_sectors_op_read_lookup_miss.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">l2_subp1_read_sector_misses</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">lts__t_sectors_op_read_lookup_miss.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">l2_subp0_read_sysmem_sector_queries</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">lts__t_sectors_aperture_sysmem_op_read.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">l2_subp1_read_sysmem_sector_queries</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">lts__t_sectors_aperture_sysmem_op_read.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">l2_subp0_read_tex_hit_sectors</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">lts__t_sectors_srcunit_tex_op_read_lookup_hit.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">l2_subp1_read_tex_hit_sectors</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">lts__t_sectors_srcunit_tex_op_read_lookup_hit.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">l2_subp0_read_tex_sector_queries</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">lts__t_sectors_srcunit_tex_op_read.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">l2_subp1_read_tex_sector_queries</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">lts__t_sectors_srcunit_tex_op_read.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">l2_subp0_total_read_sector_queries</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">lts__t_sectors_op_read.sum + lts__t_sectors_op_atom.sum + lts__t_sectors_op_red.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">l2_subp1_total_read_sector_queries</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">lts__t_sectors_op_read.sum + lts__t_sectors_op_atom.sum + lts__t_sectors_op_red.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">l2_subp0_total_write_sector_queries</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">lts__t_sectors_op_write.sum + lts__t_sectors_op_atom.sum + lts__t_sectors_op_red.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">l2_subp1_total_write_sector_queries</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">lts__t_sectors_op_write.sum + lts__t_sectors_op_atom.sum + lts__t_sectors_op_red.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">l2_subp0_write_sector_misses</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">lts__t_sectors_op_write_lookup_miss.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">l2_subp1_write_sector_misses</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">lts__t_sectors_op_write_lookup_miss.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">l2_subp0_write_sysmem_sector_queries</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">lts__t_sectors_aperture_sysmem_op_write.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">l2_subp1_write_sysmem_sector_queries</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">lts__t_sectors_aperture_sysmem_op_write.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">l2_subp0_write_tex_hit_sectors</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">lts__t_sectors_srcunit_tex_op_write_lookup_hit.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">l2_subp1_write_tex_hit_sectors</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">lts__t_sectors_srcunit_tex_op_write_lookup_hit.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">l2_subp0_write_tex_sector_queries</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">lts__t_sectors_srcunit_tex_op_write.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">l2_subp1_write_tex_sector_queries</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">lts__t_sectors_srcunit_tex_op_write.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">not_predicated_off_thread_inst_executed</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__thread_inst_executed_pred_on.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">pcie_rx_active_pulse</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">pcie_tx_active_pulse</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">prof_trigger_00</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">prof_trigger_01</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">prof_trigger_02</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">prof_trigger_03</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">prof_trigger_04</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">prof_trigger_05</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">prof_trigger_06</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">prof_trigger_07</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">n/a</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">inst_issued0</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__issue_inst0.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">sm_cta_launched</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">sm__ctas_launched.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">shared_load</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_op_shared_ld.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">shared_store</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_op_shared_st.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">generic_load</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_op_generic_ld.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">generic_store</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_op_generic_st.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">global_load</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_op_global_ld.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">global_store</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_op_global_st.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">local_load</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_op_local_ld.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">local_store</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_op_local_st.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">shared_atom</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_op_shared_atom.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">shared_atom_cas</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__inst_executed_shared_atom_dot_cas.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">shared_ld_bank_conflict</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">shared_st_bank_conflict</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">shared_ld_transactions</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">shared_st_transactions</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">l1tex__data_pipe_lsu_wavefronts_mem_shared_op_st.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">tensor_pipe_active_cycles_s0</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__pipe_tensor_cycles_active.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">tensor_pipe_active_cycles_s1</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__pipe_tensor_cycles_active.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">tensor_pipe_active_cycles_s2</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__pipe_tensor_cycles_active.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">tensor_pipe_active_cycles_s3</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__pipe_tensor_cycles_active.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">thread_inst_executed</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__thread_inst_executed.sum</td>
                                 </tr>
                                 <tr class="row">
                                    <td class="entry" valign="top" width="30%" headers="d54e5256" rowspan="1" colspan="1">warps_launched</td>
                                    <td class="entry" valign="top" headers="d54e5259" rowspan="1" colspan="1">smsp__warps_launched.sum</td>
                                 </tr>
                              </tbody>
                           </table>
                        </div>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="nvprof-filtering"><a name="nvprof-filtering" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#nvprof-filtering" name="nvprof-filtering" shape="rect">6.5.&nbsp;Filtering</a></h3>
                     <div class="body conbody">
                        <ul class="ul">
                           <li class="li"><strong class="ph b">Filtering by kernel name</strong><p class="p"> Both nvprof and <span class="keyword">NVIDIA Nsight Compute CLI</span> support filtering which
                                 kernels' data should be collected. In nvprof, the option is
                                 <samp class="ph codeph">--kernels</samp> and applies to following metric collection
                                 options. In <span class="keyword">NVIDIA Nsight Compute CLI</span>, the option is named
                                 <samp class="ph codeph">--kernel-regex</samp> and applies to the complete application
                                 execution. In other words, <span class="keyword">NVIDIA Nsight Compute CLI</span> does not
                                 currently support collecting different metrics for different kernels, unless
                                 they execute on different GPU architectures. 
                              </p>
                           </li>
                           <li class="li"><strong class="ph b">Filtering by kernel ID</strong><p class="p">
                                 Nvprof allows users to specify which kernels to profile using a kernel ID description, using the same <samp class="ph codeph">--kernels</samp> option.
                                 In <span class="keyword">NVIDIA Nsight Compute CLI</span>, the syntax for this kernel ID is identical, but the option is named
                                 <samp class="ph codeph">--kernel-id</samp>.
                                 
                              </p>
                           </li>
                           <li class="li"><strong class="ph b">Filtering by device</strong><p class="p"> Both nvprof and <span class="keyword">NVIDIA Nsight Compute CLI</span> use
                                 <samp class="ph codeph">--devices</samp> to filter the devices which to profile. In
                                 contrast to nvprof, in <span class="keyword">NVIDIA Nsight Compute CLI</span> the option applies
                                 globally, not only to following options. 
                              </p>
                           </li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="nvprof-output"><a name="nvprof-output" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#nvprof-output" name="nvprof-output" shape="rect">6.6.&nbsp;Output</a></h3>
                     <div class="body conbody">
                        <ul class="ul">
                           <li class="li"><strong class="ph b">API trace and summary</strong><p class="p"><span class="keyword">NVIDIA Nsight Compute CLI</span> does not support any form of API-usage related output.
                                 No API data is captured during profiling.
                                 
                              </p>
                           </li>
                           <li class="li"><strong class="ph b">Dependency analysis</strong><p class="p"><span class="keyword">NVIDIA Nsight Compute CLI</span> does not support any dependency analysis.
                                 No API data is captured during profiling.
                                 
                              </p>
                           </li>
                           <li class="li"><strong class="ph b">GPU trace</strong><p class="p"><span class="keyword">NVIDIA Nsight Compute CLI</span> does not support any GPU trace output.
                                 Due to kernel replay during profiling, kernel executions are serialized, and start and end timestamps do
                                 not necessarily match those during application execution.
                                 In addition, no records for memory activities are recorded.
                                 
                              </p>
                           </li>
                           <li class="li"><strong class="ph b">Print summary</strong><p class="p">
                                 While nvprof has several command line options to specify which summary information to print,
                                 <span class="keyword">NVIDIA Nsight Compute CLI</span> uses further arguments to the <samp class="ph codeph">--summary</samp> options.
                                 Profiling data can be summarized <samp class="ph codeph">per-gpu</samp>, <samp class="ph codeph">per-kernel</samp> or <samp class="ph codeph">per-nvtx</samp> context.
                                 
                              </p>
                           </li>
                           <li class="li"><strong class="ph b">Kernel name demangling</strong><p class="p">
                                 Nvprof allows users to decide between name demangling on or off using the <samp class="ph codeph">--demangling</samp> options.
                                 <span class="keyword">NVIDIA Nsight Compute CLI</span> currently always demangles kernel names in the output.
                                 In addition, the option <samp class="ph codeph">--kernel-regex-base</samp> can be used to decide which name format should be used
                                 when matching kernel names during filtering.
                                 
                              </p>
                           </li>
                           <li class="li"><strong class="ph b">Pages</strong><p class="p">
                                 Nvprof has no concept of output pages, all data is shown as a list or summarized.
                                 <span class="keyword">NVIDIA Nsight Compute CLI</span> uses <dfn class="term">pages</dfn> to define how data should be structured and printed.
                                 Those correspond to the report pages used in the GUI variant.
                                 The option <samp class="ph codeph">--page</samp> can be used to select which page to show, and <samp class="ph codeph">details</samp> is selected by default.
                                 All pages also support printing in CSV format for easier post-processing, using the <samp class="ph codeph">--csv</samp> option.
                                 
                              </p>
                           </li>
                        </ul>
                     </div>
                  </div>
                  <div class="topic concept nested1" id="nvprof-launch-attach"><a name="nvprof-launch-attach" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#nvprof-launch-attach" name="nvprof-launch-attach" shape="rect">6.7.&nbsp;Launch and Attach</a></h3>
                     <div class="body conbody">
                        <ul class="ul">
                           <li class="li"><strong class="ph b">Launching a process for profiling</strong><p class="p">
                                 In nvprof, the application to profile is passed to the tool as a command line argument.
                                 The application must be a local executable.
                                 Alternatively, you can choose to use the tool in a <dfn class="term">daemon mode</dfn> and profile all applicable processes on the local machine (nvprof option <samp class="ph codeph">--profile-all-processes</samp>).
                                 In nvprof, the decision to profile the complete process tree or only the root process is done via the <samp class="ph codeph">--profile-child-processes</samp> flag.
                                 In <span class="keyword">NVIDIA Nsight Compute CLI</span>, the <samp class="ph codeph">--target-processes</samp> option is used for this.
                                 
                              </p>
                              <p class="p"><span class="keyword">NVIDIA Nsight Compute CLI</span> has several modes to determine which application to collect data for.
                                 By default, the executable passed via the command line to the tool is started, connected to, and profiled.
                                 This mode is called <samp class="ph codeph">launch-and-attach</samp>.
                                 
                              </p>
                           </li>
                           <li class="li"><strong class="ph b">Launching a process for attach</strong><p class="p">
                                 In contrast to nvprof, you can choose to only launch a local executable. In this mode (<samp class="ph codeph">--mode launch</samp>),
                                 the process is started, connected to, but then suspended at the first CUDA API call.
                                 Subsequently, there is a third mode (<samp class="ph codeph">--mode attach</samp>) to attach to any process launched using the
                                 aforementioned mode. In this case, all profiling and output options would be passed to the attaching instance of
                                 <span class="keyword">NVIDIA Nsight Compute CLI</span>.
                                 
                              </p>
                           </li>
                           <li class="li"><strong class="ph b">Remote profiling</strong><p class="p"> Finally, using <samp class="ph codeph">launch</samp> and <samp class="ph codeph">attach</samp>, you can
                                 connect to a launched process on a remote machine, which could even run a
                                 different operating system than the local host. Use <samp class="ph codeph">--hostname</samp>
                                 to select which remote host to connect to. 
                              </p>
                           </li>
                        </ul>
                     </div>
                  </div>
               </div>
               <div class="topic concept nested0" id="notices-header"><a name="notices-header" shape="rect">
                     <!-- --></a><h2 class="title topictitle1"><a href="#notices-header" name="notices-header" shape="rect">Notices</a></h2>
                  <div class="topic reference nested1" id="notice"><a name="notice" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#notice" name="notice" shape="rect"></a></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Notice</h3>
                           <p class="p">ALL NVIDIA DESIGN SPECIFICATIONS, REFERENCE BOARDS, FILES, DRAWINGS, DIAGNOSTICS, LISTS, AND OTHER DOCUMENTS (TOGETHER AND
                              SEPARATELY, "MATERIALS") ARE BEING PROVIDED "AS IS." NVIDIA MAKES NO WARRANTIES, EXPRESSED, IMPLIED, STATUTORY, OR OTHERWISE
                              WITH RESPECT TO THE MATERIALS, AND EXPRESSLY DISCLAIMS ALL IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTABILITY, AND FITNESS
                              FOR A PARTICULAR PURPOSE. 
                           </p>
                           <p class="p">Information furnished is believed to be accurate and reliable. However, NVIDIA Corporation assumes no responsibility for the
                              consequences of use of such information or for any infringement of patents or other rights of third parties that may result
                              from its use. No license is granted by implication of otherwise under any patent rights of NVIDIA Corporation. Specifications
                              mentioned in this publication are subject to change without notice. This publication supersedes and replaces all other information
                              previously supplied. NVIDIA Corporation products are not authorized as critical components in life support devices or systems
                              without express written approval of NVIDIA Corporation.
                           </p>
                        </div>
                     </div>
                  </div>
                  <div class="topic reference nested1" id="trademarks"><a name="trademarks" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#trademarks" name="trademarks" shape="rect"></a></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Trademarks</h3>
                           <p class="p">NVIDIA and the NVIDIA logo are trademarks or registered trademarks of NVIDIA Corporation
                              in the U.S. and other countries.  Other company and product names may be trademarks of
                              the respective companies with which they are associated.
                           </p>
                        </div>
                     </div>
                  </div>
                  <div class="topic reference nested1" id="copyright-past-to-present"><a name="copyright-past-to-present" shape="rect">
                        <!-- --></a><h3 class="title topictitle2"><a href="#copyright-past-to-present" name="copyright-past-to-present" shape="rect"></a></h3>
                     <div class="body refbody">
                        <div class="section">
                           <h3 class="title sectiontitle">Copyright</h3>
                           <p class="p">© <span class="ph">2018</span>-<span class="ph">2022</span> NVIDIA
                              Corporation and affiliates. All rights reserved.
                           </p>
                           <p class="p">This product includes software developed by the Syncro Soft SRL (http://www.sync.ro/).</p>
                        </div>
                     </div>
                  </div>
               </div>
               <div class="fn"><a id="command-line-options-profile__import-filtering-enabled-option" shape="rect"><sup>1</sup></a>  
                  This filtering option is available when using <samp class="ph codeph">--import</samp>.
                  
               </div>
               <div class="fn"><a id="nvprof-metric-comparison__l2-read-reduction-footnote" shape="rect"><sup>2</sup></a>  
                  Sector reads from reductions are added here only for compatibility to the current definition of the metric in nvprof.
                  Reductions do not cause data to be communicated from L2 back to L1.
                  
               </div>
               
               <hr id="contents-end"></hr>
               
            </article>
         </div>
      </div>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/formatting/common.min.js"></script>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/scripts/google-analytics/google-analytics-write.js"></script>
      <script language="JavaScript" type="text/javascript" charset="utf-8" src="../common/scripts/google-analytics/google-analytics-tracker.js"></script>
      <script type="text/javascript">_satellite.pageBottom();</script>
      <script type="text/javascript">var switchTo5x=true;</script><script type="text/javascript" src="http://w.sharethis.com/button/buttons.js"></script><script type="text/javascript">stLight.options({publisher: "998dc202-a267-4d8e-bce9-14debadb8d92", doNotHash: false, doNotCopy: false, hashAddressBar: false});</script></body>
</html>