File: tu10x-gfxt.config

package info (click to toggle)
nvidia-cuda-toolkit 12.4.1-3
  • links: PTS, VCS
  • area: non-free
  • in suites: forky, sid
  • size: 18,505,836 kB
  • sloc: ansic: 203,477; cpp: 64,769; python: 34,699; javascript: 22,006; xml: 13,410; makefile: 3,085; sh: 2,343; perl: 352
file content (556 lines) | stat: -rw-r--r-- 16,963 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
# NOTE: Metrics representing a superset of other metrics must be declared first
# if they appear in the same group (displayRows). Otherwise different metrics will
# dominate in the group row depending on zoom level if overlay display mode is used.
version: 3
metricSets:
- alias: tu10x-gfxt
  name: Graphics Throughput Metrics for NVIDIA TU10x (frequency >= 10kHz)
  chips: [TU102, TU104, TU106]
  counterWidth: narrow
  displayRows:
  - name: <Root>
    type: overlay
    displayHeightScale: 0.5
    metrics:
    - GPU Active
    - Async Copy Engine Active
    - Sync Copy Engine Active
    - GR Active
  - name: <Root>
    type: overlay
    displayHeightScale: -0.5
    metrics:
    - GPU Memory Read Bandwidth
    - GPU Memory Write Bandwidth
    - PCIe Read Bandwidth
    - PCIe Write Bandwidth
  - name: GPU Active
    description: Activity of the graphics and copy engines
    type: overlay
    metrics:
    - GPU Active
    - Sync Copy Engine Active
    - Async Copy Engine Active
    - GR Active
  - name: Unit Throughputs
    description: Throughput of each major hardware unit
    type: overlay
    displayHeightScale: 2.0
    metrics:
    - SM Throughput
    - RTCORE Throughput
    - VAF Throughput
    - L1 Throughput
    - L2 Throughput
    - VRAM Throughput
    - PCIe Throughput
    - PD Throughput
    - PES+VPC Throughput
    - RASTER Throughput
    - PROP Throughput
    - ZROP Throughput
    - CROP Throughput
  - name: Compute in Flight
    description: Compute dispatches sent to the GPU queues that have been issued,
      but not yet complete
    type: overlay
    metrics:
    - Sync Compute in Flight
    - Async Compute in Flight
  - name: Draw/Dispatch Start
    description: Draw and dispatch start times
    type: overlay
    metrics:
    - Draw Started
    - Dispatch Started
  - name: SM Occupancy (TPC View)
    type: stacked
    displayHeightScale: 2.0
    metrics:
    - Vertex/Tess/Geometry Warps
    - Pixel Warps
    - Compute Warps
    - Unallocated Warps in Active SMs
    - Idle SM Unused Warp Slots
  - name: Warp Can't Launch
    description: Warp can't launch reasons
    type: overlay
    metrics:
    - PS Warp Can't Launch
    - PS Warp Can't Launch - Register Limited
    - CS Warp Can't Launch - Register Limited
  - name: SM Instruction Throughputs
    description: SM instruction throughput breakdown by pipe
    type: overlay
    metrics:
    - SM Issue Active
    - SM ALU Pipe Throughput
    - SM FMA Pipe Throughput
    - SM SFU Pipe Throughput
    - SM FP16+Tensor Pipe Throughput
  - name: L1 Throughputs
    description: L1 cache throughputs breakdown by stage
    type: overlay
    metrics:
    - L1 LSU Data-Stage Throughput
    - L1 Texture Data-Stage Throughput
    - L1 LSU Writeback-Stage Throughput
    - L1 Texture Filter-Stage Throughput
  - name: L2 Bandwidth
    description: L2 bandwidth breakdown by source unit
    type: stacked
    metrics:
    - L2 Bandwidth from CROP
    - L2 Bandwidth from L1
    - L2 Bandwidth from ZROP
  - name: L2 Hit Rates
    description: L2 cache hit rates in aggregate and for read operations from L1
    type: overlay
    metrics:
    - L2 Hit Rate
    - L2 Hit Rate from L1
  - name: GPU Memory Bandwidth
    description: GPU memory read/write bandwidth
    type: stacked
    metrics:
    - GPU Memory Read Bandwidth
    - GPU Memory Write Bandwidth
  - name: PCIe Bandwidth
    description: PCIe read/write bandwidth
    type: overlay
    metrics:
    - PCIe Read Bandwidth
    - PCIe Write Bandwidth
  - name: PCIe to BAR Requests
    description: PCIe to BAR 0/1/2 read/write requests
    type: overlay
    metrics:
    - PCIe Read Requests to BAR1
    - PCIe Write Requests to BAR1
    - PCIe Read Requests to BAR0
    - PCIe Write Requests to BAR0
    - PCIe Read Requests to BAR2
    - PCIe Write Requests to BAR2
  - name: ZCULL
    description: Total/Rejected number of samples sent to ZCULL
    type: overlay
    metrics:
    - ZCULL Rejected Samples
    - ZCULL Input Samples
  metrics:
  - name: GPU Active
    id: FE_A.TriageA.gpu__engine_cycles_active_gr_or_ce
    color: '#33B2B2'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
    - name: Workloads
      suffix: .sum
  - name: Sync Copy Engine Active
    id: FE_A.TriageA.gpu__engine_cycles_active_any_syncce
    color: '#B2B233'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
    - name: Workloads
      suffix: .sum
  - name: Async Copy Engine Active
    id: FE_A.TriageA.gpu__engine_cycles_active_any_asyncce
    color: '#FFA700'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
    - name: Workloads
      suffix: .sum
  - name: GR Active
    id: FE_A.TriageA.gr__cycles_active
    description: Number of cycles the graphics/compute engine is active. The graphics
      and compute engine are active if there is any work in the graphics pipe or if
      the compute pipe is processing a dispatch.
    color: '#ED6A32'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
    - name: Workloads
      suffix: .sum
  - name: SM Throughput
    id: SM.TriageA.sm__throughput
    color: '#D100EE'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: RTCORE Throughput
    id: rtcore__cycles_executed
    color: '#D18841'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: VAF Throughput
    id: TPC.TriageA.vaf__throughput
    color: '#D13341'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: L1 Throughput
    id: SM.TriageA.l1tex__throughput
    color: '#FFC425'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: L2 Throughput
    id: LTS.TriageA.lts__throughput
    color: '#00AEDB'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: VRAM Throughput
    id: FBPA.TriageA.dramc__throughput
    color: '#F37735'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: PCIe Throughput
    id: PCI.TriageA.pcie__throughput
    color: '#3577F3'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: PD Throughput
    id: HUB.TriageA.pda__throughput
    color: '#00B159'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: PES+VPC Throughput
    id: GPC_B.TriageA.pes__throughput
    color: '#3FB159'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: RASTER Throughput
    id: GPC_A.TriageA.raster__throughput
    color: '#6FB159'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: PROP Throughput
    id: GPC_A.TriageA.prop__throughput
    color: '#9FB159'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: ZROP Throughput
    id: ROP.TriageA.zrop__throughput
    color: '#854442'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: CROP Throughput
    id: ROP.TriageA.crop__throughput
    color: '#AFB159'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: Sync Compute in Flight
    id: HUB.TriageA.gr__dispatch_cycles_active_queue_sync
    color: '#FE7F9C'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: Async Compute in Flight
    id: HUB.TriageA.gr__dispatch_cycles_active_queue_async
    color: '#FFD179'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: Draw Started
    id: FE_A.TriageA.fe__draw_count
    description: Number of draw calls started
    color: '#F0F000'
    subMetrics:
    - name: Workloads
      suffix: .sum
  - name: Dispatch Started
    id: HUB.TriageA.gr__dispatch_count
    description: Number of compute dispatches/grid launches started
    color: '#00F0F0'
    subMetrics:
    - name: Workloads
      suffix: .sum
  - name: Vertex/Tess/Geometry Warps
    id: TPC.TriageA.tpc__warps_active_shader_vtg_realtime
    color: '#0057E7'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
    - name: Avg
      suffix: .avg
    - name: Avg Warps per Cycle
      suffix: .avg.per_cycle_elapsed
  - name: Pixel Warps
    id: TPC.TriageA.tpc__warps_active_shader_ps_realtime
    color: '#008744'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
    - name: Avg
      suffix: .avg
    - name: Avg Warps per Cycle
      suffix: .avg.per_cycle_elapsed
  - name: Compute Warps
    id: TPC.TriageA.tpc__warps_active_shader_cs_realtime
    color: '#FFA700'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
    - name: Avg
      suffix: .avg
    - name: Avg Warps per Cycle
      suffix: .avg.per_cycle_elapsed
  - name: Unallocated Warps in Active SMs
    id: TPC.TriageA.tpc__warps_inactive_sm_active_realtime
    color: '#696969'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
    - name: Avg
      suffix: .avg
    - name: Avg Warps per Cycle
      suffix: .avg.per_cycle_elapsed
  - name: Idle SM Unused Warp Slots
    id: TPC.TriageA.tpc__warps_inactive_sm_idle_realtime
    color: '#C0C0C0'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
    - name: Avg
      suffix: .avg
    - name: Avg Warps per Cycle
      suffix: .avg.per_cycle_elapsed
  - name: PS Warp Can't Launch
    id: tpc__warp_launch_cycles_stalled_shader_ps
    color: '#FF6F00'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: PS Warp Can't Launch - Register Limited
    id: tpc__warp_launch_cycles_stalled_shader_ps_reason_register_allocation
    color: '#00CF00'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: CS Warp Can't Launch - Register Limited
    id: tpc__warp_launch_cycles_stalled_shader_cs_reason_register_allocation
    color: '#1111CF'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: SM Issue Active
    id: SM.TriageA.sm__inst_executed_realtime
    description: SM instruction issue rate. Each SM can issue 4 instructions/cycle
      (1 instruction/SMSP/cycle).
    color: '#CFCF3F'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
    - name: Instructions
      suffix: .sum
    - name: Avg Inst/Clk per SM
      suffix: .avg.per_cycle_elapsed
    - name: Peak Inst/Clk per SM
      suffix: .avg.peak_sustained
  - name: SM ALU Pipe Throughput
    id: SM.TriageA.sm__inst_executed_pipe_alu_realtime
    color: '#FF6F00'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
    - name: Instructions
      suffix: .sum
    - name: Avg Inst/Clk per SM
      suffix: .avg.per_cycle_elapsed
    - name: Peak Inst/Clk per SM
      suffix: .avg.peak_sustained
  - name: SM FMA Pipe Throughput
    id: SM.TriageA.sm__inst_executed_pipe_fma_realtime
    color: '#00CF00'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
    - name: Instructions
      suffix: .sum
    - name: Avg Inst/Clk per SM
      suffix: .avg.per_cycle_elapsed
    - name: Peak Inst/Clk per SM
      suffix: .avg.peak_sustained
  - name: SM SFU Pipe Throughput
    id: SM.TriageA.sm__inst_executed_pipe_xu_realtime
    color: '#00003F'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
    - name: Instructions
      suffix: .sum
    - name: Avg Inst/Clk per SM
      suffix: .avg.per_cycle_elapsed
    - name: Peak Inst/Clk per SM
      suffix: .avg.peak_sustained
  - name: SM FP16+Tensor Pipe Throughput
    id: SM.TriageA.sm__pipe_shared_cycles_active_realtime
    description: Cycles the tensor pipe or fp16x2 pipe is active
    color: '#CF003F'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
    - name: Instructions
      suffix: .sum
    - name: Avg Inst/Clk per SM
      suffix: .avg.per_cycle_elapsed
    - name: Peak Inst/Clk per SM
      suffix: .avg.peak_sustained
  - name: L1 LSU Data-Stage Throughput
    id: SM.TriageA.l1tex__data_pipe_lsu_wavefronts
    color: '#CFCF3F'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: L1 Texture Data-Stage Throughput
    id: SM.TriageA.l1tex__data_pipe_tex_wavefronts_realtime
    color: '#00003F'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: L1 LSU Writeback-Stage Throughput
    id: SM.TriageA.l1tex__lsu_writeback_active
    color: '#CF003F'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: L1 Texture Filter-Stage Throughput
    id: SM.TriageA.l1tex__f_cycles_active
    color: '#3F00CF'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: L2 Bandwidth from CROP
    id: TriageA.lts__t_sector_throughput_srcunit_crop
    color: '#AA1111'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: L2 Bandwidth from L1
    id: TriageA.lts__t_sector_throughput_srcunit_tex
    color: '#11AA11'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: L2 Bandwidth from ZROP
    id: TriageA.lts__t_sector_throughput_srcunit_zrop
    color: '#1111AA'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: L2 Hit Rate
    id: LTS.TriageA.lts__average_t_sector_op_read_realtime_hit_rate
    color: '#88001B'
    subMetrics:
    - name: Ratio %
      suffix: .pct
  - name: L2 Hit Rate from L1
    id: LTS.TriageA.lts__average_t_sector_srcunit_tex_op_read_realtime_hit_rate
    color: '#FFCA18'
    subMetrics:
    - name: Ratio %
      suffix: .pct
  - name: GPU Memory Read Bandwidth
    id: FBPA.TriageA.dramc__read_throughput
    color: '#CCCC00'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: GPU Memory Write Bandwidth
    id: FBPA.TriageA.dramc__write_throughput
    color: '#CC00CC'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: PCIe Read Bandwidth
    id: PCI.TriageA.pcie__read_bytes
    description: Number of bytes received by GPU (includes protocol)
    color: '#FF3D00'
    schedulingRule: migOptional
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
    - name: Bytes
      suffix: .sum
    - name: GB/s
      suffix: .sum.per_second
      multiplier: 1.0e-09
  - name: PCIe Write Bandwidth
    id: PCI.TriageA.pcie__write_bytes
    description: Number of bytes transmitted by GPU (includes protocol)
    color: '#00CFCF'
    schedulingRule: migOptional
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
    - name: Bytes
      suffix: .sum
    - name: GB/s
      suffix: .sum.per_second
      multiplier: 1.0e-09
  - name: PCIe Read Requests to BAR1
    id: pcie__rx_requests_aperture_bar1_op_read
    description: CPU+Peer Reads from VRAM over PCIe
    color: '#87FFFF'
    subMetrics:
    - name: Requests
      suffix: .sum
  - name: PCIe Write Requests to BAR1
    id: pcie__rx_requests_aperture_bar1_op_write
    description: CPU+Peer Writes to VRAM over PCIe
    color: '#8700FF'
    subMetrics:
    - name: Requests
      suffix: .sum
  - name: PCIe Read Requests to BAR0
    id: pcie__rx_requests_aperture_bar0_op_read
    color: '#AFFF87'
    subMetrics:
    - name: Requests
      suffix: .sum
  - name: PCIe Write Requests to BAR0
    id: pcie__rx_requests_aperture_bar0_op_write
    color: '#870087'
    subMetrics:
    - name: Requests
      suffix: .sum
  - name: PCIe Read Requests to BAR2
    id: pcie__rx_requests_aperture_bar2_op_read
    color: '#AFAF00'
    subMetrics:
    - name: Requests
      suffix: .sum
  - name: PCIe Write Requests to BAR2
    id: pcie__rx_requests_aperture_bar2_op_write
    color: '#870000'
    subMetrics:
    - name: Requests
      suffix: .sum
  - name: ZCULL Rejected Samples
    id: GPC_A.TriageA.raster__zcull_input_samples_op_rejected_realtime
    color: '#663399'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
    - name: Sum
      suffix: .sum
  - name: ZCULL Input Samples
    id: GPC_A.TriageA.raster__zcull_input_samples_realtime
    color: '#CC3399'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
    - name: Sum
      suffix: .sum