File: tu10x.config

package info (click to toggle)
nvidia-cuda-toolkit 12.4.1-3
  • links: PTS, VCS
  • area: non-free
  • in suites: forky, sid
  • size: 18,505,836 kB
  • sloc: ansic: 203,477; cpp: 64,769; python: 34,699; javascript: 22,006; xml: 13,410; makefile: 3,085; sh: 2,343; perl: 352
file content (330 lines) | stat: -rw-r--r-- 10,852 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
# NOTE: Metrics representing a superset of other metrics must be declared first
# if they appear in the same group (displayRows). Otherwise different metrics will
# dominate in the group row depending on zoom level if overlay display mode is used.
version: 3
metricSets:
- alias: tu10x
  name: General Metrics for NVIDIA TU10x (any frequency)
  chips: [TU102, TU104, TU106]
  counterWidth: wide
  displayRows:
  - name: <Root>
    type: overlay
    displayHeightScale: 0.5
    metrics:
    - GR Active
  - name: <Root>
    type: overlay
    displayHeightScale: -0.5
    metrics:
    - NVLink RX Responses User Data
    - NVLink TX Responses User Data
    - PCIe RX Throughput
    - PCIe TX Throughput
  - name: GPC Clock Frequency
    type: overlay
    metrics:
    - GPC Clock Frequency
  - name: SYS Clock Frequency
    type: overlay
    metrics:
    - SYS Clock Frequency
  - name: GPU Active
    description: The graphics or compute engine is active
    type: overlay
    metrics:
    - GR Active
  - name: Compute in Flight
    description: Compute dispatches sent to the GPU queues that have been issued,
      but not yet complete
    type: overlay
    metrics:
    - Sync Compute in Flight
    - Async Compute in Flight
  - name: Draw/Dispatch Start
    description: Draw and dispatch start times
    type: overlay
    metrics:
    - Draw Started
    - Dispatch Started
  - name: SM Active
    type: overlay
    metrics:
    - SM Active
  - name: SM Instructions
    type: overlay
    metrics:
    - SM Issue
    - Tensor Active / FP16 Active
  - name: SM Warp Occupancy
    type: stacked
    displayHeightScale: 3.0
    metrics:
    - Vertex/Tess/Geometry Warps in Flight
    - Pixel Warps in Flight
    - Compute Warps in Flight
    - Unallocated Warps in Active SMs
  - name: DRAM Bandwidth
    type: stacked
    metrics:
    - DRAM Read Bandwidth
    - DRAM Write Bandwidth
  - name: NVLink RX Bandwidth
    type: stacked
    metrics:
    - NVLink RX Requests Protocol Data
    - NVLink RX Requests User Data
    - NVLink RX Responses Protocol Data
    - NVLink RX Responses User Data
  - name: NVLink TX Bandwidth
    type: stacked
    metrics:
    - NVLink TX Requests Protocol Data
    - NVLink TX Requests User Data
    - NVLink TX Responses Protocol Data
    - NVLink TX Responses User Data
  - name: PCIe Bandwidth
    type: overlay
    metrics:
    - PCIe RX Throughput
    - PCIe TX Throughput
  - name: PCIe Read Requests to BAR1
    metrics:
    - PCIe Read Requests to BAR1
  - name: PCIe Write Requests to BAR1
    metrics:
    - PCIe Write Requests to BAR1
  metrics:
  - name: GPC Clock Frequency
    id: gpc__cycles_elapsed
    description: The frequency of the GPC graphics clock. In NVIDIA GPU Specs this
      is the Boost Clock and Base Clock. In nvidia-smi this is the "gr", "graphics",
      "Graphics Clock".
    color: '#8BDDB4'
    subMetrics:
    - name: MHz
      suffix: .avg.per_second
      multiplier: 1.0e-06
  - name: SYS Clock Frequency
    id: sys__cycles_elapsed
    description: The frequency of the SYS clock. The SYS clock is the clock used for
      the copy engines, compute and graphics command processor, and the hardware performance
      monitor streaming unit.
    color: '#118E44'
    schedulingRule: migOptional
    subMetrics:
    - name: MHz
      suffix: .avg.per_second
      multiplier: 1.0e-06
  - name: GR Active
    id: gr__cycles_active
    description: Number of cycles the graphics/compute engine is active. The graphics
      and compute engine are active if there is any work in the graphics pipe or if
      the compute pipe is processing a dispatch.
    color: '#ED6A32'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: Sync Compute in Flight
    id: HUB.TriageA.gr__dispatch_cycles_active_queue_sync
    color: '#FE7F9C'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: Async Compute in Flight
    id: HUB.TriageA.gr__dispatch_cycles_active_queue_async
    color: '#FFD179'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: Draw Started
    id: FE_A.TriageA.fe__draw_count
    description: Number of draw calls started
    color: '#F0F000'
    subMetrics:
    - name: Workloads
      suffix: .sum
  - name: Dispatch Started
    id: HUB.TriageA.gr__dispatch_count
    description: Number of compute dispatches/grid launches started
    color: '#00F0F0'
    subMetrics:
    - name: Workloads
      suffix: .sum
  - name: SM Active
    id: sm__cycles_active
    description: Total number of cycles the SM is active (at least 1 active warp)
    color: '#FFA5A5'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: SM Issue
    id: SM.TriageA.sm__inst_executed_realtime
    description: SM instruction issue rate. Each SM can issue 4 instructions/cycle
      (1 instruction/SMSP/cycle).
    color: '#98C4DD'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: Tensor Active / FP16 Active
    id: SM.TriageA.sm__pipe_shared_cycles_active_realtime
    description: Cycles the tensor pipe or fp16x2 pipe is active
    color: '#B3FF70'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: Vertex/Tess/Geometry Warps in Flight
    id: TPC.TriageA.tpc__warps_active_shader_vtg_realtime
    description: Total number of graphics vertex, tesselation, geometry, and mesh
      shader warps in flight
    color: '#FFA5A5'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
    - name: Avg
      suffix: .avg
    - name: Avg Warps per Cycle
      suffix: .avg.per_cycle_elapsed
  - name: Pixel Warps in Flight
    id: TPC.TriageA.tpc__warps_active_shader_ps_realtime
    description: Total number of pixel shader warps in flight
    color: '#8BDDB4'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
    - name: Avg
      suffix: .avg
    - name: Avg Warps per Cycle
      suffix: .avg.per_cycle_elapsed
  - name: Compute Warps in Flight
    id: TPC.TriageA.tpc__warps_active_shader_cs_realtime
    description: Total number of compute shader warps in flight
    color: '#FFD179'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
    - name: Avg
      suffix: .avg
    - name: Avg Warps per Cycle
      suffix: .avg.per_cycle_elapsed
  - name: Unallocated Warps in Active SMs
    id: TPC.TriageA.tpc__warps_inactive_sm_active_realtime
    description: Total number of warp slots available on active SMs
    color: '#696969'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
    - name: Avg
      suffix: .avg
    - name: Avg Warps per Cycle
      suffix: .avg.per_cycle_elapsed
  - name: DRAM Read Bandwidth
    id: dramc__read_throughput
    description: The ratio of cycles the DRAM interface was active reading data to
      the elapsed cycles in the same period as a percentage
    color: '#FFA5A5'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: DRAM Write Bandwidth
    id: dramc__write_throughput
    description: The ratio of cycles the DRAM interface was active writing data to
      the elapsed cycles in the same period as a percentage
    color: '#98C4DD'
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: NVLink RX Requests Protocol Data
    id: nvlrx__bytes_packet_request_data_protocol
    description: Number of bytes of NVLRX request protocol data
    color: '#C79536'
    schedulingRule: optional
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: NVLink RX Requests User Data
    id: nvlrx__bytes_packet_request_data_user
    description: Number of bytes of NVLRX request user data
    color: '#FFD179'
    schedulingRule: optional
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: NVLink RX Responses Protocol Data
    id: nvlrx__bytes_packet_response_data_protocol
    description: Number of bytes of NVLRX response protocol data
    color: '#C25D5D'
    schedulingRule: optional
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: NVLink RX Responses User Data
    id: nvlrx__bytes_packet_response_data_user
    description: Number of bytes of NVLRX response user data
    color: '#FFA5A5'
    schedulingRule: optional
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: NVLink TX Requests Protocol Data
    id: nvltx__bytes_packet_request_data_protocol
    description: Number of bytes of NVLTX request protocol data
    color: '#9052BF'
    schedulingRule: optional
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: NVLink TX Requests User Data
    id: nvltx__bytes_packet_request_data_user
    description: Number of bytes of NVLTX request user data
    color: '#D6A4FC'
    schedulingRule: optional
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: NVLink TX Responses Protocol Data
    id: nvltx__bytes_packet_response_data_protocol
    description: Number of bytes of NVLTX response protocol data
    color: '#6999B5'
    schedulingRule: optional
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: NVLink TX Responses User Data
    id: nvltx__bytes_packet_response_data_user
    description: Number of bytes of NVLTX response user data
    color: '#98C4DD'
    schedulingRule: optional
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: PCIe RX Throughput
    id: PCI.TriageA.pcie__read_bytes
    description: Number of bytes received by GPU (includes protocol)
    color: '#FF3D00'
    schedulingRule: migOptional
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: PCIe TX Throughput
    id: PCI.TriageA.pcie__write_bytes
    description: Number of bytes transmitted by GPU (includes protocol)
    color: '#00CFCF'
    schedulingRule: migOptional
    subMetrics:
    - name: Throughput %
      suffix: .avg.pct_of_peak_sustained_elapsed
  - name: PCIe Read Requests to BAR1
    id: pcie__rx_requests_aperture_bar1_op_read
    description: CPU+Peer Reads from VRAM over PCIe
    color: '#87FFFF'
    subMetrics:
    - name: Requests
      suffix: .sum
  - name: PCIe Write Requests to BAR1
    id: pcie__rx_requests_aperture_bar1_op_write
    description: CPU+Peer Writes to VRAM over PCIe
    color: '#8700FF'
    subMetrics:
    - name: Requests
      suffix: .sum