File: MemoryWorkloadAnalysis.section

package info (click to toggle)
nvidia-cuda-toolkit 12.4.1-3
  • links: PTS, VCS
  • area: non-free
  • in suites: forky, sid
  • size: 18,505,836 kB
  • sloc: ansic: 203,477; cpp: 64,769; python: 34,699; javascript: 22,006; xml: 13,410; makefile: 3,085; sh: 2,343; perl: 352
file content (66 lines) | stat: -rw-r--r-- 1,752 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
Identifier: "MemoryWorkloadAnalysis"
DisplayName: "Memory Workload Analysis"
Description: "Detailed analysis of the memory resources of the GPU. Memory can become a limiting factor for the overall kernel performance when fully utilizing the involved hardware units (Mem Busy), exhausting the available communication bandwidth between those units (Max Bandwidth), or by reaching the maximum throughput of issuing memory instructions (Mem Pipes Busy)."
Order: 30
Sets {
  Identifier: "detailed"
}
Sets {
  Identifier: "full"
}
Header {
  Metrics {
    Label: "Memory Throughput"
    Name: "dram__bytes.sum.per_second"
    Filter {
      MaxArch: CC_70
    }
    Options {
      Name: "dram__bytes.sum.per_second"
      Filter {
        MinArch: CC_75
        MaxArch: CC_86
      }
    }
    Options {
      Name: "dram__bytes.sum.per_second"
      Filter {
        MinArch: CC_89
      }
    }
  }
  Metrics {
    Label: "Mem Busy"
    Name: "gpu__compute_memory_access_throughput.avg.pct_of_peak_sustained_elapsed"
  }
  Metrics {
    Label: "L1/TEX Hit Rate"
    Name: "l1tex__t_sector_hit_rate.pct"
  }
  Metrics {
    Label: "Max Bandwidth"
    Name: "gpu__compute_memory_request_throughput.avg.pct_of_peak_sustained_elapsed"
  }
  Metrics {
    Label: "L2 Hit Rate"
    Name: "lts__t_sector_hit_rate.pct"
  }
  Metrics {
    Label: "Mem Pipes Busy"
    Name: "sm__memory_throughput.avg.pct_of_peak_sustained_elapsed"
  }
  Metrics {
    Label: "L2 Compression Success Rate"
    Name: "lts__average_gcomp_input_sector_success_rate.pct"
    Filter {
      MinArch: CC_80
    }
  }
  Metrics {
    Label: "L2 Compression Ratio"
    Name: "lts__average_gcomp_output_sector_compression_achieved_rate.ratio"
    Filter {
      MinArch: CC_80
    }
  }
}