1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
|
Identifier: "MemoryWorkloadAnalysis"
DisplayName: "Memory Workload Analysis"
Description: "Detailed analysis of the memory resources of the GPU. Memory can become a limiting factor for the overall kernel performance when fully utilizing the involved hardware units (Mem Busy), exhausting the available communication bandwidth between those units (Max Bandwidth), or by reaching the maximum throughput of issuing memory instructions (Mem Pipes Busy)."
Order: 30
Sets {
Identifier: "detailed"
}
Sets {
Identifier: "full"
}
Header {
Metrics {
Label: "Memory Throughput"
Name: "dram__bytes.sum.per_second"
Filter {
MaxArch: CC_70
}
Options {
Name: "dram__bytes.sum.per_second"
Filter {
MinArch: CC_75
MaxArch: CC_86
}
}
Options {
Name: "dram__bytes.sum.per_second"
Filter {
MinArch: CC_89
}
}
}
Metrics {
Label: "Mem Busy"
Name: "gpu__compute_memory_access_throughput.avg.pct_of_peak_sustained_elapsed"
}
Metrics {
Label: "L1/TEX Hit Rate"
Name: "l1tex__t_sector_hit_rate.pct"
}
Metrics {
Label: "Max Bandwidth"
Name: "gpu__compute_memory_request_throughput.avg.pct_of_peak_sustained_elapsed"
}
Metrics {
Label: "L2 Hit Rate"
Name: "lts__t_sector_hit_rate.pct"
}
Metrics {
Label: "Mem Pipes Busy"
Name: "sm__memory_throughput.avg.pct_of_peak_sustained_elapsed"
}
Metrics {
Label: "L2 Compression Success Rate"
Name: "lts__average_gcomp_input_sector_success_rate.pct"
Filter {
MinArch: CC_80
}
}
Metrics {
Label: "L2 Compression Ratio"
Name: "lts__average_gcomp_output_sector_compression_achieved_rate.ratio"
Filter {
MinArch: CC_80
}
}
}
|