1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
|
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import NvRules
from RequestedMetrics import MetricRequest, RequestedMetricsParser, Importance
requested_metrics = [
MetricRequest("smsp__sass_inst_executed_op_shared_ld.sum", None, Importance.OPTIONAL, 0),
MetricRequest("smsp__sass_inst_executed_op_shared_st.sum", None, Importance.OPTIONAL, 0),
MetricRequest("smsp__inst_executed_op_ldsm.sum", None, Importance.OPTIONAL, 0, False),
MetricRequest("l1tex__data_pipe_lsu_wavefronts_mem_shared_op_ld.sum", None, Importance.OPTIONAL, 0),
MetricRequest("l1tex__data_pipe_lsu_wavefronts_mem_shared_op_st.sum", None, Importance.OPTIONAL, 0),
MetricRequest("l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_ld.sum", None, Importance.OPTIONAL, 0),
MetricRequest("l1tex__data_bank_conflicts_pipe_lsu_mem_shared_op_st.sum", None, Importance.OPTIONAL, 0),
]
def get_identifier():
return "SharedMemoryConflicts"
def get_name():
return "Shared Memory Conflicts"
def get_description():
return "Detection of shared memory bank conflicts."
def get_section_identifier():
return "MemoryWorkloadAnalysis_Tables"
def get_parent_rules_identifiers():
return ["Memory"]
def get_estimated_speedup(parent_weights, bank_conflicts_percent):
l1tex_throughput_name = "l1tex__throughput.avg.pct_of_peak_sustained_active"
if l1tex_throughput_name in parent_weights:
speedup_type = NvRules.IFrontend.SpeedupType_GLOBAL
l1tex_throughput = parent_weights[l1tex_throughput_name] / 100
improvement_percent = bank_conflicts_percent * l1tex_throughput
else:
speedup_type = NvRules.IFrontend.SpeedupType_LOCAL
improvement_percent = bank_conflicts_percent
return speedup_type, improvement_percent
def apply(handle):
ctx = NvRules.get_context(handle)
action = ctx.range_by_idx(0).action_by_idx(0)
fe = ctx.frontend()
metrics = RequestedMetricsParser(handle, action).parse(requested_metrics)
parent_weights = fe.receive_dict_from_parent("Memory")
shared_access_types = {
"Shared Load" : ["mem_shared_op_ld", "shared_ld"],
"Shared Store" : ["mem_shared_op_st", "shared_st"]
}
for access_info, metric_str in shared_access_types.items():
requests = metrics[f"smsp__sass_inst_executed_op_{metric_str[1]}.sum"].value()
if access_info == "Shared Load":
requests += metrics["smsp__inst_executed_op_ldsm.sum"].value()
if requests == 0:
continue
wavefronts = metrics[f"l1tex__data_pipe_lsu_wavefronts_{metric_str[0]}.sum"].value()
bank_conflicts_metric_name = f"l1tex__data_bank_conflicts_pipe_lsu_{metric_str[0]}.sum"
bank_conflicts = metrics[bank_conflicts_metric_name].value()
bank_conflicts_percent = (bank_conflicts * 100.0) / wavefronts if wavefronts > 0 else 0.0
bank_conflicts_threshold = 10.0
if (bank_conflicts_percent >= bank_conflicts_threshold):
message = "The memory access pattern for {}s might not be optimal ".format(access_info.lower())
message += "and causes on average a {:.1f} - way bank conflict ".format(wavefronts / requests)
message += "across all {:.0f} {} requests.".format(requests, access_info.lower())
message += "This results in {:.0f} bank conflicts, ".format(bank_conflicts)
message += " which represent {:.2f}% ".format(bank_conflicts_percent)
message += "of the overall {:.0f} wavefronts for {}s.".format(wavefronts, access_info.lower())
message += " Check the @section:SourceCounters:Source Counters@ section for uncoalesced {}s.".format(access_info.lower())
msg_id = fe.message(NvRules.IFrontend.MsgType_MSG_OPTIMIZATION, message, "{} Bank Conflicts".format(access_info))
speedup_type, speedup_value = get_estimated_speedup(parent_weights, bank_conflicts_percent)
fe.speedup(msg_id, speedup_type, speedup_value)
fe.focus_metric(
msg_id,
bank_conflicts_metric_name,
bank_conflicts,
NvRules.IFrontend.Severity_SEVERITY_HIGH,
"Decrease bank conflicts for {}s".format(access_info.lower()),
)
l1tex_throughput_name = "l1tex__throughput.avg.pct_of_peak_sustained_active"
if l1tex_throughput_name in parent_weights:
fe.focus_metric(
msg_id,
l1tex_throughput_name,
parent_weights[l1tex_throughput_name],
NvRules.IFrontend.Severity_SEVERITY_LOW,
"The higher the L1/TEX cache throughput the more severe the issue becomes",
)
|