File: SpeedupWithFocusMetrics.py

package info (click to toggle)
nvidia-cuda-toolkit 12.4.1-2
  • links: PTS, VCS
  • area: non-free
  • in suites: trixie
  • size: 18,505,836 kB
  • sloc: ansic: 203,477; cpp: 64,769; python: 34,699; javascript: 22,006; xml: 13,410; makefile: 3,085; sh: 2,343; perl: 352
file content (70 lines) | stat: -rw-r--r-- 2,493 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import NvRules


def get_identifier():
    return "TemplateRuleSpeedup"


def get_name():
    return "Speedup Estimation Template"


def get_description():
    return "A rule template containing a speedup estimation and focus metrics."


def apply(handle):
    ctx = NvRules.get_context(handle)
    action = ctx.range_by_idx(0).action_by_idx(0)
    frontend = ctx.frontend()

    # Get metrics for the speedup estimation
    compute_throughput_name = "sm__throughput.avg.pct_of_peak_sustained_elapsed"
    memory_throughput_name = (
        "gpu__compute_memory_throughput.avg.pct_of_peak_sustained_elapsed"
    )
    compute_throughput = action[compute_throughput_name].value()
    memory_throughput = action[memory_throughput_name].value()

    # Calculate the potential speedup; we will later need a number in percent
    if compute_throughput > memory_throughput:
        dominated_by = "compute"
        improvement_percent = 100 - compute_throughput
    else:
        dominated_by = "memory"
        improvement_percent = 100 - memory_throughput

    # Post a message to the frontend summarizing the outcome of the rule.
    # Messages of type OPTIMIZATION with a speedup estimate will also be displayed
    # on the summary page.
    message_id = frontend.message(
        NvRules.IFrontend.MsgType_MSG_OPTIMIZATION,
        "This kernel is currently dominated by {}.".format(dominated_by),
        "Compute vs Memory",
    )

    # Attach a speedup estimate to the last message.
    # Since we did not make sure that the required metrics are collected by a
    # parent-scope section, we ought to make sure that the metrics are collected manually
    # or by another section. We express this here with SpeedupType_GLOBAL.
    frontend.speedup(
        message_id, NvRules.IFrontend.SpeedupType_GLOBAL, improvement_percent
    )

    # Attach the two metrics which entered the speedup estimation to the last message.
    # These metrics serve as focus metrics or key performance indicators and should
    # be tracked when optimizing the kernel according to this rule.
    frontend.focus_metric(
        message_id,
        compute_throughput_name,
        compute_throughput,
        NvRules.IFrontend.Severity_SEVERITY_DEFAULT,
        "Increase the compute throughput",
    )
    frontend.focus_metric(
        message_id,
        memory_throughput_name,
        memory_throughput,
        NvRules.IFrontend.Severity_SEVERITY_DEFAULT,
        "Increase the memory throughput",
    )