File: gpu_object_lifetime_tracker.h

package info (click to toggle)
halide 21.0.0-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 55,752 kB
  • sloc: cpp: 289,334; ansic: 22,751; python: 7,486; makefile: 4,299; sh: 2,508; java: 1,549; javascript: 282; pascal: 207; xml: 127; asm: 9
file content (107 lines) | stat: -rw-r--r-- 3,868 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#ifndef GPU_OBJECT_LIFETIME_H
#define GPU_OBJECT_LIFETIME_H

#include <array>
#include <stdio.h>
#include <string.h>

namespace Halide {
namespace Internal {

class GpuObjectLifetimeTracker {
    struct ObjectType {
        const char *const created;
        const char *const destroyed;
        bool const is_global;
        int total_created;
        int live_count;

        ObjectType(const char *created, const char *destroyed, bool is_global = false)
            : created(created), destroyed(destroyed),
              is_global(is_global), total_created(0), live_count(0) {
        }
    };

    std::array<ObjectType, 22> object_types = {{
        {"Caching compiled kernel:", "Releasing cached compilation:"},

        // OpenCL objects
        {"clCreateContext", "clReleaseContext", true},
        {"clCreateCommandQueue", "clReleaseCommandQueue", true},
        // This handles both "clCreateProgramWithSource" and
        // "clCreateProgramWithBinary".
        {"clCreateBuffer", "clReleaseMemObject"},
        {"clCreateKernel", "clReleaseKernel"},

        // CUDA objects
        {"cuCtxCreate", "cuCtxDestroy", true},
        {"cuMemAlloc", "cuMemFree"},

        // Metal objects
        {"Allocating: MTLCreateSystemDefaultDevice", "Releasing: MTLCreateSystemDefaultDevice", true},
        {"Allocating: new_command_queue", "Releasing: new_command_queue"},

        // Hexagon objects
        {"halide_remote_load_library", "halide_remote_release_library"},
        {"ion_alloc", "ion_free"},

        // Vulkan objects
        {"vk_create_context", "vk_destroy_context", true},
        {"vk_create_command_pool", "vk_destroy_command_pool"},
        {"vk_create_command_buffer", "vk_destroy_command_buffer"},
        {"vk_create_pipeline_layout", "vk_destroy_pipeline_layout"},
        {"vk_create_compute_pipeline", "vk_destroy_compute_pipeline"},
        {"vk_create_descriptor_pool", "vk_destroy_descriptor_pool"},
        {"Vulkan: Reserved memory for device region", "Vulkan: Released memory for device region"},
        {"vkCreateBuffer: Created buffer for device region", "vkDestroyBuffer: Destroyed buffer for device region"},

        // WebGPU objects
        {"wgpuCreateInstance", "wgpuInstanceRelease", true},
        {"wgpuDeviceCreateBuffer", "wgpuBufferRelease"},
        {"wgpuDeviceCreateComputePipeline", "wgpuComputePipelineRelease"},
    }};

public:
    // Parse a line of output from gpu_debug and update object counts.
    void record_gpu_debug(const char *str) {
        for (auto &o : object_types) {
            if (strstr(str, o.created)) {
                o.total_created++;
                o.live_count++;
            } else if (strstr(str, o.destroyed)) {
                o.live_count--;
            }
        }
    }

    // Check that there are no live objects remaining, and we created at least one object.
    int validate_gpu_object_lifetime(bool allow_globals, bool allow_none, int max_globals) {
        int total = 0;
        for (auto &o : object_types) {
            if (o.live_count != 0 &&
                !(allow_globals && o.is_global)) {
                printf("Error! %d objects created by %s still live\n",
                       o.live_count, o.created);
                return 1;
            }
            if (o.is_global && o.total_created > max_globals) {
                printf("Error! %d global objects created by %s, max is %d\n",
                       o.total_created, o.created, max_globals);
                return 1;
            }

            total += o.total_created;
        }
        if (!allow_none && total == 0) {
            printf("Error! No objects created. Ensure gpu_debug is set, ");
            printf("and record_gpu_debug is called from halide_print.\n");
            return 1;
        }
        return 0;
    }
};

}  // namespace Internal
}  // namespace Halide

#endif