File: gpu_object_lifetime_tracker.h

package info (click to toggle)
halide 14.0.0-3
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 49,124 kB
  • sloc: cpp: 238,722; makefile: 4,303; python: 4,047; java: 1,575; sh: 1,384; pascal: 211; xml: 165; javascript: 43; ansic: 34
file content (92 lines) | stat: -rw-r--r-- 3,009 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#ifndef GPU_OBJECT_LIFETIME_H
#define GPU_OBJECT_LIFETIME_H

#include <array>
#include <stdio.h>
#include <string.h>

namespace Halide {
namespace Internal {

class GpuObjectLifetimeTracker {
    struct ObjectType {
        const char *const created;
        const char *const destroyed;
        bool const is_global;
        int total_created;
        int live_count;

        ObjectType(const char *created, const char *destroyed, bool is_global = false)
            : created(created), destroyed(destroyed),
              is_global(is_global), total_created(0), live_count(0) {
        }
    };

    std::array<ObjectType, 11> object_types = {{
        {"Caching compiled kernel:", "Releasing cached compilation:"},

        // OpenCL objects
        {"clCreateContext", "clReleaseContext", true},
        {"clCreateCommandQueue", "clReleaseCommandQueue", true},
        // This handles both "clCreateProgramWithSource" and
        // "clCreateProgramWithBinary".
        {"clCreateBuffer", "clReleaseMemObject"},
        {"clCreateKernel", "clReleaseKernel"},

        // CUDA objects
        {"cuCtxCreate", "cuCtxDestroy", true},
        {"cuMemAlloc", "cuMemFree"},

        // Metal objects
        {"Allocating: MTLCreateSystemDefaultDevice", "Releasing: MTLCreateSystemDefaultDevice", true},
        {"Allocating: new_command_queue", "Releasing: new_command_queue"},

        // Hexagon objects
        {"halide_remote_load_library", "halide_remote_release_library"},
        {"ion_alloc", "ion_free"},
    }};

public:
    // Parse a line of output from gpu_debug and update object counts.
    void record_gpu_debug(const char *str) {
        for (auto &o : object_types) {
            if (strstr(str, o.created)) {
                o.total_created++;
                o.live_count++;
            } else if (strstr(str, o.destroyed)) {
                o.live_count--;
            }
        }
    }

    // Check that there are no live objects remaining, and we created at least one object.
    int validate_gpu_object_lifetime(bool allow_globals, bool allow_none, int max_globals) {
        int total = 0;
        for (auto &o : object_types) {
            if (o.live_count != 0 &&
                !(allow_globals && o.is_global)) {
                printf("Error! %d objects created by %s still live\n",
                       o.live_count, o.created);
                return -1;
            }
            if (o.is_global && o.total_created > max_globals) {
                printf("Error! %d global objects created by %s, max is %d\n",
                       o.total_created, o.created, max_globals);
                return -1;
            }

            total += o.total_created;
        }
        if (!allow_none && total == 0) {
            printf("Error! No objects created. Ensure gpu_debug is set, ");
            printf("and record_gpu_debug is called from halide_print.\n");
            return -1;
        }
        return 0;
    }
};

}  // namespace Internal
}  // namespace Halide

#endif