File: opencl_c.cpp

package info (click to toggle)
intel-compute-runtime 20.44.18297-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 34,780 kB
  • sloc: cpp: 379,729; lisp: 4,931; python: 299; sh: 196; makefile: 8
file content (136 lines) | stat: -rw-r--r-- 3,010 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
/*
 * Copyright (C) 2017-2020 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 */

#include "opencl_c.h"

#include "shared/source/helpers/string.h"

namespace BuiltinKernelsSimulation {

#define SCHEDULER_EMULATION 1

// globals
std::mutex gMutex;
unsigned int globalID[3];
unsigned int localID[3];
unsigned int localSize[3];

std::map<std::thread::id, uint32_t> threadIDToLocalIDmap;

SynchronizationBarrier *pGlobalBarrier = nullptr;

uint4 operator+(uint4 const &a, uint4 const &b) {
    uint4 c(0, 0, 0, 0);
    c.x = a.x + b.x;
    c.y = a.y + b.y;
    c.z = a.z + b.z;
    c.w = a.w + b.w;
    return c;
}

int4 operator+(int4 const &a, int4 const &b) {
    int4 c(0, 0, 0, 0);
    c.x = a.x + b.x;
    c.y = a.y + b.y;
    c.z = a.z + b.z;
    c.w = a.w + b.w;
    return c;
}

uint get_local_id(int dim) {
    uint LID = 0;

    // use thread id
    if (threadIDToLocalIDmap.size() > 0) {
        std::thread::id id = std::this_thread::get_id();
        LID = threadIDToLocalIDmap[id] % 24;
    }
    // use id from loop iteration
    else {
        LID = localID[dim];
    }
    return LID;
}

uint get_global_id(int dim) {
    uint GID = 0;

    // use thread id
    if (threadIDToLocalIDmap.size() > 0) {
        std::thread::id id = std::this_thread::get_id();
        GID = threadIDToLocalIDmap[id];
    }
    // use id from loop iteration
    else {
        GID = globalID[dim];
    }
    return GID;
}

uint get_local_size(int dim) {
    return localSize[dim];
}

uint get_num_groups(int dim) {
    return NUM_OF_THREADS / 24;
}

uint get_group_id(int dim) {
    return get_global_id(dim) / 24;
}

void barrier(int x) {
    pGlobalBarrier->enter();

    // int LID = get_local_id(0);
    volatile int BreakPointHere = 0;

    // PUT BREAKPOINT HERE to stop after each barrier
    BreakPointHere++;
}

uint4 read_imageui(image *im, int4 coord) {
    uint4 color = {0, 0, 0, 1};

    uint offset = ((coord.z * im->height + coord.y) * im->width + coord.x) * im->bytesPerChannel * im->channels;

    char *temp = &im->ptr[offset];
    char *colorDst = (char *)&color;

    for (uint i = 0; i < im->channels; i++) {
        memcpy_s(colorDst, sizeof(uint4), temp, im->bytesPerChannel);
        temp += im->bytesPerChannel;
        colorDst += 4;
    }
    return color;
}

uint4 write_imageui(image *im, uint4 coord, uint4 color) {
    uint offset = ((coord.z * im->height + coord.y) * im->width + coord.x) * im->bytesPerChannel * im->channels;

    char *temp = &im->ptr[offset];
    char *colorSrc = (char *)&color;

    size_t size = im->width * im->height * im->depth * im->bytesPerChannel * im->channels;

    for (uint i = 0; i < im->channels; i++) {
        memcpy_s(temp, size - offset, colorSrc, im->bytesPerChannel);
        temp += im->bytesPerChannel;
        colorSrc += 4;
    }
    return *(uint4 *)temp; // NOLINT
}

uchar convert_uchar_sat(uint c) {
    return (uchar)c;
}

ushort convert_ushort_sat(uint c) {
    return (ushort)c;
}

} // namespace BuiltinKernelsSimulation