1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
|
/*
* Copyright (C) 2017-2020 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "opencl_c.h"
#include "shared/source/helpers/string.h"
namespace BuiltinKernelsSimulation {
#define SCHEDULER_EMULATION 1
// globals
std::mutex gMutex;
unsigned int globalID[3];
unsigned int localID[3];
unsigned int localSize[3];
std::map<std::thread::id, uint32_t> threadIDToLocalIDmap;
SynchronizationBarrier *pGlobalBarrier = nullptr;
uint4 operator+(uint4 const &a, uint4 const &b) {
uint4 c(0, 0, 0, 0);
c.x = a.x + b.x;
c.y = a.y + b.y;
c.z = a.z + b.z;
c.w = a.w + b.w;
return c;
}
int4 operator+(int4 const &a, int4 const &b) {
int4 c(0, 0, 0, 0);
c.x = a.x + b.x;
c.y = a.y + b.y;
c.z = a.z + b.z;
c.w = a.w + b.w;
return c;
}
uint get_local_id(int dim) {
uint LID = 0;
// use thread id
if (threadIDToLocalIDmap.size() > 0) {
std::thread::id id = std::this_thread::get_id();
LID = threadIDToLocalIDmap[id] % 24;
}
// use id from loop iteration
else {
LID = localID[dim];
}
return LID;
}
uint get_global_id(int dim) {
uint GID = 0;
// use thread id
if (threadIDToLocalIDmap.size() > 0) {
std::thread::id id = std::this_thread::get_id();
GID = threadIDToLocalIDmap[id];
}
// use id from loop iteration
else {
GID = globalID[dim];
}
return GID;
}
uint get_local_size(int dim) {
return localSize[dim];
}
uint get_num_groups(int dim) {
return NUM_OF_THREADS / 24;
}
uint get_group_id(int dim) {
return get_global_id(dim) / 24;
}
void barrier(int x) {
pGlobalBarrier->enter();
// int LID = get_local_id(0);
volatile int BreakPointHere = 0;
// PUT BREAKPOINT HERE to stop after each barrier
BreakPointHere++;
}
uint4 read_imageui(image *im, int4 coord) {
uint4 color = {0, 0, 0, 1};
uint offset = ((coord.z * im->height + coord.y) * im->width + coord.x) * im->bytesPerChannel * im->channels;
char *temp = &im->ptr[offset];
char *colorDst = (char *)&color;
for (uint i = 0; i < im->channels; i++) {
memcpy_s(colorDst, sizeof(uint4), temp, im->bytesPerChannel);
temp += im->bytesPerChannel;
colorDst += 4;
}
return color;
}
uint4 write_imageui(image *im, uint4 coord, uint4 color) {
uint offset = ((coord.z * im->height + coord.y) * im->width + coord.x) * im->bytesPerChannel * im->channels;
char *temp = &im->ptr[offset];
char *colorSrc = (char *)&color;
size_t size = im->width * im->height * im->depth * im->bytesPerChannel * im->channels;
for (uint i = 0; i < im->channels; i++) {
memcpy_s(temp, size - offset, colorSrc, im->bytesPerChannel);
temp += im->bytesPerChannel;
colorSrc += 4;
}
return *(uint4 *)temp; // NOLINT
}
uchar convert_uchar_sat(uint c) {
return (uchar)c;
}
ushort convert_ushort_sat(uint c) {
return (ushort)c;
}
} // namespace BuiltinKernelsSimulation
|