1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
|
#include "pocl_opencl.h"
#define CL_HPP_ENABLE_EXCEPTIONS
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
#define CL_HPP_TARGET_OPENCL_VERSION 120
#include <CL/opencl.hpp>
#include <cassert>
#include <iostream>
using namespace std;
const char *SOURCE = R"RAW(
__kernel void test(global uint *output, global const uint* trialValue){
/* Variable declarations */
int lid = get_local_id(0);
int i, j, k;
uint result[Y][Z];
__local uint localResult[X][Y][Z];
for (i = 0; i < Y; ++i)
for (j = 0; j < Z; ++j) {
result[i][j] = 0;
localResult[lid][i][j] = 0;
}
for (i = 0; i < Y; ++i)
for (j = 0; j < Z; ++j) {
result[i][j] += trialValue[j] * 4;
localResult[lid][i][j] += result[i][j];
}
barrier (CLK_LOCAL_MEM_FENCE);
uint sum = 0;
for (k = 0; k < X; ++k)
for (i = 0; i < Y; ++i)
for (j = 0; j < Z; ++j) {
sum += localResult[k][i][j];
}
output[lid] = sum;
}
)RAW";
bool test_invocation(unsigned x, unsigned y, unsigned z,
const std::string &arg_x, const std::string &arg_y,
const std::string &arg_z, cl::CommandQueue &queue) {
unsigned expected_sum = x * y * z * 4;
unsigned local_size = x;
assert(local_size > 0);
assert(local_size <= 256);
cl::Program program(SOURCE);
std::string options = "-cl-std=CL1.2";
options += " -DX=" + arg_x + " -DY=" + arg_y + " -DZ=" + arg_z;
program.build(options.c_str());
cl_uint *in1 = new cl_uint[z];
cl_uint *out = new cl_uint[x];
for (size_t i = 0; i < x; ++i) {
out[i] = 0;
}
for (size_t i = 0; i < z; ++i) {
in1[i] = 1;
}
cl::Buffer inbuf((cl_mem_flags)(CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR),
(z * sizeof(cl_uint)), in1);
cl::Buffer outbuf((cl_mem_flags)(CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR),
(x * sizeof(cl_uint)), out);
// This triggers compilation of dynamic WG binaries.
cl::Program::Binaries binaries{};
int err = program.getInfo<>(CL_PROGRAM_BINARIES, &binaries);
assert(err == CL_SUCCESS);
auto kernel = cl::KernelFunctor<cl::Buffer, cl::Buffer>(program, "test");
kernel(
cl::EnqueueArgs(queue, cl::NDRange(local_size), cl::NDRange(local_size)),
outbuf, inbuf);
queue.enqueueReadBuffer(outbuf, 1, 0, (x * sizeof(cl_uint)), out);
queue.finish();
bool correct = true;
for (size_t i = 0; i < x; ++i) {
if (out[i] != expected_sum)
correct = false;
}
std::cout << (correct ? "OK\n" : "FAIL\n");
delete[] in1;
delete[] out;
return correct;
}
int main(int argc, char *argv[]) {
if (argc < 4) {
std::cout << "USAGE: $0 X Y Z\n";
return EXIT_FAILURE;
}
cl::Platform platform = cl::Platform::getDefault();
cl::Device device = cl::Device::getDefault();
std::string arg_x(argv[1]);
std::string arg_y(argv[2]);
std::string arg_z(argv[3]);
unsigned x = std::stoi(argv[1]);
unsigned y = std::stoi(argv[2]);
unsigned z = std::stoi(argv[3]);
try {
cl::CommandQueue queue = cl::CommandQueue::getDefault();
if (!test_invocation(x, y, z, arg_x, arg_y, arg_z, queue))
return EXIT_FAILURE;
if (!test_invocation(y, z, x, arg_y, arg_z, arg_x, queue))
return EXIT_FAILURE;
if (!test_invocation(z, x, y, arg_z, arg_x, arg_y, queue))
return EXIT_FAILURE;
} catch (cl::Error &err) {
std::cerr << "ERROR: " << err.what() << "(" << err.err() << ")"
<< std::endl;
return EXIT_FAILURE;
}
platform.unloadCompiler();
return EXIT_SUCCESS;
}
|