1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
|
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "pocl_opencl.h"
#ifdef __cplusplus
extern "C" {
#endif
int
exec_dot_product_kernel (cl_context context, cl_device_id device,
cl_command_queue cmd_queue, cl_program program,
int n, cl_float4 *srcA, cl_float4 *srcB,
cl_float *dst)
{
cl_kernel kernel = NULL;
cl_mem memobjs[3] = { 0, 0, 0 };
size_t global_work_size[1];
size_t local_work_size[1];
cl_int err = CL_SUCCESS;
int i;
poclu_bswap_cl_float_array (device, (cl_float *)srcA, 4 * n);
poclu_bswap_cl_float_array (device, (cl_float *)srcB, 4 * n);
memobjs[0]
= clCreateBuffer (context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
sizeof (cl_float4) * n, srcA, &err);
CHECK_CL_ERROR2 (err);
memobjs[1]
= clCreateBuffer (context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
sizeof (cl_float4) * n, srcB, &err);
CHECK_CL_ERROR2 (err);
memobjs[2] = clCreateBuffer (context, CL_MEM_READ_WRITE,
sizeof (cl_float) * n, NULL, &err);
CHECK_CL_ERROR2 (err);
kernel = clCreateKernel (program, "dot_product", NULL);
CHECK_CL_ERROR2 (err);
err = clSetKernelArg (kernel, 0, sizeof (cl_mem), (void *)&memobjs[0]);
CHECK_CL_ERROR2 (err);
err = clSetKernelArg (kernel, 1, sizeof (cl_mem), (void *)&memobjs[1]);
CHECK_CL_ERROR2 (err);
err = clSetKernelArg (kernel, 2, sizeof (cl_mem), (void *)&memobjs[2]);
CHECK_CL_ERROR2 (err);
global_work_size[0] = n;
local_work_size[0] = 2;
err = clEnqueueNDRangeKernel (cmd_queue, kernel, 1, NULL, global_work_size,
local_work_size, 0, NULL, NULL);
CHECK_CL_ERROR2 (err);
err = clEnqueueReadBuffer (cmd_queue, memobjs[2], CL_TRUE, 0,
n * sizeof (cl_float), dst, 0, NULL, NULL);
CHECK_CL_ERROR2 (err);
poclu_bswap_cl_float_array (device, (cl_float *)dst, n);
poclu_bswap_cl_float_array (device, (cl_float *)srcA, 4 * n);
poclu_bswap_cl_float_array (device, (cl_float *)srcB, 4 * n);
ERROR:
clReleaseMemObject (memobjs[0]);
clReleaseMemObject (memobjs[1]);
clReleaseMemObject (memobjs[2]);
clReleaseKernel (kernel);
return err;
}
#ifdef __cplusplus
}
#endif
|