File: vecadd_exec.c

package info (click to toggle)
pocl 6.0-7
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 25,320 kB
  • sloc: lisp: 149,513; ansic: 103,778; cpp: 54,947; python: 1,513; sh: 949; ruby: 255; pascal: 226; tcl: 180; makefile: 175; java: 72; xml: 49
file content (78 lines) | stat: -rw-r--r-- 2,208 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#include "poclu.h"
#include <CL/opencl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#ifdef __cplusplus
extern "C" {
#endif

int
exec_vecadd_kernel (cl_context context, cl_device_id device,
                    cl_command_queue cmd_queue, cl_program program, int n,
                    int wg_size,
                    cl_float *srcA, cl_float *srcB, cl_float *dst)
{
  cl_kernel kernel = NULL;
  cl_mem memobjs[3] = { 0, 0, 0 };
  size_t global_work_size[1];
  size_t local_work_size[1];
  cl_int err = CL_SUCCESS;
  int i;

  poclu_bswap_cl_float_array (device, (cl_float *)srcA, 4 * n);
  poclu_bswap_cl_float_array (device, (cl_float *)srcB, 4 * n);

  memobjs[0]
    = clCreateBuffer (context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
		      sizeof (cl_float) * n, srcA, &err);
  CHECK_CL_ERROR2 (err);

  memobjs[1]
    = clCreateBuffer (context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
		      sizeof (cl_float) * n, srcB, &err);
  CHECK_CL_ERROR2 (err);

  memobjs[2] = clCreateBuffer (context, CL_MEM_READ_WRITE,
			       sizeof (cl_float) * n, NULL, &err);
  CHECK_CL_ERROR2 (err);

  kernel = clCreateKernel (program, "vecadd", NULL);
  CHECK_CL_ERROR2 (err);

  err = clSetKernelArg (kernel, 0, sizeof (cl_mem), (void *)&memobjs[0]);
  CHECK_CL_ERROR2 (err);

  err = clSetKernelArg (kernel, 1, sizeof (cl_mem), (void *)&memobjs[1]);
  CHECK_CL_ERROR2 (err);

  err = clSetKernelArg (kernel, 2, sizeof (cl_mem), (void *)&memobjs[2]);
  CHECK_CL_ERROR2 (err);

  global_work_size[0] = n;
  local_work_size[0] = wg_size;

  err = clEnqueueNDRangeKernel (cmd_queue, kernel, 1, NULL, global_work_size,
				local_work_size, 0, NULL, NULL);
  CHECK_CL_ERROR2 (err);

  err = clEnqueueReadBuffer (cmd_queue, memobjs[2], CL_TRUE, 0,
			     n * sizeof (cl_float), dst, 0, NULL, NULL);
  CHECK_CL_ERROR2 (err);

  poclu_bswap_cl_float_array (device, (cl_float *)dst, n);
  poclu_bswap_cl_float_array (device, (cl_float *)srcA, 4 * n);
  poclu_bswap_cl_float_array (device, (cl_float *)srcB, 4 * n);

 ERROR:
  clReleaseMemObject (memobjs[0]);
  clReleaseMemObject (memobjs[1]);
  clReleaseMemObject (memobjs[2]);
  clReleaseKernel (kernel);
  return err;
}

#ifdef __cplusplus
}
#endif