File: example1_exec.c

package info (click to toggle)
pocl 6.0-7
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 25,320 kB
  • sloc: lisp: 149,513; ansic: 103,778; cpp: 54,947; python: 1,513; sh: 949; ruby: 255; pascal: 226; tcl: 180; makefile: 175; java: 72; xml: 49
file content (78 lines) | stat: -rw-r--r-- 2,407 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "pocl_opencl.h"

#ifdef __cplusplus
extern "C" {
#endif

  int
  exec_dot_product_kernel (cl_context context, cl_device_id device,
                           cl_command_queue cmd_queue, cl_program program,
                           int n, cl_float4 *srcA, cl_float4 *srcB,
                           cl_float *dst)
  {
    cl_kernel kernel = NULL;
    cl_mem memobjs[3] = { 0, 0, 0 };
    size_t global_work_size[1];
    size_t local_work_size[1];
    cl_int err = CL_SUCCESS;
    int i;

    poclu_bswap_cl_float_array (device, (cl_float *)srcA, 4 * n);
    poclu_bswap_cl_float_array (device, (cl_float *)srcB, 4 * n);

    memobjs[0]
        = clCreateBuffer (context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                          sizeof (cl_float4) * n, srcA, &err);
    CHECK_CL_ERROR2 (err);

    memobjs[1]
        = clCreateBuffer (context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
                          sizeof (cl_float4) * n, srcB, &err);
    CHECK_CL_ERROR2 (err);

    memobjs[2] = clCreateBuffer (context, CL_MEM_READ_WRITE,
                                 sizeof (cl_float) * n, NULL, &err);
    CHECK_CL_ERROR2 (err);

    kernel = clCreateKernel (program, "dot_product", NULL);
    CHECK_CL_ERROR2 (err);

    err = clSetKernelArg (kernel, 0, sizeof (cl_mem), (void *)&memobjs[0]);
    CHECK_CL_ERROR2 (err);

    err = clSetKernelArg (kernel, 1, sizeof (cl_mem), (void *)&memobjs[1]);
    CHECK_CL_ERROR2 (err);

    err = clSetKernelArg (kernel, 2, sizeof (cl_mem), (void *)&memobjs[2]);
    CHECK_CL_ERROR2 (err);

    global_work_size[0] = n;
    local_work_size[0] = 2;

    err = clEnqueueNDRangeKernel (cmd_queue, kernel, 1, NULL, global_work_size,
                                  local_work_size, 0, NULL, NULL);
    CHECK_CL_ERROR2 (err);

    err = clEnqueueReadBuffer (cmd_queue, memobjs[2], CL_TRUE, 0,
                               n * sizeof (cl_float), dst, 0, NULL, NULL);
    CHECK_CL_ERROR2 (err);

    poclu_bswap_cl_float_array (device, (cl_float *)dst, n);
    poclu_bswap_cl_float_array (device, (cl_float *)srcA, 4 * n);
    poclu_bswap_cl_float_array (device, (cl_float *)srcB, 4 * n);

  ERROR:
    clReleaseMemObject (memobjs[0]);
    clReleaseMemObject (memobjs[1]);
    clReleaseMemObject (memobjs[2]);
    clReleaseKernel (kernel);
    return err;
}

#ifdef __cplusplus
}
#endif