File: test_issue_231.cpp

package info (click to toggle)
pocl 0.13-8
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 18,452 kB
  • ctags: 13,529
  • sloc: lisp: 113,221; cpp: 57,376; ansic: 25,021; sh: 6,150; makefile: 2,067; python: 699; pascal: 98; java: 72; xml: 49
file content (94 lines) | stat: -rw-r--r-- 2,982 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94

// Used to cause an LLVM crash with Haswell/Broadwell.
// See https://github.com/pocl/pocl/issues/231

#define CL_HPP_ENABLE_EXCEPTIONS
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
#define CL_HPP_TARGET_OPENCL_VERSION 120
#define CL_HPP_CL_1_2_DEFAULT_BUILD
#include <CL/cl2.hpp>
#include <iostream>

using namespace std;

const char *SOURCE = R"RAW(
#define local_barrier() barrier(CLK_LOCAL_MEM_FENCE);
#define WITHIN_KERNEL /* empty */
#define KERNEL __kernel
#define GLOBAL_MEM __global
#define LOCAL_MEM __local
#define LOCAL_MEM_ARG __local
#define REQD_WG_SIZE(X,Y,psc_Z) __attribute__((reqd_work_group_size(X, Y, psc_Z)))
#define psc_LID_0 get_local_id(0)
#define psc_LID_1 get_local_id(1)
#define psc_LID_2 get_local_id(2)
#define psc_GID_0 get_group_id(0)
#define psc_GID_1 get_group_id(1)
#define psc_GID_2 get_group_id(2)
#define psc_LDIM_0 get_local_size(0)
#define psc_LDIM_1 get_local_size(1)
#define psc_LDIM_2 get_local_size(2)
#define psc_GDIM_0 get_num_groups(0)
#define psc_GDIM_1 get_num_groups(1)
#define psc_GDIM_2 get_num_groups(2)
    #if __OPENCL_C_VERSION__ < 120
    #pragma OPENCL EXTENSION cl_khr_fp64: enable
    #endif
//CL//
#define psc_WG_SIZE 16
#define psc_SCAN_EXPR(a, b, across_seg_boundary) a+b
#define psc_INPUT_EXPR(i) (input_ary[i])
typedef int psc_scan_type;
typedef int psc_index_type;
// NO_SEG_BOUNDARY is the largest representable integer in psc_index_type.
// This assumption is used in code below.
#define NO_SEG_BOUNDARY 2147483647
//CL//
#define psc_K 256
KERNEL
REQD_WG_SIZE(psc_WG_SIZE, 1, 1)
void scan_scan_intervals_lev1(
    __global int *input_ary, __global int *output_ary,
    GLOBAL_MEM psc_scan_type *restrict psc_partial_scan_buffer,
    const psc_index_type N,
    const psc_index_type psc_interval_size
        , GLOBAL_MEM psc_scan_type *restrict psc_interval_results
    )
{
    // index psc_K in first dimension used for psc_carry storage
    struct psc_wrapped_scan_type
    {
        psc_scan_type psc_value;
    };
    // padded in psc_WG_SIZE to avoid bank conflicts
    LOCAL_MEM struct psc_wrapped_scan_type psc_ldata[psc_WG_SIZE];
    for(int i = 0; i < 10; ++i)
    {
        local_barrier();
        psc_scan_type psc_val = 0;
        if (psc_LID_0 >= 2)
        {
            psc_scan_type psc_tmp = psc_ldata[psc_LID_0 - 2].psc_value;
            psc_val = psc_tmp+ psc_val;
        }
        // {{{ writes to local allowed, reads from local not allowed
        psc_ldata[psc_LID_0].psc_value = psc_val;
    }
}
)RAW";

int main(int argc, char *argv[])
{
  cl::Device device = cl::Device::getDefault();
  cl::CommandQueue queue = cl::CommandQueue::getDefault();
  cl::Program program(SOURCE, true);

  auto kernel = cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer, cl_int, cl_int, cl::Buffer>(program, "scan_scan_intervals_lev1");

  cl_int i;
  cl::Buffer buffer;
  kernel(cl::EnqueueArgs(queue, cl::NDRange(16), cl::NDRange(16)),
         buffer, buffer, buffer, i, i, buffer);

  queue.finish();
}