File: test_command_queue.cpp

package info (click to toggle)
boost1.90 1.90.0-1
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 593,120 kB
  • sloc: cpp: 4,190,908; xml: 196,648; python: 34,618; ansic: 23,145; asm: 5,468; sh: 3,774; makefile: 1,161; perl: 1,020; sql: 728; ruby: 676; yacc: 478; java: 77; lisp: 24; csh: 6
file content (352 lines) | stat: -rw-r--r-- 10,963 bytes parent folder | download | duplicates (17)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
//---------------------------------------------------------------------------//
// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
//
// Distributed under the Boost Software License, Version 1.0
// See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt
//
// See http://boostorg.github.com/compute for more information.
//---------------------------------------------------------------------------//

#define BOOST_TEST_MODULE TestCommandQueue
#include <boost/test/unit_test.hpp>

#include <iostream>

#include <boost/compute/kernel.hpp>
#include <boost/compute/system.hpp>
#include <boost/compute/program.hpp>
#include <boost/compute/command_queue.hpp>
#include <boost/compute/algorithm/fill.hpp>
#include <boost/compute/container/vector.hpp>
#include <boost/compute/utility/dim.hpp>
#include <boost/compute/utility/source.hpp>
#include <boost/compute/detail/diagnostic.hpp>

#include "check_macros.hpp"
#include "context_setup.hpp"

namespace bc = boost::compute;
namespace compute = boost::compute;

BOOST_AUTO_TEST_CASE(get_context)
{
    BOOST_VERIFY(queue.get_context() == context);
    BOOST_VERIFY(queue.get_info<CL_QUEUE_CONTEXT>() == context.get());
}

BOOST_AUTO_TEST_CASE(get_device)
{
    BOOST_VERIFY(queue.get_info<CL_QUEUE_DEVICE>() == device.get());
}

BOOST_AUTO_TEST_CASE(equality_operator)
{
    compute::command_queue queue1(context, device);
    BOOST_CHECK(queue1 == queue1);

    compute::command_queue queue2 = queue1;
    BOOST_CHECK(queue1 == queue2);

    compute::command_queue queue3(context, device);
    BOOST_CHECK(queue1 != queue3);
}

BOOST_AUTO_TEST_CASE(event_profiling)
{
    bc::command_queue queue(context, device, bc::command_queue::enable_profiling);

    int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
    bc::buffer buffer(context, sizeof(data));

    bc::event event =
        queue.enqueue_write_buffer_async(buffer,
                                         0,
                                         sizeof(data),
                                         static_cast<const void *>(data));
    queue.finish();

    event.get_profiling_info<cl_ulong>(bc::event::profiling_command_queued);
    event.get_profiling_info<cl_ulong>(bc::event::profiling_command_submit);
    event.get_profiling_info<cl_ulong>(bc::event::profiling_command_start);
    event.get_profiling_info<cl_ulong>(bc::event::profiling_command_end);
}

BOOST_AUTO_TEST_CASE(kernel_profiling)
{
    // create queue with profiling enabled
    boost::compute::command_queue queue(
        context, device, boost::compute::command_queue::enable_profiling
    );

    // input data
    int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
    boost::compute::buffer buffer(context, sizeof(data));

    // copy input data to device
    queue.enqueue_write_buffer(buffer, 0, sizeof(data), data);

    // setup kernel
    const char source[] =
        "__kernel void iscal(__global int *buffer, int alpha)\n"
        "{\n"
        "    buffer[get_global_id(0)] *= alpha;\n"
        "}\n";

    boost::compute::program program =
        boost::compute::program::create_with_source(source, context);
    program.build();

    boost::compute::kernel kernel(program, "iscal");
    kernel.set_arg(0, buffer);
    kernel.set_arg(1, 2);

    // execute kernel
    size_t global_work_offset = 0;
    size_t global_work_size = 8;

    boost::compute::event event =
        queue.enqueue_nd_range_kernel(kernel,
                                      size_t(1),
                                      &global_work_offset,
                                      &global_work_size,
                                      0);

    // wait until kernel is finished
    event.wait();

    // check profiling information
    event.get_profiling_info<cl_ulong>(bc::event::profiling_command_queued);
    event.get_profiling_info<cl_ulong>(bc::event::profiling_command_submit);
    event.get_profiling_info<cl_ulong>(bc::event::profiling_command_start);
    event.get_profiling_info<cl_ulong>(bc::event::profiling_command_end);

    // read results back to host
    queue.enqueue_read_buffer(buffer, 0, sizeof(data), data);

    // check results
    BOOST_CHECK_EQUAL(data[0], 2);
    BOOST_CHECK_EQUAL(data[1], 4);
    BOOST_CHECK_EQUAL(data[2], 6);
    BOOST_CHECK_EQUAL(data[3], 8);
    BOOST_CHECK_EQUAL(data[4], 10);
    BOOST_CHECK_EQUAL(data[5], 12);
    BOOST_CHECK_EQUAL(data[6], 14);
    BOOST_CHECK_EQUAL(data[7], 16);
}

BOOST_AUTO_TEST_CASE(construct_from_cl_command_queue)
{
    // create cl_command_queue
    cl_command_queue cl_queue;
#ifdef BOOST_COMPUTE_CL_VERSION_2_0
    if (device.check_version(2, 0)){ // runtime check
        cl_queue =
            clCreateCommandQueueWithProperties(context, device.id(), 0, 0);
    } else
#endif // BOOST_COMPUTE_CL_VERSION_2_0
    {
        // Suppress deprecated declarations warning
        BOOST_COMPUTE_DISABLE_DEPRECATED_DECLARATIONS();
        cl_queue =
            clCreateCommandQueue(context, device.id(), 0, 0);
        BOOST_COMPUTE_ENABLE_DEPRECATED_DECLARATIONS();
    }
    BOOST_VERIFY(cl_queue);

    // create boost::compute::command_queue
    boost::compute::command_queue queue(cl_queue);

    // check queue
    BOOST_CHECK(queue.get_context() == context);
    BOOST_CHECK(cl_command_queue(queue) == cl_queue);

    // cleanup cl_command_queue
    clReleaseCommandQueue(cl_queue);
}

#ifdef BOOST_COMPUTE_CL_VERSION_1_1
BOOST_AUTO_TEST_CASE(write_buffer_rect)
{
    REQUIRES_OPENCL_VERSION(1, 1);

    // skip this test on AMD GPUs due to a buggy implementation
    // of the clEnqueueWriteBufferRect() function
    if(device.vendor() == "Advanced Micro Devices, Inc." &&
       device.type() & boost::compute::device::gpu){
        std::cerr << "skipping write_buffer_rect test on AMD GPU" << std::endl;
        return;
    }

    int data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
    boost::compute::buffer buffer(context, 8 * sizeof(int));

    // copy every other value to the buffer
    size_t buffer_origin[] = { 0, 0, 0 };
    size_t host_origin[] = { 0, 0, 0 };
    size_t region[] = { sizeof(int), sizeof(int), 1 };

    queue.enqueue_write_buffer_rect(
        buffer,
        buffer_origin,
        host_origin,
        region,
        sizeof(int),
        0,
        2 * sizeof(int),
        0,
        data
    );

    // check output values
    int output[4];
    queue.enqueue_read_buffer(buffer, 0, 4 * sizeof(int), output);
    BOOST_CHECK_EQUAL(output[0], 1);
    BOOST_CHECK_EQUAL(output[1], 3);
    BOOST_CHECK_EQUAL(output[2], 5);
    BOOST_CHECK_EQUAL(output[3], 7);
}
#endif // BOOST_COMPUTE_CL_VERSION_1_1

static bool nullary_kernel_executed = false;

static void nullary_kernel()
{
    nullary_kernel_executed = true;
}

BOOST_AUTO_TEST_CASE(native_kernel)
{
    cl_device_exec_capabilities exec_capabilities =
        device.get_info<CL_DEVICE_EXECUTION_CAPABILITIES>();
    if(!(exec_capabilities & CL_EXEC_NATIVE_KERNEL)){
        std::cerr << "skipping native_kernel test: "
                  << "device does not support CL_EXEC_NATIVE_KERNEL"
                  << std::endl;
        return;
    }

    compute::vector<int> vector(1000, context);
    compute::fill(vector.begin(), vector.end(), 42, queue);
    BOOST_CHECK_EQUAL(nullary_kernel_executed, false);
    queue.enqueue_native_kernel(&nullary_kernel);
    queue.finish();
    BOOST_CHECK_EQUAL(nullary_kernel_executed, true);
}

BOOST_AUTO_TEST_CASE(copy_with_wait_list)
{
    int data1[] = { 1, 3, 5, 7 };
    int data2[] = { 2, 4, 6, 8 };

    compute::buffer buf1(context, 4 * sizeof(int));
    compute::buffer buf2(context, 4 * sizeof(int));

    compute::event write_event1 =
        queue.enqueue_write_buffer_async(buf1, 0, buf1.size(), data1);

    compute::event write_event2 =
        queue.enqueue_write_buffer_async(buf2, 0, buf2.size(), data2);

    compute::event read_event1 =
        queue.enqueue_read_buffer_async(buf1, 0, buf1.size(), data2, write_event1);

    compute::event read_event2 =
        queue.enqueue_read_buffer_async(buf2, 0, buf2.size(), data1, write_event2);

    read_event1.wait();
    read_event2.wait();

    CHECK_HOST_RANGE_EQUAL(int, 4, data1, (2, 4, 6, 8));
    CHECK_HOST_RANGE_EQUAL(int, 4, data2, (1, 3, 5, 7));
}

#ifndef BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST
BOOST_AUTO_TEST_CASE(enqueue_kernel_with_extents)
{
    using boost::compute::dim;
    using boost::compute::uint_;

    const char source[] = BOOST_COMPUTE_STRINGIZE_SOURCE(
        __kernel void foo(__global int *output1, __global int *output2)
        {
            output1[get_global_id(0)] = get_local_id(0);
            output2[get_global_id(1)] = get_local_id(1);
        }
    );

    compute::kernel kernel =
        compute::kernel::create_with_source(source, "foo", context);

    compute::vector<uint_> output1(4, context);
    compute::vector<uint_> output2(4, context);

    kernel.set_arg(0, output1);
    kernel.set_arg(1, output2);

    queue.enqueue_nd_range_kernel(kernel, dim(0, 0), dim(4, 4), dim(1, 1));

    CHECK_RANGE_EQUAL(int, 4, output1, (0, 0, 0, 0));
    CHECK_RANGE_EQUAL(int, 4, output2, (0, 0, 0, 0));

    // Maximum number of work-items that can be specified in each
    // dimension of the work-group to clEnqueueNDRangeKernel.
    std::vector<size_t> max_work_item_sizes =
        device.get_info<CL_DEVICE_MAX_WORK_ITEM_SIZES>();

    if(max_work_item_sizes[0] < size_t(2)) {
        return;
    }

    queue.enqueue_nd_range_kernel(kernel, dim(0, 0), dim(4, 4), dim(2, 1));

    CHECK_RANGE_EQUAL(int, 4, output1, (0, 1, 0, 1));
    CHECK_RANGE_EQUAL(int, 4, output2, (0, 0, 0, 0));

    if(max_work_item_sizes[1] < size_t(2)) {
        return;
    }

    queue.enqueue_nd_range_kernel(kernel, dim(0, 0), dim(4, 4), dim(2, 2));

    CHECK_RANGE_EQUAL(int, 4, output1, (0, 1, 0, 1));
    CHECK_RANGE_EQUAL(int, 4, output2, (0, 1, 0, 1));
}
#endif // BOOST_COMPUTE_NO_HDR_INITIALIZER_LIST

#ifdef BOOST_COMPUTE_CL_VERSION_2_1
BOOST_AUTO_TEST_CASE(get_default_device_queue)
{
    REQUIRES_OPENCL_VERSION(2, 1);

    boost::compute::command_queue default_device_queue(
        context, device,
        boost::compute::command_queue::on_device |
        boost::compute::command_queue::on_device_default |
        boost::compute::command_queue::enable_out_of_order_execution
    );
    BOOST_CHECK_NO_THROW(queue.get_info<CL_QUEUE_DEVICE_DEFAULT>());
    BOOST_CHECK_EQUAL(
        queue.get_default_device_queue(),
        default_device_queue
    );
}

BOOST_AUTO_TEST_CASE(set_as_default_device_queue)
{
    REQUIRES_OPENCL_VERSION(2, 1);

    boost::compute::command_queue new_default_device_queue(
        context, device,
        boost::compute::command_queue::on_device |
        boost::compute::command_queue::enable_out_of_order_execution
    );
    new_default_device_queue.set_as_default_device_queue();
    BOOST_CHECK_EQUAL(
         queue.get_default_device_queue(),
         new_default_device_queue
    );
}
#endif

BOOST_AUTO_TEST_SUITE_END()