1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
|
kernel void
test_kernel (global int *output)
{
size_t flat_id = get_global_id (2) * get_global_size (1)
+ get_global_id (1) * get_global_size (0)
+ get_global_id (0);
size_t grid_size
= get_global_size (2) * get_global_size (1) * get_global_size (0);
for (volatile int i = 0; i < 3; ++i)
{
output[flat_id] = flat_id * 1000 + i;
barrier (CLK_GLOBAL_MEM_FENCE);
int temp = output[flat_id + 1 == grid_size ? 0 : (flat_id + 1)];
barrier (CLK_GLOBAL_MEM_FENCE);
/* If the barrier was ignored, we are likely copying
a zero from the neighbour's slot or the previous
value (in case the iterations are executed in
lock step). */
output[flat_id] = temp;
}
}
|