File: loopbarriers.cl

package info (click to toggle)
pocl 7.1-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 29,768 kB
  • sloc: lisp: 151,669; ansic: 135,425; cpp: 65,801; python: 1,846; sh: 1,084; ruby: 255; pascal: 231; tcl: 180; makefile: 174; asm: 81; java: 72; xml: 49
file content (33 lines) | stat: -rw-r--r-- 1,088 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33

#define printf(...)

kernel void
test_kernel (global int *output)
{
  size_t flat_id = get_global_id (2) * get_global_size (1)
                   + get_global_id (1) * get_global_size (0)
                   + get_global_id (0);

  size_t grid_size
      = get_global_size (2) * get_global_size (1) * get_global_size (0);

  printf ("a. flat_id %d lid %d\n", flat_id, get_local_id (0));
  for (volatile int i = 0; i < 3; ++i)
    {
      output[flat_id] = flat_id * 1000 + i;
      printf ("b. flat_id %d i %d lid %d\n", flat_id, i, get_local_id (0));

      barrier (CLK_GLOBAL_MEM_FENCE);
      printf ("c. flat_id %d i %d lid %d\n", flat_id, i, get_local_id (0));

      int temp = output[flat_id + 1 == grid_size ? 0 : (flat_id + 1)];

      barrier (CLK_GLOBAL_MEM_FENCE);
      /* If the barrier was ignored, we are likely copying
         a zero from the neighbour's slot or the previous
         value (in case the iterations are executed in
         lock step). */
      output[flat_id] = temp;
      printf ("d. flat_id %d i %d lid %d\n", flat_id, i, get_local_id (0));
    }
}