File: example1.cl

package info (click to toggle)
pocl 6.0-7
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 25,320 kB
  • sloc: lisp: 149,513; ansic: 103,778; cpp: 54,947; python: 1,513; sh: 949; ruby: 255; pascal: 226; tcl: 180; makefile: 175; java: 72; xml: 49
file content (41 lines) | stat: -rw-r--r-- 968 bytes parent folder | download | duplicates (7)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#define USE_VECTOR_DATATYPES

__kernel void 
dot_product (__global const float4 *a,  
	     __global const float4 *b, __global float *c) 
{ 
  int gid = get_global_id(0); 

#ifndef USE_VECTOR_DATATYPES
  /* This version is to smoke test the autovectorization.
     Tries to create parallel regions with nice memory
     access pattern etc. so it gets autovectorizer. */
  /* This parallel region does not vectorize with the
     loop vectorizer because it accesses vector datatypes.
     Perhaps with SLP/BB vectorizer.*/

  float ax = a[gid].x;
  float ay = a[gid].y; 
  float az = a[gid].z;
  float aw = a[gid].w;

  float bx = b[gid].x, 
      by = b[gid].y, 
      bz = b[gid].z, 
      bw = b[gid].w;

  barrier(CLK_LOCAL_MEM_FENCE);

  /* This parallel region should vectorize. */
  c[gid] = ax * bx;
  c[gid] += ay * by;
  c[gid] += az * bz;
  c[gid] += aw * bw;

#else
  float4 prod = a[gid] * b[gid];
  c[gid] = prod.x + prod.y + prod.z + prod.w;
#endif
 

}