File: example1.cl

package info (click to toggle)

pocl 7.1-1

links: PTS, VCS
area: main
in suites: experimental
size: 29,768 kB
sloc: lisp: 151,669; ansic: 135,425; cpp: 65,801; python: 1,846; sh: 1,084; ruby: 255; pascal: 231; tcl: 180; makefile: 174; asm: 81; java: 72; xml: 49

file content (41 lines) | stat: -rw-r--r-- 968 bytes

parent folder | download | duplicates (8)

#define USE_VECTOR_DATATYPES

__kernel void 
dot_product (__global const float4 *a,  
	     __global const float4 *b, __global float *c) 
{ 
  int gid = get_global_id(0); 

#ifndef USE_VECTOR_DATATYPES
  /* This version is to smoke test the autovectorization.
     Tries to create parallel regions with nice memory
     access pattern etc. so it gets autovectorizer. */
  /* This parallel region does not vectorize with the
     loop vectorizer because it accesses vector datatypes.
     Perhaps with SLP/BB vectorizer.*/

  float ax = a[gid].x;
  float ay = a[gid].y; 
  float az = a[gid].z;
  float aw = a[gid].w;

  float bx = b[gid].x, 
      by = b[gid].y, 
      bz = b[gid].z, 
      bw = b[gid].w;

  barrier(CLK_LOCAL_MEM_FENCE);

  /* This parallel region should vectorize. */
  c[gid] = ax * bx;
  c[gid] += ay * by;
  c[gid] += az * bz;
  c[gid] += aw * bw;

#else
  float4 prod = a[gid] * b[gid];
  c[gid] = prod.x + prod.y + prod.z + prod.w;
#endif
 

}