File: main.c

package info (click to toggle)
cmake 4.2.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 152,336 kB
  • sloc: ansic: 403,896; cpp: 303,920; sh: 4,105; python: 3,583; yacc: 3,106; lex: 1,279; f90: 538; asm: 471; lisp: 375; cs: 270; java: 266; fortran: 239; objc: 215; perl: 213; xml: 198; makefile: 111; javascript: 83; pascal: 63; tcl: 55; php: 25; ruby: 22
file content (44 lines) | stat: -rw-r--r-- 1,020 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#include <stdio.h>
#include <stdlib.h>

void vecaddgpu(float* r, float* a, float* b, int n)
{
#pragma acc kernels loop present(r, a, b)
  for (int i = 0; i < n; ++i)
    r[i] = a[i] + b[i];
}

int main(void)
{
  int n = 100000; /* vector length */
  float* a;       /* input vector 1 */
  float* b;       /* input vector 2 */
  float* r;       /* output vector */
  float* e;       /* expected output values */
  int i, errs;

  a = (float*)malloc(n * sizeof(float));
  b = (float*)malloc(n * sizeof(float));
  r = (float*)malloc(n * sizeof(float));
  e = (float*)malloc(n * sizeof(float));
  for (i = 0; i < n; ++i) {
    a[i] = (float)(i + 1);
    b[i] = (float)(1000 * i);
  }
/* compute on the GPU */
#pragma acc data copyin(a[0 : n], b[0 : n]) copyout(r[0 : n])
  {
    vecaddgpu(r, a, b, n);
  }
  /* compute on the host to compare */
  for (i = 0; i < n; ++i)
    e[i] = a[i] + b[i];
  /* compare results */
  errs = 0;
  for (i = 0; i < n; ++i) {
    if (r[i] != e[i]) {
      ++errs;
    }
  }
  return errs;
}