File: declare-variant-5.c

package info (click to toggle)
gcc-arm-none-eabi 15%3A12.2.rel1-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 959,712 kB
  • sloc: cpp: 3,275,382; ansic: 2,061,766; ada: 840,956; f90: 208,513; makefile: 76,132; asm: 73,433; xml: 50,448; exp: 34,146; sh: 32,436; objc: 15,637; fortran: 14,012; python: 11,991; pascal: 6,787; awk: 4,779; perl: 3,054; yacc: 338; ml: 285; lex: 201; haskell: 122
file content (36 lines) | stat: -rw-r--r-- 1,361 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-additional-options "-mavx2" } */

typedef float __v4sf __attribute__((vector_size (16)));
typedef int __v4si __attribute__((vector_size (16)));
typedef float __v8sf __attribute__((vector_size (32)));
typedef int __v8si __attribute__((vector_size (32)));
__v4si f1 (__v4sf, __v4sf, float *);
__v8si f2 (__v8sf, __v8sf, float *);
__v4si f3 (__v4si, int, __v4si);

#pragma omp declare variant (f1) match (construct={parallel,for,simd(simdlen(4),notinbranch,uniform(z),aligned(z:4 * sizeof (*z)))})
#pragma omp declare variant (f2) match (construct={for,simd(uniform(z),simdlen(8),notinbranch)})
int f4 (float x, float y, float *z);

#pragma omp declare variant (f3) match (construct={simd(simdlen(4),inbranch,linear(y:1))})
int f5 (int x, int y);

void
test (int *x, float *y, float *z, float *w)
{
  #pragma omp parallel
  #pragma omp for simd aligned (w:4 * sizeof (float))
  for (int i = 0; i < 1024; i++)
    x[i] = f4 (y[i], z[i], w);
  #pragma omp parallel for simd aligned (w:4 * sizeof (float)) simdlen(4)
  for (int i = 1024; i < 2048; i++)
    x[i] = f4 (y[i], z[i], w);
  #pragma omp simd aligned (w:4 * sizeof (float))
  for (int i = 2048; i < 4096; i++)
    x[i] = f4 (y[i], z[i], w);
  #pragma omp simd
  for (int i = 4096; i < 8192; i++)
    if (x[i] > 10)
      x[i] = f5 (x[i], i);
}