File: pr97832-3.c

package info (click to toggle)
gcc-arm-none-eabi 15%3A14.2.rel1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,099,328 kB
  • sloc: cpp: 3,627,108; ansic: 2,571,498; ada: 834,230; f90: 235,082; makefile: 79,231; asm: 74,984; xml: 51,692; exp: 39,736; sh: 33,298; objc: 15,629; python: 15,069; fortran: 14,429; pascal: 7,003; awk: 5,070; perl: 3,106; ml: 285; lisp: 253; lex: 204; haskell: 135
file content (50 lines) | stat: -rw-r--r-- 1,854 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
/* { dg-do compile } */
/* { dg-additional-options "-Ofast" } */
/* { dg-require-effective-target vect_double } */

void foo(double* restrict y, const double* restrict x0, const double* restrict x1, int clen)
{
  int xi = clen & 2;
  double f00_re = x0[0+xi+0];
  double f10_re = x1[0+xi+0];
  double f01_re = x0[0+xi+1];
  double f11_re = x1[0+xi+1];
  double f00_im = x0[4+xi+0];
  double f10_im = x1[4+xi+0];
  double f01_im = x0[4+xi+1];
  double f11_im = x1[4+xi+1];
  int clen2 = (clen+xi) * 2;
  double* y0 = &y[0];
  double* y1 = &y[clen2];
  #pragma GCC unroll 0
  for (int c = 0; c < clen2; c += 8) {
    // y0[c] = y0[c] - x0[c]*conj(f00) - x1[c]*conj(f10);
    // y1[c] = y1[c] - x0[c]*conj(f01) - x1[c]*conj(f11);
    #pragma GCC unroll 4
    for (int k = 0; k < 4; ++k) {
      double x0_re = x0[c+0+k];
      double x0_im = x0[c+4+k];
      double y0_re = y0[c+0+k];
      double y0_im = y0[c+4+k];
      double y1_re = y1[c+0+k];
      double y1_im = y1[c+4+k];
      y0_re = y0_re - x0_re * f00_re - x0_im * f00_im;
      y0_im = y0_im + x0_re * f00_im - x0_im * f00_re;
      y1_re = y1_re - x0_re * f01_re - x0_im * f01_im;
      y1_im = y1_im + x0_re * f01_im - x0_im * f01_re;
      double x1_re = x1[c+0+k];
      double x1_im = x1[c+4+k];
      y0_re = y0_re - x1_re * f10_re - x1_im * f10_im;
      y0_im = y0_im + x1_re * f10_im - x1_im * f10_re;
      y1_re = y1_re - x1_re * f11_re - x1_im * f11_im;
      y1_im = y1_im + x1_re * f11_im - x1_im * f11_re;
      y0[c+0+k] = y0_re;
      y0[c+4+k] = y0_im;
      y1[c+0+k] = y1_re;
      y1[c+4+k] = y1_im;
    }
  }
}

/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target { ! { vect_load_lanes && vect_strided8 } } } } } */
/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" { target { ! { vect_load_lanes && vect_strided8 } } } } } */