File: pr115841.c

package info (click to toggle)
gcc-arm-none-eabi 15%3A14.2.rel1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,099,328 kB
  • sloc: cpp: 3,627,108; ansic: 2,571,498; ada: 834,230; f90: 235,082; makefile: 79,231; asm: 74,984; xml: 51,692; exp: 39,736; sh: 33,298; objc: 15,629; python: 15,069; fortran: 14,429; pascal: 7,003; awk: 5,070; perl: 3,106; ml: 285; lisp: 253; lex: 204; haskell: 135
file content (42 lines) | stat: -rw-r--r-- 1,235 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
/* { dg-do compile } */
/* { dg-additional-options "-Ofast -fcommon -fvect-cost-model=dynamic --param vect-partial-vector-usage=1" } */
/* { dg-additional-options "-mavx512vl" { target avx512vl } } */

/* To trigger the bug costing needs to determine that aligning the A170
   accesses with a prologue is good and there should be a vectorized
   epilogue with a smaller vector size, re-using the vector accumulator
   from the vectorized main loop that's statically known to execute
   but the epilogue loop is not.  */

static unsigned char xl[192];
unsigned char A170[192*3];

void jerate (unsigned char *, unsigned char *);
float foo (unsigned n)
{
  jerate (xl, A170);

  unsigned i = 32;
  int kr = 1;
  float sfn11s = 0.f;
  float sfn12s = 0.f;
  do
    {
      int krm1 = kr - 1;
      long j = krm1;
      float a = (*(float(*)[n])A170)[j];
      float b = (*(float(*)[n])xl)[j];
      float c = a * b;
      float d = c * 6.93149983882904052734375e-1f;
      float e = (*(float(*)[n])A170)[j+48];
      float f = (*(float(*)[n])A170)[j+96];
      float g = d * e;
      sfn11s = sfn11s + g;
      float h = f * d;
      sfn12s = sfn12s + h;
      kr++;
    }
  while (--i != 0);
  float tem = sfn11s + sfn12s;
  return tem;
}