File: slp-21.c

package info (click to toggle)
gcc-arm-none-eabi 15%3A14.2.rel1-1
links: PTS, VCS
area: main
in suites: forky, sid, trixie
size: 1,099,328 kB
sloc: cpp: 3,627,108; ansic: 2,571,498; ada: 834,230; f90: 235,082; makefile: 79,231; asm: 74,984; xml: 51,692; exp: 39,736; sh: 33,298; objc: 15,629; python: 15,069; fortran: 14,429; pascal: 7,003; awk: 5,070; perl: 3,106; ml: 285; lisp: 253; lex: 204; haskell: 135
file content (219 lines) | stat: -rw-r--r-- 4,775 bytes
/* { dg-require-effective-target vect_int } */

#include <stdarg.h>
#include "tree-vect.h"

#define N 128 

int
main1 ()
{
  unsigned short i;
  unsigned short out[N*8], out2[N*8], b0, b1, b2, b3, b4, a0, a1, a2, a3, b5;
  unsigned short in[N*8];

  for (i = 0; i < N*8; i++)
    {
      in[i] = i;
    }

  /* Different operations in both cases - vectorization with interleaving.  */
  for (i = 0; i < N; i++)
    {
      a0 = in[i*4];
      a1 = in[i*4 + 1];
      a2 = in[i*4 + 2];
      a3 = in[i*4 + 3];

      b0 = a0 * 8;
      b1 = a1 + 7;
      b2 = a2 + 6;
      b3 = a3 * 5;
      
      b4 = a2 + 4;
      b5 = a3 + 3;

      out[i*4] = b0;
      out[i*4 + 1] = b1;
      out[i*4 + 2] = b2;
      out[i*4 + 3] = b3;

      out2[i*4] = b0;
      out2[i*4 + 1] = b1;
      out2[i*4 + 2] = b4;
      out2[i*4 + 3] = b5;
    }

  /* check results:  */
#pragma GCC novector
  for (i = 0; i < N; i++)
    {
      a0 = in[i*4];
      a1 = in[i*4 + 1];
      a2 = in[i*4 + 2];
      a3 = in[i*4 + 3];

      b0 = a0 * 8;
      b1 = a1 + 7;
      b2 = a2 + 6;
      b3 = a3 * 5;

      b4 = a2 + 4;
      b5 = a3 + 3;

      if (out[i*4] != b0 
         || out[i*4 + 1] != b1
         || out[i*4 + 2] != b2
         || out[i*4 + 3] != b3)
	abort ();

      if (out2[i*4] != b0  
         || out2[i*4 + 1] != b1
         || out2[i*4 + 2] != b4
         || out2[i*4 + 3] != b5)
        abort ();
    }

  /* Different operations in the first case - vectorization with interleaving.  */
  for (i = 0; i < N; i++)
    {
      a0 = in[i*4];
      a1 = in[i*4 + 1];
      a2 = in[i*4 + 2];
      a3 = in[i*4 + 3];

      b0 = a0 + 8;
      b1 = a1 + 7;
      b2 = a2 + 6;
      b3 = a3 * 5;

      b4 = a2 + 4;
      b5 = a3 + 3;

      out[i*4] = b0;
      out[i*4 + 1] = b1;
      out[i*4 + 2] = b2;
      out[i*4 + 3] = b3;

      out2[i*4] = b0;
      out2[i*4 + 1] = b1;
      out2[i*4 + 2] = b4;
      out2[i*4 + 3] = b5;
    }

  /* check results:  */
#pragma GCC novector
  for (i = 0; i < N; i++)
    {
      a0 = in[i*4];
      a1 = in[i*4 + 1];
      a2 = in[i*4 + 2];
      a3 = in[i*4 + 3];

      b0 = a0 + 8;
      b1 = a1 + 7;
      b2 = a2 + 6;
      b3 = a3 * 5;

      b4 = a2 + 4;
      b5 = a3 + 3;

      if (out[i*4] != b0
         || out[i*4 + 1] != b1
         || out[i*4 + 2] != b2
         || out[i*4 + 3] != b3)
        abort ();

      if (out2[i*4] != b0
         || out2[i*4 + 1] != b1
         || out2[i*4 + 2] != b4
         || out2[i*4 + 3] != b5)
        abort ();
    }


  /* Different operations in the second case - vectorization with interleaving.  */
  for (i = 0; i < N; i++) 
    { 
      a0 = in[i*4];
      a1 = in[i*4 + 1];
      a2 = in[i*4 + 2];
      a3 = in[i*4 + 3];

      b0 = a0 + 8;
      b1 = a1 + 7;
      b2 = a2 + 6;
      b3 = a3 + 5;

      b4 = a2 * 4;
      b5 = a3 + 3;

      out[i*4] = b0;
      out[i*4 + 1] = b1;
      out[i*4 + 2] = b2;
      out[i*4 + 3] = b3;

      out2[i*4] = b0;
      out2[i*4 + 1] = b1;
      out2[i*4 + 2] = b4;
      out2[i*4 + 3] = b5;
    }

  /* check results:  */
#pragma GCC novector
  for (i = 0; i < N; i++)
    {
      a0 = in[i*4];
      a1 = in[i*4 + 1];
      a2 = in[i*4 + 2];
      a3 = in[i*4 + 3];

      b0 = a0 + 8;
      b1 = a1 + 7;
      b2 = a2 + 6;
      b3 = a3 + 5;

      b4 = a2 * 4;
      b5 = a3 + 3;

      if (out[i*4] != b0
         || out[i*4 + 1] != b1
         || out[i*4 + 2] != b2
         || out[i*4 + 3] != b3)
        abort ();

      if (out2[i*4] != b0
         || out2[i*4 + 1] != b1
         || out2[i*4 + 2] != b4
         || out2[i*4 + 3] != b5)
        abort ();
    }


  return 0;
}

int main (void)
{
  check_vect ();

  main1 ();

  return 0;
}

/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect"  { target { vect_strided4 || vect_extract_even_odd } } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target  { ! { vect_strided4 || vect_extract_even_odd } } } } } */
/* Some targets can vectorize the second of the three main loops using
   hybrid SLP.  For 128-bit vectors, the required 4->3 permutations are:

   { 0, 1, 2, 4, 5, 6, 8, 9 }
   { 2, 4, 5, 6, 8, 9, 10, 12 }
   { 5, 6, 8, 9, 10, 12, 13, 14 }

   Not all vect_perm targets support that, and it's a bit too specific to have
   its own effective-target selector, so we just test targets directly.  */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { target { powerpc64*-*-* s390*-*-* loongarch*-*-* } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target { vect_strided4 && { ! { powerpc64*-*-* s390*-*-* loongarch*-*-* } } } } } } */
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect"  { target { ! { vect_strided4 } } } } } */