File: unroll-and-jam.c

package info (click to toggle)
gcc-riscv64-unknown-elf 8.3.0.2019.08%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 680,956 kB
  • sloc: ansic: 3,237,715; cpp: 896,882; ada: 772,854; f90: 144,254; asm: 68,788; makefile: 67,456; sh: 29,743; exp: 28,045; objc: 15,273; fortran: 11,885; python: 7,369; pascal: 5,375; awk: 3,725; perl: 2,872; yacc: 316; xml: 311; ml: 285; lex: 198; haskell: 122
file content (111 lines) | stat: -rw-r--r-- 2,952 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
/* { dg-do run } */
/* { dg-options "-O3 -floop-unroll-and-jam --param unroll-jam-min-percent=0 -fdump-tree-unrolljam-details" } */
/* { dg-require-effective-target int32plus } */

#include <stdio.h>
extern unsigned int a[];
extern unsigned int b[];
extern unsigned int aa[][1024];
unsigned int checksum;
void checkaa(void)
{
  unsigned sum = 1;
  unsigned long i, j;
  for (i = 0; i < 1024; i++) {
      for (j = 0; j < 16; j++) {
	  sum += aa[j][i]*31+47;
      }
  }
  checksum = checksum * 27 + sum;
  //printf("  %d\n", sum);
}

void checkb(void)
{
  unsigned sum = 1;
  unsigned long i, j;
  for (i = 0; i < 1024; i++) {
      sum += b[i]*31+47;
  }
  checksum = checksum * 27 + sum;
  //printf("  %d\n", sum);
}

#define TEST(name, body, test) \
static void __attribute__((noinline,noclone)) name (unsigned long n, unsigned long m) \
{ \
  unsigned long i, j; \
  for (i = 1; i < m; i++) { \
      for (j = 1; j < n; j++) { \
	  body; \
      } \
  } \
  test; \
} \
static void __attribute__((noinline,noclone,optimize("O1"))) name ## noopt (unsigned long n, unsigned long m) \
{ \
  unsigned long i, j; \
  for (i = 1; i < m; i++) { \
      for (j = 1; j < n; j++) { \
	  body; \
      } \
  } \
  test; \
}
TEST(foo1, aa[i+1][j+1]=aa[i][j] * aa[i][j] / 2, checkaa()) //ok, -1,-1
TEST(foo2, aa[i][j+1]=3*aa[i+1][j], checkaa()) //notok, 1,-1
TEST(foo3, aa[i+1][j-1]=aa[i][j] * aa[i][j] / 2, checkaa()) //notok, -1,1
TEST(foo4, aa[i][j] = aa[i-1][j+1] * aa[i-1][j+1] / 2, checkaa()) //notok, -1,1
TEST(foo5, aa[i][j] = aa[i+1][j+1] * aa[i+1][j+1] / 2, checkaa()) //ok, 1,1
TEST(foo6, aa[i][j] = aa[i+1][j] * aa[i+1][j] / 2, checkaa()) //ok, -1,0
TEST(foo7, aa[i+1][j] = aa[i][j] * aa[i][j] / 2, checkaa()) //ok, 1,0
TEST(foo9, b[j] = 3*b[j+1] + 1, checkb()) //notok, 0,-1
TEST(foo10, b[j] = 3*b[j] + 1, checkb()) //ok, 0,0

/* foo8 should work as well, but currently doesn't because the distance
   vectors we compute are too pessimistic.  We compute
     (0,1), (1,1) and (1,-1) 
   and the last one causes us to lose.  */
TEST(foo8, b[j+1] = 3*b[j] + 1, checkb()) //ok, 0,1

unsigned int a[1024];
unsigned int b[1024];
unsigned int aa[16][1024];
void init(void)
{
  unsigned long i,j;
  for (i = 0; i < 1024; i++) {
      for (j = 0; j < 16; j++) {
	  aa[j][i] = ((j+1)*2+i+1) % 17;
      }
      a[i] = ((i+1)*31) % 19;
      b[i] = ((i+1)*47) % 23;
  }
  checksum = 1;
}

#define RUN(name) \
    printf(" %s\n", #name); \
    init();for(i=0;i<4;i++)name##noopt(32,8); checka = checksum; \
    init();for(i=0;i<4;i++)name(32,8); \
    printf("%sok %s\n", checka != checksum ? "NOT " : "", #name);

int main()
{
  int i;
  unsigned checka;
  RUN(foo1);
  RUN(foo2);
  RUN(foo3);
  RUN(foo4);
  RUN(foo5);
  RUN(foo6);
  RUN(foo7);
  RUN(foo8);
  RUN(foo9);
  RUN(foo10);
  return 0;
}

/* Five loops should be unroll-jammed (actually six, but see above).  */
/* { dg-final { scan-tree-dump-times "applying unroll and jam" 5 "unrolljam" } } */