File: Walk.c

package info (click to toggle)
p7zip 16.02%2Bdfsg-8
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye
  • size: 14,148 kB
  • sloc: cpp: 167,145; ansic: 14,992; python: 1,911; asm: 1,688; sh: 1,132; makefile: 703
file content (267 lines) | stat: -rw-r--r-- 5,506 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
/* Walk.c -- memory walks
2010-10-14 : Igor Pavlov : Public domain */

#include "Walk.h"

unsigned MY_FAST_CALL WalkSum(const unsigned *p, unsigned num)
{
  unsigned b = 0;
  unsigned i;
  for (i = 0; i < num; i += 16)
  {
    b = b + p[i] +
      p[i + 1] +
      p[i + 2] +
      p[i + 3] +
      p[i + 4] +
      p[i + 5] +
      p[i + 6] +
      p[i + 7] +
      p[i + 8] +
      p[i + 9] +
      p[i + 10] +
      p[i + 11] +
      p[i + 12] +
      p[i + 13] +
      p[i + 14] +
      p[i + 15];
  }
  return b;
}

unsigned MY_FAST_CALL WalkSum2(const unsigned *p, unsigned num, unsigned step)
{
  unsigned b = 0;
  unsigned i;
  for (i = 0; i < num;)
  {
    b += p[i]; i += step;
    b += p[i]; i += step;
    b += p[i]; i += step;
    b += p[i]; i += step;
    b += p[i]; i += step;
    b += p[i]; i += step;
    b += p[i]; i += step;
    b += p[i]; i += step;
  }
  return b;
}

void MY_FAST_CALL WalkWrite(unsigned *p, unsigned num)
{
  unsigned i;
  for (i = 0; i < num; i += 8)
  {
    p[i] = 0;
    p[i + 1] = 0;
    p[i + 2] = 0;
    p[i + 3] = 0;
    p[i + 4] = 0;
    p[i + 5] = 0;
    p[i + 6] = 0;
    p[i + 7] = 0;
  }
}

void MY_FAST_CALL WalkWrite2(unsigned *p, unsigned num, unsigned step)
{
  unsigned i;
  for (i = 0; i < num;)
  {
    p[i] = 0; i += step;
    p[i] = 0; i += step;
    p[i] = 0; i += step;
    p[i] = 0; i += step;
    p[i] = 0; i += step;
    p[i] = 0; i += step;
    p[i] = 0; i += step;
    p[i] = 0; i += step;
  }
}

unsigned MY_FAST_CALL Walk1Add(const unsigned *p, unsigned num, unsigned a0)
{
  unsigned i;
  num >>= 4;
  for (i = 0; i < num; i++)
  {
    a0 += p[a0]; a0 += p[a0]; a0 += p[a0]; a0 += p[a0];
    a0 += p[a0]; a0 += p[a0]; a0 += p[a0]; a0 += p[a0];
    a0 += p[a0]; a0 += p[a0]; a0 += p[a0]; a0 += p[a0];
    a0 += p[a0]; a0 += p[a0]; a0 += p[a0]; a0 += p[a0];
  }
  return a0;
}

unsigned MY_FAST_CALL Walk1Nop(const unsigned *p, unsigned num, unsigned a0)
{
  unsigned k = (a0 < 0xFFFFFFFF) ? 0 : 1;
  unsigned i;
  for (i = 0; i < num; i++)
  {
    a0 += k;
    a0 = p[a0];
  }
  return a0;
}

#define R1      (unsigned)(size_t)a0
#define R2 R1 + (unsigned)(size_t)a1
#define R3 R2 + (unsigned)(size_t)a2
#define R4 R3 + (unsigned)(size_t)a3
#define R5 R4 + (unsigned)(size_t)a4
#define R6 R5 + (unsigned)(size_t)a5
#define R7 R6 + (unsigned)(size_t)a6
#define R8 R7 + (unsigned)(size_t)a7
#define R9 R8 + (unsigned)(size_t)a8
#define R10 R9 + (unsigned)(size_t)a9
#define R11 R10 + (unsigned)(size_t)a10
#define R12 R11 + (unsigned)(size_t)a11

#define A1    a0 = p[a0];
#define A2 A1 a1 = p[a1];
#define A3 A2 a2 = p[a2];
#define A4 A3 a3 = p[a3];
#define A5 A4 a4 = p[a4];
#define A6 A5 a5 = p[a5];
#define A7 A6 a6 = p[a6];
#define A8 A7 a7 = p[a7];
#define A9 A8 a8 = p[a8];
#define A10 A9 a9 = p[a9];
#define A11 A10 a10 = p[a10];
#define A12 A11 a11 = p[a11];

WWW_DECL(1)
{
  unsigned i;
  num >>= 4;
  for (i = 0; i < num; i++)
    a0 = p[p[p[p[p[p[p[p[p[p[p[p[p[p[p[p[a0]]]]]]]]]]]]]]]];
  return a0;
}

#define WWW(n) WWW_DECL(n) \
{ unsigned i; num >>= 2; \
  for (i = 0; i < num; i++) { \
    A ## n; A ## n; A ## n; A ## n; \
  } return R ## n ; }

WWW(2)
WWW(3)
WWW(4)
WWW(5)
WWW(6)
WWW(7)
WWW(8)
WWW(9)
WWW(10)
WWW(11)
WWW(12)

#define D1    a0 = *a0;
#define D2 D1 a1 = *a1;
#define D3 D2 a2 = *a2;
#define D4 D3 a3 = *a3;
#define D5 D4 a4 = *a4;
#define D6 D5 a5 = *a5;
#define D7 D6 a6 = *a6;
#define D8 D7 a7 = *a7;
#define D9 D8 a8 = *a8;
#define D10 D9 a9 = *a9;

unsigned MY_FAST_CALL Walk1d(unsigned num, P1)
{
  unsigned i;
  num >>= 4;
  for (i = 0; i < num; i++)
    a0 = *(void **)*(void **)*(void **)*(void **)
        *(void **)*(void **)*(void **)*(void **)
        *(void **)*(void **)*(void **)*(void **)
        *(void **)*(void **)*(void **)*a0;
  return R1;
}

#define DDD(n) DDD_DECL(n) \
{ unsigned i; num >>= 3; \
  for (i = 0; i < num; i++) { \
    D ## n; D ## n; D ## n; D ## n; \
    D ## n; D ## n; D ## n; D ## n; \
  } return R ## n ; }

DDD(2)
DDD(3)
DDD(4)
DDD(5)
DDD(6)
DDD(7)
DDD(8)
DDD(9)
DDD(10)

#define PPP(x, y) y = p[x]; p[x] = y;

#define B1     PPP(a0, z0)
#define B2  B1 PPP(a1, z1)
#define B3  B2 PPP(a2, z2)
#define B4  B3 PPP(a3, z3)
#define B5  B4 PPP(a4, z4)
#define B6  B5 PPP(a5, z5)

#define C1     PPP(z0, a0)
#define C2  C1 PPP(z1, a1)
#define C3  C2 PPP(z2, a2)
#define C4  C3 PPP(z3, a3)
#define C5  C4 PPP(z4, a4)
#define C6  C5 PPP(z5, a5)

#define Z1     z0
#define Z2 Z1, z1
#define Z3 Z2, z2
#define Z4 Z3, z3
#define Z5 Z4, z4
#define Z6 Z5, z5

#define RW(n) RW_DECL(n) \
{ unsigned i; num >>= 3; for (i = 0; i < num; i++) { \
    unsigned Z ## n; \
    B ## n; C ## n; \
    B ## n; C ## n; \
    B ## n; C ## n; \
    B ## n; C ## n; \
  } return R ## n ; }

RW(1)
RW(2)
RW(3)
RW(4)
RW(5)
RW(6)


#define MMM(x, y) y = *x; *x = y;

#define E1     MMM(a0, z0)
#define E2  E1 MMM(a1, z1)
#define E3  E2 MMM(a2, z2)

#define F1     MMM(z0, a0)
#define F2  F1 MMM(z1, a1)
#define F3  F2 MMM(z2, a2)

#define Y1     **z0
#define Y2 Y1, **z1
#define Y3 Y2, **z2

#define WD(n) WD_DECL(n) \
{ unsigned i; num >>= 3; \
  for (i = 0; i < num; i++) { \
    void Y ## n; \
    E ## n; F ## n; \
    E ## n; F ## n; \
    E ## n; F ## n; \
    E ## n; F ## n; \
  } return R ## n ; }

WD(1)
WD(2)
WD(3)