File: x86.h

package info (click to toggle)
numerix 0.22-4
  • links: PTS, VCS
  • area: main
  • in suites: lenny
  • size: 4,380 kB
  • ctags: 4,165
  • sloc: asm: 26,210; ansic: 12,168; ml: 4,912; sh: 3,899; pascal: 414; makefile: 179
file content (482 lines) | stat: -rw-r--r-- 12,839 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
// file config/x86.h: AMD-Athlon configuration options
/*-----------------------------------------------------------------------+
 |  Copyright 2005-2006, Michel Quercia (michel.quercia@prepas.org)      |
 |                                                                       |
 |  This file is part of Numerix. Numerix is free software; you can      |
 |  redistribute it and/or modify it under the terms of the GNU Lesser   |
 |  General Public License as published by the Free Software Foundation; |
 |  either version 2.1 of the License, or (at your option) any later     |
 |  version.                                                             |
 |                                                                       |
 |  The Numerix Library is distributed in the hope that it will be       |
 |  useful, but WITHOUT ANY WARRANTY; without even the implied warranty  |
 |  of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU  |
 |  Lesser General Public License for more details.                      |
 |                                                                       |
 |  You should have received a copy of the GNU Lesser General Public     |
 |  License along with the GNU MP Library; see the file COPYING. If not, |
 |  write to the Free Software Foundation, Inc., 59 Temple Place -       |
 |  Suite 330, Boston, MA 02111-1307, USA.                               |
 +-----------------------------------------------------------------------+
 |                                                                       |
 |                  Configuration pour processeurs Athlon                |
 |                                                                       |
 +-----------------------------------------------------------------------*/

/* Machine word size */
#define bits_32

/* Memory allocation strategy  */
@use_alloca@

/* Double-long available */
@have_long_long@

                          /* +---------------------+
                             |  Debugging options  |
                             +---------------------+ */

/*
   When a "debug_xxx" symbol is defined, every call to "xxx" will be
   monitored by a special control function that checks arguments
   and results. Except when debugging Numerix, leave all the following
   symbols undefined !
*/
   
/* memory allocation */
#undef debug_alloc

/* multiplication-square */
#undef debug_mul_n2
#undef debug_karamul
#undef debug_toommul
#undef debug_mmul
#undef debug_butterfly
#undef debug_smul
#undef debug_sjoin
#undef debug_fftmul

/* division */
#undef debug_div_n2
#undef debug_burnidiv
#undef debug_moddiv
#undef debug_karpdiv

/* square root */
#undef debug_sqrt_n2
#undef debug_zimsqrt
#undef debug_modsqrt

/* pth root */
#undef debug_root

/* modular exponentiation */
#undef debug_powmod

/* gcd */
#undef debug_gcd_n2
#undef debug_lehmer

/* primality */
#undef debug_isprime

                /* +------------------------------------------+
                   |  Functions written in assembly language  |
                   +------------------------------------------+ */

#ifdef use_slong

/* Want to use SSE2 instructions (Pentium4 and higher) ? */
#undef use_sse2

/* comparison */
#define assembly_sn_cmp

/* addition/subtraction */
#define assembly_sn_add
#define assembly_sn_sub
#define assembly_sn_inc
#define assembly_sn_dec

/* multiplication and square */
#define assembly_sn_mul_1
#define assembly_sn_mul_n2
#define assembly_sn_sqr_n2
#define assembly_sn_karamul
#define assembly_sn_karasqr
#define assembly_sn_toommul
#define assembly_sn_toomsqr

/* operations modulo BASE^n + 1 */
#define assembly_sn_mmul
#define assembly_sn_msqr
#define assembly_sn_butterfly

/* operations modulo BASE^n - 1 */
#define assembly_sn_ssub
#define assembly_sn_smul
#define assembly_sn_ssqr
#define assembly_sn_sjoin3

/* shifts */
#define assembly_sn_shift_down
#define assembly_sn_shift_up

/* division */
#define assembly_sn_div_1
#define assembly_sn_mod_1
#define assembly_sn_div_n2
#define assembly_sn_burnidiv

/* square root */
#define assembly_sn_sqrt_n2

/* modular exponentiation */
#define assembly_sn_mgdiv_n2

/* gcd */
#define assembly_sn_gcd_2
#define assembly_sn_hgcd_2

#endif /* use_slong */


                             /* +--------------+
                                |  Thresholds  |
                                +--------------+ */

/*
  The constants below tell when to switch between various algorithms
  for an operation. They have been setup for an Athlon K7-550Mhz.
*/


/*
   -------------------- mul(a,la,b,lb,c)

   if 2*lb > fftmul_lim :
     if la+lb  > fftmul_lim_8 -> fft(13)
     if la+lb  > fftmul_lim_7 -> fft(12)
     if la+lb  > fftmul_lim_6 -> fft(11)
     if la+lb  > fftmul_lim_5 -> fft(10)
     if la+lb  > fftmul_lim_4 -> fft(9)
     if la+lb  > fftmul_lim_3 -> fft(8)
     if la+lb  > fftmul_lim_2 -> fft(7)
     if la+lb  > fftmul_lim_1 -> smul(24)
     else                     -> smul(12)

   if 2*lb <= fftmul_lim :
     if lb > toommul_lim      -> toommul
     if lb > karamul_lim      -> karamul
     else                     -> mul_n2

   -------------------- remdiv(a,lc,b,lb,c)

   if lc > remdiv_lim :
     if lb+1 > fftmul_lim_8 -> fft(13)
     if lb+1 > fftmul_lim_7 -> fft(12)
     if lb+1 > fftmul_lim_6 -> fft(11)
     if lb+1 > fftmul_lim_5 -> fft(10)
     if lb+1 > fftmul_lim_4 -> fft(9)
     if lb+1 > fftmul_lim_3 -> fft(8)
     if lb+1 > fftmul_lim_2 -> fft(7)
     if lb+1 > fftmul_lim_1 -> smul(24)
     else                   -> smul(12)

   if lc <= remdiv_lim :    -> toommul

   -------------------- smul(a,la,b,lb,c,n)

   if n is even and n > smul_lim_even -> smul
   if n is odd  and n > smul_lim_odd  -> smul    (assembly version)
   else                               -> toommul

   -------------------- mmul(a,b,n)
   if n > mmul_lim -> mmul
   else            -> toommul

*/

#if defined(use_clong)

#define karamul_lim         31
#define toommul_lim        100
#define fftmul_lim         416
#define fftmul_lim_1      1600
#define fftmul_lim_2      7400
#define fftmul_lim_3     20000
#define fftmul_lim_4     60000
#define fftmul_lim_5    190000
#define fftmul_lim_6    450000
#define fftmul_lim_7   1800000
#define fftmul_lim_8   7500000
#define remdiv_lim          72
#define smul_lim_even       20
#define mmul_lim            23

#elif defined(use_dlong)

#define karamul_lim         25
#define toommul_lim         76
#define fftmul_lim         300
#define fftmul_lim_1      1200
#define fftmul_lim_2      6000
#define fftmul_lim_3     14000
#define fftmul_lim_4     30000
#define fftmul_lim_5     90000
#define fftmul_lim_6    300000
#define fftmul_lim_7    900000
#define fftmul_lim_8   3000000
#define remdiv_lim          72
#define smul_lim_even       12
#define mmul_lim            20

#elif defined(use_slong)

#define karamul_lim         23
#define toommul_lim         80
#define fftmul_lim         359
#define fftmul_lim_1      2000
#define fftmul_lim_2     10000
#define fftmul_lim_3     20000
#define fftmul_lim_4     60000
#define fftmul_lim_5    120000
#define fftmul_lim_6    400000
#define fftmul_lim_7   1000000
#define fftmul_lim_8   6000000
#define remdiv_lim          72
#define smul_lim_even       13
#define smul_lim_odd        20
#define mmul_lim            23

#endif

/*
   -------------------- sqr(a,la,b)

   if 2*la > fftsqr_lim :
     if 2*la > fftsqr_lim_8 -> fft(13)
     if 2*la > fftsqr_lim_7 -> fft(12)
     if 2*la > fftsqr_lim_6 -> fft(11)
     if 2*la > fftsqr_lim_5 -> fft(10)
     if 2*la > fftsqr_lim_4 -> fft(9)
     if 2*la > fftsqr_lim_3 -> fft(8)
     if 2*la > fftsqr_lim_2 -> fft(7)
     if 2*la > fftsqr_lim_1 -> ssqr(24)
     else                   -> ssqr(12)

   if 2*la <= fftsqr_lim :
     if la > toomsqr_lim    -> toomsqr
     if la > karasqr_lim    -> karasqr
     else                   -> sqr_n2

   -------------------- remsqrt(a,la,b)

   if lc > remsqrt_lim :
     if lb+1 > fftsqr_lim_8 -> fft(13)
     if lb+1 > fftsqr_lim_7 -> fft(12)
     if lb+1 > fftsqr_lim_6 -> fft(11)
     if lb+1 > fftsqr_lim_5 -> fft(10)
     if lb+1 > fftsqr_lim_4 -> fft(9)
     if lb+1 > fftsqr_lim_3 -> fft(8)
     if lb+1 > fftsqr_lim_2 -> fft(7)
     if lb+1 > fftsqr_lim_1 -> ssqr(24)
     else                   -> ssqr(12)

   if lc <= remsqrt_lim :   -> toomsqr

   -------------------- ssqr(a,la,b,n)

   if n is even and n > ssqr_lim_even -> ssqr
   if n is odd  and n > ssqr_lim_odd  -> ssqr    (assembly version)
   else                               -> toomsqr

   -------------------- msqr(a,n)
   if n > msqr_lim -> msqr
   else            -> toomsqr

*/

#if defined(use_clong)

#define karasqr_lim         43
#define toomsqr_lim        100
#define fftsqr_lim         414
#define fftsqr_lim_1      1600
#define fftsqr_lim_2      6800
#define fftsqr_lim_3     20000
#define fftsqr_lim_4     60000
#define fftsqr_lim_5    190000
#define fftsqr_lim_6    450000
#define fftsqr_lim_7   1800000
#define fftsqr_lim_8   7500000
#define remsqrt_lim        144
#define ssqr_lim_even       20
#define msqr_lim            23

#elif defined(use_dlong)

#define karasqr_lim         41
#define toomsqr_lim         76
#define fftsqr_lim         366
#define fftsqr_lim_1      1200
#define fftsqr_lim_2      6000
#define fftsqr_lim_3     14000
#define fftsqr_lim_4     30000
#define fftsqr_lim_5     90000
#define fftsqr_lim_6    300000
#define fftsqr_lim_7    900000
#define fftsqr_lim_8   3000000
#define remsqrt_lim        144
#define ssqr_lim_even       17
#define msqr_lim            26

#elif defined(use_slong)

#define karasqr_lim         47
#define toomsqr_lim         80
#define fftsqr_lim         377
#define fftsqr_lim_1      2000
#define fftsqr_lim_2     10000
#define fftsqr_lim_3     20000
#define fftsqr_lim_4     60000
#define fftsqr_lim_5    120000
#define fftsqr_lim_6    400000
#define fftsqr_lim_7   1000000
#define fftsqr_lim_8   6000000
#define remsqrt_lim        144
#define ssqr_lim_even       15
#define ssqr_lim_odd        28
#define msqr_lim            29

#endif

/*
   -------------------- div(a,lc,b,lb,c)

   if lb > karpdiv_lim_1 and lc > 1.50*lb         -> karpdiv
   if lb > karpdiv_lim_2 and lc > karpdiv_lim_2   -> karpdiv
   if lb > moddiv_lim    and lc > div_small_c_lim -> moddiv
   if lb > burnidiv_lim  and lc > div_small_c_lim -> burnidiv
   else                                           -> div_n2

   -------------------- inv(a,la,b)

   if la > karpinv_lim -> karpinv
   else                -> moddiv

*/


#if defined(use_clong)

#define div_small_c_lim     15
#define burnidiv_lim        30
#define moddiv_lim         269
#define karpinv_lim        144
#define karpdiv_lim_1      285
#define karpdiv_lim_2      570

#elif defined(use_dlong)

#define div_small_c_lim     17
#define burnidiv_lim        35
#define moddiv_lim         214
#define karpinv_lim        144
#define karpdiv_lim_1     2500
#define karpdiv_lim_2     5000

#elif defined(use_slong)

#define div_small_c_lim     26
#define burnidiv_lim        50
#define moddiv_lim         224
#define karpinv_lim        144
#define karpdiv_lim_1      224
#define karpdiv_lim_2      448

#endif

/* -------------------- sqrt(a,la,b)

  if la > modsqrt_lim -> modsqrt
  if la > zimsqrt_lim -> zimsqrt
  else                -> sqrt_n2

*/

#if defined(use_clong)

#define zimsqrt_lim        120
#define modsqrt_lim        350

#elif defined(use_dlong)

#define zimsqrt_lim        172
#define modsqrt_lim        287

#elif defined(use_slong)

#define zimsqrt_lim        115
#define modsqrt_lim        360

#endif

/* -------------------- powmod(a,la,b,lb,c,lc,d)

  if c mod 2 = 0 -> powmod
  if lc = 1      -> powmod
  if c mod 2 = 1 -> powmod_mg with ...
    if lc <= montgomery_lim_1 -> mgdiv_n2
    if lc <= montgomery_lim_2 -> mgdiv_i, toommul, smul
    else                      -> mgdiv_i, fftmul
*/
  
#if defined(use_clong)

#define montgomery_lim_1 150
#define montgomery_lim_2 10000 /* > 1000, et trop long pour tre dtermin */

#elif defined(use_dlong)

#define montgomery_lim_1 80
#define montgomery_lim_2 10000 /* > 1000, et trop long pour tre dtermin */

#elif defined(use_slong)

#define montgomery_lim_1 104
#define montgomery_lim_2 10000 /* > 1000, et trop long pour tre dtermi*/

#endif

/* -------------------- gcd(a,b,mode)

  mode = 0
    if min(la,lb) <= lehmer_lim_0 -> gcd_n2
    else                          -> lehmer

  mode = 1 or 2
    if min(la,lb) <= lehmer_lim_1 -> gcd_n2
    else                          -> lehmer
*/

#if defined (use_clong)

#define lehmer_lim_0    600
#define lehmer_lim_1     48

#elif defined (use_dlong)

#define lehmer_lim_0   1060
#define lehmer_lim_1    300

#elif defined (use_slong)

#define lehmer_lim_0    370
#define lehmer_lim_1     48

#endif

  

/* -------------------- End of config file -------------------- */