File: mulmod.h

package info (click to toggle)
libmath-prime-util-perl 0.46-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 2,044 kB
  • ctags: 1,933
  • sloc: perl: 19,450; ansic: 6,379; python: 24; makefile: 11
file content (166 lines) | stat: -rw-r--r-- 4,770 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#ifndef MPU_MULMOD_H
#define MPU_MULMOD_H

#include "ptypes.h"

#if defined(__GNUC__)
  #define INLINE inline
#elif defined(_MSC_VER)
  #define INLINE __inline
#else
  #define INLINE
#endif

/* if n is smaller than this, you can multiply without overflow */
#define HALF_WORD (UVCONST(1) << (BITS_PER_WORD/2))
/* This will be true if we think mulmods are fast */
#define MULMODS_ARE_FAST 1

#if (BITS_PER_WORD == 32) && HAVE_STD_U64

  /* We have 64-bit available, but UV is 32-bit.  Do the math in 64-bit.
   * Even if it is emulated, it should be as fast or faster than us doing it.
   */
  #define addmod(a,b,n)  (UV)( ((uint64_t)(a) + (b)) % (n) )
  #define mulmod(a,b,n)  (UV)( ((uint64_t)(a) * (b)) % (n) )
  #define sqrmod(a,n)    (UV)( ((uint64_t)(a) * (a)) % (n) )

#elif defined(__GNUC__) && defined(__x86_64__)

  /* GCC on a 64-bit Intel x86, help from WraithX and Wojciech Izykowski */
  /* Beware: if (a*b)/c > 2^64, there will be an FP exception */
  static INLINE UV _mulmod(UV a, UV b, UV n) {
    UV d, dummy;                    /* d will get a*b mod c */
    asm ("mulq %3\n\t"              /* mul a*b -> rdx:rax */
         "divq %4\n\t"              /* (a*b)/c -> quot in rax remainder in rdx */
         :"=a"(dummy), "=&d"(d)     /* output */
         :"a"(a), "rm"(b), "rm"(n)  /* input */
         :"cc"                      /* mulq and divq can set conditions */
        );
    return d;
  }
  #define mulmod(a,b,n) _mulmod(a,b,n)
  #define sqrmod(a,n)   _mulmod(a,a,n)
  /* A version for _MSC_VER:
   *    __asm { mov rax, qword ptr a
   *            mul qword ptr b
   *            div qword ptr c
   *            mov qword ptr d, rdx }   */

  /* addmod from Kruppa 2010 page 67 */
  static INLINE UV _addmod(UV a, UV b, UV n) {
    UV r = a+b;
    UV t = a-n;
    asm ("add %2, %1\n\t"    /* t := t + b */
         "cmovc %1, %0\n\t"  /* if (carry) r := t */
         :"+r" (r), "+&r" (t)
         :"rm" (b)
         :"cc"
        );
    return r;
  }
  #define addmod(a,b,n) _addmod(a,b,n)

#elif BITS_PER_WORD == 64 && __GNUC__ == 4 && __GNUC_MINOR__ >= 4 && (defined(__x86_64__) || defined(__powerpc64__))

  /* We're 64-bit, using a modern gcc, and the target has some 128-bit type.
   * The actual number of targets that have this implemented are limited. */

  #if __GNUC__ == 4 && __GNUC_MINOR__ >= 4 && __GNUC_MINOR__ < 6
    typedef unsigned int uint128_t __attribute__ ((__mode__ (TI)));
  #else
    typedef unsigned __int128 uint128_t;
  #endif

  #define mulmod(a,b,n) (UV)( ((uint128_t)(a) * (b)) % (n) )
  #define sqrmod(a,n)   (UV)( ((uint128_t)(a) * (a)) % (n) )

#else

  /* UV is the largest integral type available (that we know of). */
#undef MULMODS_ARE_FAST
#define MULMODS_ARE_FAST 0

  /* Do it by hand */
  static INLINE UV _mulmod(UV a, UV b, UV n) {
    UV r = 0;
    if (a >= n) a %= n;   /* Careful attention from the caller should make */
    if (b >= n) b %= n;   /* these unnecessary.                            */
    if ((a|b) < HALF_WORD) return (a*b) % n;
    if (a < b) { UV t = a; a = b; b = t; }
    if (n <= (UV_MAX>>1)) {
      while (b > 0) {
        if (b & 1)  { r += a;  if (r >= n) r -= n; }
        b >>= 1;
        if (b)      { a += a;  if (a >= n) a -= n; }
      }
    } else {
      while (b > 0) {
        if (b & 1)  r = ((n-r) > a) ? r+a : r+a-n;    /* r = (r + a) % n */
        b >>= 1;
        if (b)      a = ((n-a) > a) ? a+a : a+a-n;    /* a = (a + a) % n */
      }
    }
    return r;
  }

  #define mulmod(a,b,n) _mulmod(a,b,n)
  #define sqrmod(a,n)   _mulmod(a,a,n)

#endif

#ifndef addmod
  static INLINE UV addmod(UV a, UV b, UV n) {
    return ((n-a) > b) ?  a+b  :  a+b-n;
  }
#endif

static INLINE UV submod(UV a, UV b, UV n) {
  UV t = n-b;  /* Evaluate as UV, then hand to addmod */
  return addmod(a, t, n);
}

/* a^2 + c mod n */
#define sqraddmod(a, c, n)     addmod(sqrmod(a,n),   c, n)
/* a*b + c mod n */
#define muladdmod(a, b, c, n)  addmod(mulmod(a,b,n), c, n)
/* a*b - c mod n */
#define mulsubmod(a, b, c, n)  submod(mulmod(a,b,n), c, n)

/* a^k mod n */
#ifndef HALF_WORD
  static INLINE UV powmod(UV a, UV k, UV n) {
    UV t = 1;
    if (a >= n) a %= n;
    while (k) {
      if (k & 1) t = mulmod(t, a, n);
      k >>= 1;
      if (k)     a = sqrmod(a, n);
    }
    return t;
  }
#else
  static INLINE UV powmod(UV a, UV k, UV n) {
    UV t = 1;
    if (a >= n) a %= n;
    if (n < HALF_WORD) {
      while (k) {
        if (k & 1) t = (t*a)%n;
        k >>= 1;
        if (k)     a = (a*a)%n;
      }
    } else {
      while (k) {
        if (k & 1) t = mulmod(t, a, n);
        k >>= 1;
        if (k)     a = sqrmod(a, n);
      }
    }
    return t;
  }
#endif

/* a^k + c mod n */
#define powaddmod(a, k, c, n)  addmod(powmod(a,k,n),c,n)

#endif