File: mulredc1.asm

package info (click to toggle)
gmp-ecm 7.0.4%2Bds-5
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster
  • size: 4,728 kB
  • sloc: asm: 36,431; ansic: 34,057; xml: 885; python: 799; sh: 698; makefile: 348
file content (54 lines) | stat: -rw-r--r-- 1,579 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#
#  mp_limb_t mulredc1(mp_limb_t * z, const mp_limb_t x, const mp_limb_t y,
#                 const mp_limb_t m, mp_limb_t inv_m)
#
#  Compute z := x*y mod m, in Montgomery representation, where x, y < m
#  and m is n limb wide.  inv_m is the less significant limb of the
#  inverse of m modulo 2^(n*GMP_LIMB_BITS)
#
#  The result might be unreduced (larger than m) but becomes reduced
#  after subtracting m. The calling function should take care of that.
#
#  We use a temporary space for unreduced product on the stack.
#  Therefore, this can not be used for large integers (anyway, the
#  algorithm is quadratic).
#
#  WARNING: z is only n limbs but since it might be unreduced, there
#  could be a carry that does not fit in z. This carry is returned.


include(`config.m4')
	TEXT
	GLOBL GSYM_PREFIX`'mulredc1
	TYPE(GSYM_PREFIX`'mulredc1,`function')

ifdef(`WINDOWS64_ABI',
# stack: inv_m, %r9: m, %r8: y, %rdx: x, %rcx: *z
`define(`INV_M', `0x28(%rsp)')
define(`M', `%r9')
define(`Y', `%r8')
define(`X', `%rdx')
define(`Z', `%rcx')
define(`TMP2', `%r10')
define(`TMP1', `%r8')',
# %r8: inv_m, %rcx: m, %rdx: y, %rsi : x, %rdi : *z
`define(`INV_M', `%r8')
define(`M', `%rcx')
define(`Y', `%rdx')
define(`X', `%rsi')
define(`Z', `%rdi')
define(`TMP2', `%r10')
define(`TMP1', `%r9')')

GSYM_PREFIX`'mulredc1:
	movq	Y, %rax
	mulq	X
	movq	%rdx, TMP2
	movq	%rax, TMP1      # store xy in [r9:r10]
	mulq	INV_M           # compute u
	mulq	M               # compute u*m
	addq	TMP1, %rax      # rax is 0, now (carry is important)
	adcq	TMP2, %rdx
	movq	%rdx, (Z)
	adcq	$0, %rax
	ret