File: addconst_amd64.s

package info (click to toggle)
golang-gonum-v1-gonum 0.15.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 18,792 kB
  • sloc: asm: 6,252; fortran: 5,271; sh: 377; ruby: 211; makefile: 98
file content (53 lines) | stat: -rw-r--r-- 1,530 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
// Copyright ©2016 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// +build !noasm,!gccgo,!safe

#include "textflag.h"

// func Addconst(alpha float64, x []float64)
TEXT ·AddConst(SB), NOSPLIT, $0
	MOVQ   x_base+8(FP), SI // SI = &x
	MOVQ   x_len+16(FP), CX // CX = len(x)
	CMPQ   CX, $0           // if len(x) == 0 { return }
	JE     ac_end
	MOVSD  alpha+0(FP), X4  // X4 = { a, a }
	SHUFPD $0, X4, X4
	MOVUPS X4, X5           // X5 = X4
	XORQ   AX, AX           // i = 0
	MOVQ   CX, BX
	ANDQ   $7, BX           // BX = len(x) % 8
	SHRQ   $3, CX           // CX = floor( len(x) / 8 )
	JZ     ac_tail_start    // if CX == 0 { goto ac_tail_start }

ac_loop: // Loop unrolled 8x   do {
	MOVUPS (SI)(AX*8), X0   // X_i = s[i:i+1]
	MOVUPS 16(SI)(AX*8), X1
	MOVUPS 32(SI)(AX*8), X2
	MOVUPS 48(SI)(AX*8), X3
	ADDPD  X4, X0           // X_i += a
	ADDPD  X5, X1
	ADDPD  X4, X2
	ADDPD  X5, X3
	MOVUPS X0, (SI)(AX*8)   // s[i:i+1] = X_i
	MOVUPS X1, 16(SI)(AX*8)
	MOVUPS X2, 32(SI)(AX*8)
	MOVUPS X3, 48(SI)(AX*8)
	ADDQ   $8, AX           // i += 8
	LOOP   ac_loop          // } while --CX > 0
	CMPQ   BX, $0           // if BX == 0 { return }
	JE     ac_end

ac_tail_start: // Reset loop counters
	MOVQ BX, CX // Loop counter: CX = BX

ac_tail: // do {
	MOVSD (SI)(AX*8), X0 // X0 = s[i]
	ADDSD X4, X0         // X0 += a
	MOVSD X0, (SI)(AX*8) // s[i] = X0
	INCQ  AX             // ++i
	LOOP  ac_tail        // } while --CX > 0

ac_end:
	RET