File: asm_vecSqrt_sse.s

package info (click to toggle)
golang-github-gorgonia-vecf64 0.9.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 252 kB
  • sloc: asm: 601; sh: 15; makefile: 2
file content (43 lines) | stat: -rw-r--r-- 681 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
// +build sse
// +build amd64
// +build !fastmath

/*
Sqrt takes a []float32 and square roots every element in the slice.
*/
#include "textflag.h"

// func Sqrt(a []float64)
TEXT ·Sqrt(SB), NOSPLIT, $0
	MOVQ a_data+0(FP), SI
	MOVQ SI, CX
	MOVQ a_len+8(FP), AX  // len(a) into AX - +8, because first 8 is pointer, second 8 is length, third 8 is cap

	SUBQ $2, AX
	JL   remainder

loop:
	SQRTPD (SI), X0
	MOVUPD X0, (SI)

	// we processed 2 elements. Each element is 8 bytes. So jump 16 ahead
	ADDQ $16, SI

	SUBQ $2, AX
	JGE  loop

remainder:
	ADDQ $2, AX
	JE   done

remainder1:
	MOVSD  (SI), X0
	SQRTSD X0, X0
	MOVSD  X0, (SI)

	ADDQ $8, SI
	DECQ AX
	JNE  remainder1

done:
	RET