File: asm.go

package info (click to toggle)
golang-github-mmcloughlin-avo 0.5.0-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 15,024 kB
  • sloc: xml: 71,029; asm: 14,862; sh: 194; makefile: 21; ansic: 11
file content (79 lines) | stat: -rw-r--r-- 2,012 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
//go:build ignore
// +build ignore

package main

import (
	"strconv"

	. "github.com/mmcloughlin/avo/build"
	. "github.com/mmcloughlin/avo/operand"
	. "github.com/mmcloughlin/avo/reg"
)

// The goal of this test is to confirm correct liveness analysis of zeroing mode
// when masking in AVX-512. In merge masking, some of the bits of the output
// register will be preserved, so the register is live coming into the
// instruction. Zeroing mode removes any input dependency.
//
// This synthetic test sets up a situation where we allocate multiple temporary
// registers. Allocation is only feasible if the liveness pass correctly
// identifies that they are not all live at once.

func main() {
	const n = 32

	TEXT("Zeroing", NOSPLIT, "func(out *[8]uint64)")
	Doc("Zeroing computes the sum 1+2+...+" + strconv.Itoa(n) + " in 8 lanes of 512-bit register.")

	out := Load(Param("out"), GP64())

	Comment("Initialize sum.")
	s := ZMM()
	VPXORD(s, s, s)

	// Allocate registers for the terms of the sum. Write garbage to them.
	//
	// The point here is that under merge-masking, or an incorrect handling of
	// zeroing-masking, these registers would be live from this point. And there
	// would be too many of them so register allocation would fail.
	Comment("Initialize summand registers.")
	filler := GP64()
	MOVQ(U64(0x9e77d78aacb8cbcc), filler)

	z := make([]VecVirtual, n)
	for i := 0; i < n; i++ {
		z[i] = ZMM()
		VPBROADCASTQ(filler, z[i])
	}

	// Prepare a mask register set to all ones.
	Comment("Prepare mask register.")
	k := K()
	KXNORW(k, k, k)

	// Prepare an increment register set to 1 in each lane.
	Comment("Prepare constant registers.")
	one := GP64()
	MOVQ(U64(1), one)
	ones := ZMM()
	VPBROADCASTQ(one, ones)

	zero := ZMM()
	VPXORD(zero, zero, zero)

	last := zero
	for i := 0; i < n; i++ {
		Commentf("Summand %d.", i+1)
		VPADDD_Z(last, ones, k, z[i])
		VPADDD(s, z[i], s)
		last = z[i]
	}

	Comment("Write result to output pointer.")
	VMOVDQU64(s, Mem{Base: out})

	RET()

	Generate()
}