File: fp-atomics-gfx940.ll

package info (click to toggle)
llvm-toolchain-20 1%3A20.1.6-1~exp1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 2,111,304 kB
  • sloc: cpp: 7,438,677; ansic: 1,393,822; asm: 1,012,926; python: 241,650; f90: 86,635; objc: 75,479; lisp: 42,144; pascal: 17,286; sh: 10,027; ml: 5,082; perl: 4,730; awk: 3,523; makefile: 3,349; javascript: 2,251; xml: 892; fortran: 672
file content (132 lines) | stat: -rw-r--r-- 5,598 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 < %s | FileCheck %s -check-prefix=GFX940

define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat(ptr %ptr) {
; GFX940-LABEL: flat_atomic_fadd_f32_noret_pat:
; GFX940:       ; %bb.0:
; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX940-NEXT:    v_mov_b32_e32 v2, 4.0
; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
; GFX940-NEXT:    buffer_wbl2 sc0 sc1
; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2 sc1
; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX940-NEXT:    buffer_inv sc0 sc1
; GFX940-NEXT:    s_endpgm
  %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0
  ret void
}

define amdgpu_kernel void @flat_atomic_fadd_f32_noret_pat_ieee(ptr %ptr) #0 {
; GFX940-LABEL: flat_atomic_fadd_f32_noret_pat_ieee:
; GFX940:       ; %bb.0:
; GFX940-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX940-NEXT:    v_mov_b32_e32 v2, 4.0
; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
; GFX940-NEXT:    v_mov_b64_e32 v[0:1], s[0:1]
; GFX940-NEXT:    buffer_wbl2 sc0 sc1
; GFX940-NEXT:    flat_atomic_add_f32 v[0:1], v2 sc1
; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX940-NEXT:    buffer_inv sc0 sc1
; GFX940-NEXT:    s_endpgm
  %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0
  ret void
}

define float @flat_atomic_fadd_f32_rtn_pat(ptr %ptr, float %data) {
; GFX940-LABEL: flat_atomic_fadd_f32_rtn_pat:
; GFX940:       ; %bb.0:
; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT:    v_mov_b32_e32 v2, 4.0
; GFX940-NEXT:    buffer_wbl2 sc0 sc1
; GFX940-NEXT:    flat_atomic_add_f32 v0, v[0:1], v2 sc0 sc1
; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX940-NEXT:    buffer_inv sc0 sc1
; GFX940-NEXT:    s_setpc_b64 s[30:31]
  %ret = atomicrmw fadd ptr %ptr, float 4.0 seq_cst, !amdgpu.no.remote.memory !0
  ret float %ret
}

define <2 x half> @local_atomic_fadd_ret_v2f16_offset(ptr addrspace(3) %ptr, <2 x half> %val) {
; GFX940-LABEL: local_atomic_fadd_ret_v2f16_offset:
; GFX940:       ; %bb.0:
; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT:    ds_pk_add_rtn_f16 v0, v0, v1 offset:65532
; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
; GFX940-NEXT:    s_setpc_b64 s[30:31]
  %gep = getelementptr <2 x half>, ptr addrspace(3) %ptr, i32 16383
  %result = atomicrmw fadd ptr addrspace(3) %gep, <2 x half> %val seq_cst
  ret <2 x half> %result
}

define void @local_atomic_fadd_noret_v2f16_offset(ptr addrspace(3) %ptr, <2 x half> %val) {
; GFX940-LABEL: local_atomic_fadd_noret_v2f16_offset:
; GFX940:       ; %bb.0:
; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT:    ds_pk_add_f16 v0, v1 offset:65532
; GFX940-NEXT:    s_waitcnt lgkmcnt(0)
; GFX940-NEXT:    s_setpc_b64 s[30:31]
  %gep = getelementptr <2 x half>, ptr addrspace(3) %ptr, i32 16383
  %unused = atomicrmw fadd ptr addrspace(3) %gep, <2 x half> %val seq_cst
  ret void
}

define <2 x half> @global_atomic_fadd_ret_v2f16_agent_offset(ptr addrspace(1) %ptr, <2 x half> %val) {
; GFX940-LABEL: global_atomic_fadd_ret_v2f16_agent_offset:
; GFX940:       ; %bb.0:
; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT:    buffer_wbl2 sc1
; GFX940-NEXT:    global_atomic_pk_add_f16 v0, v[0:1], v2, off offset:1024 sc0
; GFX940-NEXT:    s_waitcnt vmcnt(0)
; GFX940-NEXT:    buffer_inv sc1
; GFX940-NEXT:    s_setpc_b64 s[30:31]
  %gep = getelementptr <2 x half>, ptr addrspace(1) %ptr, i32 256
  %result = atomicrmw fadd ptr addrspace(1) %gep, <2 x half> %val syncscope("agent") seq_cst
  ret <2 x half> %result
}

define void @global_atomic_fadd_noret_v2f16_agent_offset(ptr addrspace(1) %ptr, <2 x half> %val) {
; GFX940-LABEL: global_atomic_fadd_noret_v2f16_agent_offset:
; GFX940:       ; %bb.0:
; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT:    buffer_wbl2 sc1
; GFX940-NEXT:    global_atomic_pk_add_f16 v[0:1], v2, off offset:1024
; GFX940-NEXT:    s_waitcnt vmcnt(0)
; GFX940-NEXT:    buffer_inv sc1
; GFX940-NEXT:    s_setpc_b64 s[30:31]
  %gep = getelementptr <2 x half>, ptr addrspace(1) %ptr, i32 256
  %unused = atomicrmw fadd ptr addrspace(1) %gep, <2 x half> %val syncscope("agent") seq_cst
  ret void
}

define <2 x half> @flat_atomic_fadd_ret_v2f16_agent_offset(ptr %ptr, <2 x half> %val) {
; GFX940-LABEL: flat_atomic_fadd_ret_v2f16_agent_offset:
; GFX940:       ; %bb.0:
; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT:    buffer_wbl2 sc1
; GFX940-NEXT:    flat_atomic_pk_add_f16 v0, v[0:1], v2 offset:1024 sc0
; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX940-NEXT:    buffer_inv sc1
; GFX940-NEXT:    s_setpc_b64 s[30:31]
  %gep = getelementptr <2 x half>, ptr %ptr, i32 256
  %result = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
  ret <2 x half> %result
}

define void @flat_atomic_fadd_noret_v2f16_agent_offset(ptr %ptr, <2 x half> %val) {
; GFX940-LABEL: flat_atomic_fadd_noret_v2f16_agent_offset:
; GFX940:       ; %bb.0:
; GFX940-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX940-NEXT:    buffer_wbl2 sc1
; GFX940-NEXT:    flat_atomic_pk_add_f16 v[0:1], v2 offset:1024
; GFX940-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX940-NEXT:    buffer_inv sc1
; GFX940-NEXT:    s_setpc_b64 s[30:31]
  %gep = getelementptr <2 x half>, ptr %ptr, i32 256
  %unused = atomicrmw fadd ptr %gep, <2 x half> %val syncscope("agent") seq_cst
  ret void
}

attributes #0 = { "denormal-fp-math-f32"="ieee,ieee" }

!0 = !{}