File: fsqrt.ll

package info (click to toggle)
llvm-toolchain-17 1%3A17.0.6-22
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,799,624 kB
  • sloc: cpp: 6,428,607; ansic: 1,383,196; asm: 793,408; python: 223,504; objc: 75,364; f90: 60,502; lisp: 33,869; pascal: 15,282; sh: 9,684; perl: 7,453; ml: 4,937; awk: 3,523; makefile: 2,889; javascript: 2,149; xml: 888; fortran: 619; cs: 573
file content (153 lines) | stat: -rw-r--r-- 5,699 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s


; Run with unsafe-fp-math to make sure nothing tries to turn this into 1 / rsqrt(x)

; FUNC-LABEL: {{^}}v_safe_fsqrt_f32:
; GCN: v_sqrt_f32_e32 {{v[0-9]+, v[0-9]+}}
define amdgpu_kernel void @v_safe_fsqrt_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
  %r0 = load float, ptr addrspace(1) %in
  %r1 = call float @llvm.sqrt.f32(float %r0)
  store float %r1, ptr addrspace(1) %out
  ret void
}

; FUNC-LABEL: {{^}}v_unsafe_fsqrt_f32:
; GCN: v_sqrt_f32_e32 {{v[0-9]+, v[0-9]+}}
define amdgpu_kernel void @v_unsafe_fsqrt_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #2 {
  %r0 = load float, ptr addrspace(1) %in
  %r1 = call float @llvm.sqrt.f32(float %r0)
  store float %r1, ptr addrspace(1) %out
  ret void
}


; FUNC-LABEL: {{^}}s_sqrt_f32:
; GCN: v_sqrt_f32_e32

; R600: RECIPSQRT_IEEE * T{{[0-9]\.[XYZW]}}, KC0[2].Z
; R600: RECIP_IEEE * T{{[0-9]\.[XYZW]}}, PS
define amdgpu_kernel void @s_sqrt_f32(ptr addrspace(1) %out, float %in) #1 {
entry:
  %fdiv = call float @llvm.sqrt.f32(float %in)
  store float %fdiv, ptr addrspace(1) %out
  ret void
}

; FUNC-LABEL: {{^}}s_sqrt_v2f32:
; GCN: v_sqrt_f32_e32
; GCN: v_sqrt_f32_e32

; R600-DAG: RECIPSQRT_IEEE * T{{[0-9]\.[XYZW]}}, KC0[2].W
; R600-DAG: RECIP_IEEE * T{{[0-9]\.[XYZW]}}, PS
; R600-DAG: RECIPSQRT_IEEE * T{{[0-9]\.[XYZW]}}, KC0[3].X
; R600-DAG: RECIP_IEEE * T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}
define amdgpu_kernel void @s_sqrt_v2f32(ptr addrspace(1) %out, <2 x float> %in) #1 {
entry:
  %fdiv = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
  store <2 x float> %fdiv, ptr addrspace(1) %out
  ret void
}

; FUNC-LABEL: {{^}}s_sqrt_v4f32:
; GCN: v_sqrt_f32_e32
; GCN: v_sqrt_f32_e32
; GCN: v_sqrt_f32_e32
; GCN: v_sqrt_f32_e32

; R600-DAG: RECIPSQRT_IEEE * T{{[0-9]\.[XYZW]}}, KC0[3].Y
; R600-DAG: RECIP_IEEE * T{{[0-9]\.[XYZW]}}, PS
; R600-DAG: RECIPSQRT_IEEE * T{{[0-9]\.[XYZW]}}, KC0[3].Z
; R600-DAG: RECIP_IEEE * T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}
; R600-DAG: RECIPSQRT_IEEE * T{{[0-9]\.[XYZW]}}, KC0[3].W
; R600-DAG: RECIP_IEEE * T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}
; R600-DAG: RECIPSQRT_IEEE * T{{[0-9]\.[XYZW]}}, KC0[4].X
; R600-DAG: RECIP_IEEE * T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}
define amdgpu_kernel void @s_sqrt_v4f32(ptr addrspace(1) %out, <4 x float> %in) #1 {
entry:
  %fdiv = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %in)
  store <4 x float> %fdiv, ptr addrspace(1) %out
  ret void
}

; FUNC-LABEL: {{^}}elim_redun_check_neg0:
; GCN: v_sqrt_f32_e32
; GCN-NOT: v_cndmask
define amdgpu_kernel void @elim_redun_check_neg0(ptr addrspace(1) %out, float %in) #1 {
entry:
  %sqrt = call float @llvm.sqrt.f32(float %in)
  %cmp = fcmp olt float %in, -0.000000e+00
  %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt
  store float %res, ptr addrspace(1) %out
  ret void
}

; FUNC-LABEL: {{^}}elim_redun_check_pos0:
; GCN: v_sqrt_f32_e32
; GCN-NOT: v_cndmask
define amdgpu_kernel void @elim_redun_check_pos0(ptr addrspace(1) %out, float %in) #1 {
entry:
  %sqrt = call float @llvm.sqrt.f32(float %in)
  %cmp = fcmp olt float %in, 0.000000e+00
  %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt
  store float %res, ptr addrspace(1) %out
  ret void
}

; FUNC-LABEL: {{^}}elim_redun_check_ult:
; GCN: v_sqrt_f32_e32
; GCN-NOT: v_cndmask
define amdgpu_kernel void @elim_redun_check_ult(ptr addrspace(1) %out, float %in) #1 {
entry:
  %sqrt = call float @llvm.sqrt.f32(float %in)
  %cmp = fcmp ult float %in, -0.000000e+00
  %res = select i1 %cmp, float 0x7FF8000000000000, float %sqrt
  store float %res, ptr addrspace(1) %out
  ret void
}

; FUNC-LABEL: {{^}}elim_redun_check_v2:
; GCN: v_sqrt_f32_e32
; GCN: v_sqrt_f32_e32
; GCN-NOT: v_cndmask
define amdgpu_kernel void @elim_redun_check_v2(ptr addrspace(1) %out, <2 x float> %in) #1 {
entry:
  %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
  %cmp = fcmp olt <2 x float> %in, <float -0.000000e+00, float -0.000000e+00>
  %res = select <2 x i1> %cmp, <2 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000>, <2 x float> %sqrt
  store <2 x float> %res, ptr addrspace(1) %out
  ret void
}

; FUNC-LABEL: {{^}}elim_redun_check_v2_ult
; GCN: v_sqrt_f32_e32
; GCN: v_sqrt_f32_e32
; GCN-NOT: v_cndmask
define amdgpu_kernel void @elim_redun_check_v2_ult(ptr addrspace(1) %out, <2 x float> %in) #1 {
entry:
  %sqrt = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %in)
  %cmp = fcmp ult <2 x float> %in, <float -0.000000e+00, float -0.000000e+00>
  %res = select <2 x i1> %cmp, <2 x float> <float 0x7FF8000000000000, float 0x7FF8000000000000>, <2 x float> %sqrt
  store <2 x float> %res, ptr addrspace(1) %out
  ret void
}

; FUNC-LABEL: {{^}}recip_sqrt:
; R600: RECIPSQRT_IEEE
; R600-NOT: RECIP_IEEE
define amdgpu_kernel void @recip_sqrt(ptr addrspace(1) %out, float %src) nounwind {
  %sqrt = call float @llvm.sqrt.f32(float %src)
  %recipsqrt = fdiv fast float 1.0, %sqrt
  store float %recipsqrt, ptr addrspace(1) %out, align 4
  ret void
}

declare float @llvm.sqrt.f32(float %in) #0
declare <2 x float> @llvm.sqrt.v2f32(<2 x float> %in) #0
declare <4 x float> @llvm.sqrt.v4f32(<4 x float> %in) #0

attributes #0 = { nounwind readnone }
attributes #1 = { nounwind "unsafe-fp-math"="false" }
attributes #2 = { nounwind "unsafe-fp-math"="true" }