File: divergent-target-test.ll

package info (click to toggle)
llvm-toolchain-19 1%3A19.1.7-3~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm-proposed-updates
  • size: 1,998,492 kB
  • sloc: cpp: 6,951,680; ansic: 1,486,157; asm: 913,598; python: 232,024; f90: 80,126; objc: 75,281; lisp: 37,276; pascal: 16,990; sh: 10,009; ml: 5,058; perl: 4,724; awk: 3,523; makefile: 3,167; javascript: 2,504; xml: 892; fortran: 664; cs: 573
file content (83 lines) | stat: -rw-r--r-- 1,886 bytes parent folder | download | duplicates (10)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
; REQUIRES: amdgpu-registered-target && x86-registered-target
; RUN: opt < %s -mtriple=amdgcn -passes=jump-threading -S | FileCheck %s -check-prefixes=CHECK,DIVERGENT
; RUN: opt < %s -mtriple=x86_64 -passes=jump-threading -S | FileCheck %s -check-prefixes=CHECK,UNIFORM

; Here we assure that for the target with no branch divergence usual Jump Threading optimization performed
; For target with branch divergence - no optimization, so the IR is unchanged.

declare i32 @f1()
declare i32 @f2()
declare void @f3()

define i32 @test(i1 %cond) {
; CHECK: test
	br i1 %cond, label %T1, label %F1

; DIVERGENT:   T1
; UNIFORM-NOT: T1
T1:
	%v1 = call i32 @f1()
	br label %Merge
; DIVERGENT:   F1
; UNIFORM-NOT: F1
F1:
	%v2 = call i32 @f2()
	br label %Merge
; DIVERGENT:   Merge
; UNIFORM-NOT: Merge
Merge:
	%A = phi i1 [true, %T1], [false, %F1]
	%B = phi i32 [%v1, %T1], [%v2, %F1]
	br i1 %A, label %T2, label %F2

; DIVERGENT:   T2
T2:
; UNIFORM: T2:
; UNIFORM: %v1 = call i32 @f1()
; UNIFORM: call void @f3()
; UNIFORM: ret i32 %v1
	call void @f3()
	ret i32 %B
; DIVERGENT:   F2
F2:
; UNIFORM: F2:
; UNIFORM: %v2 = call i32 @f2()
; UNIFORM: ret i32 %v2
	ret i32 %B
}

; Check divergence check is skipped if there can't be divergence in
; the function.
define i32 @requires_single_lane_exec(i1 %cond) #0 {
; CHECK: requires_single_lane_exec
	br i1 %cond, label %T1, label %F1

; CHECK-NOT: T1
T1:
	%v1 = call i32 @f1()
	br label %Merge
; CHECK-NOT: F1
F1:
	%v2 = call i32 @f2()
	br label %Merge
; CHECK-NOT: Merge
Merge:
	%A = phi i1 [true, %T1], [false, %F1]
	%B = phi i32 [%v1, %T1], [%v2, %F1]
	br i1 %A, label %T2, label %F2

T2:
; CHECK: T2:
; CHECK: %v1 = call i32 @f1()
; CHECK: call void @f3()
; CHECK: ret i32 %v1
	call void @f3()
	ret i32 %B
F2:
; CHECK: F2:
; CHECK: %v2 = call i32 @f2()
; CHECK: ret i32 %v2
	ret i32 %B
}

attributes #0 = { "amdgpu-flat-work-group-size"="1,1" }