File: cf.ll

package info (click to toggle)
llvm-toolchain-19 1%3A19.1.7-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,998,520 kB
  • sloc: cpp: 6,951,680; ansic: 1,486,157; asm: 913,598; python: 232,024; f90: 80,126; objc: 75,281; lisp: 37,276; pascal: 16,990; sh: 10,009; ml: 5,058; perl: 4,724; awk: 3,523; makefile: 3,167; javascript: 2,504; xml: 892; fortran: 664; cs: 573
file content (126 lines) | stat: -rw-r--r-- 4,413 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64 -mattr=+cf,+avx512f -verify-machineinstrs | FileCheck %s

define void @basic(i32 %a, ptr %b, ptr %p, ptr %q) {
; CHECK-LABEL: basic:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    testl %edi, %edi
; CHECK-NEXT:    cfcmovel (%rsi), %eax
; CHECK-NEXT:    cfcmovel %eax, (%rdx)
; CHECK-NEXT:    movl $1, %eax
; CHECK-NEXT:    cfcmovneq %rax, (%rdx)
; CHECK-NEXT:    movw $2, %ax
; CHECK-NEXT:    cfcmovnew %ax, (%rcx)
; CHECK-NEXT:    retq
entry:
  %cond = icmp eq i32 %a, 0
  %0 = bitcast i1 %cond to <1 x i1>
  %1 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i32> poison)
  call void @llvm.masked.store.v1i32.p0(<1 x i32> %1, ptr %p, i32 4, <1 x i1> %0)
  %2 = xor i1 %cond, true
  %3 = bitcast i1 %2 to <1 x i1>
  call void @llvm.masked.store.v1i64.p0(<1 x i64> <i64 1>, ptr %p, i32 8, <1 x i1> %3)
  call void @llvm.masked.store.v1i16.p0(<1 x i16> <i16 2>, ptr %q, i32 8, <1 x i1> %3)
  ret void
}

define i16 @cload_passthru_zero(i16 %a, ptr %b) {
; CHECK-LABEL: cload_passthru_zero:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    testw %di, %di
; CHECK-NEXT:    cfcmovew (%rsi), %ax
; CHECK-NEXT:    retq
entry:
  %cond = icmp eq i16 %a, 0
  %0 = bitcast i1 %cond to <1 x i1>
  %1 = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i16> <i16 0>)
  %2 = bitcast <1 x i16> %1 to i16
  ret i16 %2
}

define i64 @cload_passthru_not_zero(i64 %a, ptr %b) {
; CHECK-LABEL: cload_passthru_not_zero:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    testq %rdi, %rdi
; CHECK-NEXT:    cfcmoveq (%rsi), %rdi, %rax
; CHECK-NEXT:    retq
entry:
  %cond = icmp eq i64 %a, 0
  %0 = bitcast i1 %cond to <1 x i1>
  %va = bitcast i64 %a to <1 x i64>
  %1 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i64> %va)
  %2 = bitcast <1 x i64> %1 to i64
  ret i64 %2
}

;; CFCMOV can use the flags produced by SUB directly.
define i64 @reduced_data_dependency(i64 %a, i64 %b, ptr %c) {
; CHECK-LABEL: reduced_data_dependency:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    movq %rdi, %rcx
; CHECK-NEXT:    subq %rsi, %rcx
; CHECK-NEXT:    cfcmovnsq (%rdx), %rdi, %rax
; CHECK-NEXT:    addq %rcx, %rax
; CHECK-NEXT:    retq
entry:
  %sub = sub i64 %a, %b
  %cond = icmp sge i64 %sub, 0
  %0 = bitcast i1 %cond to <1 x i1>
  %va = bitcast i64 %a to <1 x i64>
  %1 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr %c, i32 4, <1 x i1> %0, <1 x i64> %va)
  %2 = bitcast <1 x i64> %1 to i64
  %3 = add i64 %2, %sub
  ret i64 %3
}

;; No need to optimize the generated assembly for cond_false/cond_true b/c it
;; should never be emitted by middle end. Add IR here just to check it's
;; legal to feed constant mask to backend.
define i16 @cond_false(ptr %b) {
; CHECK-LABEL: cond_false:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    xorl %eax, %eax
; CHECK-NEXT:    negb %al
; CHECK-NEXT:    cfcmovnew (%rdi), %ax
; CHECK-NEXT:    retq
entry:
  %0 = bitcast i1 false to <1 x i1>
  %1 = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i16> <i16 0>)
  %2 = bitcast <1 x i16> %1 to i16
  ret i16 %2
}

define i64 @cond_true(ptr %b) {
; CHECK-LABEL: cond_true:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    movb $1, %al
; CHECK-NEXT:    negb %al
; CHECK-NEXT:    cfcmovneq (%rdi), %rax
; CHECK-NEXT:    retq
entry:
  %0 = bitcast i1 true to <1 x i1>
  %1 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i64> <i64 0>)
  %2 = bitcast <1 x i64> %1 to i64
  ret i64 %2
}

define void @no_crash(ptr %p, <4 x i1> %cond1, <4 x i1> %cond2) {
; CHECK-LABEL: no_crash:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    vpslld $31, %xmm1, %xmm1
; CHECK-NEXT:    vptestmd %zmm1, %zmm1, %k0
; CHECK-NEXT:    kshiftlw $12, %k0, %k0
; CHECK-NEXT:    kshiftrw $12, %k0, %k1
; CHECK-NEXT:    vpslld $31, %xmm0, %xmm0
; CHECK-NEXT:    vptestmd %zmm0, %zmm0, %k0
; CHECK-NEXT:    kshiftlw $12, %k0, %k0
; CHECK-NEXT:    kshiftrw $12, %k0, %k2
; CHECK-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k2} {z}
; CHECK-NEXT:    vmovdqu64 %zmm0, (%rdi) {%k1}
; CHECK-NEXT:    vzeroupper
; CHECK-NEXT:    retq
entry:
  %0 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr %p, i32 8, <4 x i1> %cond1, <4 x i64> poison)
  call void @llvm.masked.store.v4i64.p0(<4 x i64> %0, ptr %p, i32 8, <4 x i1> %cond2)
  ret void
}