| 12
 3
 4
 5
 6
 7
 8
 9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 
 | ; RUN: opt -mtriple=nvptx64-nvidia-cuda -passes=load-store-vectorizer -S -o - %s | FileCheck %s
define void @ldg_f16(ptr nocapture align 16 %rd0) {
  %load1 = load <2 x half>, ptr %rd0, align 16
  %p1 = fcmp ogt <2 x half> %load1, zeroinitializer
  %s1 = select <2 x i1> %p1, <2 x half> %load1, <2 x half> zeroinitializer
  store <2 x half> %s1, ptr %rd0, align 16
  %in2 = getelementptr half, ptr %rd0, i64 2
  %load2 = load <2 x half>, ptr %in2, align 4
  %p2 = fcmp ogt <2 x half> %load2, zeroinitializer
  %s2 = select <2 x i1> %p2, <2 x half> %load2, <2 x half> zeroinitializer
  store <2 x half> %s2, ptr %in2, align 4
  %in3 = getelementptr half, ptr %rd0, i64 4
  %load3 = load <2 x half>, ptr %in3, align 4
  %p3 = fcmp ogt <2 x half> %load3, zeroinitializer
  %s3 = select <2 x i1> %p3, <2 x half> %load3, <2 x half> zeroinitializer
  store <2 x half> %s3, ptr %in3, align 4
  %in4 = getelementptr half, ptr %rd0, i64 6
  %load4 = load <2 x half>, ptr %in4, align 4
  %p4 = fcmp ogt <2 x half> %load4, zeroinitializer
  %s4 = select <2 x i1> %p4, <2 x half> %load4, <2 x half> zeroinitializer
  store <2 x half> %s4, ptr %in4, align 4
  ret void
; CHECK-LABEL: @ldg_f16
; CHECK: %[[LD:.*]] = load <8 x half>, ptr
; CHECK: shufflevector <8 x half> %[[LD]], <8 x half> poison, <2 x i32> <i32 0, i32 1>
; CHECK: shufflevector <8 x half> %[[LD]], <8 x half> poison, <2 x i32> <i32 2, i32 3>
; CHECK: shufflevector <8 x half> %[[LD]], <8 x half> poison, <2 x i32> <i32 4, i32 5>
; CHECK: shufflevector <8 x half> %[[LD]], <8 x half> poison, <2 x i32> <i32 6, i32 7>
; CHECK: store <8 x half>
}
define void @no_nonpow2_vector(ptr nocapture align 16 %rd0) {
  %load1 = load <3 x half>, ptr %rd0, align 4
  %p1 = fcmp ogt <3 x half> %load1, zeroinitializer
  %s1 = select <3 x i1> %p1, <3 x half> %load1, <3 x half> zeroinitializer
  store <3 x half> %s1, ptr %rd0, align 4
  %in2 = getelementptr half, ptr %rd0, i64 3
  %load2 = load <3 x half>, ptr %in2, align 4
  %p2 = fcmp ogt <3 x half> %load2, zeroinitializer
  %s2 = select <3 x i1> %p2, <3 x half> %load2, <3 x half> zeroinitializer
  store <3 x half> %s2, ptr %in2, align 4
  %in3 = getelementptr half, ptr %rd0, i64 6
  %load3 = load <3 x half>, ptr %in3, align 4
  %p3 = fcmp ogt <3 x half> %load3, zeroinitializer
  %s3 = select <3 x i1> %p3, <3 x half> %load3, <3 x half> zeroinitializer
  store <3 x half> %s3, ptr %in3, align 4
  %in4 = getelementptr half, ptr %rd0, i64 9
  %load4 = load <3 x half>, ptr %in4, align 4
  %p4 = fcmp ogt <3 x half> %load4, zeroinitializer
  %s4 = select <3 x i1> %p4, <3 x half> %load4, <3 x half> zeroinitializer
  store <3 x half> %s4, ptr %in4, align 4
  ret void
; CHECK-LABEL: @no_nonpow2_vector
; CHECK-NOT: shufflevector
}
define void @no_pointer_vector(ptr nocapture align 16 %rd0) {
  %load1 = load <2 x ptr>, ptr %rd0, align 4
  %p1 = icmp ne <2 x ptr> %load1, zeroinitializer
  %s1 = select <2 x i1> %p1, <2 x ptr> %load1, <2 x ptr> zeroinitializer
  store <2 x ptr> %s1, ptr %rd0, align 4
  %in2 = getelementptr ptr, ptr %rd0, i64 2
  %load2 = load <2 x ptr>, ptr %in2, align 4
  %p2 = icmp ne <2 x ptr> %load2, zeroinitializer
  %s2 = select <2 x i1> %p2, <2 x ptr> %load2, <2 x ptr> zeroinitializer
  store <2 x ptr> %s2, ptr %in2, align 4
  %in3 = getelementptr ptr, ptr %rd0, i64 4
  %load3 = load <2 x ptr>, ptr %in3, align 4
  %p3 = icmp ne <2 x ptr> %load3, zeroinitializer
  %s3 = select <2 x i1> %p3, <2 x ptr> %load3, <2 x ptr> zeroinitializer
  store <2 x ptr> %s3, ptr %in3, align 4
  %in4 = getelementptr ptr, ptr %rd0, i64 6
  %load4 = load <2 x ptr>, ptr %in4, align 4
  %p4 = icmp ne <2 x ptr> %load4, zeroinitializer
  %s4 = select <2 x i1> %p4, <2 x ptr> %load4, <2 x ptr> zeroinitializer
  store <2 x ptr> %s4, ptr %in4, align 4
  ret void
; CHECK-LABEL: @no_pointer_vector
; CHECK-NOT: shufflevector
}
 |