1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
|
; REQUIRES: asserts
; RUN: opt -S -passes=loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses -debug-only=loop-vectorize,vectorutils -disable-output < %s 2>&1 | FileCheck %s -check-prefix=STRIDED_UNMASKED
; RUN: opt -S -passes=loop-vectorize -force-vector-width=8 -force-vector-interleave=1 -enable-interleaved-mem-accesses -enable-masked-interleaved-mem-accesses -debug-only=loop-vectorize,vectorutils -disable-output < %s 2>&1 | FileCheck %s -check-prefix=STRIDED_MASKED
target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
; We test here that the loop-vectorizer forms an interleave-groups from
; predicated memory accesses only if they are both in the same (predicated)
; block (first scenario below).
; If the accesses are not in the same predicated block, an interleave-group
; is not formed (scenarios 2,3 below).
; Scenario 1: Check the case where it is legal to create masked interleave-
; groups. Altogether two groups are created (one for loads and one for stores)
; when masked-interleaved-acceses are enabled. When masked-interleaved-acceses
; are disabled we do not create any interleave-group.
;
; void masked_strided1(const unsigned char* restrict p,
; unsigned char* restrict q,
; unsigned char guard) {
; for(ix=0; ix < 1024; ++ix) {
; if (ix > guard) {
; char left = p[2*ix];
; char right = p[2*ix + 1];
; char max = max(left, right);
; q[2*ix] = max;
; q[2*ix+1] = 0 - max;
; }
; }
;}
; STRIDED_UNMASKED: LV: Checking a loop in 'masked_strided1'
; STRIDED_UNMASKED: LV: Analyzing interleaved accesses...
; STRIDED_UNMASKED-NOT: LV: Creating an interleave group
; STRIDED_MASKED: LV: Checking a loop in 'masked_strided1'
; STRIDED_MASKED: LV: Analyzing interleaved accesses...
; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 %{{.*}}, ptr %{{.*}}, align 1
; STRIDED_MASKED-NEXT: LV: Inserted: store i8 %{{.*}}, ptr %{{.*}}, align 1
; STRIDED_MASKED-NEXT: into the interleave group with store i8 %{{.*}}, ptr %{{.*}}, align 1
; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: %{{.*}} = load i8, ptr %{{.*}}, align 1
; STRIDED_MASKED-NEXT: LV: Inserted: %{{.*}} = load i8, ptr %{{.*}}, align 1
; STRIDED_MASKED-NEXT: into the interleave group with %{{.*}} = load i8, ptr %{{.*}}, align 1
; Scenario 2: Check the case where it is illegal to create a masked interleave-
; group because the first access is predicated, and the second isn't.
; We therefore create a separate interleave-group with gaps for each of the
; stores (if masked-interleaved-accesses are enabled).
; If masked-interleaved-accesses is not enabled we create only one interleave
; group of stores (for the non-predicated store) and it is later invalidated
; due to gaps.
;
; void masked_strided2(const unsigned char* restrict p,
; unsigned char* restrict q,
; unsigned char guard1,
; unsigned char guard2) {
; for(ix=0; ix < 1024; ++ix) {
; if (ix > guard1) {
; q[2*ix] = 1;
; }
; q[2*ix+1] = 2;
; }
;}
; STRIDED_UNMASKED: LV: Checking a loop in 'masked_strided2'
; STRIDED_UNMASKED: LV: Analyzing interleaved accesses...
; STRIDED_UNMASKED-NEXT: LV: Creating an interleave group with: store i8 1, ptr %{{.*}}, align 1
; STRIDED_UNMASKED-NEXT: LV: Invalidate candidate interleaved store group due to gaps.
; STRIDED_UNMASKED-NOT: LV: Creating an interleave group
; STRIDED_MASKED: LV: Checking a loop in 'masked_strided2'
; STRIDED_MASKED: LV: Analyzing interleaved accesses...
; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 2, ptr %{{.*}}, align 1
; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 1, ptr %{{.*}}, align 1
; STRIDED_MASKED-NOT: LV: Invalidate candidate interleaved store group due to gaps.
; Scenario 3: Check the case where it is illegal to create a masked interleave-
; group because the two accesses are in separate predicated blocks.
; We therefore create a separate interleave-group with gaps for each of the accesses,
; If masked-interleaved-accesses is not enabled we don't create any interleave
; group because all accesses are predicated.
;
; void masked_strided3(const unsigned char* restrict p,
; unsigned char* restrict q,
; unsigned char guard1,
; unsigned char guard2) {
; for(ix=0; ix < 1024; ++ix) {
; if (ix > guard1) {
; q[2*ix] = 1;
; }
; if (ix > guard2) {
; q[2*ix+1] = 2;
; }
; }
;}
; STRIDED_UNMASKED: LV: Checking a loop in 'masked_strided3'
; STRIDED_UNMASKED: LV: Analyzing interleaved accesses...
; STRIDED_UNMASKED-NOT: LV: Creating an interleave group
; STRIDED_MASKED: LV: Checking a loop in 'masked_strided3'
; STRIDED_MASKED: LV: Analyzing interleaved accesses...
; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 2, ptr %{{.*}}, align 1
; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 1, ptr %{{.*}}, align 1
; STRIDED_MASKED-NOT: LV: Invalidate candidate interleaved store group due to gaps.
; ModuleID = 'test.c'
source_filename = "test.c"
target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
target triple = "i386-unknown-linux-gnu"
define dso_local void @masked_strided1(ptr noalias nocapture readonly %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr #0 {
entry:
%conv = zext i8 %guard to i32
br label %for.body
for.body:
%ix.024 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%cmp1 = icmp ugt i32 %ix.024, %conv
br i1 %cmp1, label %if.then, label %for.inc
if.then:
%mul = shl nuw nsw i32 %ix.024, 1
%arrayidx = getelementptr inbounds i8, ptr %p, i32 %mul
%0 = load i8, ptr %arrayidx, align 1
%add = or disjoint i32 %mul, 1
%arrayidx4 = getelementptr inbounds i8, ptr %p, i32 %add
%1 = load i8, ptr %arrayidx4, align 1
%cmp.i = icmp slt i8 %0, %1
%spec.select.i = select i1 %cmp.i, i8 %1, i8 %0
%arrayidx6 = getelementptr inbounds i8, ptr %q, i32 %mul
store i8 %spec.select.i, ptr %arrayidx6, align 1
%sub = sub i8 0, %spec.select.i
%arrayidx11 = getelementptr inbounds i8, ptr %q, i32 %add
store i8 %sub, ptr %arrayidx11, align 1
br label %for.inc
for.inc:
%inc = add nuw nsw i32 %ix.024, 1
%exitcond = icmp eq i32 %inc, 1024
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret void
}
define dso_local void @masked_strided2(ptr noalias nocapture readnone %p, ptr noalias nocapture %q, i8 zeroext %guard) local_unnamed_addr #0 {
entry:
%conv = zext i8 %guard to i32
br label %for.body
for.body:
%ix.012 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%mul = shl nuw nsw i32 %ix.012, 1
%arrayidx = getelementptr inbounds i8, ptr %q, i32 %mul
store i8 1, ptr %arrayidx, align 1
%cmp1 = icmp ugt i32 %ix.012, %conv
br i1 %cmp1, label %if.then, label %for.inc
if.then:
%add = or disjoint i32 %mul, 1
%arrayidx3 = getelementptr inbounds i8, ptr %q, i32 %add
store i8 2, ptr %arrayidx3, align 1
br label %for.inc
for.inc:
%inc = add nuw nsw i32 %ix.012, 1
%exitcond = icmp eq i32 %inc, 1024
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret void
}
define dso_local void @masked_strided3(ptr noalias nocapture readnone %p, ptr noalias nocapture %q, i8 zeroext %guard1, i8 zeroext %guard2) local_unnamed_addr #0 {
entry:
%conv = zext i8 %guard1 to i32
%conv3 = zext i8 %guard2 to i32
br label %for.body
for.body:
%ix.018 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%mul = shl nuw nsw i32 %ix.018, 1
%cmp1 = icmp ugt i32 %ix.018, %conv
br i1 %cmp1, label %if.then, label %if.end
if.then:
%arrayidx = getelementptr inbounds i8, ptr %q, i32 %mul
store i8 1, ptr %arrayidx, align 1
br label %if.end
if.end:
%cmp4 = icmp ugt i32 %ix.018, %conv3
br i1 %cmp4, label %if.then6, label %for.inc
if.then6:
%add = or disjoint i32 %mul, 1
%arrayidx7 = getelementptr inbounds i8, ptr %q, i32 %add
store i8 2, ptr %arrayidx7, align 1
br label %for.inc
for.inc:
%inc = add nuw nsw i32 %ix.018, 1
%exitcond = icmp eq i32 %inc, 1024
br i1 %exitcond, label %for.end, label %for.body
for.end:
ret void
}
attributes #0 = { "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" }
|