commit 98592fcc61307968f7df1362771534595a1e1c21
Author: Keno Fischer <keno@juliacomputing.com>
Date:   Wed Jul 25 19:29:02 2018 -0400

    [SCEV] Don't expand Wrap predicate using inttoptr in ni addrspaces
    
    Summary:
    In non-integral address spaces, we're not allowed to introduce inttoptr/ptrtoint
    intrinsics. Instead, we need to expand any pointer arithmetic as geps on the
    base pointer. Luckily this is a common task for SCEV, so all we have to do here
    is hook up the corresponding helper function and add test case.
    
    Fixes PR38290
    
    Reviewers: reames, sanjoy
    
    Subscribers: javed.absar, llvm-commits
    
    Differential Revision: https://reviews.llvm.org/D49832

diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 7f76f057216..f441a3647fb 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -2157,8 +2157,9 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
   const SCEV *Step = AR->getStepRecurrence(SE);
   const SCEV *Start = AR->getStart();
 
+  Type *ARTy = AR->getType();
   unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType());
-  unsigned DstBits = SE.getTypeSizeInBits(AR->getType());
+  unsigned DstBits = SE.getTypeSizeInBits(ARTy);
 
   // The expression {Start,+,Step} has nusw/nssw if
   //   Step < 0, Start - |Step| * Backedge <= Start
@@ -2170,11 +2171,12 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
   Value *TripCountVal = expandCodeFor(ExitCount, CountTy, Loc);
 
   IntegerType *Ty =
-      IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(AR->getType()));
+      IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy));
+  Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty;
 
   Value *StepValue = expandCodeFor(Step, Ty, Loc);
   Value *NegStepValue = expandCodeFor(SE.getNegativeSCEV(Step), Ty, Loc);
-  Value *StartValue = expandCodeFor(Start, Ty, Loc);
+  Value *StartValue = expandCodeFor(Start, ARExpandTy, Loc);
 
   ConstantInt *Zero =
       ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits));
@@ -2197,8 +2199,21 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
   // Compute:
   //   Start + |Step| * Backedge < Start
   //   Start - |Step| * Backedge > Start
-  Value *Add = Builder.CreateAdd(StartValue, MulV);
-  Value *Sub = Builder.CreateSub(StartValue, MulV);
+  Value *Add = nullptr, *Sub = nullptr;
+  if (ARExpandTy->isPointerTy()) {
+    PointerType *ARPtrTy = cast<PointerType>(ARExpandTy);
+    const SCEV *MulS = SE.getSCEV(MulV);
+    const SCEV *const StepArray[2] = {MulS, SE.getNegativeSCEV(MulS)};
+    Add = Builder.CreateBitCast(
+        expandAddToGEP(&StepArray[0], &StepArray[1], ARPtrTy, Ty, StartValue),
+        ARPtrTy);
+    Sub = Builder.CreateBitCast(
+        expandAddToGEP(&StepArray[1], &StepArray[2], ARPtrTy, Ty, StartValue),
+        ARPtrTy);
+  } else {
+    Add = Builder.CreateAdd(StartValue, MulV);
+    Sub = Builder.CreateSub(StartValue, MulV);
+  }
 
   Value *EndCompareGT = Builder.CreateICmp(
       Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue);
diff --git a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll
new file mode 100644
index 00000000000..ddcf5e1a195
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-ni.ll
@@ -0,0 +1,73 @@
+; RUN: opt -loop-versioning -S < %s | FileCheck %s -check-prefix=LV
+
+; NB: addrspaces 10-13 are non-integral
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
+
+; This matches the test case from PR38290
+; Check that we expand the SCEV predicate check using GEP, rather
+; than ptrtoint.
+
+%jl_value_t = type opaque
+%jl_array_t = type { i8 addrspace(13)*, i64, i16, i16, i32 }
+
+declare i64 @julia_steprange_last_4949()
+
+define void @"japi1_align!_9477"(%jl_value_t addrspace(10)**) #0 {
+; LV-LAVEL: L26.lver.check
+; LV: [[OFMul:%[^ ]*]]  = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[Step:%[^ ]*]])
+; LV-NEXT: [[OFMulResult:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 0
+; LV-NEXT: [[OFMulOverflow:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul]], 1
+; LV-NEXT: [[PosGEP:%[^ ]*]] = getelementptr i32, i32 addrspace(13)* [[Base:%[^ ]*]], i64 [[Step]]
+; LV-NEXT: [[NegGEP:%[^ ]*]] = getelementptr i32, i32 addrspace(13)* [[Base]], i64 [[NegStep:%[^ ]*]]
+; LV-NEXT: icmp ugt i32 addrspace(13)* [[NegGEP]], [[Base]]
+; LV-NEXT: icmp ult i32 addrspace(13)* [[PosGEP]], [[Base]]
+; LV-NOT: inttoptr
+; LV-NOT: ptrtoint
+top:
+  %1 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, align 8, !nonnull !1, !dereferenceable !2, !align !3
+  %2 = load i32, i32* inttoptr (i64 12 to i32*), align 4, !tbaa !4
+  %3 = sub i32 0, %2
+  %4 = call i64 @julia_steprange_last_4949()
+  %5 = addrspacecast %jl_value_t addrspace(10)* %1 to %jl_value_t addrspace(11)*
+  %6 = bitcast %jl_value_t addrspace(11)* %5 to %jl_value_t addrspace(10)* addrspace(11)*
+  %7 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(11)* %6, align 8, !tbaa !4, !nonnull !1, !dereferenceable !9, !align !2
+  %8 = addrspacecast %jl_value_t addrspace(10)* %7 to %jl_value_t addrspace(11)*
+  %9 = bitcast %jl_value_t addrspace(11)* %8 to i32 addrspace(13)* addrspace(11)*
+  %10 = load i32 addrspace(13)*, i32 addrspace(13)* addrspace(11)* %9, align 8, !tbaa !10, !nonnull !1
+  %11 = sext i32 %3 to i64
+  br label %L26
+
+L26:                                              ; preds = %L26, %top
+  %value_phi3 = phi i64 [ 0, %top ], [ %12, %L26 ]
+  %12 = add i64 %value_phi3, -1
+  %13 = getelementptr inbounds i32, i32 addrspace(13)* %10, i64 %12
+  %14 = load i32, i32 addrspace(13)* %13, align 4, !tbaa !13
+  %15 = add i64 %12, %11
+  %16 = getelementptr inbounds i32, i32 addrspace(13)* %10, i64 %15
+  store i32 %14, i32 addrspace(13)* %16, align 4, !tbaa !13
+  %17 = icmp eq i64 %value_phi3, %4
+  br i1 %17, label %L45, label %L26
+
+L45:                                              ; preds = %L26
+  ret void
+}
+
+attributes #0 = { "thunk" }
+
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 1, !"Debug Info Version", i32 3}
+!1 = !{}
+!2 = !{i64 16}
+!3 = !{i64 8}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"jtbaa_mutab", !6, i64 0}
+!6 = !{!"jtbaa_value", !7, i64 0}
+!7 = !{!"jtbaa_data", !8, i64 0}
+!8 = !{!"jtbaa"}
+!9 = !{i64 40}
+!10 = !{!11, !11, i64 0}
+!11 = !{!"jtbaa_arrayptr", !12, i64 0}
+!12 = !{!"jtbaa_array", !8, i64 0}
+!13 = !{!14, !14, i64 0}
+!14 = !{!"jtbaa_arraybuf", !7, i64 0}
diff --git a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
index a7e5bce7445..fa6fccecbf1 100644
--- a/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
+++ b/test/Analysis/LoopAccessAnalysis/wrapping-pointer-versioning.ll
@@ -58,10 +58,10 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 ; LV-NEXT: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE]])
 ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
 ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
-; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]]
-; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]]
-; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2
-; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2
+; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[A0:%[^ ]*]], [[OFMulResult1]]
+; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[A0]], [[OFMulResult1]]
+; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[A0]]
+; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[A0]]
 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
 ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]
 
@@ -233,10 +233,10 @@ for.end:                                          ; preds = %for.body
 ; LV: [[OFMul1:%[^ ]*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 4, i64 [[BE:%[^ ]*]])
 ; LV-NEXT: [[OFMulResult1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 0
 ; LV-NEXT: [[OFMulOverflow1:%[^ ]*]] = extractvalue { i64, i1 } [[OFMul1]], 1
-; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 %a2, [[OFMulResult1]]
-; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 %a2, [[OFMulResult1]]
-; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], %a2
-; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], %a2
+; LV-NEXT: [[AddEnd1:%[^ ]*]] = add i64 [[A0:%[^ ]*]], [[OFMulResult1]]
+; LV-NEXT: [[SubEnd1:%[^ ]*]] = sub i64 [[A0]], [[OFMulResult1]]
+; LV-NEXT: [[CmpNeg1:%[^ ]*]] = icmp ugt i64 [[SubEnd1]], [[A0]]
+; LV-NEXT: [[CmpPos1:%[^ ]*]] = icmp ult i64 [[AddEnd1]], [[A0]]
 ; LV-NEXT: [[Cmp:%[^ ]*]] = select i1 false, i1 [[CmpNeg1]], i1 [[CmpPos1]]
 ; LV-NEXT: [[PredCheck1:%[^ ]*]] = or i1 [[Cmp]], [[OFMulOverflow1]]