File: swp-loop-carried-order-dep6.mir

package info (click to toggle)
llvm-toolchain-21 1%3A21.1.6-2
links: PTS, VCS
area: main
in suites: forky
size: 2,245,044 kB
sloc: cpp: 7,619,726; ansic: 1,434,018; asm: 1,058,748; python: 252,740; f90: 94,671; objc: 70,685; lisp: 42,813; pascal: 18,401; sh: 8,601; ml: 5,111; perl: 4,720; makefile: 3,666; awk: 3,523; javascript: 2,409; xml: 892; fortran: 770
file content (154 lines) | stat: -rw-r--r-- 5,734 bytes
parent folder | download | duplicates (3)
# RUN: llc -mtriple=hexagon -run-pass pipeliner -debug-only=pipeliner %s -o /dev/null 2>&1 -pipeliner-experimental-cg=true | FileCheck %s
# REQUIRES: asserts

# Test that loop carried memory dependencies are computed correctly
# when barrier instructions exist in the loop.
# The original code is as follows.
# 
# ```
# volatile int x = 0;
# void f(int * restrict a, int * restrict b, int * restrict c, int n) {
#   for (int i = 0; i < n; i++) {
#     a[i] *= c[i];
#     b[i] *= c[i];
#     x += i;
#     a[i + 1] *= i;
#     x += i;
#     b[i + 1] *= i;
#   }
# }
# ```
#
# FIXME: Currently the following dependencies are missed.
# Loop carried edges from SU(16)
#   Order
#     SU(6)
#     SU(8)
#     SU(10)
#     SU(11)
# Loop carried edges from SU(17)
#   Order
#     SU(10)
#     SU(11)
# Loop carried edges from SU(19)
#   Order
#     SU(10)
#     SU(11)

# CHECK:      ===== Loop Carried Edges Begin =====
# CHECK-NEXT: ===== Loop Carried Edges End =====

--- |
  @x = dso_local global i32 0, align 4

  define dso_local void @f(ptr noalias nocapture noundef %a, ptr noalias nocapture noundef %b, ptr noalias nocapture noundef readonly %c, i32 noundef %n) {
  entry:
    %cmp26 = icmp sgt i32 %n, 0
    br i1 %cmp26, label %for.body.preheader, label %for.cond.cleanup

  for.body.preheader:
    %.pre = load i32, ptr %a, align 4, !tbaa !5
    %.pre28 = load i32, ptr %b, align 4, !tbaa !5
    %cgep = getelementptr i8, ptr %b, i32 4
    %cgep37 = getelementptr i8, ptr %a, i32 4
    br label %for.body

  for.cond.cleanup:
    ret void

  for.body:
    %lsr.iv35 = phi ptr [ %c, %for.body.preheader ], [ %cgep42, %for.body ]
    %lsr.iv31 = phi ptr [ %cgep37, %for.body.preheader ], [ %cgep41, %for.body ]
    %lsr.iv = phi ptr [ %cgep, %for.body.preheader ], [ %cgep40, %for.body ]
    %0 = phi i32 [ %mul11, %for.body ], [ %.pre28, %for.body.preheader ]
    %1 = phi i32 [ %mul7, %for.body ], [ %.pre, %for.body.preheader ]
    %i.027 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ]
    %2 = load i32, ptr %lsr.iv35, align 4, !tbaa !5
    %mul = mul nsw i32 %1, %2
    %cgep38 = getelementptr i8, ptr %lsr.iv31, i32 -4
    store i32 %mul, ptr %cgep38, align 4, !tbaa !5
    %mul4 = mul nsw i32 %0, %2
    %cgep39 = getelementptr i8, ptr %lsr.iv, i32 -4
    store i32 %mul4, ptr %cgep39, align 4, !tbaa !5
    %3 = load volatile i32, ptr @x, align 4, !tbaa !5
    %4 = add i32 %i.027, %3
    store volatile i32 %4, ptr @x, align 4, !tbaa !5
    %add5 = add nuw nsw i32 %i.027, 1
    %5 = load i32, ptr %lsr.iv31, align 4, !tbaa !5
    %mul7 = mul nsw i32 %5, %i.027
    store i32 %mul7, ptr %lsr.iv31, align 4, !tbaa !5
    %6 = load volatile i32, ptr @x, align 4, !tbaa !5
    %7 = add i32 %i.027, %6
    store volatile i32 %7, ptr @x, align 4, !tbaa !5
    %8 = load i32, ptr %lsr.iv, align 4, !tbaa !5
    %mul11 = mul nsw i32 %8, %i.027
    store i32 %mul11, ptr %lsr.iv, align 4, !tbaa !5
    %exitcond.not = icmp eq i32 %n, %add5
    %cgep40 = getelementptr i8, ptr %lsr.iv, i32 4
    %cgep41 = getelementptr i8, ptr %lsr.iv31, i32 4
    %cgep42 = getelementptr i8, ptr %lsr.iv35, i32 4
    br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
  }

  !5 = !{!6, !6, i64 0}
  !6 = !{!"int", !7, i64 0}
  !7 = !{!"omnipotent char", !8, i64 0}
  !8 = !{!"Simple C/C++ TBAA"}

...
---
name:            f
tracksRegLiveness: true
body:             |
  bb.0.entry:
    successors: %bb.1, %bb.2
    liveins: $r0, $r1, $r2, $r3
  
    %19:intregs = COPY $r3
    %18:intregs = COPY $r2
    %17:intregs = COPY $r1
    %16:intregs = COPY $r0
    %20:predregs = C2_cmpgti %19, 0
    J2_jumpf %20, %bb.2, implicit-def dead $pc
    J2_jump %bb.1, implicit-def dead $pc
  
  bb.1.for.body.preheader:
    %0:intregs, %3:intregs = L2_loadri_pi %16, 4 :: (load (s32) from %ir.a, !tbaa !5)
    %1:intregs, %2:intregs = L2_loadri_pi %17, 4 :: (load (s32) from %ir.b, !tbaa !5)
    %22:intregs = A2_tfrsi 0
    %26:intregs = C4_addipc target-flags(hexagon-pcrel) @x
    %30:intregs = COPY %19
    J2_loop0r %bb.3, %30, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
    J2_jump %bb.3, implicit-def dead $pc
  
  bb.2.for.cond.cleanup:
    PS_jmpret $r31, implicit-def dead $pc
  
  bb.3.for.body:
    successors: %bb.2, %bb.3
  
    %4:intregs = PHI %18, %bb.1, %15, %bb.3
    %5:intregs = PHI %3, %bb.1, %14, %bb.3
    %6:intregs = PHI %2, %bb.1, %13, %bb.3
    %7:intregs = PHI %1, %bb.1, %12, %bb.3
    %8:intregs = PHI %0, %bb.1, %11, %bb.3
    %9:intregs = PHI %22, %bb.1, %10, %bb.3
    %23:intregs, %15:intregs = L2_loadri_pi %4, 4 :: (load (s32) from %ir.lsr.iv35, !tbaa !5)
    %24:intregs = nsw M2_mpyi %8, %23
    S2_storeri_io %5, -4, killed %24 :: (store (s32) into %ir.cgep38, !tbaa !5)
    %25:intregs = nsw M2_mpyi %7, %23
    S2_storeri_io %6, -4, killed %25 :: (store (s32) into %ir.cgep39, !tbaa !5)
    L4_add_memopw_io %26, 0, %9 :: (volatile store (s32) into @x, !tbaa !5), (volatile dereferenceable load (s32) from @x, !tbaa !5)
    %10:intregs = nuw nsw A2_addi %9, 1
    %27:intregs = L2_loadri_io %5, 0 :: (load (s32) from %ir.lsr.iv31, !tbaa !5)
    %11:intregs = nsw M2_mpyi killed %27, %9
    S2_storeri_io %5, 0, %11 :: (store (s32) into %ir.lsr.iv31, !tbaa !5)
    L4_add_memopw_io %26, 0, %9 :: (volatile store (s32) into @x, !tbaa !5), (volatile dereferenceable load (s32) from @x, !tbaa !5)
    %28:intregs = L2_loadri_io %6, 0 :: (load (s32) from %ir.lsr.iv, !tbaa !5)
    %12:intregs = nsw M2_mpyi killed %28, %9
    S2_storeri_io %6, 0, %12 :: (store (s32) into %ir.lsr.iv, !tbaa !5)
    %13:intregs = A2_addi %6, 4
    %14:intregs = A2_addi %5, 4
    ENDLOOP0 %bb.3, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
    J2_jump %bb.2, implicit-def $pc
...