File: swp-loop-carried-order-dep2.mir

package info (click to toggle)

llvm-toolchain-21 1%3A21.1.0-1

links: PTS, VCS
area: main
in suites: sid
size: 2,235,796 kB
sloc: cpp: 7,617,614; ansic: 1,433,901; asm: 1,058,726; python: 252,096; f90: 94,671; objc: 70,753; lisp: 42,813; pascal: 18,401; sh: 10,032; ml: 5,111; perl: 4,720; awk: 3,523; makefile: 3,401; javascript: 2,272; xml: 892; fortran: 770

file content (103 lines) | stat: -rw-r--r-- 3,441 bytes

parent folder | download | duplicates (3)

# RUN: llc -mtriple=hexagon -run-pass pipeliner -debug-only=pipeliner %s -o /dev/null 2>&1 -pipeliner-experimental-cg=true | FileCheck %s
# REQUIRES: asserts

# Test that loop-carried memory dependencies are added correctly.
# The original code is as follows.
#
# ```
# void f(int *a, int n) {
#   for (int i = 1; i < n; i++) {
#     a[i] += a[i];
#     a[i-1] += i;
#   }
# }
# ```
# 
# Loop-carried dependencies exist from load/store for a[i] to store for a[i-1], but not vice versa.

# CHECK:      ===== Loop Carried Edges Begin =====
# CHECK-NEXT:   Loop carried edges from SU(3)
# CHECK-NEXT:     Order
# CHECK-NEXT:       SU(7)
# CHECK-NEXT:   Loop carried edges from SU(5)
# CHECK-NEXT:     Order
# CHECK-NEXT:       SU(7)
# CHECK-NEXT: ===== Loop Carried Edges End =====

--- |
  define dso_local void @f(ptr nocapture noundef %a, i32 noundef %n) local_unnamed_addr {
  entry:
    %cmp11 = icmp sgt i32 %n, 1
    br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup

  for.body.preheader:
    %load_initial = load i32, ptr %a, align 4
    %cgep = getelementptr i8, ptr %a, i32 4
    br label %for.body

  for.cond.cleanup:
    ret void

  for.body:
    %lsr.iv = phi ptr [ %cgep, %for.body.preheader ], [ %cgep16, %for.body ]
    %store_forwarded = phi i32 [ %load_initial, %for.body.preheader ], [ %add, %for.body ]
    %i.012 = phi i32 [ 1, %for.body.preheader ], [ %inc, %for.body ]
    %0 = load i32, ptr %lsr.iv, align 4, !tbaa !5
    %add = shl nsw i32 %0, 1
    store i32 %add, ptr %lsr.iv, align 4, !tbaa !5
    %1 = add i32 %store_forwarded, %i.012
    %cgep15 = getelementptr i8, ptr %lsr.iv, i32 -4
    store i32 %1, ptr %cgep15, align 4, !tbaa !5
    %inc = add nuw nsw i32 %i.012, 1
    %exitcond.not = icmp eq i32 %n, %inc
    %cgep16 = getelementptr i8, ptr %lsr.iv, i32 4
    br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
  }

  !5 = !{!6, !6, i64 0}
  !6 = !{!"int", !7, i64 0}
  !7 = !{!"omnipotent char", !8, i64 0}
  !8 = !{!"Simple C/C++ TBAA"}

...
---
name:            f
tracksRegLiveness: true
body:             |
  bb.0.entry:
    successors: %bb.1, %bb.2
    liveins: $r0, $r1
  
    %9:intregs = COPY $r1
    %8:intregs = COPY $r0
    %10:predregs = C2_cmpgti %9, 1
    J2_jumpf %10, %bb.2, implicit-def dead $pc
    J2_jump %bb.1, implicit-def dead $pc
  
  bb.1.for.body.preheader:
    %0:intregs, %1:intregs = L2_loadri_pi %8, 4 :: (load (s32) from %ir.a)
    %12:intregs = A2_tfrsi 1
    %16:intregs = A2_addi %9, -1
    %17:intregs = COPY %16
    J2_loop0r %bb.3, %17, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
    J2_jump %bb.3, implicit-def dead $pc
  
  bb.2.for.cond.cleanup:
    PS_jmpret $r31, implicit-def dead $pc
  
  bb.3.for.body (machine-block-address-taken):
    successors: %bb.2(0x04000000), %bb.3(0x7c000000)
  
    %2:intregs = PHI %1, %bb.1, %7, %bb.3
    %3:intregs = PHI %0, %bb.1, %5, %bb.3
    %4:intregs = PHI %12, %bb.1, %6, %bb.3
    %13:intregs = L2_loadri_io %2, 0 :: (load (s32) from %ir.lsr.iv, !tbaa !5)
    %5:intregs = nsw S2_asl_i_r killed %13, 1
    S2_storeri_io %2, 0, %5 :: (store (s32) into %ir.lsr.iv, !tbaa !5)
    %14:intregs = A2_add %3, %4
    S2_storeri_io %2, -4, killed %14 :: (store (s32) into %ir.cgep15, !tbaa !5)
    %6:intregs = nuw nsw A2_addi %4, 1
    %7:intregs = A2_addi %2, 4
    ENDLOOP0 %bb.3, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
    J2_jump %bb.2, implicit-def $pc
...