File: simd-shift-in-loop.ll

package info (click to toggle)
llvm-toolchain-19 1%3A19.1.7-3~deb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm-proposed-updates
  • size: 1,998,492 kB
  • sloc: cpp: 6,951,680; ansic: 1,486,157; asm: 913,598; python: 232,024; f90: 80,126; objc: 75,281; lisp: 37,276; pascal: 16,990; sh: 10,009; ml: 5,058; perl: 4,724; awk: 3,523; makefile: 3,167; javascript: 2,504; xml: 892; fortran: 664; cs: 573
file content (104 lines) | stat: -rw-r--r-- 3,371 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s

; Test that SIMD shifts can be lowered correctly even when shift
; values are exported from outside blocks.

target triple = "wasm32-unknown-unknown"

define void @shl_loop(ptr %a, i8 %shift, i32 %count) {
; CHECK-LABEL: shl_loop:
; CHECK:         .functype shl_loop (i32, i32, i32) -> ()
; CHECK-NEXT:  # %bb.0: # %entry
; CHECK-NEXT:  .LBB0_1: # %body
; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    loop # label0:
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    v128.load 0:p2align=0
; CHECK-NEXT:    local.get 1
; CHECK-NEXT:    i8x16.shl
; CHECK-NEXT:    v128.store 16
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i32.const 16
; CHECK-NEXT:    i32.add
; CHECK-NEXT:    local.set 0
; CHECK-NEXT:    local.get 2
; CHECK-NEXT:    i32.const -1
; CHECK-NEXT:    i32.add
; CHECK-NEXT:    local.tee 2
; CHECK-NEXT:    i32.eqz
; CHECK-NEXT:    br_if 0 # 0: up to label0
; CHECK-NEXT:  # %bb.2: # %exit
; CHECK-NEXT:    end_loop
; CHECK-NEXT:    # fallthrough-return
entry:
 %t1 = insertelement <16 x i8> undef, i8 %shift, i32 0
 %vshift = shufflevector <16 x i8> %t1, <16 x i8> undef, <16 x i32> zeroinitializer
 br label %body
body:
 %out = phi ptr [%a, %entry], [%b, %body]
 %i = phi i32 [0, %entry], [%next, %body]
 %v = load <16 x i8>, ptr %out, align 1
 %r = shl <16 x i8> %v, %vshift
 %b = getelementptr inbounds i8, ptr %out, i32 16
 store <16 x i8> %r, ptr %b
 %next = add i32 %i, 1
 %i.cmp = icmp eq i32 %next, %count
 br i1 %i.cmp, label %body, label %exit
exit:
 ret void
}

; Test that SIMD shifts can be lowered correctly when shift value
; is a phi inside loop body.

define void @shl_phi_loop(ptr %a, i8 %shift, i32 %count) {
; CHECK-LABEL: shl_phi_loop:
; CHECK:         .functype shl_phi_loop (i32, i32, i32) -> ()
; CHECK-NEXT:  # %bb.0: # %entry
; CHECK-NEXT:  .LBB1_1: # %body
; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
; CHECK-NEXT:    loop # label1:
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    v128.load 0:p2align=0
; CHECK-NEXT:    local.get 1
; CHECK-NEXT:    i8x16.shl
; CHECK-NEXT:    v128.store 16
; CHECK-NEXT:    local.get 1
; CHECK-NEXT:    i32.const 1
; CHECK-NEXT:    i32.and
; CHECK-NEXT:    local.set 1
; CHECK-NEXT:    local.get 0
; CHECK-NEXT:    i32.const 16
; CHECK-NEXT:    i32.add
; CHECK-NEXT:    local.set 0
; CHECK-NEXT:    local.get 2
; CHECK-NEXT:    i32.const -1
; CHECK-NEXT:    i32.add
; CHECK-NEXT:    local.tee 2
; CHECK-NEXT:    i32.eqz
; CHECK-NEXT:    br_if 0 # 0: up to label1
; CHECK-NEXT:  # %bb.2: # %exit
; CHECK-NEXT:    end_loop
; CHECK-NEXT:    # fallthrough-return
entry:
 br label %body
body:
 %out = phi ptr [%a, %entry], [%b, %body]
 %i = phi i32 [0, %entry], [%next, %body]
 %t1 = phi i8 [%shift, %entry], [%sand, %body]
 %t2 = insertelement <16 x i8> undef, i8 %t1, i32 0
 %vshift = shufflevector <16 x i8> %t2, <16 x i8> undef, <16 x i32> zeroinitializer
 %v = load <16 x i8>, ptr %out, align 1
 %r = shl <16 x i8> %v, %vshift
 %b = getelementptr inbounds i8, ptr %out, i32 16
 store <16 x i8> %r, ptr %b
 %sand = and i8 %t1, 1
 %next = add i32 %i, 1
 %i.cmp = icmp eq i32 %next, %count
 br i1 %i.cmp, label %body, label %exit
exit:
 ret void
}