File: indirect-addressing-si-noopt.ll

package info (click to toggle)
llvm-toolchain-6.0 1%3A6.0.1-10
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 598,080 kB
  • sloc: cpp: 3,046,253; ansic: 595,057; asm: 271,965; python: 128,926; objc: 106,554; sh: 21,906; lisp: 10,191; pascal: 6,094; ml: 5,544; perl: 5,265; makefile: 2,227; cs: 2,027; xml: 686; php: 212; csh: 117
file content (64 lines) | stat: -rw-r--r-- 2,027 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
; RUN: llc -O0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s

; FIXME: Merge into indirect-addressing-si.ll

; Make sure that TwoAddressInstructions keeps src0 as subregister sub0
; of the tied implicit use and def of the super register.

; CHECK-LABEL: {{^}}insert_wo_offset:
; CHECK: s_load_dword [[IN:s[0-9]+]]
; CHECK: s_mov_b32 m0, [[IN]]
; CHECK: v_movreld_b32_e32 v[[ELT0:[0-9]+]]
; CHECK-NEXT: buffer_store_dwordx4 v{{\[}}[[ELT0]]:
define amdgpu_kernel void @insert_wo_offset(<4 x float> addrspace(1)* %out, i32 %in) {
entry:
  %ins = insertelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, float 5.0, i32 %in
  store <4 x float> %ins, <4 x float> addrspace(1)* %out
  ret void
}

; Make sure we don't hit use of undefined register errors when expanding an
; extract with undef index.

; CHECK-LABEL: {{^}}extract_adjacent_blocks:
; CHECK: s_load_dword [[ARG:s[0-9]+]]
; CHECK: s_cmp_lg_u32
; CHECK: s_cbranch_scc1 [[BB4:BB[0-9]+_[0-9]+]]

; CHECK: buffer_load_dwordx4
; CHECK: s_mov_b32 m0,
; CHECK: v_movrels_b32_e32

; CHECK: s_branch [[ENDBB:BB[0-9]+_[0-9]+]]

; CHECK: [[BB4]]:
; CHECK: buffer_load_dwordx4
; CHECK: s_mov_b32 m0,
; CHECK: v_movrels_b32_e32

; CHECK: [[ENDBB]]:
; CHECK: buffer_store_dword
; CHECK: s_endpgm

define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) #0 {
bb:
  %tmp = icmp eq i32 %arg, 0
  br i1 %tmp, label %bb1, label %bb4

bb1:
  %tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
  %tmp3 = extractelement <4 x float> %tmp2, i32 undef
  call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp2) #0 ; Prevent block optimize out
  br label %bb7

bb4:
  %tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
  %tmp6 = extractelement <4 x float> %tmp5, i32 undef
  call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp5) #0 ; Prevent block optimize out
  br label %bb7

bb7:
  %tmp8 = phi float [ %tmp3, %bb1 ], [ %tmp6, %bb4 ]
  store volatile float %tmp8, float addrspace(1)* undef
  ret void
}