File: triton_regression_no_waterfall.ll

package info (click to toggle)
llvm-toolchain-21 1%3A21.1.6-2
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 2,245,044 kB
  • sloc: cpp: 7,619,726; ansic: 1,434,018; asm: 1,058,748; python: 252,740; f90: 94,671; objc: 70,685; lisp: 42,813; pascal: 18,401; sh: 8,601; ml: 5,111; perl: 4,720; makefile: 3,666; awk: 3,523; javascript: 2,409; xml: 892; fortran: 770
file content (40 lines) | stat: -rw-r--r-- 1,515 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -o - %s | FileCheck --check-prefix=GCN %s

define amdgpu_kernel void @test_should_convert_to_v_readfirstlane_b32(float %fval, i32 %arg1, i32 %arg2) {
; GCN-LABEL: test_should_convert_to_v_readfirstlane_b32:
; GCN:       ; %bb.0: ; %entry
; GCN-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
; GCN-NEXT:    s_waitcnt lgkmcnt(0)
; GCN-NEXT:    v_cvt_u32_f32_e32 v0, s0
; GCN-NEXT:    s_nop 0
; GCN-NEXT:    v_readfirstlane_b32 s0, v0
; GCN-NEXT:    s_lshl_b32 s0, s0, 16
; GCN-NEXT:    s_or_b32 s5, s0, s1
; GCN-NEXT:    s_and_b32 s6, s5, s2
; GCN-NEXT:    s_lshr_b32 s4, s6, 2
; GCN-NEXT:    s_mov_b32 s7, s4
; GCN-NEXT:    v_mov_b32_e32 v0, s1
; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], 0 nt
; GCN-NEXT:    s_endpgm
entry:
  %conv = fptoui float %fval to i32

  %shl = shl i32 %conv, 16
  %or = or i32 %shl, %arg1
  %and = and i32 %or, %arg2
  %shr = lshr i32 %and, 2

  %sgpr128_0 = insertelement <4 x i32> poison, i32 %shr, i32 0
  %sgpr128_1 = insertelement <4 x i32> %sgpr128_0, i32 %or, i32 1
  %sgpr128_2 = insertelement <4 x i32> %sgpr128_1, i32 %and, i32 2
  %sgpr128_3 = insertelement <4 x i32> %sgpr128_2, i32 %shr, i32 3

  call void @llvm.amdgcn.raw.buffer.store.i32(i32 %arg1, <4 x i32> %sgpr128_3, i32 0, i32 0, i32 2)

  ret void
}

declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32) #0

attributes #0 = { nounwind }