File: complex-index.ll

package info (click to toggle)
llvm-toolchain-7 1%3A7.0.1-8
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 733,456 kB
  • sloc: cpp: 3,776,651; ansic: 633,271; asm: 350,301; python: 142,716; objc: 107,612; sh: 22,626; lisp: 11,056; perl: 7,999; pascal: 6,742; ml: 5,537; awk: 3,536; makefile: 2,557; cs: 2,027; xml: 841; ruby: 156
file content (49 lines) | stat: -rw-r--r-- 2,024 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
; RUN: opt -mtriple=amdgcn-amd-amdhsa -basicaa -load-store-vectorizer -S -o - %s | FileCheck %s

declare i64 @_Z12get_local_idj(i32)

declare i64 @_Z12get_group_idj(i32)

declare double @llvm.fmuladd.f64(double, double, double)

; CHECK-LABEL: @factorizedVsNonfactorizedAccess(
; CHECK: load <2 x float>
; CHECK: store <2 x float>
define amdgpu_kernel void @factorizedVsNonfactorizedAccess(float addrspace(1)* nocapture %c) {
entry:
  %call = tail call i64 @_Z12get_local_idj(i32 0)
  %call1 = tail call i64 @_Z12get_group_idj(i32 0)
  %div = lshr i64 %call, 4
  %div2 = lshr i64 %call1, 3
  %mul = shl i64 %div2, 7
  %rem = shl i64 %call, 3
  %mul3 = and i64 %rem, 120
  %add = or i64 %mul, %mul3
  %rem4 = shl i64 %call1, 7
  %mul5 = and i64 %rem4, 896
  %mul6 = shl nuw nsw i64 %div, 3
  %add7 = add nuw i64 %mul5, %mul6
  %mul9 = shl i64 %add7, 10
  %add10 = add i64 %mul9, %add
  %arrayidx = getelementptr inbounds float, float addrspace(1)* %c, i64 %add10
  %load1 = load float, float addrspace(1)* %arrayidx, align 4
  %conv = fpext float %load1 to double
  %mul11 = fmul double %conv, 0x3FEAB481D8F35506
  %conv12 = fptrunc double %mul11 to float
  %conv18 = fpext float %conv12 to double
  %storeval1 = tail call double @llvm.fmuladd.f64(double 0x3FF4FFAFBBEC946A, double 0.000000e+00, double %conv18)
  %cstoreval1 = fptrunc double %storeval1 to float
  store float %cstoreval1, float addrspace(1)* %arrayidx, align 4

  %add23 = or i64 %add10, 1
  %arrayidx24 = getelementptr inbounds float, float addrspace(1)* %c, i64 %add23
  %load2 = load float, float addrspace(1)* %arrayidx24, align 4
  %conv25 = fpext float %load2 to double
  %mul26 = fmul double %conv25, 0x3FEAB481D8F35506
  %conv27 = fptrunc double %mul26 to float
  %conv34 = fpext float %conv27 to double
  %storeval2 = tail call double @llvm.fmuladd.f64(double 0x3FF4FFAFBBEC946A, double 0.000000e+00, double %conv34)
  %cstoreval2 = fptrunc double %storeval2 to float
  store float %cstoreval2, float addrspace(1)* %arrayidx24, align 4
  ret void
}