File: intel_fpga_lsu_optimized.ll

package info (click to toggle)
spirv-llvm-translator-14 14.0.11-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 15,640 kB
  • sloc: cpp: 47,664; lisp: 3,704; sh: 153; python: 43; makefile: 33
file content (144 lines) | stat: -rw-r--r-- 10,506 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
; LLVM IR generated by Intel SYCL Clang compiler (https://github.com/intel/llvm)
; SYCL source code can be found below:

; #include <CL/sycl.hpp>
; #include <CL/sycl/intel/fpga_extensions.hpp>
;
; int main() {
;   cl::sycl::queue Queue{cl::sycl::intel::fpga_emulator_selector{}};
;
;   {
;     cl::sycl::buffer<int, 1> output_buffer(output_data, 1);
;     cl::sycl::buffer<int, 1> input_buffer(input_data, 1);
;
;     Queue.submit([&](cl::sycl::handler &cgh) {
;       auto output_accessor =
;           output_buffer.get_access<cl::sycl::access::mode::write>(cgh);
;       auto input_accessor =
;           input_buffer.get_access<cl::sycl::access::mode::read>(cgh);
;
;       cgh.single_task<class kernel>([=] {
;         auto input_ptr = input_accessor.get_pointer();
;         auto output_ptr = output_accessor.get_pointer();
;
;         using PrefetchingLSU =
;             cl::sycl::intel::lsu<cl::sycl::intel::prefetch<true>,
;                                  cl::sycl::intel::statically_coalesce<false>>;
;
;         using BurstCoalescedLSU =
;             cl::sycl::intel::lsu<cl::sycl::intel::burst_coalesce<true>,
;                                  cl::sycl::intel::statically_coalesce<false>>;
;
;         using CachingLSU =
;             cl::sycl::intel::lsu<cl::sycl::intel::burst_coalesce<true>,
;                                  cl::sycl::intel::cache<1024>,
;                                  cl::sycl::intel::statically_coalesce<false>>;
;
;         using PipelinedLSU = cl::sycl::intel::lsu<>;
;
;         int X = PrefetchingLSU::load(input_ptr); // int X = input_ptr[0]
;         int Y = CachingLSU::load(input_ptr + 1); // int Y = input_ptr[1]
;
;         BurstCoalescedLSU::store(output_ptr, X); // output_ptr[0] = X
;         PipelinedLSU::store(output_ptr + 1, Y);  // output_ptr[1] = Y
;       });
;     });
;   }
;
;   return 0;
; }

; Check that translation of optimized IR doesn't crash:
; RUN: llvm-as %s -o %t.bc
; RUN: llvm-spirv %t.bc --spirv-ext=+SPV_INTEL_fpga_memory_accesses -o %t.spv

; Check that reverse translation restore ptr.annotations correctly:
; RUN: llvm-spirv -r %t.spv -o %t.rev.bc
; RUN: llvm-dis < %t.rev.bc | FileCheck %s --check-prefix=CHECK-LLVM

target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown"

%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range" = type { %"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" }
%"class._ZTSN2cl4sycl6detail5arrayILi1EEE.cl::sycl::detail::array" = type { [1 x i64] }

$"_ZTSZZ8test_lsuN2cl4sycl5queueEENK3$_0clERNS0_7handlerEE6kernel" = comdat any

@.str = private unnamed_addr constant [26 x i8] c"{params:12}{cache-size:0}\00", section "llvm.metadata"
@.str.1 = private unnamed_addr constant [14 x i8] c"<invalid loc>\00", section "llvm.metadata"
@.str.2 = private unnamed_addr constant [28 x i8] c"{params:7}{cache-size:1024}\00", section "llvm.metadata"
@.str.3 = private unnamed_addr constant [25 x i8] c"{params:5}{cache-size:0}\00", section "llvm.metadata"
@.str.4 = private unnamed_addr constant [25 x i8] c"{params:0}{cache-size:0}\00", section "llvm.metadata"

; CHECK-LLVM: [[PTR_i27_ANNOT_STR:@[a-z0-9_.]]] = {{.*}}{params:12}
; CHECK-LLVM: [[PTR_i15_i_ANNOT_STR:@[a-z0-9_.]]] = {{.*}}{params:7}{cache-size:1024}
; CHECK-LLVM: [[PTR_i_ANNOT_STR:@[a-z0-9_.]]] = {{.*}}{params:5}
; CHECK-LLVM: [[PTR_i_i_ANNOT_STR:@[a-z0-9_.]]] = {{.*}}{params:0}{cache-size:0}

; Function Attrs: norecurse
define weak_odr dso_local spir_kernel void @"_ZTSZZ8test_lsuN2cl4sycl5queueEENK3$_0clERNS0_7handlerEE6kernel"(i32 addrspace(1)* %_arg_, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_1, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_2, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_3, i32 addrspace(1)* %_arg_4, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_6, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_7, %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* byval(%"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range") align 8 %_arg_8) local_unnamed_addr #0 comdat !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 !kernel_arg_buffer_location !8 {
entry:
  %0 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_3, i64 0, i32 0, i32 0, i64 0
  %1 = load i64, i64* %0, align 8
  %add.ptr.i27 = getelementptr inbounds i32, i32 addrspace(1)* %_arg_, i64 %1
  %2 = getelementptr inbounds %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range", %"class._ZTSN2cl4sycl5rangeILi1EEE.cl::sycl::range"* %_arg_8, i64 0, i32 0, i32 0, i64 0
  %3 = load i64, i64* %2, align 8
  %add.ptr.i = getelementptr inbounds i32, i32 addrspace(1)* %_arg_4, i64 %3
  %4 = addrspacecast i32 addrspace(1)* %add.ptr.i27 to i32 addrspace(4)*
  %5 = tail call dereferenceable(4) i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* %4, i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), i32 0, i8* null) #2
  %6 = load i32, i32 addrspace(4)* %5, align 4, !tbaa !9
  ; CHECK-LLVM: [[PTR_i27:[%0-9a-z.]+]] = getelementptr inbounds i32, i32 addrspace(1)* {{[%0-9a-z._]+}}, i64 {{[%0-9a-z.]+}}
  ; CHECK-LLVM: [[PTR_i:[%0-9a-z.]+]] = getelementptr inbounds i32, i32 addrspace(1)* {{[%0-9a-z._]+}}, i64 {{[%0-9a-z.]+}}
  ; CHECK-LLVM: [[PTR_i27_AS_CAST:[%0-9a-z.]+]] = addrspacecast i32 addrspace(1)* [[PTR_i27]] to i32 addrspace(4)*
  ; CHECK-LLVM: [[PTR_ANNOT_CALL:[%0-9a-z.]+]] = call i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* [[PTR_i27_AS_CAST]], i8* getelementptr inbounds ({{.*}} [[PTR_i27_ANNOT_STR]]
  ; CHECK-LLVM: [[PTR_RESULT_LOAD:[%0-9a-z.]+]] = load i32, i32 addrspace(4)* [[PTR_ANNOT_CALL]]
  %add.ptr.i15.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i27, i64 1
  %7 = addrspacecast i32 addrspace(1)* %add.ptr.i15.i to i32 addrspace(4)*
  %8 = tail call dereferenceable(4) i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* %7, i8* getelementptr inbounds ([28 x i8], [28 x i8]* @.str.2, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), i32 0, i8* null) #2
  %9 = load i32, i32 addrspace(4)* %8, align 4, !tbaa !9
  ; CHECK-LLVM: [[PTR_i15_i:[%0-9a-z.]+]] = getelementptr inbounds i32, i32 addrspace(1)* {{[%0-9a-z._]+}}, i64 {{[%0-9a-z.]+}}
  ; CHECK-LLVM: [[PTR_i15_i_AS_CAST:[%0-9a-z.]+]] = addrspacecast i32 addrspace(1)* [[PTR_i15_i]] to i32 addrspace(4)*
  ; CHECK-LLVM: [[PTR_ANNOT_CALL:[%0-9a-z.]+]] = call i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* [[PTR_i15_i_AS_CAST]], i8* getelementptr inbounds ({{.*}} [[PTR_i15_i_ANNOT_STR]]
  ; CHECK-LLVM: [[PTR_RESULT_LOAD_1:[%0-9a-z.]+]] = load i32, i32 addrspace(4)* [[PTR_ANNOT_CALL]]
  %10 = addrspacecast i32 addrspace(1)* %add.ptr.i to i32 addrspace(4)*
  %11 = tail call i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* %10, i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), i32 0, i8* null) #2
  store i32 %6, i32 addrspace(4)* %11, align 4, !tbaa !9
  ; CHECK-LLVM: [[PTR_i_AS_CAST:[%0-9a-z.]+]] = addrspacecast i32 addrspace(1)* [[PTR_i]] to i32 addrspace(4)*
  ; CHECK-LLVM: [[PTR_ANNOT_CALL:[%0-9a-z.]+]] = call i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* [[PTR_i_AS_CAST]], i8* getelementptr inbounds ({{.*}} [[PTR_i_ANNOT_STR]]
  ; CHECK-LLVM: store i32 [[PTR_RESULT_LOAD]], i32 addrspace(4)* [[PTR_ANNOT_CALL]]
  %add.ptr.i.i = getelementptr inbounds i32, i32 addrspace(1)* %add.ptr.i, i64 1
  %12 = addrspacecast i32 addrspace(1)* %add.ptr.i.i to i32 addrspace(4)*
  %13 = tail call i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* %12, i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.4, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), i32 0, i8* null) #2
  store i32 %9, i32 addrspace(4)* %13, align 4, !tbaa !9
  ; CHECK-LLVM: [[PTR_i_i:[%0-9a-z.]+]] = getelementptr inbounds i32, i32 addrspace(1)* {{[%0-9a-z._]+}}, i64 {{[%0-9a-z.]+}}
  ; CHECK-LLVM: [[PTR_i_i_AS_CAST:[%0-9a-z.]+]] = addrspacecast i32 addrspace(1)* [[PTR_i_i]] to i32 addrspace(4)*
  ; CHECK-LLVM: [[PTR_ANNOT_CALL:[%0-9a-z.]+]] = call i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)* [[PTR_i_i_AS_CAST]], i8* getelementptr inbounds ({{.*}} [[PTR_i_i_ANNOT_STR]]
  ; CHECK-LLVM: store i32 [[PTR_RESULT_LOAD_1]], i32 addrspace(4)* [[PTR_ANNOT_CALL]]
  ret void
}

; Function Attrs: nounwind willreturn
declare i32 addrspace(4)* @llvm.ptr.annotation.p4i32(i32 addrspace(4)*, i8*, i8*, i32, i8*) #1

attributes #0 = { norecurse "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "sycl-module-id"="fpga_lsu.cpp" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind willreturn }
attributes #2 = { nounwind readnone }

!opencl.spir.version = !{!0}
!spirv.Source = !{!1}
!llvm.ident = !{!2}
!llvm.module.flags = !{!3}

!0 = !{i32 1, i32 2}
!1 = !{i32 4, i32 100000}
!2 = !{!"clang version 12.0.0"}
!3 = !{i32 1, !"wchar_size", i32 4}
!4 = !{i32 1, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0}
!5 = !{!"none", !"none", !"none", !"none", !"none", !"none", !"none", !"none"}
!6 = !{!"int*", !"cl::sycl::range<1>", !"cl::sycl::range<1>", !"cl::sycl::id<1>", !"int*", !"cl::sycl::range<1>", !"cl::sycl::range<1>", !"cl::sycl::id<1>"}
!7 = !{!"", !"", !"", !"", !"", !"", !"", !""}
!8 = !{i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1}
!9 = !{!10, !10, i64 0}
!10 = !{!"int", !11, i64 0}
!11 = !{!"omnipotent char", !12, i64 0}
!12 = !{!"Simple C++ TBAA"}