1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
|
// RUN: mlir-opt %s -split-input-file -canonicalize="test-convergence" | FileCheck %s
// CHECK-LABEL: func @known_oob_load
func.func @known_oob_load(%arg0: memref<4xf32>) -> f32 {
// CHECK: %[[zero:.*]] = arith.constant 0.000000e+00 : f32
// CHECK: return %[[zero]]
%c4_i32 = arith.constant 4 : i32
%0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c4_i32] : memref<4xf32>, i32 -> f32
func.return %0 : f32
}
// -----
// CHECK-LABEL: func @known_oob_load_2d
func.func @known_oob_load_2d(%arg0: memref<4x4xf32>) -> f32 {
// CHECK: %[[zero:.*]] = arith.constant 0.000000e+00 : f32
// CHECK: return %[[zero]]
%c0_i32 = arith.constant 0 : i32
%c4_i32 = arith.constant 4 : i32
%0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c4_i32, %c0_i32] : memref<4x4xf32>, i32, i32 -> f32
func.return %0 : f32
}
// -----
// CHECK-LABEL: func @known_oob_load_2d_on_last
func.func @known_oob_load_2d_on_last(%arg0: memref<4x4xf32>) -> f32 {
// CHECK: %[[zero:.*]] = arith.constant 0.000000e+00 : f32
// CHECK: return %[[zero]]
%c0_i32 = arith.constant 0 : i32
%c16_i32 = arith.constant 16 : i32
%0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c0_i32, %c16_i32] : memref<4x4xf32>, i32, i32 -> f32
func.return %0 : f32
}
// -----
// CHECK-LABEL: func @known_oob_load_index
func.func @known_oob_load_index(%arg0: memref<4xf32>) -> f32 {
// CHECK: %[[zero:.*]] = arith.constant 0.000000e+00 : f32
// CHECK: return %[[zero]]
%c0_i32 = arith.constant 0 : i32
%0 = amdgpu.raw_buffer_load {boundsCheck = true, indexOffset = 4 : i32} %arg0[%c0_i32] : memref<4xf32>, i32 -> f32
func.return %0 : f32
}
// -----
// CHECK-LABEL: func @known_oob_load_sgproffset
func.func @known_oob_load_sgproffset(%arg0: memref<4xf32>) -> f32 {
// CHECK: %[[zero:.*]] = arith.constant 0.000000e+00 : f32
// CHECK: return %[[zero]]
%c2_i32 = arith.constant 2 : i32
%0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c2_i32] sgprOffset %c2_i32 : memref<4xf32>, i32 -> f32
func.return %0 : f32
}
// -----
// CHECK-LABEL: func @unknown_load
func.func @unknown_load(%arg0: memref<4xf32>, %arg1: i32) -> f32 {
// CHECK: %[[loaded:.*]] = amdgpu.raw_buffer_load
// CHECK: return %[[loaded]]
%c4_i32 = arith.constant 4 : i32
%0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%arg1] sgprOffset %c4_i32 : memref<4xf32>, i32 -> f32
func.return %0 : f32
}
// -----
// CHECK-LABEL: func @unknown_load_sgproffset
func.func @unknown_load_sgproffset(%arg0: memref<4xf32>, %arg1: i32) -> f32 {
// CHECK: %[[loaded:.*]] = amdgpu.raw_buffer_load
// CHECK: return %[[loaded]]
%c4_i32 = arith.constant 4 : i32
%0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c4_i32] sgprOffset %arg1 : memref<4xf32>, i32 -> f32
func.return %0 : f32
}
// -----
// CHECK-LABEL: func @unranked
func.func @unranked(%arg0: memref<?xf32>) -> f32 {
// CHECK: %[[loaded:.*]] = amdgpu.raw_buffer_load
// CHECK: return %[[loaded]]
%c4_i32 = arith.constant 4 : i32
%0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c4_i32] : memref<?xf32>, i32 -> f32
func.return %0 : f32
}
// -----
// CHECK-LABEL: func @no_oob_check
func.func @no_oob_check(%arg0: memref<4xf32>) -> f32 {
// CHECK: %[[loaded:.*]] = amdgpu.raw_buffer_load
// CHECK: return %[[loaded]]
%c4_i32 = arith.constant 4 : i32
%0 = amdgpu.raw_buffer_load {boundsCheck = false} %arg0[%c4_i32] : memref<4xf32>, i32 -> f32
func.return %0 : f32
}
// -----
// CHECK-LABEL: func @in_bounds_overall
func.func @in_bounds_overall(%arg0: memref<4x4xf32>) -> f32 {
// CHECK: %[[loaded:.*]] = amdgpu.raw_buffer_load
// CHECK: return %[[loaded]]
%c0_i32 = arith.constant 0 : i32
%c15_i32 = arith.constant 15 : i32
%0 = amdgpu.raw_buffer_load {boundsCheck = true} %arg0[%c0_i32, %c15_i32] : memref<4x4xf32>, i32, i32 -> f32
func.return %0 : f32
}
// -----
// CHECK-LABEL: func @dead_store
func.func @dead_store(%arg0: memref<4xf32>, %arg1: f32) {
// CHECK-NOT: amdgpu.raw_buffer_store
%c4_i32 = arith.constant 4 : i32
amdgpu.raw_buffer_store {boundsCheck = true} %arg1 -> %arg0[%c4_i32] : f32 -> memref<4xf32>, i32
func.return
}
// -----
// CHECK-LABEL: func @dead_atomic_add
func.func @dead_atomic_add(%arg0: memref<4xf32>, %arg1: f32) {
// CHECK-NOT: amdgpu.raw_buffer_atomic_fadd
%c4_i32 = arith.constant 4 : i32
amdgpu.raw_buffer_atomic_fadd {boundsCheck = true} %arg1 -> %arg0[%c4_i32] : f32 -> memref<4xf32>, i32
func.return
}
|