1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
|
// RUN: mlir-opt -split-input-file --test-linalg-transform-patterns="test-generalize-tensor-pack" %s | FileCheck %s
func.func @simple_KCRS_to_KCRSsr(%arg0: tensor<1x1x32x8xf32>, %arg1: tensor<1x1x1x1x8x32xf32>) -> tensor<1x1x1x1x8x32xf32> {
%0 = tensor.pack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x32x8xf32> -> tensor<1x1x1x1x8x32xf32>
return %0 : tensor<1x1x1x1x8x32xf32>
}
// CHECK-LABEL: func.func @simple_KCRS_to_KCRSsr
// CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]]
// CHECK: %[[TILE:.+]] = tensor.extract_slice %[[SRC]][0, 0, 0, 0] [1, 1, 32, 8] [1, 1, 1, 1]
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x32xf32>
// CHECK: %[[TRANSP:.+]] = linalg.transpose
// CHECK-SAME: ins(%[[TILE]] : tensor<32x8xf32>)
// CHECK-SAME: outs(%[[EMPTY]] : tensor<8x32xf32>)
// CHECK-SAME: permutation = [1, 0]
// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[TRANSP]] into %[[DEST]]
// CHECK-SAME: [0, 0, 0, 0, 0, 0] [1, 1, 1, 1, 8, 32] [1, 1, 1, 1, 1, 1]
// CHECK: return %[[INSERT]]
// -----
func.func @simple_pad_and_pack(%input: tensor<5x1xf32>, %output: tensor<1x1x8x2xf32>, %pad: f32) -> tensor<1x1x8x2xf32> {
%0 = tensor.pack %input padding_value(%pad : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %output : tensor<5x1xf32> -> tensor<1x1x8x2xf32>
return %0 : tensor<1x1x8x2xf32>
}
// CHECK-LABEL: func.func @simple_pad_and_pack
// CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[PAD_VAL:[a-zA-Z0-9]+]]
// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index
// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index
// CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index
// CHECK: %[[PAD:.+]] = tensor.pad %[[SRC]] low[%[[C0]], %[[C0]]] high[%[[C3]], %[[C1]]]
// CHECK: tensor.yield %[[PAD_VAL]]
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<8x2xf32>
// CHECK: %[[TRANSP:.+]] = linalg.transpose
// CHECK-SAME: ins(%[[PAD]] : tensor<8x2xf32>)
// CHECK-SAME: outs(%[[EMPTY]] : tensor<8x2xf32>)
// CHECK-SAME: permutation = [0, 1]
// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[TRANSP]] into %[[DEST]]
// CHECK-SAME: [0, 0, 0, 0] [1, 1, 8, 2] [1, 1, 1, 1]
// CHECK: return %[[INSERT]]
// -----
func.func @simple_NC_to_CNnc(%arg0: tensor<32x8xf32>, %arg1: tensor<1x1x32x8xf32>) -> tensor<1x1x32x8xf32>{
%0 = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<32x8xf32> -> tensor<1x1x32x8xf32>
return %0 : tensor<1x1x32x8xf32>
}
// CHECK-LABEL: func.func @simple_NC_to_CNnc
// CHECK-SAME: %[[SRC:[a-zA-Z0-9]+]]
// CHECK-SAME: %[[DEST:[a-zA-Z0-9]+]]
// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<32x8xf32>
// CHECK: %[[TRANSP:.+]] = linalg.transpose
// CHECK-SAME: ins(%[[SRC]] : tensor<32x8xf32>)
// CHECK-SAME: outs(%[[EMPTY]] : tensor<32x8xf32>)
// CHECK-SAME: permutation = [0, 1]
// CHECK: %[[INSERT:.+]] = tensor.insert_slice %[[TRANSP]] into %[[DEST]]
// CHECK-SAME: [0, 0, 0, 0] [1, 1, 32, 8] [1, 1, 1, 1]
// CHECK: return %[[INSERT]]
// -----
// RUN: mlir-opt -split-input-file --test-transform-dialect-interpreter --canonicalize --test-linalg-transform-patterns="test-generalize-tensor-pack" %s | FileCheck %s --check-prefix=CHECK-TRANS
func.func @KCRS_to_KCRSsr(%arg0: tensor<1x1x128x64xf32>, %arg1: tensor<1x1x4x8x8x32xf32>) -> tensor<1x1x4x8x8x32xf32> {
%0 = tensor.pack %arg0 inner_dims_pos = [3, 2] inner_tiles = [8, 32] into %arg1 : tensor<1x1x128x64xf32> -> tensor<1x1x4x8x8x32xf32>
return %0 : tensor<1x1x4x8x8x32xf32>
}
// CHECK-TRANS-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 32)>
// CHECK-TRANS-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 * -32 + 128, 32)>
// CHECK-TRANS-DAG: #[[MAP2:.+]] = affine_map<(d0) -> (d0 * 8)>
// CHECK-TRANS-DAG: #[[MAP3:.+]] = affine_map<(d0) -> (d0 * -8 + 64, 8)>
// CHECK-TRANS: func.func @KCRS_to_KCRSsr
// CHECK-TRANS-SAME: %[[SRC:[a-zA-Z0-9]+]]
// CHECK-TRANS-SAME: %[[DEST:[a-zA-Z0-9]+]]
// CHECK-TRANS: %{{.+}} = scf.for %[[R:[a-zA-Z0-9]+]] =
// CHECK-TRANS: %{{.+}} = scf.for %[[S:[a-zA-Z0-9]+]] =
// CHECK-TRANS: %[[IN_R:.+]] = affine.apply #[[MAP0]](%[[R]])
// CHECK-TRANS: %[[IN_R_SZ:.+]] = affine.min #[[MAP1]](%[[R]])
// CHECK-TRANS: %[[IN_S:.+]] = affine.apply #[[MAP2]](%[[S]])
// CHECK-TRANS: %[[IN_S_SZ:.+]] = affine.min #[[MAP3]](%[[S]])
// CHECK-TRANS: %[[SRC_SLICE:.+]] = tensor.extract_slice %[[SRC]]
// CHECK-TRANS-SAME: [0, 0, %[[IN_R]], %[[IN_S]]] [1, 1, %[[IN_R_SZ]], %[[IN_S_SZ]]] [1, 1, 1, 1]
// CHECK-TRANS: %[[TILE:.+]] = tensor.extract_slice %[[SRC_SLICE]]
// CHECK-TRANS-SAME: [0, 0, 0, 0] [1, 1, 32, 8] [1, 1, 1, 1] : tensor<1x1x?x?xf32> to tensor<32x8xf32>
// CHECK-TRANS: %[[EMPTY:.+]] = tensor.empty() : tensor<8x32xf32>
// CHECK-TRANS: %[[TRANSP:.+]] = linalg.transpose
// CHECK-TRANS-SAME: ins(%[[TILE]]
// CHECK-TRANS-SAME: outs(%[[EMPTY]]
// CHECK-TRANS-SAME: permutation = [1, 0]
// CHECK-TRANS: %{{.+}} = tensor.insert_slice %[[TRANSP]] into %{{.+}}
transform.sequence failures(propagate) {
^bb0(%arg1: !pdl.operation):
%0 = transform.structured.match ops{["tensor.pack"]} in %arg1
%1, %loops:4 = transform.structured.tile_to_scf_for %0 [1, 1, 1, 1]
}
// -----
func.func @pad_and_pack(%arg0: tensor<13x15xf32>, %arg1: tensor<2x8x8x2xf32>, %arg2: f32) -> tensor<2x8x8x2xf32> {
%0 = tensor.pack %arg0 padding_value(%arg2 : f32) inner_dims_pos = [0, 1] inner_tiles = [8, 2] into %arg1 : tensor<13x15xf32> -> tensor<2x8x8x2xf32>
return %0 : tensor<2x8x8x2xf32>
}
// CHECK-TRANS: func.func @pad_and_pack
// CHECK-TRANS-SAME: %[[SRC:[a-zA-Z0-9]+]]
// CHECK-TRANS-SAME: %[[DEST:[a-zA-Z0-9]+]]
// CHECK-TRANS-SAME: %[[PAD_VAL:[a-zA-Z0-9]+]]
// CHECK-TRANS: scf.for
// CHECK-TRANS: scf.for
// CHECK-TRANS: %[[SRC_SLICE]] = tensor.extract_slice %[[SRC]]
// CHECK-TRANS: %[[PAD:.+]] = tensor.pad %[[SRC_SLICE]]
// CHECK-TRANS: tensor.yield %[[PAD_VAL]]
// CHECK-TRANS: } : tensor<?x?xf32> to tensor<8x2xf32>
// CHECK-TRANS: %[[EMPTY:.+]] = tensor.empty() : tensor<8x2xf32>
// CHECK-TRANS: %[[TRANSP:.+]] = linalg.transpose
// CHECK-TRANS-SAME: ins(%[[PAD]] : tensor<8x2xf32>)
// CHECK-TRANS-SAME: outs(%[[EMPTY]] : tensor<8x2xf32>)
// CHECK-TRANS-SAME: permutation = [0, 1]
// CHECK-TRANS: %{{.+}} = tensor.insert_slice %[[TRANSP]] into %{{.+}}
transform.sequence failures(propagate) {
^bb0(%arg1: !pdl.operation):
%0 = transform.structured.match ops{["tensor.pack"]} in %arg1
%1, %loops:2 = transform.structured.tile_to_scf_for %0 [1, 1]
}
// -----
func.func @KC_to_CKkc(%arg0: tensor<128x256xf32>, %arg1: tensor<32x4x32x8xf32>) -> tensor<32x4x32x8xf32> {
%0 = tensor.pack %arg0 outer_dims_perm = [1, 0] inner_dims_pos = [0, 1] inner_tiles = [32, 8] into %arg1 : tensor<128x256xf32> -> tensor<32x4x32x8xf32>
return %0 : tensor<32x4x32x8xf32>
}
// CHECK-TRANS-DAG: #[[MAP0:.+]] = affine_map<(d0) -> (d0 * 32)>
// CHECK-TRANS-DAG: #[[MAP1:.+]] = affine_map<(d0) -> (d0 * -32 + 128, 32)>
// CHECK-TRANS-DAG: #[[MAP2:.+]] = affine_map<(d0) -> (d0 * 8)>
// CHECK-TRANS-DAG: #[[MAP3:.+]] = affine_map<(d0) -> (d0 * -8 + 256, 8)>
// CHECK-TRANS: func.func @KC_to_CKkc
// CHECK-TRANS-SAME: %[[SRC:[a-zA-Z0-9]+]]
// CHECK-TRANS-SAME: %[[DEST:[a-zA-Z0-9]+]]
// CHECK-TRANS: %{{.+}} = scf.for %[[C:[a-zA-Z0-9]+]] =
// CHECK-TRANS: %{{.+}} = scf.for %[[K:[a-zA-Z0-9]+]] =
// CHECK-TRANS-DAG: %[[IN_K:.+]] = affine.apply #[[MAP0]](%[[K]])
// CHECK-TRANS-DAG: %[[IN_K_SZ:.+]] = affine.min #[[MAP1]](%[[K]])
// CHECK-TRANS-DAG: %[[IN_C:.+]] = affine.apply #[[MAP2]](%[[C]])
// CHECK-TRANS-DAG: %[[IN_C_SZ:.+]] = affine.min #[[MAP3]](%[[C]])
// CHECK-TRANS: %[[SRC_SLICE:.+]] = tensor.extract_slice %[[SRC]]
// CHECK-TRANS-SAME: [%[[IN_K]], %[[IN_C]]] [%[[IN_K_SZ]], %[[IN_C_SZ]]] [1, 1]
// CHECK-TRANS: %[[TILE:.+]] = tensor.extract_slice %[[SRC_SLICE]]
// CHECK-TRANS-SAME: [0, 0] [32, 8] [1, 1] : tensor<?x?xf32> to tensor<32x8xf32>
// CHECK-TRANS: %[[EMPTY:.+]] = tensor.empty() : tensor<32x8xf32>
// CHECK-TRANS: %[[TRANSP:.+]] = linalg.transpose
// CHECK-TRANS-SAME: ins(%[[TILE]]
// CHECK-TRANS-SAME: outs(%[[EMPTY]]
// CHECK-TRANS-SAME: permutation = [0, 1]
// CHECK-TRANS: %[[SUB_ITER:.+]] = tensor.insert_slice %[[TRANSP]] into %{{[a-zA-Z0-9]+}}
// CHECK-TRANS-SAME: [0, 0, 0, 0] [1, 1, 32, 8] [1, 1, 1, 1] : tensor<32x8xf32> into tensor<1x1x32x8xf32>
// CHECK-TRANS: %{{.+}} = tensor.insert_slice %[[SUB_ITER]] into %{{[a-zA-Z0-9]+}}
// CHECK-TRANS-SAME: [%[[C]], %[[K]], 0, 0] [1, 1, 32, 8] [1, 1, 1, 1] : tensor<1x1x32x8xf32> into tensor<32x4x32x8xf32>
transform.sequence failures(propagate) {
^bb0(%arg1: !pdl.operation):
%0 = transform.structured.match ops{["tensor.pack"]} in %arg1
%1, %loops:2 = transform.structured.tile_to_scf_for %0 [1, 1]
}
|