File: fusion-push-reshape.mlir

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (126 lines) | stat: -rw-r--r-- 6,282 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
// RUN: mlir-opt %s -test-linalg-elementwise-fusion-patterns=fuse-with-reshape-by-collapsing -split-input-file | FileCheck %s

// CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0, d1)>
// CHECK-DAG: #[[$MAP3:.*]] = affine_map<(d0, d1) -> (d1)>

// CHECK-LABEL: func @reshape
// CHECK-SAME: (%[[A:.*]]: tensor<?x16xf32>, %[[B:.*]]: tensor<16xf32>, %[[INIT:.*]]: tensor<?x112x16xf32>)
//      CHECK: %[[RI:.*]] = tensor.collapse_shape %[[INIT]] {{\[}}[0, 1], [2]] : tensor<?x112x16xf32> into tensor<?x16xf32>
//      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP3]], #[[$MAP2]]],
// CHECK-SAME: iterator_types = ["parallel", "parallel"]}
// CHECK-SAME: ins(%[[A]], %[[B]] : tensor<?x16xf32>, tensor<16xf32>) outs(%[[RI]] : tensor<?x16xf32>)
//      CHECK: %[[RR:.*]] = tensor.expand_shape %[[R]] {{\[}}[0, 1], [2]] : tensor<?x16xf32> into tensor<?x112x16xf32>
//      CHECK: return %[[RR]] : tensor<?x112x16xf32>
func.func @reshape(%A: tensor<?x16xf32>, %B: tensor<16xf32>, %init: tensor<?x112x16xf32>) -> tensor<?x112x16xf32> {
  %0 = tensor.expand_shape %A [[0, 1], [2]]
      : tensor<?x16xf32> into tensor<?x112x16xf32>
  %2 = linalg.generic {indexing_maps = [
    affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d2)>,
    affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
    iterator_types = ["parallel", "parallel", "parallel"]}
  ins(%0, %B : tensor<?x112x16xf32>, tensor<16xf32>)
  outs(%init : tensor<?x112x16xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
    %s = arith.subf %arg1, %arg2 : f32
    linalg.yield %s : f32
  } -> tensor<?x112x16xf32>
  return %2 : tensor<?x112x16xf32>
}

// -----

// CHECK-DAG: #[[$MAP2:.*]] = affine_map<(d0, d1) -> (d0, d1)>
// CHECK-DAG: #[[$MAP3:.*]] = affine_map<(d0, d1) -> (d1)>

// CHECK-LABEL: func @reshape_multiple
// CHECK-SAME: (%[[A:.*]]: tensor<12544x16xf32>, %[[B:.*]]: tensor<12544x16xf32>, %[[C:.*]]: tensor<16xf32>)
//      CHECK: %[[I:.*]] = tensor.empty() : tensor<112x112x16xf32>
//      CHECK: %[[RI:.*]] = tensor.collapse_shape %[[I]] {{\[}}[0, 1], [2]] : tensor<112x112x16xf32> into tensor<12544x16xf32>
//      CHECK: %[[R:.*]] = linalg.generic {indexing_maps = [#[[$MAP2]], #[[$MAP2]], #[[$MAP3]], #[[$MAP2]]],
// CHECK-SAME: iterator_types = ["parallel", "parallel"]}
// CHECK-SAME: ins(%[[A]], %[[B]], %[[C]] : tensor<12544x16xf32>, tensor<12544x16xf32>, tensor<16xf32>) outs(%[[RI]] : tensor<12544x16xf32>)
//      CHECK: %[[RR:.*]] = tensor.expand_shape %[[R]] {{\[}}[0, 1], [2]] : tensor<12544x16xf32> into tensor<112x112x16xf32>
//      CHECK: return %[[RR]] : tensor<112x112x16xf32>
func.func @reshape_multiple(%A: tensor<12544x16xf32>, %B: tensor<12544x16xf32>,
  %C: tensor<16xf32>) -> tensor<112x112x16xf32> {
  %0 = tensor.expand_shape %A [[0, 1], [2]]
      : tensor<12544x16xf32> into tensor<112x112x16xf32>
  %1 = tensor.expand_shape %B [[0, 1], [2]]
      : tensor<12544x16xf32> into tensor<112x112x16xf32>
  %2 = tensor.empty() : tensor<112x112x16xf32>
  %3 = linalg.generic {indexing_maps = [
    affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
    affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
    affine_map<(d0, d1, d2) -> (d2)>,
    affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
    iterator_types = ["parallel", "parallel", "parallel"]}
  ins(%0, %1, %C : tensor<112x112x16xf32>, tensor<112x112x16xf32>, tensor<16xf32>)
  outs(%2 : tensor<112x112x16xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32, %arg4: f32):
    %s = arith.subf %arg1, %arg2 : f32
    %m = arith.mulf %s, %arg3 : f32
    linalg.yield %m : f32
  } -> tensor<112x112x16xf32>
  return %3 : tensor<112x112x16xf32>
}

// -----

// Negative test, since the second source is broadcasted from d1 we cannot merge
// d0 and d1 dimensions
// CHECK-LABEL: func @reshape_negative
// CHECK: tensor.expand_shape {{.*}} : tensor<12544x16xf32> into tensor<112x112x16xf32>
// CHECK: linalg.generic
// CHECK: } -> tensor<112x112x16xf32>
func.func @reshape_negative(%A: tensor<12544x16xf32>, %B: tensor<112xf32>) -> tensor<112x112x16xf32> {
  %20 = tensor.expand_shape %A [[0, 1], [2]]
      : tensor<12544x16xf32> into tensor<112x112x16xf32>
  %21 = tensor.empty() : tensor<112x112x16xf32>
  %22 = linalg.generic {indexing_maps = [
    affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d1)>,
    affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
    iterator_types = ["parallel", "parallel", "parallel"]}
  ins(%20, %B : tensor<112x112x16xf32>, tensor<112xf32>)
  outs(%21 : tensor<112x112x16xf32>) {
  ^bb0(%arg1: f32, %arg2: f32, %arg3: f32):
    %s = arith.subf %arg1, %arg2 : f32
    linalg.yield %s : f32
  } -> tensor<112x112x16xf32>
  return %22 : tensor<112x112x16xf32>
}

// -----

func.func @type_correctness(%arg0 : tensor<6x5xi32>, %arg1 : tensor<5xf32>,
    %arg2 : tensor<5xf32>) -> tensor<2x3x5xf32> {
  %cst_6 = arith.constant 1.000000e+00 : f32
  %cst_7 = arith.constant 7.000000e+00 : f32
  %cst_8 = arith.constant 1.1920929E-7 : f32
  %25 = tensor.expand_shape %arg0 [[0, 1], [2]]
      : tensor<6x5xi32> into tensor<2x3x5xi32>
  %26 = tensor.empty() : tensor<2x3x5xf32>
  %28 = linalg.generic {
      indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d1, d2)>,
                       affine_map<(d0, d1, d2) -> (d2)>,
                       affine_map<(d0, d1, d2) -> (d2)>,
                       affine_map<(d0, d1, d2) -> (d0, d1, d2)>],
      iterator_types = ["parallel", "parallel", "parallel"]}
      ins(%25, %arg1, %arg2 : tensor<2x3x5xi32>, tensor<5xf32>, tensor<5xf32>)
      outs(%26 : tensor<2x3x5xf32>) {
      ^bb0(%arg6: i32, %arg7: f32, %arg8: f32, %arg9: f32):
        %29 = arith.sitofp %arg6 : i32 to f32
        %30 = arith.addf %arg7, %cst_8 : f32
        %31 = arith.divf %cst_7, %30 : f32
        %32 = arith.divf %cst_6, %31 : f32
        %33 = arith.mulf %29, %32 : f32
        %34 = arith.addf %33, %arg8 : f32
        linalg.yield %34 : f32
      } -> tensor<2x3x5xf32>
  return %28 : tensor<2x3x5xf32>
}
// CHECK-LABEL: func @type_correctness
//       CHECK:   %[[OP:.+]] = linalg.generic
//  CHECK-SAME:   ins(%{{.+}}, %{{.+}}, %{{.+}} : tensor<6x5xi32>, tensor<5xf32>, tensor<5xf32>)
//  CHECK-SAME:   outs(%{{.+}} : tensor<6x5xf32>)
//       CHECK:   tensor.expand_shape %[[OP]]
//  CHECK-SAME:   tensor<6x5xf32> into tensor<2x3x5xf32>