File: tiled-loops.mlir

package info (click to toggle)
llvm-toolchain-13 1%3A13.0.1-11
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 1,418,840 kB
  • sloc: cpp: 5,290,826; ansic: 996,570; asm: 544,593; python: 188,212; objc: 72,027; lisp: 30,291; f90: 25,395; sh: 24,898; javascript: 9,780; pascal: 9,398; perl: 7,484; ml: 5,432; awk: 3,523; makefile: 2,913; xml: 953; cs: 573; fortran: 539
file content (79 lines) | stat: -rw-r--r-- 3,144 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
// RUN: mlir-opt %s -convert-linalg-tiled-loops-to-scf | FileCheck %s


#map0 = affine_map<(d0) -> (24, -d0 + 192)>
#map1 = affine_map<(d0, d1)[s0] -> (d0 * 192 + s0 + d1)>
#map2 = affine_map<(d0) -> (16, -d0 + 192)>

func @tiled_loop(%A: memref<192x192xf32>,
                 %B: memref<192x192xf32>,
                 %C: memref<192x192xf32>) {
  %cst = constant 0.000000e+00 : f32
  %c24 = constant 24 : index
  %c16 = constant 16 : index
  %c0 = constant 0 : index
  %c192 = constant 192 : index

  linalg.tiled_loop (%i, %j) = (%c0, %c0) to (%c192, %c192) step (%c24, %c16)
      ins (%A_ = %A: memref<192x192xf32>, %B_ = %B:  memref<192x192xf32>)
      outs (%C_ = %C: memref<192x192xf32>) {
    %0 = affine.min #map0(%i)
    %1 = memref.subview %A_[%i, 0] [%0, 192] [1, 1]
      : memref<192x192xf32> to memref<?x192xf32, #map1>
    %2 = affine.min #map2(%j)
    %3 = memref.subview %B_[0, %j] [192, %2] [1, 1]
      : memref<192x192xf32> to memref<192x?xf32, #map1>
    %4 = memref.subview %C_[%i, %j] [%0, %2] [1, 1]
      : memref<192x192xf32> to memref<?x?xf32, #map1>
    linalg.fill(%cst, %4) : f32, memref<?x?xf32, #map1>
    linalg.matmul ins(%1, %3 : memref<?x192xf32, #map1>,
                               memref<192x?xf32, #map1>)
                  outs(%4 : memref<?x?xf32, #map1>)
    linalg.yield
  }
  return
}

// CHECK-LABEL: @tiled_loop
// CHECK-SAME:  %[[A:.*]]: memref<192x192xf32>, %[[B:.*]]: memref<192x192xf32>,
// CHECK-SAME:  %[[C:.*]]: memref<192x192xf32>) {
// CHECK:       %[[C24:.*]] = constant 24 : index
// CHECK:       %[[C16:.*]] = constant 16 : index
// CHECK:       %[[C0:.*]] = constant 0 : index
// CHECK:       %[[C192:.*]] = constant 192 : index
// CHECK:       scf.for %[[I:.*]] = %[[C0]] to %[[C192]] step %[[C24]] {
// CHECK:         scf.for %[[J:.*]] = %[[C0]] to %[[C192]] step %[[C16]] {
// CHECK:           %[[A_sub:.*]] = memref.subview %[[A]][%[[I]]
// CHECK:           %[[B_sub:.*]] = memref.subview %[[B]][0, %[[J]]]
// CHECK:           %[[C_sub:.*]] = memref.subview %[[C]][%[[I]]
// CHECK:           linalg.fill
// CHECK:           linalg.matmul


func @tiled_loop_reduction(%A: memref<192x192xf32>,
                           %B: memref<192x192xf32>,
                           %C: memref<f32>) {
   %c24 = constant 24 : index
   %c16 = constant 16 : index
   %c0 = constant 0 : index
   %c192 = constant 192 : index
   %cst = constant 0.000000e+00 : f32

  linalg.tiled_loop (%i, %j) = (%c0, %c0) to (%c192, %c192) step (%c24, %c16)
      ins (%A_ = %A: memref<192x192xf32>, %B_ = %B:  memref<192x192xf32>)
      outs (%C_ = %C: memref<f32>)
      iterators["reduction", "reduction"] {
    linalg.fill(%cst, %A_) : f32, memref<192x192xf32>
    linalg.yield
  }
  return
}

// CHECK-LABEL: @tiled_loop_reduction
// CHECK:       %[[C24:.*]] = constant 24 : index
// CHECK:       %[[C16:.*]] = constant 16 : index
// CHECK:       %[[C0:.*]] = constant 0 : index
// CHECK:       %[[C192:.*]] = constant 192 : index
// CHECK:       scf.for %{{.*}} = %[[C0]] to %[[C192]] step %[[C24]]
// CHECK:         scf.for %{{.*}} = %[[C0]] to %[[C192]] step %[[C16]]
// CHECK:           linalg.fill