File: promotion.mlir

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (122 lines) | stat: -rw-r--r-- 5,082 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
// RUN: mlir-opt -allow-unregistered-dialect -pass-pipeline='builtin.module(gpu.module(gpu.func(test-gpu-memory-promotion)))' -split-input-file %s | FileCheck %s

gpu.module @foo {

  // Verify that the attribution was indeed introduced
  // CHECK-LABEL: @memref3d
  // CHECK-SAME: (%[[arg:.*]]: memref<5x4xf32>
  // CHECK-SAME: workgroup(%[[promoted:.*]] : memref<5x4xf32, #gpu.address_space<workgroup>>)
  gpu.func @memref3d(%arg0: memref<5x4xf32> {gpu.test_promote_workgroup}) kernel {
    // Verify that loop bounds are emitted, the order does not matter.
    // CHECK-DAG: %[[c1:.*]] = arith.constant 1
    // CHECK-DAG: %[[c4:.*]] = arith.constant 4
    // CHECK-DAG: %[[c5:.*]] = arith.constant 5
    // CHECK-DAG: %[[tx:.*]] = gpu.thread_id x
    // CHECK-DAG: %[[ty:.*]] = gpu.thread_id y
    // CHECK-DAG: %[[tz:.*]] = gpu.thread_id z
    // CHECK-DAG: %[[bdx:.*]] = gpu.block_dim x
    // CHECK-DAG: %[[bdy:.*]] = gpu.block_dim y
    // CHECK-DAG: %[[bdz:.*]] = gpu.block_dim z

    // Verify that loops for the copy are emitted. We only check the number of
    // loops here since their bounds are produced by mapLoopToProcessorIds,
    // tested separately.
    // CHECK: scf.for %[[i0:.*]] =
    // CHECK:   scf.for %[[i1:.*]] =
    // CHECK:     scf.for %[[i2:.*]] =

    // Verify that the copy is emitted and uses only the last two loops.
    // CHECK:       %[[v:.*]] = memref.load %[[arg]][%[[i1]], %[[i2]]]
    // CHECK:       store %[[v]], %[[promoted]][%[[i1]], %[[i2]]]

    // Verify that the use has been rewritten.
    // CHECK: "use"(%[[promoted]]) : (memref<5x4xf32, #gpu.address_space<workgroup>>)
    "use"(%arg0) : (memref<5x4xf32>) -> ()


    // Verify that loops for the copy are emitted. We only check the number of
    // loops here since their bounds are produced by mapLoopToProcessorIds,
    // tested separately.
    // CHECK: scf.for %[[i0:.*]] =
    // CHECK:   scf.for %[[i1:.*]] =
    // CHECK:     scf.for %[[i2:.*]] =

    // Verify that the copy is emitted and uses only the last two loops.
    // CHECK:       %[[v:.*]] = memref.load %[[promoted]][%[[i1]], %[[i2]]]
    // CHECK:       store %[[v]], %[[arg]][%[[i1]], %[[i2]]]
    gpu.return
  }
}

// -----

gpu.module @foo {

  // Verify that the attribution was indeed introduced
  // CHECK-LABEL: @memref5d
  // CHECK-SAME: (%[[arg:.*]]: memref<8x7x6x5x4xf32>
  // CHECK-SAME: workgroup(%[[promoted:.*]] : memref<8x7x6x5x4xf32, #gpu.address_space<workgroup>>)
  gpu.func @memref5d(%arg0: memref<8x7x6x5x4xf32> {gpu.test_promote_workgroup}) kernel {
    // Verify that loop bounds are emitted, the order does not matter.
    // CHECK-DAG: %[[c0:.*]] = arith.constant 0
    // CHECK-DAG: %[[c1:.*]] = arith.constant 1
    // CHECK-DAG: %[[c4:.*]] = arith.constant 4
    // CHECK-DAG: %[[c5:.*]] = arith.constant 5
    // CHECK-DAG: %[[c6:.*]] = arith.constant 6
    // CHECK-DAG: %[[c7:.*]] = arith.constant 7
    // CHECK-DAG: %[[c8:.*]] = arith.constant 8
    // CHECK-DAG: %[[tx:.*]] = gpu.thread_id x
    // CHECK-DAG: %[[ty:.*]] = gpu.thread_id y
    // CHECK-DAG: %[[tz:.*]] = gpu.thread_id z
    // CHECK-DAG: %[[bdx:.*]] = gpu.block_dim x
    // CHECK-DAG: %[[bdy:.*]] = gpu.block_dim y
    // CHECK-DAG: %[[bdz:.*]] = gpu.block_dim z

    // Verify that loops for the copy are emitted.
    // CHECK: scf.for %[[i0:.*]] =
    // CHECK:   scf.for %[[i1:.*]] =
    // CHECK:     scf.for %[[i2:.*]] =
    // CHECK:       scf.for %[[i3:.*]] =
    // CHECK:         scf.for %[[i4:.*]] =

    // Verify that the copy is emitted.
    // CHECK:           %[[v:.*]] = memref.load %[[arg]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
    // CHECK:           store %[[v]], %[[promoted]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]

    // Verify that the use has been rewritten.
    // CHECK: "use"(%[[promoted]]) : (memref<8x7x6x5x4xf32, #gpu.address_space<workgroup>>)
    "use"(%arg0) : (memref<8x7x6x5x4xf32>) -> ()

    // Verify that loop loops for the copy are emitted.
    // CHECK: scf.for %[[i0:.*]] =
    // CHECK:   scf.for %[[i1:.*]] =
    // CHECK:     scf.for %[[i2:.*]] =
    // CHECK:       scf.for %[[i3:.*]] =
    // CHECK:         scf.for %[[i4:.*]] =

    // Verify that the copy is emitted.
    // CHECK:           %[[v:.*]] = memref.load %[[promoted]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
    // CHECK:           store %[[v]], %[[arg]][%[[i0]], %[[i1]], %[[i2]], %[[i3]], %[[i4]]]
    gpu.return
  }
}

// -----

gpu.module @foo {

  // Check that attribution insertion works fine.
  // CHECK-LABEL: @insert
  // CHECK-SAME: (%{{.*}}: memref<4xf32>
  // CHECK-SAME: workgroup(%{{.*}}: memref<1x1xf64, #gpu.address_space<workgroup>>
  // CHECK-SAME: %[[wg2:.*]] : memref<4xf32, #gpu.address_space<workgroup>>)
  // CHECK-SAME: private(%{{.*}}: memref<1x1xi64, 5>)
  gpu.func @insert(%arg0: memref<4xf32> {gpu.test_promote_workgroup})
      workgroup(%arg1: memref<1x1xf64, #gpu.address_space<workgroup>>)
      private(%arg2: memref<1x1xi64, 5>)
      kernel {
    // CHECK: "use"(%[[wg2]])
    "use"(%arg0) : (memref<4xf32>) -> ()
    gpu.return
  }
}