File: parallel-loop-specialization.mlir

package info (click to toggle)
llvm-toolchain-13 1%3A13.0.1-6~deb11u1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 1,418,812 kB
  • sloc: cpp: 5,290,827; ansic: 996,570; asm: 544,593; python: 188,212; objc: 72,027; lisp: 30,291; f90: 25,395; sh: 24,900; javascript: 9,780; pascal: 9,398; perl: 7,484; ml: 5,432; awk: 3,523; makefile: 2,892; xml: 953; cs: 573; fortran: 539
file content (46 lines) | stat: -rw-r--r-- 2,489 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
// RUN: mlir-opt %s -parallel-loop-specialization -split-input-file | FileCheck %s

#map0 = affine_map<()[s0, s1] -> (1024, s0 - s1)>
#map1 = affine_map<()[s0, s1] -> (64, s0 - s1)>

func @parallel_loop(%outer_i0: index, %outer_i1: index, %A: memref<?x?xf32>, %B: memref<?x?xf32>,
                    %C: memref<?x?xf32>, %result: memref<?x?xf32>) {
  %c0 = constant 0 : index
  %c1 = constant 1 : index
  %d0 = memref.dim %A, %c0 : memref<?x?xf32>
  %d1 = memref.dim %A, %c1 : memref<?x?xf32>
  %b0 = affine.min #map0()[%d0, %outer_i0]
  %b1 = affine.min #map1()[%d1, %outer_i1]
  scf.parallel (%i0, %i1) = (%c0, %c0) to (%b0, %b1) step (%c1, %c1) {
    %B_elem = memref.load %B[%i0, %i1] : memref<?x?xf32>
    %C_elem = memref.load %C[%i0, %i1] : memref<?x?xf32>
    %sum_elem = addf %B_elem, %C_elem : f32
    memref.store %sum_elem, %result[%i0, %i1] : memref<?x?xf32>
  }
  return
}

// CHECK-LABEL:   func @parallel_loop(
// CHECK-SAME:                        [[VAL_0:%.*]]: index, [[VAL_1:%.*]]: index, [[VAL_2:%.*]]: memref<?x?xf32>, [[VAL_3:%.*]]: memref<?x?xf32>, [[VAL_4:%.*]]: memref<?x?xf32>, [[VAL_5:%.*]]: memref<?x?xf32>) {
// CHECK:           [[VAL_6:%.*]] = constant 0 : index
// CHECK:           [[VAL_7:%.*]] = constant 1 : index
// CHECK:           [[VAL_8:%.*]] = memref.dim [[VAL_2]], [[VAL_6]] : memref<?x?xf32>
// CHECK:           [[VAL_9:%.*]] = memref.dim [[VAL_2]], [[VAL_7]] : memref<?x?xf32>
// CHECK:           [[VAL_10:%.*]] = affine.min #map0(){{\[}}[[VAL_8]], [[VAL_0]]]
// CHECK:           [[VAL_11:%.*]] = affine.min #map1(){{\[}}[[VAL_9]], [[VAL_1]]]
// CHECK:           [[VAL_12:%.*]] = constant 1024 : index
// CHECK:           [[VAL_13:%.*]] = cmpi eq, [[VAL_10]], [[VAL_12]] : index
// CHECK:           [[VAL_14:%.*]] = constant 64 : index
// CHECK:           [[VAL_15:%.*]] = cmpi eq, [[VAL_11]], [[VAL_14]] : index
// CHECK:           [[VAL_16:%.*]] = and [[VAL_13]], [[VAL_15]] : i1
// CHECK:           scf.if [[VAL_16]] {
// CHECK:             scf.parallel ([[VAL_17:%.*]], [[VAL_18:%.*]]) = ([[VAL_6]], [[VAL_6]]) to ([[VAL_12]], [[VAL_14]]) step ([[VAL_7]], [[VAL_7]]) {
// CHECK:               memref.store
// CHECK:             }
// CHECK:           } else {
// CHECK:             scf.parallel ([[VAL_22:%.*]], [[VAL_23:%.*]]) = ([[VAL_6]], [[VAL_6]]) to ([[VAL_10]], [[VAL_11]]) step ([[VAL_7]], [[VAL_7]]) {
// CHECK:               memref.store
// CHECK:             }
// CHECK:           }
// CHECK:           return
// CHECK:         }