File: use-too-many-tiles.mlir

package info (click to toggle)
llvm-toolchain-20 1%3A20.1.8-1~exp1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 2,111,388 kB
  • sloc: cpp: 7,438,767; ansic: 1,393,871; asm: 1,012,926; python: 241,728; f90: 86,635; objc: 75,411; lisp: 42,144; pascal: 17,286; sh: 10,027; ml: 5,082; perl: 4,730; awk: 3,523; makefile: 3,349; javascript: 2,251; xml: 892; fortran: 672
file content (69 lines) | stat: -rw-r--r-- 3,474 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
// RUN: mlir-opt %s \
// RUN:   -test-lower-to-arm-sme -test-lower-to-llvm -verify-diagnostics | \
// RUN: %mcr_aarch64_cmd \
// RUN:   -e=main -entry-point-result=void \
// RUN:   -march=aarch64 -mattr="+sve,+sme" \
// RUN:   -shared-libs=%native_mlir_runner_utils,%native_mlir_c_runner_utils,%native_arm_sme_abi_shlib | \
// RUN: FileCheck %s

/// This function uses too many tiles! There's only two i16 tiles (ZA0.H and
/// ZA1.H), but this function uses five i16 tiles! Very expensive spills/reloads
/// will be inserted to emulate the extra three tiles. Note: This is only done
/// to avoid the compiler erroring out but is expected to have very poor
/// performance (hence the warning).
func.func @use_too_many_tiles(%a: memref<?x?xi16>, %b:  memref<?x?xi16>, %c: memref<?x?xi16>) {
  %c0 = arith.constant 0 : index
  // expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}}
  %tile_a = arith.constant dense<0> : vector<[8]x[8]xi16>
  // expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}}
  %tile_b = arith.constant dense<1> : vector<[8]x[8]xi16>
  // expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}}
  %tile_c = arm_sme.tile_load %a[%c0, %c0] : memref<?x?xi16>, vector<[8]x[8]xi16>
  %tile_d = arm_sme.tile_load %b[%c0, %c0] : memref<?x?xi16>, vector<[8]x[8]xi16>
  %tile_e = arm_sme.tile_load %c[%c0, %c0] : memref<?x?xi16>, vector<[8]x[8]xi16>

  // CHECK-LABEL: tile_a:
  // CHECK-COUNT-8: ( 0, 0, 0, 0, 0, 0, 0, 0
  vector.print str "tile_a:\n"
  // expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}}
  vector.print %tile_a : vector<[8]x[8]xi16>
  // CHECK-LABEL: tile_b:
  // CHECK-COUNT-8: ( 1, 1, 1, 1, 1, 1, 1, 1
  vector.print str "tile_b:\n"
  // expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}}
  vector.print %tile_b : vector<[8]x[8]xi16>
  // CHECK-LABEL: tile_c:
  // CHECK-COUNT-8: ( 2, 2, 2, 2, 2, 2, 2, 2
  vector.print str "tile_c:\n"
  // expected-warning @below {{failed to allocate SME virtual tile to operation, tile value will go through memory, expect degraded performance}}
  vector.print %tile_c : vector<[8]x[8]xi16>
  // CHECK-LABEL: tile_d:
  // CHECK-COUNT-8: ( 3, 3, 3, 3, 3, 3, 3, 3
  vector.print str "tile_d:\n"
  vector.print %tile_d : vector<[8]x[8]xi16>
  // CHECK-LABEL: tile_e:
  // CHECK-COUNT-8: ( 4, 4, 4, 4, 4, 4, 4, 4
  vector.print str "tile_e:\n"
  vector.print %tile_e : vector<[8]x[8]xi16>
  return
}

func.func @main() {
  %c16 = arith.constant 16 : index
  %svl_h = arm_sme.streaming_vl <half>

  %c2 = arith.constant 2 : i16
  %c3 = arith.constant 3 : i16
  %c4 = arith.constant 4 : i16

  %memA = memref.alloca(%svl_h, %svl_h) : memref<?x?xi16>
  %memB = memref.alloca(%svl_h, %svl_h) : memref<?x?xi16>
  %memC = memref.alloca(%svl_h, %svl_h) : memref<?x?xi16>

  linalg.fill ins(%c2 : i16) outs(%memA : memref<?x?xi16>)
  linalg.fill ins(%c3 : i16) outs(%memB : memref<?x?xi16>)
  linalg.fill ins(%c4 : i16) outs(%memC : memref<?x?xi16>)

  func.call @use_too_many_tiles(%memA, %memB, %memC) : (memref<?x?xi16>, memref<?x?xi16>, memref<?x?xi16>) -> ()
  return
}