File: test-ssve.mlir

package info (click to toggle)
swiftlang 6.0.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,519,992 kB
  • sloc: cpp: 9,107,863; ansic: 2,040,022; asm: 1,135,751; python: 296,500; objc: 82,456; f90: 60,502; lisp: 34,951; pascal: 19,946; sh: 18,133; perl: 7,482; ml: 4,937; javascript: 4,117; makefile: 3,840; awk: 3,535; xml: 914; fortran: 619; cs: 573; ruby: 573
file content (65 lines) | stat: -rw-r--r-- 2,342 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
// RUN: mlir-opt %s -test-lower-to-llvm | \
// RUN: mlir-translate -mlir-to-llvmir | \
// RUN: %lli_aarch64_cmd --march=aarch64 --mattr="+sve,+sme" \
// RUN:      -force-streaming-compatible-sve \
// RUN:      --entry-function=entry \
// RUN:      --dlopen=%mlir_native_utils_lib_dir/libmlir_c_runner_utils%shlibext | \
// RUN: FileCheck %s

// NOTE: To run this test, your CPU must support SME.

// VLA memcopy in streaming mode.
func.func @streaming_kernel_copy(%src : memref<?xi64>, %dst : memref<?xi64>, %size : index) attributes {arm_streaming} {
  %c0 = arith.constant 0 : index
  %c2 = arith.constant 2 : index
  %vscale = vector.vscale
  %step = arith.muli %c2, %vscale : index
  scf.for %i = %c0 to %size step %step {
    %0 = vector.load %src[%i] : memref<?xi64>, vector<[2]xi64>
    vector.store %0, %dst[%i] : memref<?xi64>, vector<[2]xi64>
  }
  return
}

func.func @entry() -> i32 {
  %i0 = arith.constant 0: i64
  %r0 = arith.constant 0: i32
  %c0 = arith.constant 0: index
  %c4 = arith.constant 4: index
  %c32 = arith.constant 32: index

  // Set up memory.
  %a = memref.alloc()      : memref<32xi64>
  %a_copy = memref.alloc() : memref<32xi64>
  %a_data = arith.constant dense<[1 , 2,  3 , 4 , 5,  6,  7,  8,
                                  9, 10, 11, 12, 13, 14, 15, 16,
                                  17, 18, 19, 20, 21, 22, 23, 24,
                                  25, 26, 27, 28, 29, 30, 31, 32]> : vector<32xi64>
  vector.transfer_write %a_data, %a[%c0] : vector<32xi64>, memref<32xi64>

  // Call kernel.
  %0 = memref.cast %a : memref<32xi64> to memref<?xi64>
  %1 = memref.cast %a_copy : memref<32xi64> to memref<?xi64>
  call @streaming_kernel_copy(%0, %1, %c32) : (memref<?xi64>, memref<?xi64>, index) -> ()

  // Print and verify.
  //
  // CHECK:      ( 1, 2, 3, 4 )
  // CHECK-NEXT: ( 5, 6, 7, 8 )
  // CHECK-NEXT: ( 9, 10, 11, 12 )
  // CHECK-NEXT: ( 13, 14, 15, 16 )
  // CHECK-NEXT: ( 17, 18, 19, 20 )
  // CHECK-NEXT: ( 21, 22, 23, 24 )
  // CHECK-NEXT: ( 25, 26, 27, 28 )
  // CHECK-NEXT: ( 29, 30, 31, 32 )
  scf.for %i = %c0 to %c32 step %c4 {
    %cv = vector.transfer_read %a_copy[%i], %i0 : memref<32xi64>, vector<4xi64>
    vector.print %cv : vector<4xi64>
  }

  // Release resources.
  memref.dealloc %a      : memref<32xi64>
  memref.dealloc %a_copy : memref<32xi64>

  return %r0 : i32
}