File: lower-launch-func-to-gpu-runtime-calls.mlir

package info (click to toggle)

llvm-toolchain-19 1%3A19.1.7-3~deb12u1

links: PTS, VCS
area: main
in suites: bookworm
size: 1,998,492 kB
sloc: cpp: 6,951,680; ansic: 1,486,157; asm: 913,598; python: 232,024; f90: 80,126; objc: 75,281; lisp: 37,276; pascal: 16,990; sh: 10,009; ml: 5,058; perl: 4,724; awk: 3,523; makefile: 3,167; javascript: 2,504; xml: 892; fortran: 664; cs: 573

file content (101 lines) | stat: -rw-r--r-- 3,764 bytes

parent folder | download | duplicates (10)

// RUN: mlir-opt %s --gpu-to-llvm -split-input-file | FileCheck %s

module attributes {gpu.container_module} {
  // CHECK: gpu.module
  gpu.module @kernel_module [#nvvm.target] {
    llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr,
        %arg2: !llvm.ptr, %arg3: i64, %arg4: i64,
        %arg5: i64) attributes {gpu.kernel} {
      llvm.return
    }
  }

  func.func @foo(%buffer: memref<?xf32>) {
  // CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : i64
  // CHECK: [[C32:%.*]] = llvm.mlir.constant(32 : i32) : i32
  // CHECK: [[C256:%.*]] = llvm.mlir.constant(256 : i32) : i32
    %c8 = arith.constant 8 : index
    %c32 = arith.constant 32 : i32
    %c256 = arith.constant 256 : i32

  // CHECK: gpu.launch_func @kernel_module::@kernel
  // CHECK: blocks in ([[C8]], [[C8]], [[C8]]) threads in ([[C8]], [[C8]], [[C8]]) : i64
  // CHECK: dynamic_shared_memory_size [[C256]]
  // CHECK: args([[C32]] : i32, %{{.*}} : !llvm.ptr, %{{.*}} : !llvm.ptr, %{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64)
    gpu.launch_func @kernel_module::@kernel
        blocks in (%c8, %c8, %c8)
        threads in (%c8, %c8, %c8)
        dynamic_shared_memory_size %c256
        args(%c32 : i32, %buffer : memref<?xf32>)
    return
  }
}


// -----

module attributes {gpu.container_module} {
  // CHECK: gpu.module
  gpu.module @kernel_module [#nvvm.target] {
    llvm.func @kernel(%arg0: i32, %arg1: !llvm.ptr,
        %arg2: !llvm.ptr, %arg3: i64, %arg4: i64,
        %arg5: i64) attributes {gpu.kernel} {
      llvm.return
    }
  }

  func.func @foo(%buffer: memref<?xf32>) {
  // CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : i64
  // CHECK: [[C32:%.*]] = llvm.mlir.constant(32 : i32) : i32
  // CHECK: [[C256:%.*]] = llvm.mlir.constant(256 : i32) : i32
  // CHECK: [[C2:%.*]] = llvm.mlir.constant(2 : index) : i64
    %c8 = arith.constant 8 : index    
    %c32 = arith.constant 32 : i32
    %c256 = arith.constant 256 : i32
    %c2 = arith.constant 2 : index

  // CHECK: gpu.launch_func @kernel_module::@kernel
  // CHECK: clusters in ([[C2]], [[C2]], [[C2]])
  // CHECK: blocks in ([[C8]], [[C8]], [[C8]]) threads in ([[C8]], [[C8]], [[C8]]) : i64
  // CHECK: dynamic_shared_memory_size [[C256]]
  // CHECK: args([[C32]] : i32, %{{.*}} : !llvm.ptr, %{{.*}} : !llvm.ptr, %{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64)
    gpu.launch_func @kernel_module::@kernel
        clusters in (%c2, %c2, %c2)
        blocks in (%c8, %c8, %c8)
        threads in (%c8, %c8, %c8)
        dynamic_shared_memory_size %c256
        args(%c32 : i32, %buffer : memref<?xf32>)
    return
  }
}

// -----

module attributes {gpu.container_module} {
  // CHECK: gpu.binary
  gpu.binary @kernel_module [#gpu.object<#rocdl.target, "blob">]

  func.func @foo(%buffer: memref<?xf32>) {
  // CHECK: [[C8:%.*]] = llvm.mlir.constant(8 : index) : i64
  // CHECK: [[C32:%.*]] = llvm.mlir.constant(32 : i32) : i32
  // CHECK: [[C256:%.*]] = llvm.mlir.constant(256 : i32) : i32
  // CHECK: [[C2:%.*]] = llvm.mlir.constant(2 : index) : i64
    %c8 = arith.constant 8 : index    
    %c32 = arith.constant 32 : i32
    %c256 = arith.constant 256 : i32
    %c2 = arith.constant 2 : index

  // CHECK: gpu.launch_func @kernel_module::@kernel
  // CHECK: clusters in ([[C2]], [[C2]], [[C2]])
  // CHECK: blocks in ([[C8]], [[C8]], [[C8]]) threads in ([[C8]], [[C8]], [[C8]]) : i64
  // CHECK: dynamic_shared_memory_size [[C256]]
  // CHECK: args([[C32]] : i32, %{{.*}} : !llvm.ptr, %{{.*}} : !llvm.ptr, %{{.*}} : i64, %{{.*}} : i64, %{{.*}} : i64)
    gpu.launch_func @kernel_module::@kernel
        clusters in (%c2, %c2, %c2)
        blocks in (%c8, %c8, %c8)
        threads in (%c8, %c8, %c8)
        dynamic_shared_memory_size %c256
        args(%c32 : i32, %buffer : memref<?xf32>)
    return
  }
}