1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
|
// RUN: mlir-opt %s --gpu-to-llvm | FileCheck %s
module attributes {gpu.container_module} {
// CHECK-LABEL: func @matvec
// CHECK: llvm.call @mgpuStreamCreate
// CHECK: llvm.call @mgpuMemAlloc
// CHECK: llvm.call @mgpuMemAlloc
// CHECK: llvm.call @mgpuCreateCoo
// CHECK: llvm.call @mgpuCreateDnVec
// CHECK: llvm.call @mgpuSpMVBufferSize
// CHECK: llvm.call @mgpuSpMV
// CHECK: llvm.call @mgpuDestroySpMat
// CHECK: llvm.call @mgpuDestroyDnVec
// CHECK: llvm.call @mgpuStreamSynchronize
// CHECK: llvm.call @mgpuStreamDestroy
func.func @matvec(%arg0: index) {
%token0 = gpu.wait async
%mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
%spmat, %token4 = gpu.create_coo async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
%dnvec, %token5 = gpu.create_dn_tensor async [%token4] %mem2, %arg0 : index into memref<?xf64>
%bufferSz, %token6 = gpu.spmv_buffer_size async [%token5] %spmat, %dnvec, %dnvec into f64
%token7 = gpu.spmv async [%token6] %spmat, %dnvec, %dnvec, %mem2 : memref<?xf64> into f64
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
%token9 = gpu.destroy_dn_tensor async [%token8] %dnvec
gpu.wait [%token9]
return
}
// CHECK-LABEL: func @matmul
// CHECK: llvm.call @mgpuStreamCreate
// CHECK: llvm.call @mgpuMemAlloc
// CHECK: llvm.call @mgpuMemAlloc
// CHECK: llvm.call @mgpuCreateCsr
// CHECK: llvm.call @mgpuCreateDnMat
// CHECK: llvm.call @mgpuSpMMBufferSize
// CHECK: llvm.call @mgpuSpMM
// CHECK: llvm.call @mgpuDestroySpMat
// CHECK: llvm.call @mgpuDestroyDnMat
// CHECK: llvm.call @mgpuStreamSynchronize
// CHECK: llvm.call @mgpuStreamDestroy
func.func @matmul(%arg0: index) {
%token0 = gpu.wait async
%mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
%spmat, %token4 = gpu.create_csr async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
%dnmat, %token5 = gpu.create_dn_tensor async [%token4] %mem2, %arg0, %arg0 : index, index into memref<?xf64>
%bufferSz, %token6 = gpu.spmm_buffer_size async [%token5] %spmat, %dnmat, %dnmat : index into f64
%token7 = gpu.spmm async [%token6] %spmat, %dnmat, %dnmat, %mem2 : memref<?xf64> into f64
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
%token9 = gpu.destroy_dn_tensor async [%token8] %dnmat
gpu.wait [%token9]
return
}
// CHECK-LABEL: func @spgemm
// CHECK: llvm.call @mgpuStreamCreate
// CHECK: llvm.call @mgpuMemAlloc
// CHECK: llvm.call @mgpuMemAlloc
// CHECK: llvm.call @mgpuCreateCsr
// CHECK: llvm.call @mgpuCreateCsr
// CHECK: llvm.call @mgpuCreateCsr
// CHECK: llvm.call @mgpuSpGEMMCreateDescr
// CHECK: llvm.call @mgpuSpGEMMWorkEstimation
// CHECK: llvm.call @mgpuSpGEMMCompute
// CHECK: llvm.call @mgpuSpMatGetSize
// CHECK: llvm.call @mgpuSetCsrPointers
// CHECK: llvm.call @mgpuSpGEMMCopy
// CHECK: llvm.call @mgpuSpGEMMDestroyDescr
// CHECK: llvm.call @mgpuDestroySpMat
// CHECK: llvm.call @mgpuDestroySpMat
// CHECK: llvm.call @mgpuDestroySpMat
// CHECK: llvm.call @mgpuStreamSynchronize
// CHECK: llvm.call @mgpuStreamDestroy
func.func @spgemm(%arg0: index) {
%token0 = gpu.wait async
%mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf32>
%spmatA, %token3 = gpu.create_csr async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
%spmatB, %token4 = gpu.create_csr async [%token3] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
%spmatC, %token5 = gpu.create_csr async [%token4] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
%spgemmDesc, %token6 = gpu.spgemm_create_descr async [%token5]
%alloc = memref.alloc() : memref<0xi8> // nullptr
%c0 = arith.constant 0 : index
%bufferSz1, %token7 = gpu.spgemm_work_estimation_or_compute async
[%token6]{WORK_ESTIMATION}
%spmatA, %spmatB, %spmatC,
%spgemmDesc, %c0, %alloc: f32 into memref<0xi8>
%bufferSz2, %token8 = gpu.spgemm_work_estimation_or_compute async
[%token7]{COMPUTE}
%spmatA, %spmatB, %spmatC,
%spgemmDesc, %c0, %alloc: f32 into memref<0xi8>
%rows, %cols, %nnz, %token9 = gpu.spmat_get_size async [%token8] %spmatC
%token10 = gpu.set_csr_pointers async [%token8] %spmatC, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf32>
%token11 = gpu.spgemm_copy async [%token10] %spmatA, %spmatB, %spmatC, %spgemmDesc: f32
%token12 = gpu.spgemm_destroy_descr async [%token11] %spgemmDesc
%token13 = gpu.destroy_sp_mat async [%token12] %spmatA
%token14 = gpu.destroy_sp_mat async [%token13] %spmatB
%token15 = gpu.destroy_sp_mat async [%token14] %spmatC
gpu.wait [%token15]
return
}
// CHECK-LABEL: func @sddmm
// CHECK: llvm.call @mgpuStreamCreate
// CHECK: llvm.call @mgpuMemAlloc
// CHECK: llvm.call @mgpuMemAlloc
// CHECK: llvm.call @mgpuCreateCsr
// CHECK: llvm.call @mgpuCreateDnMat
// CHECK: llvm.call @mgpuSDDMMBufferSize
// CHECK: llvm.call @mgpuSDDMM
// CHECK: llvm.call @mgpuDestroySpMat
// CHECK: llvm.call @mgpuDestroyDnMat
// CHECK: llvm.call @mgpuStreamSynchronize
// CHECK: llvm.call @mgpuStreamDestroy
func.func @sddmm(%arg0: index) {
%token0 = gpu.wait async
%mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
%spmat, %token4 = gpu.create_csr async [%token2] %arg0, %arg0, %arg0, %mem1, %mem1, %mem2 : memref<?xindex>, memref<?xindex>, memref<?xf64>
%dnmat, %token5 = gpu.create_dn_tensor async [%token4] %mem2, %arg0, %arg0 : index, index into memref<?xf64>
%bufferSz, %token6 = gpu.sddmm_buffer_size async [%token5] %dnmat, %dnmat, %spmat into f64
%token7 = gpu.sddmm async [%token6] %dnmat, %dnmat, %spmat, %mem2 : memref<?xf64> into f64
%token8 = gpu.destroy_sp_mat async [%token7] %spmat
%token9 = gpu.destroy_dn_tensor async [%token8] %dnmat
gpu.wait [%token9]
return
}
// CHECK-LABEL: func @csc_and_bsr
// CHECK: llvm.call @mgpuStreamCreate
// CHECK: llvm.call @mgpuMemAlloc
// CHECK: llvm.call @mgpuMemAlloc
// CHECK: llvm.call @mgpuCreateCsc
// CHECK: llvm.call @mgpuCreateBsr
// CHECK: llvm.call @mgpuDestroySpMat
// CHECK: llvm.call @mgpuDestroySpMat
// CHECK: llvm.call @mgpuStreamSynchronize
// CHECK: llvm.call @mgpuStreamDestroy
func.func @csc_and_bsr(%arg0: index) {
%token0 = gpu.wait async
%mem1, %token1 = gpu.alloc async [%token0] (%arg0) : memref<?xindex>
%mem2, %token2 = gpu.alloc async [%token1] (%arg0) : memref<?xf64>
%csc, %token3 = gpu.create_csc async [%token2]
%arg0, %arg0, %arg0, %mem1, %mem1, %mem2
: memref<?xindex>, memref<?xindex>, memref<?xf64>
%bsr, %token4 = gpu.create_bsr async [%token3]
%arg0, %arg0, %arg0, %arg0, %arg0, %mem1, %mem1, %mem2
: memref<?xindex>, memref<?xindex>, memref<?xf64>
%token5 = gpu.destroy_sp_mat async [%token4] %csc
%token6 = gpu.destroy_sp_mat async [%token5] %bsr
gpu.wait [%token6]
return
}
}
|