1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
|
// RUN: mlir-opt --convert-nvvm-to-llvm --split-input-file -verify-diagnostics %s
!mat64f32 = !llvm.struct<(f32, f32, f32, f32, f32, f32, f32)>
func.func @wgmma_f32_f16_f16(%descA : i64, %descB : i64) -> !mat64f32{
%result = llvm.mlir.undef : !mat64f32
// expected-error @+1 {{'nvvm.wgmma.mma_async' op results 64, however output struct has 7 elements}}
%res = nvvm.wgmma.mma_async %descA, %descB, %result,
#nvvm.shape<m = 64, n = 128, k = 16>,
D [<f32>, <zero>],
A [<f16>, #nvvm.wgmma_scale_in<neg>, <col>],
B [<f16>, #nvvm.wgmma_scale_in<neg>, <col>]
: !mat64f32 -> !mat64f32
return %res : !mat64f32
}
// -----
func.func @wgmma_f32_satfinite(%descA : i64, %descB : i64) {
%result = llvm.mlir.undef : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
// expected-error @+1 {{`satfinite` can be only used with s32 accumulator, however the current accumulator is f32}}
%res = nvvm.wgmma.mma_async %descA, %descB, %result,
#nvvm.shape<m = 64, n = 16, k = 16>,
D [<f32>, <zero>, <satfinite>],
A [<f16>, #nvvm.wgmma_scale_in<neg>, <col>],
B [<f16>, #nvvm.wgmma_scale_in<neg>, <col>]
: !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
-> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
return
}
// -----
func.func @wgmma_f32_m32(%descA : i64, %descB : i64) {
%result = llvm.mlir.undef : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
// expected-error @+1 {{shape 'm' must be 64}}
%res = nvvm.wgmma.mma_async %descA, %descB, %result,
#nvvm.shape<m = 32, n = 16, k = 16>,
D [<f32>, <zero>],
A [<f16>, #nvvm.wgmma_scale_in<neg>, <col>],
B [<f16>, #nvvm.wgmma_scale_in<neg>, <col>]
: !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
-> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
return
}
// -----
func.func @wgmma_f32_m32(%descA : i64, %descB : i64) {
%result = llvm.mlir.undef : !llvm.struct<(f32, f32, i32, f32, f32, f32, f32, f32)>
// expected-error @+1 {{op all elements in struct must be same type but there is 'i32'}}
%res = nvvm.wgmma.mma_async %descA, %descB, %result,
#nvvm.shape<m = 64, n = 16, k = 16>,
D [<f32>, <zero>],
A [<f16>, #nvvm.wgmma_scale_in<neg>, <col>],
B [<f16>, #nvvm.wgmma_scale_in<neg>, <col>]
: !llvm.struct<(f32, f32, i32, f32, f32, f32, f32, f32)>
-> !llvm.struct<(f32, f32, i32, f32, f32, f32, f32, f32)>
return
}
// -----
func.func @wgmma_f32_m32(%descA : i64, %descB : i64) {
%result = llvm.mlir.undef : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
// expected-error @+1 {{op shape 'k' must be 16 for input type f16}}
%res = nvvm.wgmma.mma_async %descA, %descB, %result,
#nvvm.shape<m = 64, n = 16, k = 3>,
D [<f32>, <zero>],
A [<f16>, #nvvm.wgmma_scale_in<neg>, <col>],
B [<f16>, #nvvm.wgmma_scale_in<neg>, <col>]
: !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
-> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
return
}
// -----
func.func @wgmma_transpose(%descA : i64, %descB : i64) {
%result = llvm.mlir.undef : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
// expected-error @+1 {{op given layouts layout_a = col and layout_b = col for input types tf32 and tf32 requires transpose. However, this is only supported for: f16 and bf16}}
%res = nvvm.wgmma.mma_async %descA, %descB, %result,
#nvvm.shape<m = 64, n = 16, k = 8>,
D [<f32>, <zero>],
A [<tf32>, #nvvm.wgmma_scale_in<neg>, <col>],
B [<tf32>, #nvvm.wgmma_scale_in<neg>, <col>]
: !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
-> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
return
}
// -----
func.func @wgmma_transpose(%descA : i64, %descB : i64) {
%result = llvm.mlir.undef : !llvm.struct<(f16, f16, f16, f16)>
// expected-error @+1 {{'nvvm.wgmma.mma_async' op f16 += tf32 * tf32, it is not supported.}}
%res = nvvm.wgmma.mma_async %descA, %descB, %result,
#nvvm.shape<m = 64, n = 16, k = 8>,
D [<f16>, <zero>],
A [<tf32>, #nvvm.wgmma_scale_in<neg>, <col>],
B [<tf32>, #nvvm.wgmma_scale_in<neg>, <col>]
:!llvm.struct<(f16, f16, f16, f16)>
-> !llvm.struct<(f16, f16, f16, f16)>
return
}
// -----
func.func @wgmma_f32_m32(%descA : i64, %descB : i64) {
%result = llvm.mlir.undef : !llvm.struct<(i32, i32, i32, i32)>
// expected-error @+1 {{input struct and result struct must be the same type}}
%res = nvvm.wgmma.mma_async %descA, %descB, %result,
#nvvm.shape<m = 64, n = 8, k = 16>,
D [<f16>, <zero>],
A [<f16>, #nvvm.wgmma_scale_in<neg>, <col>],
B [<f16>, #nvvm.wgmma_scale_in<neg>, <col>]
: !llvm.struct<(i32, i32, i32, i32)>
-> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
return
}
// -----
func.func @wgmma_f32_m32(%descA : i64, %descB : i64) {
%result = llvm.mlir.undef : !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
// expected-error @+1 {{op f32 += bf16 * f16, it is not supported}}
%res = nvvm.wgmma.mma_async %descA, %descB, %result,
#nvvm.shape<m = 64, n = 8, k = 16>,
D [<f32>, <zero>],
A [<bf16>, #nvvm.wgmma_scale_in<neg>, <col>],
B [<f16>, #nvvm.wgmma_scale_in<neg>, <col>]
: !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
-> !llvm.struct<(f32, f32, f32, f32, f32, f32, f32, f32)>
return
}
// -----
func.func @set_max_register() {
// expected-error @+1 {{new register size must be in between 24 to 256}}
nvvm.setmaxregister decrease 8
func.return
}
// -----
func.func @set_max_register() {
// expected-error @+1 {{new register size must be multiple of 8}}
nvvm.setmaxregister decrease 51
func.return
}
// -----
func.func @fence_proxy() {
// expected-error @+1 {{op only async_shared fence can have space attribute}}
nvvm.fence.proxy { kind = #nvvm.proxy_kind<async>, space = #nvvm.shared_space<cluster>}
func.return
}
// -----
func.func @fence_proxy() {
// expected-error @+1 {{op async_shared fence requires space attribute}}
nvvm.fence.proxy { kind = #nvvm.proxy_kind<async.shared>}
func.return
}
|