File: nvgpu.py

package info (click to toggle)
swiftlang 6.1.3-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,791,604 kB
  • sloc: cpp: 9,901,740; ansic: 2,201,431; asm: 1,091,827; python: 308,252; objc: 82,166; f90: 80,126; lisp: 38,358; pascal: 25,559; sh: 20,429; ml: 5,058; perl: 4,745; makefile: 4,484; awk: 3,535; javascript: 3,018; xml: 918; fortran: 664; cs: 573; ruby: 396
file content (43 lines) | stat: -rw-r--r-- 1,526 bytes parent folder | download | duplicates (9)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# RUN: %PYTHON %s | FileCheck %s
# This is just a smoke test that the dialect is functional.

from mlir.ir import *
from mlir.dialects import nvgpu, arith, memref


def constructAndPrintInModule(f):
    print("\nTEST:", f.__name__)
    with Context(), Location.unknown():
        module = Module.create()
        with InsertionPoint(module.body):
            f()
        print(module)
    return f


# CHECK-LABEL: testTypes
@constructAndPrintInModule
def testTypes():
    tensorMemrefType = MemRefType.get(
        (128, 64), F16Type.get(), memory_space=Attribute.parse("3")
    )
    # CHECK: !nvgpu.tensormap.descriptor<tensor = memref<128x64xf16, 3>, swizzle = swizzle_128b, l2promo = l2promo_256b, oob = nan, interleave = none>
    tma_desc = nvgpu.TensorMapDescriptorType.get(
        tensorMemrefType,
        nvgpu.TensorMapSwizzleKind.SWIZZLE_128B,
        nvgpu.TensorMapL2PromoKind.L2PROMO_256B,
        nvgpu.TensorMapOOBKind.OOB_NAN,
        nvgpu.TensorMapInterleaveKind.INTERLEAVE_NONE,
    )
    print(tma_desc)


# CHECK-LABEL: testSmoke
@constructAndPrintInModule
def testSmoke():
    cst = arith.ConstantOp(value=42, result=IndexType.get())
    mem_t = MemRefType.get((10, 10), F32Type.get(), memory_space=Attribute.parse("3"))
    vec_t = VectorType.get((4, 1), F32Type.get())
    mem = memref.AllocOp(mem_t, [], [])
    # CHECK: %0 = nvgpu.ldmatrix %alloc[%c42, %c42] {numTiles = 4 : i32, transpose = false} : memref<10x10xf32, 3> -> vector<4x1xf32>
    nvgpu.LdMatrixOp(vec_t, mem, [cst, cst], False, 4)