File: command_encoder_heapless_addressing.inl

package info (click to toggle)
intel-compute-runtime 26.05.37020.3-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 83,596 kB
  • sloc: cpp: 976,037; lisp: 2,096; sh: 704; makefile: 162
file content (83 lines) | stat: -rw-r--r-- 4,101 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
/*
 * Copyright (C) 2025 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 */

#include "shared/source/command_container/command_encoder.h"

#include "implicit_args.h"

namespace NEO {

template <typename Family>
template <bool heaplessModeEnabled>
void EncodeDispatchKernel<Family>::programInlineDataHeapless(uint8_t *inlineDataPtr, EncodeDispatchKernelArgs &args, CommandContainer &container, uint64_t offsetThreadData, uint64_t scratchPtr) {

    if constexpr (heaplessModeEnabled) {
        if (!args.makeCommandView) {
            const auto &kernelDescriptor = args.dispatchInterface->getKernelDescriptor();
            auto indirectDataPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.indirectDataPointerAddress;
            auto heap = container.getIndirectHeap(HeapType::indirectObject);
            auto indirectDataAddress = heap->getHeapGpuBase() + offsetThreadData;
            uint32_t inlineDataSize = DefaultWalkerType::getInlineDataSize();
            if (isDefined(indirectDataPointerAddress.pointerSize) && isValidOffset(indirectDataPointerAddress.offset)) {
                uint32_t maxBytesToCopy = std::max(0, static_cast<int32_t>(inlineDataSize - indirectDataPointerAddress.offset));
                memcpy_s(inlineDataPtr + indirectDataPointerAddress.offset, maxBytesToCopy, &indirectDataAddress, indirectDataPointerAddress.pointerSize);
            }

            if (args.immediateScratchAddressPatching) {
                auto scratchPointerAddress = kernelDescriptor.payloadMappings.implicitArgs.scratchPointerAddress;
                if (isDefined(scratchPointerAddress.pointerSize) && isValidOffset(scratchPointerAddress.offset)) {
                    uint32_t maxBytesToCopy = std::max(0, static_cast<int32_t>(inlineDataSize - scratchPointerAddress.offset));
                    memcpy_s(inlineDataPtr + scratchPointerAddress.offset, maxBytesToCopy, &scratchPtr, scratchPointerAddress.pointerSize);
                }
            }
        }
    }
}

template <typename Family>
template <bool isHeapless>
uint64_t EncodeDispatchKernel<Family>::getScratchAddressForImmediatePatching(CommandContainer &container, EncodeDispatchKernelArgs &args) {

    uint64_t scratchAddress = 0u;
    if constexpr (isHeapless) {
        if (args.immediateScratchAddressPatching) {
            const auto &kernelDescriptor = args.dispatchInterface->getKernelDescriptor();
            auto requiredScratchSlot0Size = kernelDescriptor.kernelAttributes.perThreadScratchSize[0];
            auto requiredScratchSlot1Size = kernelDescriptor.kernelAttributes.perThreadScratchSize[1];
            auto csr = container.getImmediateCmdListCsr();
            NEO::IndirectHeap *ssh = nullptr;
            if (csr->getGlobalStatelessHeapAllocation() != nullptr) {
                ssh = csr->getGlobalStatelessHeap();
            } else {
                ssh = args.surfaceStateHeap ? args.surfaceStateHeap : container.getIndirectHeap(HeapType::surfaceState);
            }

            EncodeDispatchKernel<Family>::template setScratchAddress<isHeapless>(scratchAddress, requiredScratchSlot0Size, requiredScratchSlot1Size, ssh, *csr);
        }
    }
    return scratchAddress;
}

template <typename Family>
template <bool isHeapless>
void EncodeDispatchKernel<Family>::patchScratchAddressInImplicitArgs(ImplicitArgs &implicitArgs, uint64_t scratchAddress, bool scratchPtrPatchingRequired) {
    if constexpr (isHeapless) {
        if (scratchPtrPatchingRequired) {
            implicitArgs.setScratchBufferPtr(scratchAddress);
        }
    }
}

template <typename Family>
void EncodeStates<Family>::adjustSamplerStateBorderColor(SAMPLER_STATE &samplerState, const SAMPLER_BORDER_COLOR_STATE &borderColorState) {
    constexpr auto borderColorOffsetInSamplerState = 16u;

    void *borderColorStateInSamplerState = reinterpret_cast<void *>(reinterpret_cast<uint8_t *>(&samplerState) + borderColorOffsetInSamplerState);
    memcpy_s(borderColorStateInSamplerState, sizeof(SAMPLER_BORDER_COLOR_STATE), &borderColorState, sizeof(SAMPLER_BORDER_COLOR_STATE));
}

} // namespace NEO