File: experimental_command_buffer.inl

package info (click to toggle)
intel-compute-runtime 22.43.24595.41-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 57,740 kB
  • sloc: cpp: 631,142; lisp: 3,515; sh: 470; makefile: 76; python: 21
file content (110 lines) | stat: -rw-r--r-- 4,357 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
/*
 * Copyright (C) 2018-2022 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 */

#include "shared/source/command_stream/command_stream_receiver_hw.h"
#include "shared/source/command_stream/experimental_command_buffer.h"
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/pipe_control_args.h"
#include "shared/source/memory_manager/graphics_allocation.h"

namespace NEO {

template <typename GfxFamily>
void ExperimentalCommandBuffer::injectBufferStart(LinearStream &parentStream, size_t cmdBufferOffset) {
    using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
    auto pCmd = parentStream.getSpaceForCmd<MI_BATCH_BUFFER_START>();
    auto commandStreamReceiverHw = static_cast<CommandStreamReceiverHw<GfxFamily> *>(commandStreamReceiver);
    commandStreamReceiverHw->addBatchBufferStart(pCmd, currentStream->getGraphicsAllocation()->getGpuAddress() + cmdBufferOffset, true);
}

template <typename GfxFamily>
size_t ExperimentalCommandBuffer::getRequiredInjectionSize() noexcept {
    using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
    return sizeof(MI_BATCH_BUFFER_START);
}

template <typename GfxFamily>
size_t ExperimentalCommandBuffer::programExperimentalCommandBuffer() {
    using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;

    getCS(getTotalExperimentalSize<GfxFamily>());

    size_t returnOffset = currentStream->getUsed();

    //begin timestamp
    addTimeStampPipeControl<GfxFamily>();

    addExperimentalCommands<GfxFamily>();

    //end timestamp
    addTimeStampPipeControl<GfxFamily>();

    //end
    auto pCmd = currentStream->getSpaceForCmd<MI_BATCH_BUFFER_END>();
    *pCmd = GfxFamily::cmdInitBatchBufferEnd;

    return returnOffset;
}

template <typename GfxFamily>
size_t ExperimentalCommandBuffer::getTotalExperimentalSize() noexcept {
    using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END;

    size_t size = sizeof(MI_BATCH_BUFFER_END) + getTimeStampPipeControlSize<GfxFamily>() + getExperimentalCommandsSize<GfxFamily>();
    return size;
}

template <typename GfxFamily>
size_t ExperimentalCommandBuffer::getTimeStampPipeControlSize() noexcept {
    // Two P_C for timestamps
    return 2 * MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(
                   *commandStreamReceiver->peekExecutionEnvironment().rootDeviceEnvironments[commandStreamReceiver->getRootDeviceIndex()]->getHardwareInfo(), false);
}

template <typename GfxFamily>
void ExperimentalCommandBuffer::addTimeStampPipeControl() {
    uint64_t timeStampAddress = timestamps->getGpuAddress() + timestampsOffset;
    PipeControlArgs args;
    MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
        *currentStream,
        PostSyncMode::Timestamp,
        timeStampAddress,
        0llu,
        *commandStreamReceiver->peekExecutionEnvironment().rootDeviceEnvironments[commandStreamReceiver->getRootDeviceIndex()]->getHardwareInfo(),
        args);

    //moving to next chunk
    timestampsOffset += sizeof(uint64_t);

    DEBUG_BREAK_IF(timestamps->getUnderlyingBufferSize() < timestampsOffset);
}

template <typename GfxFamily>
void ExperimentalCommandBuffer::addExperimentalCommands() {
    using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;

    uint32_t *semaphoreData = reinterpret_cast<uint32_t *>(ptrOffset(experimentalAllocation->getUnderlyingBuffer(), experimentalAllocationOffset));
    *semaphoreData = 1;
    uint64_t gpuAddr = experimentalAllocation->getGpuAddress() + experimentalAllocationOffset;

    auto semaphoreCmdSpace = currentStream->getSpaceForCmd<MI_SEMAPHORE_WAIT>();
    auto semaphoreCmd = GfxFamily::cmdInitMiSemaphoreWait;
    semaphoreCmd.setCompareOperation(MI_SEMAPHORE_WAIT::COMPARE_OPERATION_SAD_EQUAL_SDD);
    semaphoreCmd.setSemaphoreDataDword(*semaphoreData);
    semaphoreCmd.setSemaphoreGraphicsAddress(gpuAddr);
    *semaphoreCmdSpace = semaphoreCmd;
}

template <typename GfxFamily>
size_t ExperimentalCommandBuffer::getExperimentalCommandsSize() noexcept {
    using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
    return sizeof(MI_SEMAPHORE_WAIT);
}

} // namespace NEO