File: per_thread_data.cpp

package info (click to toggle)
intel-compute-runtime 20.44.18297-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 34,780 kB
  • sloc: cpp: 379,729; lisp: 4,931; python: 299; sh: 196; makefile: 8
file content (52 lines) | stat: -rw-r--r-- 2,166 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
/*
 * Copyright (C) 2017-2020 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 */

#include "opencl/source/helpers/per_thread_data.h"

#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/helpers/debug_helpers.h"

#include <array>

namespace NEO {

size_t PerThreadDataHelper::sendPerThreadData(
    LinearStream &indirectHeap,
    uint32_t simd,
    uint32_t grfSize,
    uint32_t numChannels,
    const size_t localWorkSizes[3],
    const std::array<uint8_t, 3> &workgroupWalkOrder,
    bool hasKernelOnlyImages) {
    auto offsetPerThreadData = indirectHeap.getUsed();
    if (numChannels) {
        auto localWorkSize = localWorkSizes[0] * localWorkSizes[1] * localWorkSizes[2];
        auto sizePerThreadDataTotal = getPerThreadDataSizeTotal(simd, grfSize, numChannels, localWorkSize);
        auto pDest = indirectHeap.getSpace(sizePerThreadDataTotal);

        // Generate local IDs
        DEBUG_BREAK_IF(numChannels != 3);
        generateLocalIDs(pDest, static_cast<uint16_t>(simd),
                         std::array<uint16_t, 3>{{static_cast<uint16_t>(localWorkSizes[0]),
                                                  static_cast<uint16_t>(localWorkSizes[1]),
                                                  static_cast<uint16_t>(localWorkSizes[2])}},
                         std::array<uint8_t, 3>{{workgroupWalkOrder[0], workgroupWalkOrder[1], workgroupWalkOrder[2]}},
                         hasKernelOnlyImages, grfSize);
    }
    return offsetPerThreadData;
}

uint32_t PerThreadDataHelper::getThreadPayloadSize(const iOpenCL::SPatchThreadPayload &threadPayload, uint32_t simd, uint32_t grfSize) {
    uint32_t multiplier = static_cast<uint32_t>(getGRFsPerThread(simd, grfSize));
    uint32_t threadPayloadSize = 0;
    threadPayloadSize = getNumLocalIdChannels(threadPayload) * multiplier * grfSize;
    threadPayloadSize += (threadPayload.HeaderPresent) ? grfSize : 0;
    threadPayloadSize += (threadPayload.LocalIDFlattenedPresent) ? (grfSize * multiplier) : 0;
    threadPayloadSize += (threadPayload.UnusedPerThreadConstantPresent) ? grfSize : 0;
    return threadPayloadSize;
}
} // namespace NEO