File: local_id_gen.h

package info (click to toggle)
intel-compute-runtime 25.35.35096.9-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 79,324 kB
  • sloc: cpp: 926,243; lisp: 3,433; sh: 715; makefile: 162; python: 21
file content (74 lines) | stat: -rw-r--r-- 2,916 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
/*
 * Copyright (C) 2018-2024 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 */

#pragma once

#include "shared/source/helpers/simd_helper.h"

#include <array>
#include <cstddef>
#include <cstdint>

namespace NEO {
struct RootDeviceEnvironment;
inline uint32_t getNumGrfsPerLocalIdCoordinate(uint32_t simd, uint32_t grfSize) {
    return (simd == 32 && grfSize == 32) ? 2 : 1;
}

inline uint32_t getThreadsPerWG(uint32_t simd, uint32_t lws) {
    if (isSimd1(simd)) {
        return lws;
    }
    auto result = lws + simd - 1;

    // Original logic:
    // result = (lws + simd - 1) / simd;
    // This sequence is meant to avoid an CPU DIV instruction.
    result >>= simd == 32
                   ? 5
               : simd == 16
                   ? 4
                   : 3; // for SIMD 8

    return result;
}

inline uint32_t getPerThreadSizeLocalIDs(uint32_t simd, uint32_t grfSize, uint32_t numChannels = 3) {
    if (isSimd1(simd)) {
        return grfSize;
    }
    auto numGRFSPerLocalIdCoord = getNumGrfsPerLocalIdCoordinate(simd, grfSize);
    uint32_t returnSize = numGRFSPerLocalIdCoord * grfSize * numChannels;
    return returnSize;
}

struct LocalIDHelper {
    static void (*generateSimd8)(void *buffer, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array<uint8_t, 3> &dimensionsOrder, bool chooseMaxRowSize);
    static void (*generateSimd16)(void *buffer, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array<uint8_t, 3> &dimensionsOrder, bool chooseMaxRowSize);
    static void (*generateSimd32)(void *buffer, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup, const std::array<uint8_t, 3> &dimensionsOrder, bool chooseMaxRowSize);

    static LocalIDHelper initializer;

  private:
    LocalIDHelper();
};

extern const uint16_t initialLocalID[];

template <typename Vec, int simd>
void generateLocalIDsSimd(void *b, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t threadsPerWorkGroup,
                          const std::array<uint8_t, 3> &dimensionsOrder, bool chooseMaxRowSize);

void generateLocalIDs(void *buffer, uint16_t simd, const std::array<uint16_t, 3> &localWorkgroupSize,
                      const std::array<uint8_t, 3> &dimensionsOrder, bool isImageOnlyKernel, uint32_t grfSize, uint32_t grfCount, const RootDeviceEnvironment &rootDeviceEnvironment);
void generateLocalIDsWithLayoutForImages(void *b, const std::array<uint16_t, 3> &localWorkgroupSize, uint16_t simd);

bool isCompatibleWithLayoutForImages(const std::array<uint16_t, 3> &localWorkgroupSize, const std::array<uint8_t, 3> &dimensionsOrder, uint16_t simd);

void generateLocalIDsForSimdOne(void *b, const std::array<uint16_t, 3> &localWorkgroupSize,
                                const std::array<uint8_t, 3> &dimensionsOrder, uint32_t grfSize);
} // namespace NEO