File: implicit_scaling.h

package info (click to toggle)
intel-compute-runtime 25.35.35096.9-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 79,324 kB
  • sloc: cpp: 926,243; lisp: 3,433; sh: 715; makefile: 162; python: 21
file content (118 lines) | stat: -rw-r--r-- 4,341 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
/*
 * Copyright (C) 2021-2025 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
 */

#pragma once

#include "shared/source/helpers/definitions/command_encoder_args.h"
#include "shared/source/helpers/device_bitfield.h"
#include "shared/source/helpers/vec.h"

namespace WalkerPartition {
struct WalkerPartitionArgs;
}

namespace NEO {
struct HardwareInfo;
class Device;
class LinearStream;
struct PipeControlArgs;
struct RootDeviceEnvironment;

namespace ImplicitScaling {
extern bool apiSupport;
} // namespace ImplicitScaling

struct ImplicitScalingHelper {
    static bool isImplicitScalingEnabled(const DeviceBitfield &devices, bool preCondition);
    static bool isSemaphoreProgrammingRequired();
    static bool isCrossTileAtomicRequired(bool defaultCrossTileRequirement);
    static bool isSynchronizeBeforeExecutionRequired();
    static bool isAtomicsUsedForSelfCleanup();
    static bool isSelfCleanupRequired(const WalkerPartition::WalkerPartitionArgs &args, bool apiSelfCleanup);
    static bool isWparidRegisterInitializationRequired();
    static bool isPipeControlStallRequired(bool defaultEmitPipeControl);
    static bool pipeControlBeforeCleanupAtomicSyncRequired();
};

struct ImplicitScalingDispatchCommandArgs {
    uint64_t workPartitionAllocationGpuVa = 0;
    const NEO::Device *device = nullptr;
    void **outWalkerPtr = nullptr;

    RequiredPartitionDim requiredPartitionDim = RequiredPartitionDim::none;
    uint32_t partitionCount = 0;
    uint32_t workgroupSize = 0;
    uint32_t threadGroupCount = 0;
    uint32_t maxWgCountPerTile = 0;

    bool useSecondaryBatchBuffer = false;
    bool apiSelfCleanup = false;
    bool dcFlush = false;
    bool forceExecutionOnSingleTile = false;
    bool blockDispatchToCommandBuffer = false;
    bool isRequiredDispatchWorkGroupOrder = false;
};

template <typename GfxFamily>
struct ImplicitScalingDispatch {
    using DefaultWalkerType = typename GfxFamily::DefaultWalkerType;

    template <typename WalkerType>
    static size_t getSize(bool apiSelfCleanup,
                          bool preferStaticPartitioning,
                          const DeviceBitfield &devices,
                          const Vec3<size_t> &groupStart,
                          const Vec3<size_t> &groupCount);

    template <typename WalkerType>
    static void dispatchCommands(LinearStream &commandStream,
                                 WalkerType &walkerCmd,
                                 const DeviceBitfield &devices,
                                 ImplicitScalingDispatchCommandArgs &dispatchCommandArgs);

    static bool &getPipeControlStallRequired();

    static size_t getBarrierSize(const RootDeviceEnvironment &rootDeviceEnvironment,
                                 bool apiSelfCleanup,
                                 bool usePostSync);
    static void dispatchBarrierCommands(LinearStream &commandStream,
                                        const DeviceBitfield &devices,
                                        PipeControlArgs &flushArgs,
                                        const RootDeviceEnvironment &rootDeviceEnvironment,
                                        uint64_t gpuAddress,
                                        uint64_t immediateData,
                                        bool apiSelfCleanup,
                                        bool useSecondaryBatchBuffer);

    static size_t getRegisterConfigurationSize();
    static void dispatchRegisterConfiguration(LinearStream &commandStream,
                                              uint64_t workPartitionSurfaceAddress,
                                              uint32_t addressOffset,
                                              bool isBcs);

    static size_t getOffsetRegisterSize();
    static void dispatchOffsetRegister(LinearStream &commandStream,
                                       uint32_t addressOffset, bool isBcs);

    static uint32_t getImmediateWritePostSyncOffset();
    static uint32_t getTimeStampPostSyncOffset();

    static bool platformSupportsImplicitScaling(const RootDeviceEnvironment &rootDeviceEnvironment);

  private:
    static bool pipeControlStallRequired;
};

template <typename GfxFamily>
struct PartitionRegisters {
    enum {
        wparidCCSOffset = 0x221C,
        addressOffsetCCSOffset = 0x23B4
    };
};

} // namespace NEO