File: RayTracingInterface.cpp

package info (click to toggle)
intel-graphics-compiler 1.0.12504.6-1%2Bdeb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 83,912 kB
  • sloc: cpp: 910,147; lisp: 202,655; ansic: 15,197; python: 4,025; yacc: 2,241; lex: 1,570; pascal: 244; sh: 104; makefile: 25
file content (303 lines) | stat: -rw-r--r-- 11,594 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
/*========================== begin_copyright_notice ============================

Copyright (C) 2018-2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

//===----------------------------------------------------------------------===//
///
/// The main pipeline of raytracing passes.  This will be run by all APIs
/// (currently DX and Vulkan).  The idea is to not put anything too API
/// specific here if possible.
///
//===----------------------------------------------------------------------===//

#include "IGC/common/StringMacros.hpp"
#include "AdaptorCommon/RayTracing/RayTracingInterface.h"
#include "AdaptorCommon/RayTracing/RayTracingPasses.hpp"
#include "AdaptorCommon/RayTracing/RTBuilder.h"
#include "AdaptorCommon/RayTracing/RayTracingAddressSpaceAliasAnalysis.h"
#include "AdaptorCommon/AddImplicitArgs.hpp"
#include "AdaptorCommon/ProcessFuncAttributes.h"
#include "Compiler/CISACodeGen/CodeSinking.hpp"
#include "Compiler/Optimizer/OpenCLPasses/PrivateMemory/PrivateMemoryUsageAnalysis.hpp"
#include "Compiler/Optimizer/OpenCLPasses/BreakConstantExpr/BreakConstantExpr.hpp"
#include "Compiler/Optimizer/OpenCLPasses/OpenCLPrintf/OpenCLPrintfAnalysis.hpp"
#include "Compiler/Optimizer/OpenCLPasses/OpenCLPrintf/OpenCLPrintfResolution.hpp"
#include "Compiler/Optimizer/OpenCLPasses/Atomics/ResolveOCLAtomics.hpp"
#include "IGC/common/LLVMUtils.h"
#include "common/LLVMWarningsPush.hpp"
#include <llvm/CodeGen/Passes.h>
#include <llvm/IR/Verifier.h>
#include <llvm/Transforms/IPO.h>
#include <llvm/Transforms/IPO/AlwaysInliner.h>
#include <llvm/Transforms/Scalar.h>
#include <llvm/Analysis/AliasAnalysis.h>
#include "common/LLVMWarningsPop.hpp"

using namespace llvm;
using namespace IGC;

// This must be run prior to any raytracing passes so the BTI slots are
// allocated for them to use.
static void setupRegionBTIs(CodeGenContext* pContext)
{
    if (!pContext->m_DriverInfo.supportsRaytracingStatefulAccesses())
        return;

    SIMDMode Mode = SIMDMode::UNKNOWN;
    if (auto SIMDSize = pContext->knownSIMDSize())
        Mode = *SIMDSize;

    // We need to take advantage of the LSC messages to do bindless through
    // the surface state heap.
    if (!pContext->platform.LSCEnabled(Mode))
        return;

    auto getAddrspace = [&]()
    {
        BufferType BufType =
            IGC_IS_FLAG_ENABLED(DisableRTBindlessAccess) ?
                IGC::UAV :
                IGC::SSH_BINDLESS;

        // There's nothing special about using UndefValue here. We just need
        // to encode the address space as indirect.
        return EncodeAS4GFXResource(
            *UndefValue::get(Type::getInt32Ty(*pContext->getLLVMContext())),
            BufType,
            pContext->getUniqueIndirectIdx());
    };

    auto& rtInfo = pContext->getModuleMetaData()->rtInfo;

    // bump the index once in case the index is already in use
    pContext->getUniqueIndirectIdx();

    uint32_t BaseOffset = 0;
    if (pContext->type == ShaderType::RAYTRACING_SHADER)
    {
        if (IGC_IS_FLAG_DISABLED(DisableStatefulRTStackAccess))
        {
            rtInfo.RTAsyncStackAddrspace = getAddrspace();
            rtInfo.RTAsyncStackSurfaceStateOffset = BaseOffset++;
        }

        if (IGC_IS_FLAG_DISABLED(DisableStatefulSWHotZoneAccess))
        {
            rtInfo.SWHotZoneAddrspace = getAddrspace();
            rtInfo.SWHotZoneSurfaceStateOffset = BaseOffset++;
        }

        if (IGC_IS_FLAG_DISABLED(DisableStatefulSWStackAccess))
        {
            rtInfo.SWStackAddrspace = getAddrspace();
            rtInfo.SWStackSurfaceStateOffset = BaseOffset++;
        }

        if (IGC_IS_FLAG_DISABLED(DisableStatefulRTSyncStackAccess4RTShader))
        {
            rtInfo.RTSyncStackAddrspace = getAddrspace();
            rtInfo.RTSyncStackSurfaceStateOffset = BaseOffset++;
        }
    }
    else
    {
        if (IGC_IS_FLAG_DISABLED(DisableStatefulRTSyncStackAccess4nonRTShader))
        {
            rtInfo.RTSyncStackAddrspace = getAddrspace();
            rtInfo.RTSyncStackSurfaceStateOffset = BaseOffset++;
        }
    }
}

namespace IGC {

void RayTracingLowering(RayDispatchShaderContext* pContext)
{
    setupRegionBTIs(pContext);

    IGCPassManager mpm(pContext, "RayTracingLowering");

    COMPILER_TIME_START(pContext, TIME_RT_SETUP);
    COMPILER_TIME_START(pContext, TIME_RT_PASSES);

    // If we call RayTracingLowering() without calling a Unify* layer first
    // which can happen if we've passed in an empty module to compile the
    // callStackHandler, this exposes a bug in TailCallElimination as it has
    // not listed all of its dependencies. The fact that we typically call
    // instcombine in UnifyIRDXIL() hides this issue.
    if (pContext->getModule()->empty())
        mpm.add(createAAResultsWrapperPass());

    mpm.add(new CodeGenContextWrapper(pContext));
    mpm.add(new MetaDataUtilsWrapper(pContext->getMetaDataUtils(), pContext->getModuleMetaData()));


    // DCE away shaders that are not to be exported.
    mpm.add(createGlobalDCEPass());
    mpm.add(createTraceRayInlinePrepPass());
    if (IGC_IS_FLAG_ENABLED(EnableRQHideLatency)) {
        mpm.add(createTraceRayInlineLatencySchedulerPass());
        mpm.add(createCFGSimplificationPass());
    }

    // Decompose TraceRayInline related intrinsics first before continuation
    // splitting.  This may allow us to do less spill/fills to the RTStack.
    // Will investigate further as we have more workloads.
    mpm.add(CreateTraceRayInlineLoweringPass());

    // Eliminate any obvious dead stores from rayquery
    mpm.add(createDeadStoreEliminationPass());

    // convert to global pointers first since all downstream passes will be
    // doing operations to the RTStack which will involve A64 stateless
    // operations.  If we enable stateful SWStacks, then this will convert
    // to the address space associated with the SWStack.
    // This pass is not needed when doing sync DispatchRays since there's no SWStack
    if (!pContext->doSyncDispatchRays())
        mpm.add(createPrivateToGlobalPointerPass());

    // Inject BTD Stack ID release calls before every return in raygen shaders.
    mpm.add(CreateStackIDRetirement());
    if (IGC_IS_FLAG_DISABLED(DisableEarlyRemat))
    {
        // greedily remat obviously profitable values to avoid spilling.
        // Some will be CSE'd back together later on.
        mpm.add(createEarlyRematPass());
    }
    // Emit GlobalBufferPointer and LocalBufferPointer intrinsics.
    mpm.add(CreateBindlessKernelArgLoweringPass());
    // We run this before splitting but, currently, any-hit and intersection
    // shaders will not have continuations.  It could be moved lower as long
    // as it runs before intrinsic lowering.
    mpm.add(createLowerIntersectionAnyHitPass());
    if (IGC_IS_FLAG_DISABLED(DisableDPSE))
    {
        mpm.add(createDeadPayloadStoreEliminationPass());
    }
    if (IGC_IS_FLAG_DISABLED(DisablePreSplitOpts))
    {
        // Sink values closer to uses.  In some cases, this will sync across
        // TraceRay() calls which will eliminate some spills.
        mpm.add(new CodeSinking(true));
    }
    // This must be run before SplitAsync for correctness.
    mpm.add(createSplitPreparePass());
    if (IGC_IS_FLAG_DISABLED(DisablePreSplitOpts))
    {
        // coalesce duplicate values to prevent spilling the same value
        // multiple times.
        mpm.add(createRayInfoCSEPass());
    }

    // After this pass, all shaders with TraceRay() or CallShader() calls will
    // be split into continuations at those call sites.
    if (!pContext->doSyncDispatchRays())
        mpm.add(createSplitAsyncPass());

    if (IGC_IS_FLAG_DISABLED(DisablePromoteToScratch) &&
        pContext->m_DriverInfo.supportsRTScratchSpace())
    {
        // Prior to intrinsic lowering, allocas that we can prove don't
        // escape the shader can be allocated in scratch space rather than
        // burning space on the RTStack.
        mpm.add(createPromoteToScratchPass());
    }
    // Last cleanup of duplicated nomem intrinsics prior to intrinsic lowering.
    mpm.add(createEarlyCSEPass());
    if (IGC_IS_FLAG_DISABLED(DisableLateRemat) &&
        IGC_IS_FLAG_DISABLED(DisableCompactifySpills))
    {
        // Do more complex cases of rematerialization and rearrangment of
        // spills/fills that were not handled earlier.
        mpm.add(createLateRematPass());
    }

    if (pContext->doSyncDispatchRays())
        mpm.add(createSyncHandlingPass());

    // Lower intrinsics to RTStack operations.
    mpm.add(createRayTracingIntrinsicLoweringPass());
    mpm.add(createCFGSimplificationPass());
    {
        // When we arrive here, the continuations will all be marked as
        // 'alwaysinline' but __mergeContinuations will not be.
        // __mergeContinuations and the continuations form a mutually recursive
        // call graph.
        mpm.add(createAlwaysInlinerLegacyPass());
        // Now, we only have calls to __mergeContinuations tail recursive to
        // itself.  This will leave us with just branches back to the top of
        // the function.
        mpm.add(createTailCallEliminationPass());
    }
    // inline __mergeContinuations.
    mpm.add(createInlineMergeCallsPass());
    if (pContext->tryPayloadSinking())
    {
        // attempt to sink payload writes into inlined continuations in order
        // to eliminate stores/loads to/from the payload in some cases.
        mpm.add(createPayloadSinkingPass());
    }
    // Do finalization (right now, alignment adjust and intrinsic moving).
    mpm.add(createRayTracingFinalizePass());

    // if BTD continuations are enabled, need to run ProcessFuncAttribute pass
    // to set IndirectCall attribute
    mpm.add(createProcessFuncAttributesPass());
    mpm.add(new PrivateMemoryUsageAnalysis());
    mpm.add(createGlobalDCEPass());
    mpm.add(new BreakConstantExpr());

    bool supportPrintf = (pContext->m_DriverInfo.SupportsRTPrintf() && IGC_IS_FLAG_ENABLED(EnableRTPrintf));
    if (supportPrintf)
    {
        mpm.add(new OpenCLPrintfAnalysis());
    }
    mpm.add(new AddImplicitArgs());
    if (supportPrintf)
    {
        mpm.add(new OpenCLPrintfResolution());
        mpm.add(new ResolveOCLAtomics());
        mpm.add(createRayTracingPrintfPostProcessPass());
    }

    mpm.run(*pContext->getModule());

    // Run verification after generating continuation functions to ensure
    // that we still have well formed IR with all the spill/fill code generated.
    IGC_ASSERT(false == llvm::verifyModule(*pContext->getModule(), &dbgs()));

    COMPILER_TIME_END(pContext, TIME_RT_PASSES);
    COMPILER_TIME_END(pContext, TIME_RT_SETUP);

    DumpLLVMIR(pContext, "AfterRTLowering");
}

void RayTracingInlineLowering(CodeGenContext* pContext)
{
    IGCPassManager mpm(pContext, "RayTracingInlineLowering");
    setupRegionBTIs(pContext);
    mpm.add(new CodeGenContextWrapper(pContext));


    mpm.add(createTraceRayInlinePrepPass());
    if (IGC_IS_FLAG_ENABLED(EnableRQHideLatency)) {
        mpm.add(createTraceRayInlineLatencySchedulerPass());
        mpm.add(createCFGSimplificationPass());
    }
    mpm.add(CreateTraceRayInlineLoweringPass());
    mpm.add(CreateRTGlobalsPointerLoweringPass());

#ifdef _DEBUG
    // Run verification after generating continuation functions to ensure
    // that we still have well formed IR
    mpm.add(createVerifierPass(false));
#endif // _DEBUG

    mpm.run(*pContext->getModule());
}

} // namespace IGC