File: GatingSimilarSamples.cpp

package info (click to toggle)
intel-graphics-compiler 1.0.12504.6-1%2Bdeb12u1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 83,912 kB
  • sloc: cpp: 910,147; lisp: 202,655; ansic: 15,197; python: 4,025; yacc: 2,241; lex: 1,570; pascal: 244; sh: 104; makefile: 25
file content (508 lines) | stat: -rw-r--r-- 18,489 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
/*========================== begin_copyright_notice ============================

Copyright (C) 2017-2021 Intel Corporation

SPDX-License-Identifier: MIT

============================= end_copyright_notice ===========================*/

#include "GatingSimilarSamples.hpp"
#include "common/IGCIRBuilder.h"
#include "common/igc_regkeys.hpp"
#include "GenISAIntrinsics/GenIntrinsics.h"
#include "IGC/LLVM3DBuilder/BuiltinsFrontend.hpp"
#include "common/LLVMWarningsPush.hpp"
#include <llvm/IR/Function.h>
#include <llvm/IR/BasicBlock.h>
#include <llvm/IR/InstrTypes.h>
#include <llvm/IR/Constant.h>
#include <llvm/IR/Dominators.h>
#include <llvm/IR/IntrinsicInst.h>
#include <llvm/Transforms/Utils/BasicBlockUtils.h>
#include <llvm/ADT/ilist.h>
#include <llvmWrapper/IR/InstrTypes.h>
#include "common/LLVMWarningsPop.hpp"
#include "Probe/Assertion.h"

using namespace llvm;
using namespace IGC;

//This code must check that all the similar sample inst results are divided by the same value (= 1+loop trip count)
//And must also check that the motion(first) sample inst result is also divided by the same value (=1+loop trip count)
static bool samplesAveragedEqually(const std::vector<Instruction*>& similarSampleInsts)
{
    unsigned similarToTexelSampleInstsCount = similarSampleInsts.size();
    unsigned totalSimilarSamples = similarToTexelSampleInstsCount + 1; //texel(sample2) + similar to texel(sample3,4,5)
    const float cmpAveragingFactor = (float)1.0 / (float(totalSimilarSamples));
    for (auto sampleInst : similarSampleInsts)
    {
        BasicBlock::iterator instItr = sampleInst->getIterator();
        std::set<Value*> texels; //for storing texel_x, texel_y, texel_z of this sampleInst
        for (int i = 0; i < 3; i++)
        {
            instItr++;
            if (instItr->getOpcode() == Instruction::ExtractElement)
            {
                texels.insert(&*instItr);
            }
            else
            {
                return false; //Sample->followed by 3 EE == this  pattern is not matching
            }
        }

        instItr++;
        for (int i = 0; i < 3; i++)
        {
            if (instItr->getOpcode() == Instruction::FMul)
            {//% 29 = fmul fast float %texel_x, 2.500000e-01
                if (texels.find(instItr->getOperand(0)) == texels.end() &&
                    texels.find(instItr->getOperand(1)) == texels.end())
                {
                    return false;
                }
                texels.erase(instItr->getOperand(0));
                if (ConstantFP * CF = dyn_cast<ConstantFP>(instItr->getOperand(1)))
                {
                    if (!CF->getType()->isFloatTy() || CF->getValueAPF().convertToFloat() != cmpAveragingFactor)
                        return false;
                }
                else if (ConstantFP * CF = dyn_cast<ConstantFP>(instItr->getOperand(0)))
                {
                    if (!CF->getType()->isFloatTy() || CF->getValueAPF().convertToFloat() != cmpAveragingFactor)
                        return false;
                }
                else
                {
                    return false; //texel x/y/z not multiplied by const avg factor
                }
            }
            else
            {
                return false; //3 EE -> followed by 3 FMuls == this  pattern is not matching
            }
            instItr++;
        }
        IGC_ASSERT_MESSAGE(texels.size() == 0, " All texels.x/y/z were not multiplied by same float");
        texels.clear();
    }
    return true;
}

// detect the pattern where all sample results are added together then
// multiply by constant
static bool
detectSampleAveragePattern2(const std::vector<Instruction*>& sampleInsts, Instruction* texSample)
{
    unsigned nSampleInsts = sampleInsts.size();
    float averagingFactor = float(1.0 / (nSampleInsts + 1));

    Instruction* base[3];
    for (auto* UI : texSample->users())
    {
        ExtractElementInst* ui = dyn_cast<ExtractElementInst>(UI);
        if (ui == nullptr)
        {
            return false;
        }
        ConstantInt* ci = dyn_cast<ConstantInt>(ui->getIndexOperand());
        if (ci == nullptr)
        {
            return false;
        }
        unsigned idx = static_cast<unsigned>(ci->getZExtValue());
        if (idx <= 2)
        {
            base[idx] = ui;
        }
    }

    Instruction* rgb[3] = { nullptr };

    for (unsigned i = 0; i < nSampleInsts; i++)
    {
        Instruction* sampleInst = sampleInsts[i];
        BasicBlock::iterator II = sampleInst->getIterator();
        for (unsigned j = 0; j < 3; j++)
        {
            II++;
            ExtractElementInst* ei = dyn_cast<ExtractElementInst>(II);
            if (!ei)
            {
                return false;
            }
            ConstantInt* ci = dyn_cast<ConstantInt>(ei->getIndexOperand());
            if (!ci)
            {
                return false;
            }
            unsigned idx = static_cast<unsigned>(ci->getZExtValue());
            if (idx > 2)
            {
                return false;
            }
            if (ei->hasNUsesOrMore(2))
            {
                return false;
            }

            if (i == 0)
            {
                rgb[idx] = ei;
            }
            else
            {
                Instruction* fadd = dyn_cast<Instruction>(*ei->users().begin());
                if (fadd == nullptr || fadd->getOpcode() != Instruction::FAdd ||
                    fadd->hasNUsesOrMore(2))
                {
                    return false;
                }
                if (fadd->getOperand(0) != rgb[idx] &&
                    fadd->getOperand(1) != rgb[idx])
                {
                    return false;
                }
                rgb[idx] = fadd;
            }
        }
        II++;
        if (isa<ExtractElementInst>(II))
        {
            return false;
        }
    }

    for (unsigned i = 0; i < 3; i++)
    {
        Instruction* fadd = dyn_cast<Instruction>(*rgb[i]->users().begin());
        if (fadd == nullptr || fadd->getOpcode() != Instruction::FAdd ||
            fadd->hasNUsesOrMore(2))
        {
            return false;
        }
        if (fadd->getOperand(0) != base[i] &&
            fadd->getOperand(1) != base[i])
        {
            return false;
        }

        Instruction* fmul = dyn_cast<Instruction>(*fadd->users().begin());
        if (fmul == nullptr || fmul->getOpcode() != Instruction::FMul ||
            fmul->hasNUsesOrMore(2))
        {
            return false;
        }
        ConstantFP* cf;
        if (fmul->getOperand(0) == fadd)
        {
            cf = dyn_cast<ConstantFP>(fmul->getOperand(1));
        }
        else
        {
            cf = dyn_cast<ConstantFP>(fmul->getOperand(0));
        }
        if (cf == nullptr ||
            !cf->getType()->isFloatTy() ||
            cf->getValueAPF().convertToFloat() != averagingFactor)
        {
            return false;
        }
    }
    return true;
}

// Need to match a very specific pattern here
// @llvm.genx.GenISA.sampleptr1 => samples(tex0....) ---> This will be motionSample, sampling from tex0
// @llvm.genx.GenISA.sampleptr2 => samples(tex1....) ---> This will be texelSample, sampling from tex1. We search similar to this
// @llvm.genx.GenISA.sampleptr3 => samples(tex1....)
// @llvm.genx.GenISA.sampleptr4 => samples(tex1....)
// @llvm.genx.GenISA.sampleptr5 => samples(tex1....)
bool GatingSimilarSamples::checkAndSaveSimilarSampleInsts()
{
    for (auto& I : BB->getInstList())
    {
        if (SampleIntrinsic * SI = dyn_cast<SampleIntrinsic>(&I))
        {
            if (motionSample == nullptr)
            {
                motionSample = SI;
                continue;
            }
            if (!texelSample)
            {
                texelSample = SI;
                continue;
            }
            if (areSampleInstructionsSimilar(texelSample, SI))
            {
                similarSampleInsts.push_back(SI);
            }
            else
            { //we can't have a different texel sample between 2 matching(similar) texel samples!
                return false;
            }
        }
    }
    if (similarSampleInsts.size() == 0)
        return false;
    return true;
}

bool GatingSimilarSamples::setOrCmpGatingValue(Value*& gatingValueToCmp1, Instruction* mulInst, const Instruction* texelSampleInst)
{
    if (!gatingValueToCmp1)
    {
        //This is the first texel sample inst from the loop after unrolled
        IGC_ASSERT_MESSAGE(texelSampleInst == similarSampleInsts[0], "incorrect inst sequence while extracting the loop gating value");
        gatingValueToCmp1 = mulInst;
        return true;
    }
    else
    {
        if (gatingValueToCmp1 != mulInst->getOperand(0) && gatingValueToCmp1 != mulInst->getOperand(1))
        {
            return false;
        }
    }
    return true;
}

//This function makes sure that all similar sample insts calculate cords such that they use same gating value motion.xy
//Outside the loop, we're looking at this:
//          motion.xy = (motion.xy - 0.5) * vec2(0.0666667, .125);
//          motion.xy *= texel.a;
//Check that inside the loop, we're looking at something like this:
//          vec2 tc = out_texcoord0 - motion.xy * float(i);
//          color += texture2D(texture_unit0, tc).xyz / float(n);
bool GatingSimilarSamples::findAndSetCommonGatingValue()
{
    gatingValue_mul1 = nullptr;
    gatingValue_mul2 = nullptr;

    for (auto& texelSampleInstInLoop : similarSampleInsts)
    {
        Instruction* firstOp = dyn_cast<Instruction>(texelSampleInstInLoop->getOperand(0)); //tc.1
        Instruction* secondOp = dyn_cast<Instruction>(texelSampleInstInLoop->getOperand(1)); //tc.2
        if (!(firstOp && secondOp)) return false;
        if (firstOp->getOpcode() == Instruction::FSub || firstOp->getOpcode() == Instruction::FAdd)
        {//i.e. (texcoord0 (+/-) something)
            Instruction* mayBeMulInst = dyn_cast<Instruction>(firstOp->getOperand(1));
            if (!mayBeMulInst) return false;
            //that "texcoord0 - something" might be "texcoord0 - FMul" OR it might be "tc - (0 - -FMul)"
            if (mayBeMulInst->getOpcode() == Instruction::FMul)
            {//i.e. something is FMul!
                if (!setOrCmpGatingValue(gatingValue_mul1, mayBeMulInst, texelSampleInstInLoop))
                    return false;
            }
            else if (mayBeMulInst->getOpcode() == Instruction::FSub || mayBeMulInst->getOpcode() == Instruction::FAdd)
            {//that means we have this "tc - (0 - -FMul)"
                Instruction* realMulInst = dyn_cast<Instruction>(mayBeMulInst->getOperand(1));
                if (!realMulInst) return false;
                if (ConstantFP * mustBeZero = dyn_cast<ConstantFP>(mayBeMulInst->getOperand(0)))
                {
                    if (!mustBeZero->getType()->isFloatTy() || mustBeZero->getValueAPF().convertToFloat() != 0.0f)
                        return false;
                }
                else
                {
                    return false;
                }
                if (!setOrCmpGatingValue(gatingValue_mul1, realMulInst, texelSampleInstInLoop))
                    return false;
            }
            else
            {
                return false;
            }
        }
        else
        {
            return false;
        }
        if (secondOp->getOpcode() == Instruction::FSub || secondOp->getOpcode() == Instruction::FAdd)
        {
            //i.e. (out_texcoord0 (+/-) something)
            Instruction* mayBeMulInst = dyn_cast<Instruction>(secondOp->getOperand(1));
            if (!mayBeMulInst) return false;
            //that "tc - something" might be "tc - FMul" OR it might be "tc - (0 - -FMul)"
            if (mayBeMulInst->getOpcode() == Instruction::FMul)
            {//i.e. something is FMul!
                if (!setOrCmpGatingValue(gatingValue_mul2, mayBeMulInst, texelSampleInstInLoop))
                    return false;
            }
            else if (mayBeMulInst->getOpcode() == Instruction::FSub || mayBeMulInst->getOpcode() == Instruction::FAdd)
            {//that means we have this "tc - (0 - -FMul)"
                Instruction* realMulInst = dyn_cast<Instruction>(mayBeMulInst->getOperand(1));
                if (!realMulInst) return false;
                if (ConstantFP * mustBeZero = dyn_cast<ConstantFP>(mayBeMulInst->getOperand(0)))
                {
                    if (!mustBeZero->getType()->isFloatTy() || mustBeZero->getValueAPF().convertToFloat() != 0.0f)
                        return false;
                }
                else
                {
                    return false;
                }
                if (!setOrCmpGatingValue(gatingValue_mul2, realMulInst, texelSampleInstInLoop))
                    return false;
            }
            else
            {
                return false;
            }
        }
        else
        {
            return false;
        }
    }
    return true; //a common gating value was found and set
}

//check if 2 sample insts sample from the same texture
bool GatingSimilarSamples::areSampleInstructionsSimilar(Instruction* firstSampleInst, Instruction* secondSampleInst)
{
    if (!firstSampleInst || !secondSampleInst) return false;
    IGC_ASSERT(isSampleInstruction(firstSampleInst));
    IGC_ASSERT(isSampleInstruction(secondSampleInst));
    if (firstSampleInst->getNumOperands() != secondSampleInst->getNumOperands())
        return false;

    if (firstSampleInst->getOpcode() != secondSampleInst->getOpcode())
    {
        return false;
    }

    //all operands except the first two operands should be the same
    unsigned int numOperands = firstSampleInst->getNumOperands();
    for (unsigned int i = 2; i < numOperands; i++)
    {
        if (firstSampleInst->getOperand(i) != secondSampleInst->getOperand(i))
            return false;
    }
    return true;
}


//This pass assumes loop unrolling has been performed
bool GatingSimilarSamples::runOnFunction(llvm::Function& F)
{
    BB = nullptr; //opt runs only if single BB in function
    motionSample = nullptr;
    texelSample = nullptr;
    resultInst = nullptr;
    gatingValue_mul1 = nullptr;
    gatingValue_mul2 = nullptr;
    similarSampleInsts.clear();

    if (IGC_GET_FLAG_VALUE(DisableGatingSimilarSamples))
        return false;
    if (F.getBasicBlockList().size() != 1)
        return false;
    BB = &*F.getBasicBlockList().begin();

    if (!checkAndSaveSimilarSampleInsts())
        return false;

    bool pattern1 = samplesAveragedEqually(similarSampleInsts);
    bool pattern2 = detectSampleAveragePattern2(similarSampleInsts, texelSample);
    if (!pattern1 && !pattern2)
        return false;

    //By now we know that all similar sample inst results are divided by the same values and added with equal weights.
    if (!findAndSetCommonGatingValue())
        return false;

    //save the final result inst
    for (BasicBlock::reverse_iterator rItr = BB->rbegin(); rItr != BB->rend(); rItr++)
    {
        if (GenIntrinsicInst * GenI = dyn_cast<GenIntrinsicInst>(&*rItr))//  GenISAIntrinsic::GenISA_OUTPUT)
        {
            if (GenI->getIntrinsicID() == GenISAIntrinsic::GenISA_OUTPUT)
            {
                resultInst = &*rItr;
                break;
            }
        }
    }
    if (resultInst == nullptr)
        return false;

    //extract original texel.xyz and averaged color.xyz values for creating 3 PHI nodes
    BasicBlock::iterator temp = texelSample->getIterator();
    temp++;
    Value* texel_x = &*temp;
    if (temp->getOpcode() != Instruction::ExtractElement) return false;
    temp++;
    Value* texel_y = &*temp;
    if (temp->getOpcode() != Instruction::ExtractElement) return false;
    temp++;
    Value* texel_z = &*temp;
    if (temp->getOpcode() != Instruction::ExtractElement) return false;



    //create a if-then basic block with the gating condition
    IGCIRBuilder<> IRB(F.getContext());
    FastMathFlags FMF;
    FMF.setFast();
    IRB.setFastMathFlags(FMF);
    IRB.SetInsertPoint(similarSampleInsts[0]);
    Value* gatingVal1 = IRB.CreateBitCast(gatingValue_mul1, IRB.getFloatTy());
    Value* cnd1 = IRB.CreateFCmpONE(gatingVal1, ConstantFP::get(IRB.getFloatTy(), 0.0f));
    Value* gatingVal2 = IRB.CreateBitCast(gatingValue_mul2, IRB.getFloatTy());
    Value* cnd2 = IRB.CreateFCmpONE(gatingVal2, ConstantFP::get(IRB.getFloatTy(), 0.0f));
    Value* isGatingValueNotZero = IRB.CreateOr(cnd1, cnd2);
    IGCLLVM::TerminatorInst* thenBlockTerminator = SplitBlockAndInsertIfThen(isGatingValueNotZero, similarSampleInsts[0], false);
    BasicBlock* thenBlock = thenBlockTerminator->getParent();
    if (thenBlockTerminator->getNumSuccessors() != 1)
    {
        return false;
    }


    //move all insts starting from similarSampleInst[0] upto resultInst(non-inluding) into the new then block
    BasicBlock* tailBlock = thenBlockTerminator->getSuccessor(0);
    thenBlock->getInstList().splice(thenBlock->begin(), tailBlock->getInstList(), similarSampleInsts[0]->getIterator(), resultInst->getIterator());


    Value* avg_color_x = resultInst->getOperand(0);
    Value* avg_color_y = resultInst->getOperand(1);
    Value* avg_color_z = resultInst->getOperand(2);

    //Add 3 phi nodes - for x (result op0), y(result op1) and z(result op2).
    IRB.SetInsertPoint(resultInst);
    PHINode* PN1 = IRB.CreatePHI(avg_color_x->getType(), 2);
    PN1->addIncoming(avg_color_x, thenBlock);
    PN1->addIncoming(texel_x, texelSample->getParent());
    resultInst->setOperand(0, PN1);

    PHINode* PN2 = IRB.CreatePHI(avg_color_y->getType(), 2);
    PN2->addIncoming(avg_color_y, thenBlock);
    PN2->addIncoming(texel_y, texelSample->getParent());
    resultInst->setOperand(1, PN2);

    PHINode* PN3 = IRB.CreatePHI(avg_color_z->getType(), 2);
    PN3->addIncoming(avg_color_z, thenBlock);
    PN3->addIncoming(texel_z, texelSample->getParent());
    resultInst->setOperand(2, PN3);

    return true;
}

char IGC::GatingSimilarSamples::ID = 0;

IGC_INITIALIZE_PASS_BEGIN(GatingSimilarSamples, "loop-gating",
    "Loop Gating Optimization", false, false)
    INITIALIZE_PASS_DEPENDENCY(CodeGenContextWrapper)
    IGC_INITIALIZE_PASS_END(GatingSimilarSamples, "loop-gating",
        "Loop Gating Optimization", false, false)

    llvm::FunctionPass* IGC::CreateGatingSimilarSamples()
{
    return new GatingSimilarSamples();
}