1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
|
// =============================================================================
// === GPUQREngine/Include/GPUQREngine_Scheduler.hpp ===========================
// =============================================================================
//
// The Scheduler is a principal class in the GPUQREngine.
//
// This class manages the input set of Fronts, creates BucketLists when
// necessary for factorization, and contains all logic required to coordinate
// the factorization and assembly tasks with the GPU.
//
// =============================================================================
#ifndef GPUQRENGINE_SCHEDULER_HPP
#define GPUQRENGINE_SCHEDULER_HPP
#include "GPUQREngine_Common.hpp"
#include "GPUQREngine_FrontState.hpp"
#include "GPUQREngine_TaskDescriptor.hpp"
#include "GPUQREngine_BucketList.hpp"
#include "GPUQREngine_LLBundle.hpp"
#include "GPUQREngine_Front.hpp"
#define SSGPU_MINAPPLYGRANULARITY 16
size_t ssgpu_maxQueueSize // return size of scheduler queue
(
size_t gpuMemorySize // size of GPU memory, in bytes
) ;
class Scheduler
{
private:
/* Scheduler.cpp */
bool initialize(size_t gpuMemorySize);
/* Scheduler_Front.cpp */
bool pullFrontData(Int f);
/* Scheduler_FillWorkQueue.cpp */
void fillTasks
(
Int f, // INPUT: Current front
TaskDescriptor *queue, // INPUT: CPU Task entries
Int *queueIndex // IN/OUT: The index of the current entry
);
public:
bool memory_ok; // Flag for the creating function to
// determine whether we had enough
// memory to initialize the Scheduler.
bool cuda_ok; // Flag for the creating function to
// determine whether we could
// successfully invoke the cuda
// initialization calls.
Front *frontList;
Int numFronts;
Int numFrontsCompleted;
int activeSet;
BucketList *bucketLists;
Int *afPerm; // Permutation of "active" fronts
Int *afPinv; // Inverse permutation of "active" fronts
Int numActiveFronts;
Int maxQueueSize;
Workspace *workQueues[2];
Int numTasks[2];
Int minApplyGranularity; // The minimum number of tiles for which
// we will group apply tasks
bool *FrontDataPulled; // A set of flags indicating whether R has
// been pulled off the GPU.
cudaEvent_t *eventFrontDataReady; // A list of cudaEvents that are used to
// coordinate when the R factor is ready
// to be pulled from the GPU.
cudaEvent_t *eventFrontDataPulled; // A list of cudaEvents that are used to
// coordinate when the R factor is finally
// finished transfering off the GPU.
// Use multiple CUDA streams to coordinate kernel launches and asynchronous
// memory transfers between the host and the device:
// kernelStreams : Launch kernels on alternating streams
// H2D : Asynchronous memory transfer stream (Host-to-Device)
// D2H : Asynchronous memory transfer stream (Device-to-Host)
cudaStream_t kernelStreams[2];
cudaStream_t memoryStreamH2D;
cudaStream_t memoryStreamD2H;
/* Scheduler.cpp */
void *operator new(long unsigned int, Scheduler* p){ return p; }
Scheduler(Front *fronts, Int numFronts, size_t gpuMemorySize);
~Scheduler();
/* Scheduler_Front.cpp */
void activateFront
(
Int f // The index of the front to operate on
);
bool finishFront
(
Int f // The index of the front to operate on
);
void initializeBucketList
(
Int f // The index of the front to operate on
)
{
// NOTE: tested by SPQR/Tcov, but not flagged as such in cov results
BucketList *dlbl = (&bucketLists[f]);
if(dlbl->useFlag) dlbl->Initialize();
}
/* Scheduler_TransferData.cpp */
void transferData
(
void
);
/* Scheduler_FillWorkQueue.cpp */
void fillWorkQueue
(
void
);
/* Scheduler_LaunchKernel.cpp */
void launchKernel
(
void
);
/* Scheduler_PostProcess.cpp */
bool postProcess
(
void
);
void toggleQueue
(
void
)
{
activeSet ^= 1;
}
/* Stats */
float kernelTime;
Int numKernelLaunches;
Int gpuFlops;
#ifdef GPUQRENGINE_RENDER
/* Debug stuff */
const char *TaskNames[21];
const char *StateNames[9];
int renderCount;
void render();
#endif
#if 1
void debugDumpFront(Front *front);
#endif
};
#endif
|