File: GPUQREngine_BucketList.hpp

package info (click to toggle)
suitesparse 1%3A5.8.1%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 152,716 kB
  • sloc: ansic: 774,385; cpp: 24,213; makefile: 6,310; fortran: 1,927; java: 1,826; csh: 1,686; ruby: 725; sh: 535; perl: 225; python: 209; sed: 164; awk: 60
file content (168 lines) | stat: -rw-r--r-- 5,767 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
// =============================================================================
// === GPUQREngine/Include/GPUQREngine_BucketList.hpp ==========================
// =============================================================================
//
// The BucketList is a principal class in the GPUQREngine.
//
// The BucketList manages a set of LLBundle structures in a doubly-linked list.
// During factorization, the BucketList logically manipulates the LLBundles,
// and depending on the configuration of each, generates GPU tasks to be added
// to the GPU work queue.
//
// =============================================================================

#ifndef GPUQRENGINE_BUCKETLIST_HPP
#define GPUQRENGINE_BUCKETLIST_HPP

#include "GPUQREngine_Common.hpp"
#include "GPUQREngine_TaskDescriptor.hpp"
#include "GPUQREngine_LLBundle.hpp"
#include "GPUQREngine_Front.hpp"

struct TaskDescriptor;
class LLBundle;

class BucketList
{
public:
    bool useFlag;            // A flag indicating whether to use this
    bool memory_ok;          // A flag indicating whether the object
                             // was constructed properly

    double *gpuF;            // The gpu front pointer

    Int *head;               // The head idle tile index in the bucket
    Int *next;               // The next idle tile index in the bucket
    Int *prev;               // The prev idle tile index in the bucket
    bool *triu;              // Flag indicating whether the tile index
                             // is upper triangular

    Int *bundleCount;        // The # of bundles native to bucket index
    Int *idleTileCount;      // The # of idle tiles in bucket index

    Front *front;
    Int numRowTiles;         // # row tiles of F
    Int numColTiles;         // # col tiles of F
    Int numBuckets;          // min(numRowTiles, numColTiles)
    Int numIdleTiles;        // Total # of idle tiles stored in buckets
    Int PanelSize;           // Max # of rowtiles that can fit in one bundle
    Int TileSize;            // Dimensions of tiles
    Int Wavefront;           // Index of first non-completed colBucket
    Int LastBucket;          // Index of last colBucket with idleTiles
                             // or bundles

    Int ApplyGranularity;    // The desired granularity (in col tiles)
                             // for applies

    LLBundle *Bundles;       // The bundles maintained by this scheduler
    Int numBundles;          // Total # of bundles

    Workspace *wsMongoVT;    // The VT blocks this bucket list scheduler owns
    double **gpuVT;          // Array of available VT slots within the VT struct
    int VThead;              // Index of the first available entry in VTlist

    // Constructors
    void *operator new(long unsigned int, BucketList* p)
    {
        return p;
    }
    BucketList(Front *f, Int minApplyGranularity);
    ~BucketList();

    // Bundle management functions
    void Insert(Int tile, Int bucket, bool upperTriangular = false);
    void Remove(Int tile, Int bucket);
    #ifdef GPUQRENGINE_PIPELINING
    Int RemoveHead(Int bucket);
    #endif

    // VT management functions
    double *allocateVT();
    double *freeVT(double *gpuVT);

    bool IsDone()
    {
        // We're done if we have no bundles left with tasks.
        return (numBundles == 0);
    }

//  // IsRReadyEarly experimental feature : not available in production use.
//  bool IsRReadyEarly()
//  {
//      // If we're doing a dense factorization, we're never done early.
//      if(front->isDense()) return false;
//
//      // We can't pull the R factor early if we also need the CBlock.
//      if(front->isStaged()) return false;
//
//      // If we're doing a sparse factorization, we're done early if we're
//      // past the pivot row.
//      return (TILESIZE * (Wavefront-1) > front->sparseMeta.fp);
//  }

    // Initialize takes the BucketList and adds rowtiles in positions
    // appropriate for the staircase of the problem.
    void Initialize
    (
        void
    );

    // AdvanceBundles advances existing bundles, leaving the First tile behind
    // and keeping a Shadow copy to support subsequent Apply tasks.
    void AdvanceBundles
    (
        void
    );

    #ifdef GPUQRENGINE_PIPELINING
    // GrowBundles looks for row tiles (or bundles) involved in a factorization
    // and attempts to add those bundles or row tiles to a task currently set
    // for a series of Apply tasks. This is also known as Pipelining.
    void GrowBundles
    (
        void
    );
    #endif

    // CreateBundles selects rowtiles up to PANELSIZE and creates a new bundle
    // ready for factorization.
    void CreateBundles
    (
        void
    );

    // PostProcess handles any cleanup operations following a kernel invocation
    // including merging delta tiles with the main bundle and other fixups.
    void PostProcess
    (
        void
    );

    // SkipBundleCreation determines whether we should skip creating a new
    // bundle for the specified tile in the specified column bucket.
    bool SkipBundleCreation
    (
        Int tile,
        Int colBucket
    );

    // IsInternal determines whether a tile is completely within the bounds
    // of the front because if it isn't then we will need to use the special
    // edge case kernels.
    bool IsInternal
    (
        LLBundle& bundle,
        int jLast
    );

    // FillWorkQueue is responsible for filling the work queue with items and
    // resolving generic TaskType entries on the bundles into concrete tasks
    // to be performed by the GPU.
    Int FillWorkQueue
    (
        TaskDescriptor *queue,  // The list of work items for the GPU
        Int *queueIndex         // The current index into the queue
    );
};

#endif