File: GPUQREngine_Internal.cpp

package info (click to toggle)
suitesparse 1%3A5.8.1%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 152,716 kB
  • sloc: ansic: 774,385; cpp: 24,213; makefile: 6,310; fortran: 1,927; java: 1,826; csh: 1,686; ruby: 725; sh: 535; perl: 225; python: 209; sed: 164; awk: 60
file content (88 lines) | stat: -rw-r--r-- 3,035 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
// =============================================================================
// === GPUQREngine/Source/GPUQREngine_Internal.cpp =============================
// =============================================================================
//
// GPUQREngine_Internal is the call-down from the dense and sparse polymorphic
// wrappers. This code is responsible for maintaining the Scheduler and
// coordinating the factorization in a main loop.
//
// =============================================================================

#include "GPUQREngine_Internal.hpp"
#include "GPUQREngine_Scheduler.hpp"
#include "GPUQREngine_Stats.hpp"


QREngineResultCode GPUQREngine_Internal
(
    size_t gpuMemorySize,   // The total available GPU memory size in bytes
    Front *fronts,          // The list of fronts to factorize
    Int numFronts,          // The number of fronts to factorize
    Int *Parent,            // The front-to-parent mapping
    Int *Childp,            // Front-to-child column pointers
    Int *Child,             // Child permutation
                            // (Child[Childp[f]] to Child[Childp[f+1]] are all
                            // the front identifiers for front "f"'s children.
    QREngineStats *stats    // An optional parameter. If present, statistics
                            // are collected and passed back to the caller
                            // via this struct
)
{
    bool ok = true;

    /* Create the scheduler. */
    Scheduler *scheduler = (Scheduler*) SuiteSparse_calloc(1,sizeof(Scheduler));
    if (scheduler == NULL)
    {
        return QRENGINE_OUTOFMEMORY;
    }

    new (scheduler) Scheduler(fronts, numFronts, gpuMemorySize);
    ok = scheduler->memory_ok && scheduler->cuda_ok;

    /* If we encountered problems initializing the scheduler: */
    if(!ok)
    {
        bool memory_ok = scheduler->memory_ok ;
        bool cuda_ok = scheduler->cuda_ok ;
        if(scheduler)
        {
            scheduler->~Scheduler();
            scheduler = (Scheduler*) SuiteSparse_free (scheduler) ;
        }
        if(!memory_ok) return QRENGINE_OUTOFMEMORY;
        if(!cuda_ok) return QRENGINE_GPUERROR;
    }

    bool completed = false;
    while(!completed)
    {
//      #ifdef GPUQRENGINE_RENDER
//      scheduler->render();
//      #endif
        scheduler->fillWorkQueue();
        scheduler->transferData();

        // Launch the kernel and break out of the loop if we encountered
        // a cuda error.
        scheduler->launchKernel();
        if(!scheduler->cuda_ok) break;

        completed = scheduler->postProcess();
        scheduler->toggleQueue();
    }

    /* Report metrics back to the caller. */
    if(stats)
    {
        stats->kernelTime = scheduler->kernelTime;
        stats->numLaunches = scheduler->numKernelLaunches;
        stats->flopsActual = scheduler->gpuFlops;
    }

    /* Explicitly invoke the destructor */
    scheduler->~Scheduler();
    scheduler = (Scheduler*) SuiteSparse_free(scheduler);

    return QRENGINE_SUCCESS;
}