File: stream.cu

package info (click to toggle)
forge 1.0.1-6
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,324 kB
  • sloc: cpp: 12,447; ansic: 319; xml: 182; makefile: 19
file content (150 lines) | stat: -rw-r--r-- 4,346 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
/*******************************************************
 * Copyright (c) 2015-2019, ArrayFire
 * All rights reserved.
 *
 * This file is distributed under 3-clause BSD license.
 * The complete license agreement can be obtained at:
 * http://arrayfire.com/licenses/BSD-3-Clause
 ********************************************************/

#include <forge.h>
#include <cuda_runtime.h>
#define USE_FORGE_CUDA_COPY_HELPERS
#include <ComputeCopy.h>

const unsigned DIMX = 640;
const unsigned DIMY = 480;
const float MINIMUM = 1.0f;
const float MAXIMUM = 20.f;
const float STEP    = 2.0f;
const int NELEMS    = (MAXIMUM-MINIMUM+1)/STEP;

void generateColors(float* colors);

void generatePoints(float* points, float* dirs);

inline int divup(int a, int b)
{
    return (a+b-1)/b;
}

int main(void)
{
    /*
     * First Forge call should be a window creation call
     * so that necessary OpenGL context is created for any
     * other forge::* object to be created successfully
     */
    forge::Window wnd(DIMX, DIMY, "3D Vector Field Demo");
    wnd.makeCurrent();

    forge::Chart chart(FG_CHART_3D);
    chart.setAxesLimits(MINIMUM-1.0f, MAXIMUM,
                        MINIMUM-1.0f, MAXIMUM,
                        MINIMUM-1.0f, MAXIMUM);
    chart.setAxesTitles("x-axis", "y-axis", "z-axis");

    int numElems = NELEMS*NELEMS*NELEMS;
    forge::VectorField field = chart.vectorField(numElems, forge::f32);
    field.setColor(0.f, 1.f, 0.f, 1.f);

    float* points;
    float* colors;
    float* dirs;

    FORGE_CUDA_CHECK(cudaMalloc((void**)&points, 3*numElems*sizeof(float)));
    FORGE_CUDA_CHECK(cudaMalloc((void**)&colors, 3*numElems*sizeof(float)));
    FORGE_CUDA_CHECK(cudaMalloc((void**)&dirs, 3*numElems*sizeof(float)));

    generatePoints(points, dirs);
    generateColors(colors);

    GfxHandle* handles[3];
    createGLBuffer(&handles[0], field.vertices(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[1], field.colors(), FORGE_VERTEX_BUFFER);
    createGLBuffer(&handles[2], field.directions(), FORGE_VERTEX_BUFFER);

    copyToGLBuffer(handles[0], (ComputeResourceHandle)points, field.verticesSize());
    copyToGLBuffer(handles[1], (ComputeResourceHandle)colors,   field.colorsSize());
    copyToGLBuffer(handles[2], (ComputeResourceHandle)dirs, field.directionsSize());

    do {
        wnd.draw(chart);
    } while(!wnd.close());

    releaseGLBuffer(handles[0]);
    releaseGLBuffer(handles[1]);
    releaseGLBuffer(handles[2]);

    FORGE_CUDA_CHECK(cudaFree(points));
    FORGE_CUDA_CHECK(cudaFree(colors));
    FORGE_CUDA_CHECK(cudaFree(dirs));

    return 0;
}

__global__
void genColorsKernel(float* colors, int nelems)
{
    const float AF_BLUE[4] = {0.0588f , 0.1137f , 0.2745f , 1.0f};
    const float AF_ORANGE[4] = {0.8588f , 0.6137f , 0.0745f , 1.0f};

    int i = blockDim.x * blockIdx.x + threadIdx.x;

    if (i<nelems) {
        if (i%2==0) {
            colors[3*i+0] = AF_ORANGE[0];
            colors[3*i+1] = AF_ORANGE[1];
            colors[3*i+2] = AF_ORANGE[2];
        } else {
            colors[3*i+0] = AF_BLUE[0];
            colors[3*i+1] = AF_BLUE[1];
            colors[3*i+2] = AF_BLUE[2];
        }
    }
}

void generateColors(float* colors)
{
    const int numElems = NELEMS*NELEMS*NELEMS;
    static const dim3 threads(512);
    dim3 blocks(divup(numElems, threads.x));

    genColorsKernel<<<blocks, threads>>>(colors, numElems);
}

__global__
void pointGenKernel(float* points, float* dirs, int nBBS0, int nelems, float minimum, float step)
{
    int k = blockIdx.x / nBBS0;
    int i = blockDim.x * (blockIdx.x-k*nBBS0) + threadIdx.x;
    int j = blockDim.y * blockIdx.y + threadIdx.y;

    if (i<nelems && j<nelems && k<nelems) {
        float x = minimum + i*step;
        float y = minimum + j*step;
        float z = minimum + k*step;

        int id = i + j * nelems + k * nelems*nelems;

        points[3*id+0] = x;
        points[3*id+1] = y;
        points[3*id+2] = z;

        dirs[3*id+0] = x-10.f;
        dirs[3*id+1] = y-10.f;
        dirs[3*id+2] = z-10.f;
    }
}

void generatePoints(float* points, float* dirs)
{
    static dim3 threads(8, 8);

    int blk_x = divup(NELEMS, threads.x);
    int blk_y = divup(NELEMS, threads.y);

    dim3 blocks(blk_x*NELEMS, blk_y);

    pointGenKernel<<<blocks, threads>>>(points, dirs, blk_x, NELEMS, MINIMUM, STEP);
}