File: ext-potential.cpp

package info (click to toggle)
gpaw 25.7.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 18,888 kB
  • sloc: python: 174,804; ansic: 17,564; cpp: 5,668; sh: 972; csh: 139; makefile: 45
file content (126 lines) | stat: -rw-r--r-- 3,932 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#include <stdio.h>
#include <time.h>
#include <sys/types.h>
#include <sys/time.h>

#include <Python.h>
#define PY_ARRAY_UNIQUE_SYMBOL GPAW_ARRAY_API
#define NO_IMPORT_ARRAY
#include <numpy/arrayobject.h>

#include "../gpu.h"
#include "../gpu-complex.h"

#ifndef GPU_USE_COMPLEX
#define BLOCK_SIZEX 32
#define BLOCK_SIZEY 8
#define XDIV 4
#endif


__global__ void Zgpu(add_linear_field_kernel)(
        const Tgpu *a, const int3 c_sizea, Tgpu *b, const int3 c_n,
        const int3 c_beg, const double3 strength, int blocks)
{
    int xx = gridDim.x / XDIV;
    int yy = gridDim.y / blocks;

    int blocksi = blockIdx.y / yy;
    int i1bl = blockIdx.y - yy * blocksi;
    int i1tid = threadIdx.y;
    int i1 = i1bl * BLOCK_SIZEY + i1tid;

    int xind = blockIdx.x / xx;
    int i2bl = blockIdx.x - xind * xx;
    int i2 = i2bl * BLOCK_SIZEX + threadIdx.x;

    int xlen = (c_n.x + XDIV - 1) / XDIV;
    int xstart = xind * xlen;
    int xend = MIN(xstart + xlen, c_n.x);

    b += c_sizea.x * c_sizea.y * c_sizea.z * blocksi;
    a += c_sizea.x * c_sizea.y * c_sizea.z * blocksi;

    b += i2 + i1 * c_sizea.z + xstart * c_sizea.y * c_sizea.z;
    a += i2 + i1 * c_sizea.z + xstart * c_sizea.y * c_sizea.z;

    double yz = (i1 + c_beg.y) * strength.y + (i2 + c_beg.z) * strength.z;
    for (int i0=xstart; i0 < xend; i0++) {
        if ((i2 < c_n.z) && (i1 < c_n.y)) {
            IADD(b[0], MULDT(((i0 + c_beg.x) * strength.x + yz), a[0]));
        }
        b += c_sizea.y * c_sizea.z;
        a += c_sizea.y * c_sizea.z;
    }
}

#ifndef GPU_USE_COMPLEX
#define GPU_USE_COMPLEX
#include "ext-potential.cpp"

extern "C"
PyObject* add_linear_field_gpu(PyObject *self, PyObject *args)
{
    void *a_gpu;
    void *b_gpu;
    PyObject *shape;
    PyArrayObject *c_ni, *c_begi, *c_vi, *strengthi;
    PyArray_Descr *type;
    int blocks=1;

    int3 hc_sizea, hc_n, hc_beg;
    double3 h_strength;

    if (!PyArg_ParseTuple(args, "nOOnOOOO", &a_gpu, &shape, &type,
                          &b_gpu, &c_ni, &c_begi, &c_vi, &strengthi))
        return NULL;

    int nd = PyTuple_Size(shape);
    if (nd == 4)
        blocks = (int) PyLong_AsLong(PyTuple_GetItem(shape, 0));

    hc_sizea.x = (int) PyLong_AsLong(PyTuple_GetItem(shape, nd-3+0));
    hc_sizea.y = (int) PyLong_AsLong(PyTuple_GetItem(shape, nd-3+1));
    hc_sizea.z = (int) PyLong_AsLong(PyTuple_GetItem(shape, nd-3+2));

    hc_n.x = ((long*) PyArray_DATA(c_ni))[0];
    hc_n.y = ((long*) PyArray_DATA(c_ni))[1];
    hc_n.z = ((long*) PyArray_DATA(c_ni))[2];

    hc_beg.x = ((long*) PyArray_DATA(c_begi))[0];
    hc_beg.y = ((long*) PyArray_DATA(c_begi))[1];
    hc_beg.z = ((long*) PyArray_DATA(c_begi))[2];

    h_strength.x = ((double*) PyArray_DATA(strengthi))[0]
                 * ((double*) PyArray_DATA(c_vi))[0+0*3];
    h_strength.y = ((double*) PyArray_DATA(strengthi))[1]
                 * ((double*) PyArray_DATA(c_vi))[1+1*3];
    h_strength.z = ((double*) PyArray_DATA(strengthi))[2]
                 * ((double*) PyArray_DATA(c_vi))[2+2*3];

    int gridy = blocks * ((hc_n.y + BLOCK_SIZEY - 1) / BLOCK_SIZEY);
    int gridx = XDIV * ((hc_n.z + BLOCK_SIZEX - 1) / BLOCK_SIZEX);

    dim3 dimBlock(BLOCK_SIZEX, BLOCK_SIZEY);
    dim3 dimGrid(gridx, gridy);

    if (type->type_num == NPY_DOUBLE) {
        gpuLaunchKernel(add_linear_field_kernel,
                        dimGrid, dimBlock, 0, 0,
                        (double*) a_gpu, hc_sizea,
                        (double*) b_gpu, hc_n, hc_beg,
                        h_strength, blocks);
    } else {
        gpuLaunchKernel(add_linear_field_kernelz,
                        dimGrid, dimBlock, 0, 0,
                        (gpuDoubleComplex*) a_gpu, hc_sizea,
                        (gpuDoubleComplex*) b_gpu, hc_n, hc_beg,
                        h_strength, blocks);
    }
    gpuCheckLastError();
    if (PyErr_Occurred())
        return NULL;
    else
        Py_RETURN_NONE;
}
#endif