File: gpu-complex.h

package info (click to toggle)
gpaw 25.7.0-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 18,888 kB
  • sloc: python: 174,804; ansic: 17,564; cpp: 5,668; sh: 972; csh: 139; makefile: 45
file content (133 lines) | stat: -rw-r--r-- 3,625 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#ifdef GPAW_CUDA
#include <cuComplex.h>
#endif
#ifdef GPAW_HIP
#include <hip/hip_complex.h>
#endif
#include "gpu-runtime.h"

#undef Tgpu
#undef Zgpu
#undef MULTD
#undef MULDT
#undef ADD
#undef ADD3
#undef ADD4
#undef IADD
#undef MAKED
#undef MULTT
#undef CONJ
#undef REAL
#undef IMAG
#undef NEG

#ifndef GPU_USE_COMPLEX
#  define Tgpu           double
#  define Zgpu(f)        f
#  define MULTT(a,b)     ((a) * (b))
#  define MULTD(a,b)     ((a) * (b))
#  define MULDT(a,b)     ((a) * (b))
#  define ADD(a,b)       ((a) + (b))
#  define ADD3(a,b,c)    ((a) + (b) + (c))
#  define ADD4(a,b,c,d)  ((a) + (b) + (c) + (d))
#  define IADD(a,b)      ((a) += (b))
#  define MAKED(a)       (a)
#  define CONJ(a)        (a)
#  define REAL(a)        (a)
#  define IMAG(a)        (0)
#  define NEG(a)         (-(a))
#else
#  define Tgpu           gpuDoubleComplex
#  define Zgpu(f)        f ## z
#  define MULTT(a,b)     gpuCmul((a), (b))
#  define MULTD(a,b)     gpuCmulD((a), (b))
#  define MULDT(b,a)     MULTD((a), (b))
#  define ADD(a,b)       gpuCadd((a), (b))
#  define ADD3(a,b,c)    gpuCadd3((a), (b), (c))
#  define ADD4(a,b,c,d)  gpuCadd4((a), (b), (c), (d))
#  define IADD(a,b)      {(a).x += gpuCreal(b); (a).y += gpuCimag(b);}
#  define MAKED(a)       make_gpuDoubleComplex(a, 0)
#  define CONJ(a)        gpuConj(a)
#  define REAL(a)        gpuCreal(a)
#  define IMAG(a)        gpuCimag(a)
#  define NEG(a)         gpuCneg(a)
#endif

#ifndef GPU_COMPLEX_H
#define GPU_COMPLEX_H

__host__ __device__ static __inline__ gpuDoubleComplex gpuCmulD(
        gpuDoubleComplex x, double y)
{
    return make_gpuDoubleComplex(gpuCreal(x) * y, gpuCimag(x) * y);
}

#ifdef __cplusplus
__host__ __device__ static __inline__ gpuDoubleComplex operator*(
        gpuDoubleComplex x, double y)
{
    return gpuCmulD(x, y);
}

__host__ __device__ static __inline__ gpuFloatComplex operator*(
        gpuFloatComplex x, float y)
{
    return make_gpuFloatComplex(x.x * y, x.y * y);
}

__host__ __device__ static __inline__ gpuDoubleComplex operator*(
        gpuDoubleComplex x, gpuDoubleComplex y)
{
    return gpuCmul(x, y);
}

__host__ __device__ static __inline__ gpuFloatComplex operator*(
        gpuFloatComplex x, gpuFloatComplex y)
{
    return gpuCmulf(x, y);
}
__host__ __device__ static __inline__ gpuDoubleComplex operator-(
        gpuDoubleComplex x, gpuDoubleComplex y)
{
    return gpuCsub(x, y);
}
__host__ __device__ static __inline__ gpuFloatComplex operator-(
        gpuFloatComplex x, gpuFloatComplex y)
{
    return gpuCsubf(x, y);
}
__host__ __device__ static __inline__ gpuDoubleComplex operator+(
        gpuDoubleComplex x, gpuDoubleComplex y)
{
    return gpuCadd(x, y);
}
__host__ __device__ static __inline__ gpuFloatComplex operator+(
        gpuFloatComplex x, gpuFloatComplex y)
{
    return gpuCaddf(x, y);
}
#endif

__host__ __device__ static __inline__ gpuDoubleComplex gpuCneg(
        gpuDoubleComplex x)
{
    return make_gpuDoubleComplex(-gpuCreal(x), -gpuCimag(x));
}

__host__ __device__ static __inline__ gpuDoubleComplex gpuCadd3(
        gpuDoubleComplex x, gpuDoubleComplex y, gpuDoubleComplex z)
{
    return make_gpuDoubleComplex(gpuCreal(x) + gpuCreal(y) + gpuCreal(z),
                                 gpuCimag(x) + gpuCimag(y) + gpuCimag(z));
}

__host__ __device__ static __inline__ gpuDoubleComplex gpuCadd4(
        gpuDoubleComplex x, gpuDoubleComplex y, gpuDoubleComplex z,
        gpuDoubleComplex w)
{
    return make_gpuDoubleComplex(
            gpuCreal(x) + gpuCreal(y) + gpuCreal(z) + gpuCreal(w),
            gpuCimag(x) + gpuCimag(y) + gpuCimag(z) + gpuCimag(w));
}

#endif