File: blas.pyx

package info (click to toggle)
libgpuarray 0.7.6-13
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 3,176 kB
  • sloc: ansic: 19,235; python: 4,591; makefile: 208; javascript: 71; sh: 15
file content (211 lines) | stat: -rw-r--r-- 7,235 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
from pygpu.gpuarray import GpuArrayException
from pygpu.gpuarray cimport (_GpuArray, GpuArray, GA_NO_ERROR, GpuArray_error,
                             pygpu_copy, pygpu_empty, pygpu_zeros,
                             GA_ANY_ORDER, GA_F_ORDER, GpuArray_ISONESEGMENT)

cdef extern from "gpuarray/buffer_blas.h":
    ctypedef enum cb_transpose:
        cb_no_trans,
        cb_trans,
        cb_conj_trans

cdef extern from "gpuarray/blas.h":
    int GpuArray_rdot(_GpuArray *X, _GpuArray *Y, _GpuArray *Z, int nocopy)
    int GpuArray_rgemv(cb_transpose transA, double alpha, _GpuArray *A,
                       _GpuArray *X, double beta, _GpuArray *Y, int nocopy)
    int GpuArray_rgemm(cb_transpose transA, cb_transpose transB,
                       double alpha, _GpuArray *A, _GpuArray *B,
                       double beta, _GpuArray *C, int nocopy)
    int GpuArray_rger(double alpha, _GpuArray *X, _GpuArray *Y, _GpuArray *A,
                      int nocopy)
    int GpuArray_rgemmBatch_3d(
        cb_transpose transA, cb_transpose transB, double alpha,
        _GpuArray *A, _GpuArray *B, double beta, _GpuArray *C, int nocopy)

cdef api int pygpu_blas_rdot(GpuArray X, GpuArray Y, GpuArray Z, bint nocopy) except -1:
    cdef int err
    err = GpuArray_rdot(&X.ga, &Y.ga, &Z.ga, nocopy)
    if err != GA_NO_ERROR:
        raise GpuArrayException(GpuArray_error(&X.ga, err), err)
    return 0

cdef api int pygpu_blas_rgemv(cb_transpose transA, double alpha, GpuArray A,
                              GpuArray X, double beta, GpuArray Y,
                              bint nocopy) except -1:
    cdef int err
    err = GpuArray_rgemv(transA, alpha, &A.ga, &X.ga, beta, &Y.ga, nocopy);
    if err != GA_NO_ERROR:
        raise GpuArrayException(GpuArray_error(&A.ga, err), err)
    return 0

cdef api int pygpu_blas_rgemm(cb_transpose transA, cb_transpose transB,
                              double alpha, GpuArray A, GpuArray B,
                              double beta, GpuArray C, bint nocopy) except -1:
    cdef int err
    err = GpuArray_rgemm(transA, transB, alpha, &A.ga, &B.ga, beta, &C.ga, nocopy);
    if err != GA_NO_ERROR:
        raise GpuArrayException(GpuArray_error(&A.ga, err), err)
    return 0

cdef api int pygpu_blas_rger(double alpha, GpuArray X, GpuArray Y, GpuArray A,
                             bint nocopy) except -1:
    cdef int err
    err = GpuArray_rger(alpha, &X.ga, &Y.ga, &A.ga, nocopy);
    if err != GA_NO_ERROR:
        raise GpuArrayException(GpuArray_error(&X.ga, err), err)
    return 0

cdef api int pygpu_blas_rgemmBatch_3d(cb_transpose transA, cb_transpose transB,
                                      double alpha, GpuArray A, GpuArray B,
                                      double beta, GpuArray C, bint nocopy) except -1:
    cdef int err
    err = GpuArray_rgemmBatch_3d(transA, transB,
                                 alpha, &A.ga, &B.ga,
                                 beta, &C.ga, nocopy)
    if err != GA_NO_ERROR:
        raise GpuArrayException(GpuArray_error(&A.ga, err), err)
    return 0


def dot(GpuArray X, GpuArray Y, GpuArray Z=None, overwrite_z=False):
    """dot(X, Y, Z=None, overwrite_z=False)
    """
    if Z is None:
        Z = pygpu_empty(0, NULL, X.typecode, GA_ANY_ORDER, X.context, None)
        overwrite_z = True

    if not overwrite_z:
        Z = pygpu_copy(Z, GA_ANY_ORDER)
    pygpu_blas_rdot(X, Y, Z, 0)
    return Z

def gemv(double alpha, GpuArray A, GpuArray X, double beta=0.0,
         GpuArray Y=None, trans_a=False, overwrite_y=False):
    """gemv(alpha, A, X, beta=0.0, Y=None, trans_a=False, overwrite_y=False)
    """
    cdef cb_transpose transA
    cdef size_t Yshp

    if trans_a:
        transA = cb_trans
    else:
        transA = cb_no_trans

    if A.ga.nd != 2:
        raise TypeError("A is not a matrix")
    if transA == cb_no_trans:
        Yshp = A.ga.dimensions[0]
    else:
        Yshp = A.ga.dimensions[1]
    if Y is None:
        if beta != 0.0:
            raise ValueError("Y not provided and beta != 0")
        Y = pygpu_zeros(1, &Yshp, A.ga.typecode, GA_ANY_ORDER, A.context, None)
        overwrite_y = True

    if not overwrite_y:
        Y = pygpu_copy(Y, GA_ANY_ORDER)
    pygpu_blas_rgemv(transA, alpha, A, X, beta, Y, 0)

    return Y

def gemm(double alpha, GpuArray A, GpuArray B, double beta, GpuArray C=None,
         trans_a=False, trans_b=False, overwrite_c=False):
    """gemm(alpha, A, B, beta, C=None, trans_a=False, trans_b=False, overwrite_c=False)
    """
    cdef cb_transpose transA
    cdef cb_transpose transB
    cdef size_t[2] Cshp

    if trans_a:
        transA = cb_trans
    else:
        transA = cb_no_trans
    if trans_b:
        transB = cb_trans
    else:
        transB = cb_no_trans

    if A.ga.nd != 2:
        raise TypeError("A is not a matrix")
    if B.ga.nd != 2:
        raise TypeError("B is not a matrix")
    if transA == cb_no_trans:
        Cshp[0] = A.ga.dimensions[0]
    else:
        Cshp[0] = A.ga.dimensions[1]
    if transB == cb_no_trans:
        Cshp[1] = B.ga.dimensions[1]
    else:
        Cshp[1] = B.ga.dimensions[0]
    if C is None:
        if beta != 0.0:
            raise ValueError("C not provided and beta != 0")
        C = pygpu_empty(2, Cshp, A.ga.typecode, GA_ANY_ORDER, A.context, None)
        overwrite_c = True

    if not overwrite_c:
        C = pygpu_copy(C, GA_ANY_ORDER)
    pygpu_blas_rgemm(transA, transB, alpha, A, B, beta, C, 0)

    return C

def ger(double alpha, GpuArray X, GpuArray Y, GpuArray A=None,
        overwrite_a=False):
    """ger(alpha, X, Y, A=None, overwrite_a=False)
    """
    cdef size_t[2] Ashp

    if A is None:
        Ashp[0] = X.ga.dimensions[0];
        Ashp[1] = Y.ga.dimensions[0];
        A = pygpu_zeros(2, Ashp, X.ga.typecode, GA_ANY_ORDER, X.context, None)
        overwrite_a = True

    if not overwrite_a:
        A = pygpu_copy(A, GA_ANY_ORDER)
    pygpu_blas_rger(alpha, X, Y, A, 0)

    return A

def gemmBatch_3d(double alpha, GpuArray A, GpuArray B,
                 double beta, GpuArray C=None,
                 trans_a=False, trans_b=False, overwrite_c=False):
    """gemmBatch_3d(alpha, A, B, beta, C=None, trans_a=False, trans_b=False, overwrite_c=False)
    """
    cdef cb_transpose transA
    cdef cb_transpose transB
    cdef size_t[3] Cshp

    if trans_a:
        transA = cb_trans
    else:
        transA = cb_no_trans
    if trans_b:
        transB = cb_trans
    else:
        transB = cb_no_trans

    if A.ga.nd != 3:
        raise TypeError("A is not a batch of matrices")
    if B.ga.nd != 3:
        raise TypeError("B is not a batch of matrices")

    Cshp[0] = A.ga.dimensions[0]
    if transA == cb_no_trans:
        Cshp[1] = A.ga.dimensions[1]
    else:
        Cshp[1] = A.ga.dimensions[2]
    if transB == cb_no_trans:
        Cshp[2] = B.ga.dimensions[2]
    else:
        Cshp[2] = B.ga.dimensions[1]
    if C is None:
        if beta != 0.0:
            raise ValueError("C not provided and beta != 0")
        C = pygpu_empty(3, Cshp, A.ga.typecode, GA_ANY_ORDER, A.context, None)
    elif not overwrite_c:
        C = pygpu_copy(C, GA_ANY_ORDER)
    pygpu_blas_rgemmBatch_3d(transA, transB, alpha, A, B, beta, C, 0)

    return C