File: collectives.pxd

package info (click to toggle)
libgpuarray 0.7.6-13
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 3,176 kB
  • sloc: ansic: 19,235; python: 4,591; makefile: 208; javascript: 71; sh: 15
file content (70 lines) | stat: -rw-r--r-- 3,003 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from pygpu.gpuarray cimport (gpucontext, GpuContext, _GpuArray, GpuArray)

cdef extern from "gpuarray/buffer_collectives.h":
    ctypedef struct gpucomm:
        pass

    enum gpucomm_reduce_ops:
        GA_SUM,
        GA_PROD,
        GA_MAX,
        GA_MIN

    enum: GA_COMM_ID_BYTES

    ctypedef struct gpucommCliqueId:
        char[GA_COMM_ID_BYTES] internal

    int gpucomm_new(gpucomm** comm, gpucontext* ctx,
                    gpucommCliqueId comm_id, int ndev, int rank)
    void gpucomm_free(gpucomm* comm)
    gpucontext* gpucomm_context(gpucomm* comm)
    int gpucomm_gen_clique_id(gpucontext* ctx, gpucommCliqueId* comm_id)
    int gpucomm_get_count(gpucomm* comm, int* gpucount)
    int gpucomm_get_rank(gpucomm* comm, int* rank)

cdef extern from "gpuarray/collectives.h" nogil:
    int GpuArray_reduce_from(const _GpuArray* src, int opcode,
                             int root, gpucomm* comm)
    int GpuArray_reduce(const _GpuArray* src, _GpuArray* dest,
                        int opcode, int root, gpucomm* comm)
    int GpuArray_all_reduce(const _GpuArray* src, _GpuArray* dest,
                            int opcode, gpucomm* comm)
    int GpuArray_reduce_scatter(const _GpuArray* src, _GpuArray* dest,
                                int opcode, gpucomm* comm)
    int GpuArray_broadcast(_GpuArray* array, int root, gpucomm* comm)
    int GpuArray_all_gather(const _GpuArray* src, _GpuArray* dest, gpucomm* comm)

cdef api class GpuCommCliqueId [type PyGpuCliqueIdType, object PyGpuCliqueIdObject]:
    cdef gpucommCliqueId c_comm_id
    cdef readonly GpuContext context


cdef api class GpuComm [type PyGpuCommType, object PyGpuCommObject]:
    cdef gpucomm* c
    cdef object __weakref__


cdef int to_reduce_opcode(op) except -1

cdef gpucontext* comm_context(GpuComm comm) except NULL
cdef int comm_generate_id(gpucontext* ctx, gpucommCliqueId* comm_id) except -1
cdef int comm_get_count(GpuComm comm, int* gpucount) except -1
cdef int comm_get_rank(GpuComm comm, int* gpurank) except -1
cdef int comm_reduce_from(GpuComm comm, GpuArray src, int opcode,
                          int root) except -1
cdef int comm_reduce(GpuComm comm, GpuArray src, GpuArray dest, int opcode,
                     int root) except -1
cdef int comm_all_reduce(GpuComm comm, GpuArray src, GpuArray dest,
                         int opcode) except -1
cdef int comm_reduce_scatter(GpuComm comm, GpuArray src, GpuArray dest,
                             int opcode) except -1
cdef int comm_broadcast(GpuComm comm, GpuArray arr, int root) except -1
cdef int comm_all_gather(GpuComm comm, GpuArray src, GpuArray dest) except -1

cdef api:
    GpuArray pygpu_make_reduced(GpuComm comm, GpuArray src, int opcode)
    GpuArray pygpu_make_all_reduced(GpuComm comm, GpuArray src, int opcode)
    GpuArray pygpu_make_reduce_scattered(GpuComm comm, GpuArray src, int opcode)
    GpuArray pygpu_make_all_gathered(GpuComm comm, GpuArray src,
                                     unsigned int nd_up)