File: use_numba.py

package info (click to toggle)
mpi4py 4.1.0-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 4,544 kB
  • sloc: python: 34,453; ansic: 16,475; makefile: 614; sh: 325; cpp: 193; f90: 178
file content (40 lines) | stat: -rw-r--r-- 1,077 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# Demonstrate how to work with Python GPU arrays using CUDA-aware MPI.
# A GPU array is allocated and manipulated through Numba, which is
# compliant with the __cuda_array_interface__ standard.
#
# Run this script using the following command:
# mpiexec -n 2 python use_cupy.py

import numpy
from numba import cuda

from mpi4py import MPI


@cuda.jit()
def add_const(arr, value):
    x = cuda.grid(1)
    if x < arr.size:
        arr[x] += value


comm = MPI.COMM_WORLD
size = comm.Get_size()
rank = comm.Get_rank()

# Send-Recv
if rank == 0:
    buf = cuda.device_array((20,), dtype="f")
    buf[:] = range(20)
    block = 32
    grid = (buf.size + block - 1) // block
    add_const[grid, block](buf, 100)
    # always make sure the GPU buffer is ready before any MPI operation
    cuda.default_stream().synchronize()
    comm.Send(buf, dest=1, tag=77)
else:
    buf = cuda.device_array((20,), dtype="f")
    cuda.default_stream().synchronize()
    comm.Recv(buf, source=0, tag=77)
    buf = buf.copy_to_host()
    assert numpy.allclose(buf, 100 + numpy.arange(20, dtype="f"))