1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
|
"""Shows the use of a raw opencl Kernel but written using pure Python. It
makes use of local memory allocated on the host.
Note that the local memory is allocated as a multiple of workgroup size times
the size of the data type automatically.
This is a raw opencl kernel so will not work on Cython!
"""
import numpy as np
from math import pi
import time
from compyle.api import annotate, declare, get_config, wrap
from compyle.low_level import (Kernel, LocalMem, local_barrier,
LID_0, LDIM_0, GDIM_0)
@annotate(double='xi, yi, xj, yj, gamma', result='doublep')
def point_vortex(xi, yi, xj, yj, gamma, result):
xij = xi - xj
yij = yi - yj
r2ij = xij*xij + yij*yij
if r2ij < 1e-14:
result[0] = 0.0
result[1] = 0.0
else:
tmp = gamma/(2.0*pi*r2ij)
result[0] = -tmp*yij
result[1] = tmp*xij
@annotate(nv='int', gdoublep='x, y, gamma, u, v', ldoublep='xc, yc, gc')
def velocity(x, y, gamma, u, v, xc, yc, gc, nv):
i, gid, nb = declare('int', 3)
j, ti, nt, jb = declare('int', 4)
ti = LID_0
nt = LDIM_0
gid = GID_0
i = gid*nt + ti
idx = declare('int')
tmp = declare('matrix(2)')
uj, vj = declare('double', 2)
nb = GDIM_0
if i < nv:
xi = x[i]
yi = y[i]
uj = 0.0
vj = 0.0
for jb in range(nb):
idx = jb*nt + ti
if idx < nv:
xc[ti] = x[idx]
yc[ti] = y[idx]
gc[ti] = gamma[idx]
else:
gc[ti] = 0.0
local_barrier()
if i < nv:
for j in range(nt):
point_vortex(xi, yi, xc[j], yc[j], gc[j], tmp)
uj += tmp[0]
vj += tmp[1]
local_barrier()
if i < nv:
u[i] = uj
v[i] = vj
def make_vortices(nv, backend):
x = np.linspace(-1, 1, nv)
y = x.copy()
gamma = np.ones(nv)
u = np.zeros_like(x)
v = np.zeros_like(x)
x, y, gamma, u, v = wrap(x, y, gamma, u, v, backend=backend)
xc, yc, gc = (LocalMem(1, backend), LocalMem(1, backend),
LocalMem(1, backend))
return x, y, gamma, u, v, xc, yc, gc, nv
def run(nv, backend):
e = Kernel(velocity, backend=backend)
args = make_vortices(nv, backend)
t1 = time.time()
gs = ((nv + 128 - 1)//128)*128
e(*args, global_size=(gs,))
print(time.time() - t1)
u = args[3]
u.pull()
print(u.data)
return e, args
if __name__ == '__main__':
from compyle.utils import ArgumentParser
p = ArgumentParser()
p.add_argument('-n', action='store', type=int, dest='n',
default=10000, help='Number of particles.')
o = p.parse_args()
assert o.backend in ['opencl', 'cuda'], ("Only OpenCL/CUDA backend is "
"supported.")
run(o.n, o.backend)
|