1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
|
# -*- encoding=utf-8 -*-
#
# Possible executions of this script
### Parallel:
# mpiexec -n 4 yade-mpi -n -x testMPIxNxM.py
# mpiexec -n 4 yade-mpi -n -x testMPIxN.py N M # (n-1) subdomains with NxM spheres each
### Monolithic:
# yade-mpi -n -x testMPIxN.py
# yade-mpi -n -x testMPIxN.py N M
# yade-mpi -n -x testMPIxN.py N M n
# in last line the optional argument 'n' has the same meaning as with mpiexec, i.e. total number of bodies will be (n-1)*N*M but on single core
### Openmp:
# yade-mpi -j4 -n -x testMPIxN.py N M n
### Nexted MPI * OpenMP
# needs testing...
'''
This script simulates spheres falling on a plate using a distributed memory approach based on mpy module
The number of spheres assigned to one particular process (aka 'worker') is N*M, they form a regular patern.
The master process (rank=0) has no spheres assigned; it is in charge of getting the total force on the plate
The number of subdomains depends on argument 'n' of mpiexec. Since rank=0 is not assigned a regular subdomain the total number of spheres is (n-1)*N*M
'''
NSTEPS = 1000 #turn it >0 to see time iterations, else only initilization TODO!HACK
#NSTEPS=50 #turn it >0 to see time iterations, else only initilization
N = 50
M = 50
#(columns, rows) per thread
if ("-ms" in sys.argv):
sys.argv.remove("-ms")
mergeSplit = True
else:
mergeSplit = False
if ("-bc" in sys.argv):
sys.argv.remove("-bc")
bodyCopy = True
else:
bodyCopy = False
#################
# Check MPI world
# This is to know if it was run with or without mpiexec (see preamble of this script)
import os
#import yade's mpi module
from yade import mpy as mp
rank, numThreads = mp.initialize() #fixme: prefix and prog should be accessed directly from mpy.py, but how ?
if (rank == None): #non-mpi execution, numThreads will still be used as multiplier for the problem size (2 => multiplier is 1)
numThreads = 2 if len(sys.argv) < 4 else (int(sys.argv[3]))
print("numThreads", numThreads)
if len(sys.argv) > 1: #we then assume N,M are provided as 1st and 2nd cmd line arguments
N = int(sys.argv[1])
M = int(sys.argv[2])
############ Build a scene (we use Yade's pre-filled scene) ############
# sequential grain colors
import colorsys
colorScale = (Vector3(colorsys.hsv_to_rgb(value * 1.0 / numThreads, 1, 1)) for value in range(0, numThreads))
#add spheres
for sd in range(0, numThreads - 1):
col = next(colorScale)
ids = []
for i in range(N): #(numThreads-1) x N x M spheres, one thread is for master and will keep only the wall, others handle spheres
for j in range(M):
id = O.bodies.append(
sphere((sd * N + i + j / 30., j, 0), 0.500, color=col)
) #a small shift in x-positions of the rows to break symmetry
ids.append(id)
if rank is not None: # assigning subdomain!=0 in single thread would freeze the particles (Newton skips them)
for id in ids:
O.bodies[id].subdomain = sd + 1
WALL_ID = O.bodies.append(box(center=(numThreads * N * 0.5, -0.5, 0), extents=(2 * numThreads * N, 0, 2), fixed=True))
collider.verletDist = 0.5
newton.gravity = (0, -10, 0) #else nothing would move
tsIdx = O.engines.index(timeStepper) #remove the automatic timestepper. Very important: we don't want subdomains to use many different timesteps...
O.engines = O.engines[0:tsIdx] + O.engines[tsIdx + 1:]
O.dt = 0.001 #this very small timestep will make it possible to run 2000 iter without merging
#O.dt=0.1*PWaveTimeStep() #very important, we don't want subdomains to use many different timesteps...
######### RUN ##########
def collectTiming():
created = os.path.isfile("collect.dat")
f = open('collect.dat', 'a')
if not created:
f.write("numThreads mpi omp Nspheres N M runtime \n")
from yade import timing
f.write(
str(numThreads) + " " + str(os.getenv('OMPI_COMM_WORLD_SIZE')) + " " + os.getenv('OMP_NUM_THREADS') + " " + str(N * M * (numThreads - 1)) +
" " + str(N) + " " + str(M) + " " + str(timing.runtime()) + "\n"
)
f.close()
if rank is None: ####### Single-core ######
O.timingEnabled = True
O.run(NSTEPS, True)
#print "num bodies:",len(O.bodies)
from yade import timing
timing.stats()
collectTiming()
print("num. bodies:", len([b for b in O.bodies]), len(O.bodies))
print("Total force on floor=", O.forces.f(WALL_ID)[1])
else: ####### MPI ######
# customize
mp.ACCUMULATE_FORCES = True #trigger force summation on master's body (here WALL_ID)
mp.VERBOSE_OUTPUT = False
mp.ERASE_REMOTE = False #erase bodies not interacting wit a given subdomain?
mp.OPTIMIZE_COM = True #L1-optimization: pass a list of double instead of a list of states
mp.USE_CPP_MPI = True and mp.OPTIMIZE_COM #L2-optimization: workaround python by passing a vector<double> at the c++ level
mp.MERGE_SPLIT = mergeSplit
mp.COPY_MIRROR_BODIES_WHEN_COLLIDE = bodyCopy and not mergeSplit
mp.mpirun(NSTEPS)
print("num. bodies:", len([b for b in O.bodies]), len(O.bodies))
if rank == 0:
mp.mprint("Total force on floor=" + str(O.forces.f(WALL_ID)[1]))
collectTiming()
else:
mp.mprint("Partial force on floor=" + str(O.forces.f(WALL_ID)[1]))
#mp.mergeScene()
if rank == 0:
O.save('mergedScene.yade')
#mp.MPI.Finalize()
#exit()
|