File: testMPI_2D_interactive.py

package info (click to toggle)
yade 2026.1.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 34,448 kB
  • sloc: cpp: 97,645; python: 52,173; sh: 677; makefile: 162
file content (136 lines) | stat: -rw-r--r-- 5,173 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# -*- encoding=utf-8 -*-

#
# Possible executions of this script
### Parallel:
# mpiexec -n 4 yade-mpi -n -x testMPIxNxM.py
# mpiexec -n 4 yade-mpi  -n -x testMPIxN.py N M # (n-1) subdomains with NxM spheres each
### Monolithic:
# yade-mpi -n -x testMPIxN.py
# yade-mpi -n -x testMPIxN.py N M
# yade-mpi -n -x testMPIxN.py N M n
# in last line the optional argument 'n' has the same meaning as with mpiexec, i.e. total number of bodies will be (n-1)*N*M but on single core
### Openmp:
# yade-mpi -j4 -n -x testMPIxN.py N M n
### Nexted MPI * OpenMP
# needs testing...
'''
This script simulates spheres falling on a plate using a distributed memory approach based on mpy module
The number of spheres assigned to one particular process (aka 'worker') is N*M, they form a regular patern.
The master process (rank=0) has no spheres assigned; it is in charge of getting the total force on the plate
The number of subdomains depends on argument 'n' of mpiexec. Since rank=0 is not assigned a regular subdomain the total number of spheres is (n-1)*N*M

'''

NSTEPS = 1000  #turn it >0 to see time iterations, else only initilization TODO!HACK
#NSTEPS=50 #turn it >0 to see time iterations, else only initilization
N = 50
M = 50
#(columns, rows) per thread

if ("-ms" in sys.argv):
	sys.argv.remove("-ms")
	mergeSplit = True
else:
	mergeSplit = False

if ("-bc" in sys.argv):
	sys.argv.remove("-bc")
	bodyCopy = True
else:
	bodyCopy = False

#################
# Check MPI world
# This is to know if it was run with or without mpiexec (see preamble of this script)
import os
#import yade's mpi module
from yade import mpy as mp

rank, numThreads = mp.initialize()  #fixme: prefix and prog should be accessed directly from mpy.py, but how ?

if (rank == None):  #non-mpi execution, numThreads will still be used as multiplier for the problem size (2 => multiplier is 1)
	numThreads = 2 if len(sys.argv) < 4 else (int(sys.argv[3]))
	print("numThreads", numThreads)

if len(sys.argv) > 1:  #we then assume N,M are provided as 1st and 2nd cmd line arguments
	N = int(sys.argv[1])
	M = int(sys.argv[2])

############  Build a scene (we use Yade's pre-filled scene)  ############

# sequential grain colors
import colorsys

colorScale = (Vector3(colorsys.hsv_to_rgb(value * 1.0 / numThreads, 1, 1)) for value in range(0, numThreads))

#add spheres
for sd in range(0, numThreads - 1):
	col = next(colorScale)
	ids = []
	for i in range(N):  #(numThreads-1) x N x M spheres, one thread is for master and will keep only the wall, others handle spheres
		for j in range(M):
			id = O.bodies.append(
			        sphere((sd * N + i + j / 30., j, 0), 0.500, color=col)
			)  #a small shift in x-positions of the rows to break symmetry
			ids.append(id)
	if rank is not None:  # assigning subdomain!=0 in single thread would freeze the particles (Newton skips them)
		for id in ids:
			O.bodies[id].subdomain = sd + 1

WALL_ID = O.bodies.append(box(center=(numThreads * N * 0.5, -0.5, 0), extents=(2 * numThreads * N, 0, 2), fixed=True))

collider.verletDist = 0.5
newton.gravity = (0, -10, 0)  #else nothing would move
tsIdx = O.engines.index(timeStepper)  #remove the automatic timestepper. Very important: we don't want subdomains to use many different timesteps...
O.engines = O.engines[0:tsIdx] + O.engines[tsIdx + 1:]
O.dt = 0.001  #this very small timestep will make it possible to run 2000 iter without merging

#O.dt=0.1*PWaveTimeStep() #very important, we don't want subdomains to use many different timesteps...


#########  RUN  ##########
def collectTiming():
	created = os.path.isfile("collect.dat")
	f = open('collect.dat', 'a')
	if not created:
		f.write("numThreads mpi omp Nspheres N M runtime \n")
	from yade import timing
	f.write(
	        str(numThreads) + " " + str(os.getenv('OMPI_COMM_WORLD_SIZE')) + " " + os.getenv('OMP_NUM_THREADS') + " " + str(N * M * (numThreads - 1)) +
	        " " + str(N) + " " + str(M) + " " + str(timing.runtime()) + "\n"
	)
	f.close()


if rank is None:  #######  Single-core  ######
	O.timingEnabled = True
	O.run(NSTEPS, True)
	#print "num bodies:",len(O.bodies)
	from yade import timing
	timing.stats()
	collectTiming()
	print("num. bodies:", len([b for b in O.bodies]), len(O.bodies))
	print("Total force on floor=", O.forces.f(WALL_ID)[1])
else:  #######  MPI  ######
	# customize
	mp.ACCUMULATE_FORCES = True  #trigger force summation on master's body (here WALL_ID)
	mp.VERBOSE_OUTPUT = False
	mp.ERASE_REMOTE = False  #erase bodies not interacting wit a given subdomain?
	mp.OPTIMIZE_COM = True  #L1-optimization: pass a list of double instead of a list of states
	mp.USE_CPP_MPI = True and mp.OPTIMIZE_COM  #L2-optimization: workaround python by passing a vector<double> at the c++ level
	mp.MERGE_SPLIT = mergeSplit
	mp.COPY_MIRROR_BODIES_WHEN_COLLIDE = bodyCopy and not mergeSplit

	mp.mpirun(NSTEPS)
	print("num. bodies:", len([b for b in O.bodies]), len(O.bodies))
	if rank == 0:
		mp.mprint("Total force on floor=" + str(O.forces.f(WALL_ID)[1]))
		collectTiming()
	else:
		mp.mprint("Partial force on floor=" + str(O.forces.f(WALL_ID)[1]))
	#mp.mergeScene()
	if rank == 0:
		O.save('mergedScene.yade')
	#mp.MPI.Finalize()
#exit()