File: runmultinode.sh

package info (click to toggle)
mpich 4.3.2-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 101,184 kB
  • sloc: ansic: 1,040,629; cpp: 82,270; javascript: 40,763; perl: 27,933; python: 16,041; sh: 14,676; xml: 14,418; f90: 12,916; makefile: 9,270; fortran: 8,046; java: 4,635; asm: 324; ruby: 103; awk: 27; lisp: 19; php: 8; sed: 4
file content (107 lines) | stat: -rwxr-xr-x 2,537 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/bin/bash

Options=$(getopt --options h:,n:,p:,I:,C:,z: \
		  		--longoptions hosts:,processes-per-node:,provider:,capability:,iterations:,ci:,cleanup,help \
				-- "$@")

eval set -- "$Options"

hosts=[]
ppn=1
iterations=1
pattern=""
capability="msg"
cleanup=false
help=false
ci=""

while true; do
	case "$1" in
		-h|--hosts)
			IFS=',' read -r -a hosts <<< "$2"; shift 2 ;;
		-n|--processes-per-node) 
			ppn=$2; shift 2 ;;
		-p|--provider)
			provider="$2"; shift 2 ;;
		-I|--iterations)
			iterations=$2; shift 2 ;;
		-z|--pattern)
			pattern="-z $2"; shift 2 ;;
		--cleanup)
			cleanup=true; shift ;;
		-C|--capability)
			capability="$2"; shift 2 ;;
		--ci)
			ci="$2"; shift 2 ;;
		--help) 
			help=true; shift ;;
		--)
			shift; break ;;
	esac
done

if $help ; then
	echo "Run the multinode test suite on the nodes provided for many procceses" 
	echo "multinode tests are run in performance mode"
	echo "Options"
	echo "\t-h,--hosts list of host names to run the tests on"
	echo "\t-n,--processes-per-node number of processes to be run on each node.\
				Total number of fi_mulinode tests run will be n*number of hosts"
	echo "\t-p,--provider libfabric provider to run the multinode tests on"
	echo "\t-C,--cabability multinode cabability to use (rma or default: msg)"
	echo "\t-I,-- iterations number of iterations for the multinode test \
				to run each pattern on"
	echo "\t--cleanup end straggling processes. Does not rerun tests"
	echo "\t--help show this message"
	exit 1
fi
		
num_hosts=${#hosts[@]}
max_ranks=$(($num_hosts*$ppn))
ranks=$max_ranks;
server=${hosts[0]}
start_server=0
output="multinode_server_${num_hosts}_${ppn}.log"
ret=0

if ! $cleanup ; then
	cmd="${ci}fi_multinode -n $ranks -s $server -p '$provider' -C $capability $pattern -I $iterations -T"
	echo $cmd
	for node in "${hosts[@]}"; do
		for i in $(seq 1 $ppn); do
			if [ $start_server -eq 0 ]; then
				echo STARTING SERVER
				if [ "$ci" == "" ]; then
					ssh $node $cmd &> $output &
				else 
					ssh $node $cmd | tee $output &
				fi
				server_pid=$!
				start_server=1
				sleep .5
			else
				echo "starting proc $i/$ppn on $node"
				if [ "$ci" == "" ]; then
					tput cuu1
				fi
				ssh $node $cmd &> /dev/null &
			fi
			sleep .05
		done
	done

	echo "Wait for processes to finish..."
	wait $server_pid
	ret=$?
fi

echo Cleaning up
for node in "${hosts[@]}"; do
	ssh $node "ps -eo comm,pid | grep '^fi_multinode' | awk '{print \$2}' | xargs kill -9" >& /dev/null
done;

if ! $cleanup ; then
	echo "Output: $PWD/$output"
fi

exit $ret