File: runmultinode.sh

package info (click to toggle)
libfabric 2.1.0-1.1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 26,108 kB
  • sloc: ansic: 387,262; python: 3,171; sh: 2,555; makefile: 1,313; cpp: 617; perl: 474; ruby: 123; asm: 27
file content (114 lines) | stat: -rwxr-xr-x 2,840 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/bin/bash

Options=$(getopt --options h:,n:,p:,I:,-x:-E:,z: \
				--longoptions hosts:,processes-per-node:,provider:,xfer-method:,env:,iterations:,ci:,cleanup,help \
				-- "$@")

eval set -- "$Options"

hosts=[]
ppn=1
iterations=1
pattern=""
xfer_method="msg"
cleanup=false
help=false
ci=""

while true; do
	case "$1" in
		-h|--hosts)
			IFS=',' read -r -a hosts <<< "$2"; shift 2 ;;
		-n|--processes-per-node)
			ppn=$2; shift 2 ;;
		-p|--provider)
			provider="$2"; shift 2 ;;
		-I|--iterations)
			iterations=$2; shift 2 ;;
		-z|--pattern)
			pattern="-z $2"; shift 2 ;;
		--cleanup)
			cleanup=true; shift ;;
		-x|--xfer-method)
			xfer_method="$2"; shift 2 ;;
		-E|--env)
			delimiter="="
			value=${2#*$delimiter}
			var=${2:0:$(( ${#2} - ${#value} - ${#delimiter} ))}
			EXPORT_STRING="export $var=\"$value\""
			EXPORT_ENV="${EXPORT_ENV}${EXPORT_STRING}; "
			shift 2 ;;
		--ci)
			ci="$2"; shift 2 ;;
		--help)
			help=true; shift ;;
		--)
			shift; break ;;
	esac
done

if $help ; then
	echo "Run the multinode test suite on the nodes provided for many procceses"
	echo "multinode tests are run in performance mode"
	echo "Options"
	echo "\t-h,--hosts list of host names to run the tests on"
	echo "\t-n,--processes-per-node number of processes to be run on each node. Total number of fi_mulinode tests run will be n*number of hosts"
	echo "\t-p,--provider libfabric provider to run the multinode tests on"
	echo "\t-x,--xfer-method multinode transfer method/capability to use (rma or default: msg)"
	echo "\t-E,--env export provided variable name and value"
	echo "\t-I,--iterations number of iterations for the multinode test \
				to run each pattern on"
	echo "\t--cleanup end straggling processes. Does not rerun tests"
	echo "\t--help show this message"
	exit 1
fi

num_hosts=${#hosts[@]}
max_ranks=$(($num_hosts*$ppn))
ranks=$max_ranks;
server=${hosts[0]}
start_server=0
output="multinode_server_${num_hosts}_${ppn}.log"
ret=0

if ! $cleanup ; then
	cmd="${EXPORT_ENV} ${ci}fi_multinode -n $ranks -s $server -p '$provider' -x $xfer_method $pattern -I $iterations -T"
	echo $cmd
	for node in "${hosts[@]}"; do
		for i in $(seq 1 $ppn); do
			if [ $start_server -eq 0 ]; then
				echo STARTING SERVER
				if [ "$ci" == "" ]; then
					ssh $node $cmd &> $output &
				else
					ssh $node $cmd | tee $output &
				fi
				server_pid=$!
				start_server=1
				sleep .5
			else
				echo "starting proc $i/$ppn on $node"
				if [ "$ci" == "" ]; then
					tput cuu1
				fi
				ssh $node $cmd &> /dev/null &
			fi
			sleep .05
		done
	done

	echo "Wait for processes to finish..."
	wait $server_pid
	ret=$?
fi

echo Cleaning up
for node in "${hosts[@]}"; do
	ssh $node "ps -eo comm,pid | grep '^fi_multinode' | awk '{print \$2}' | xargs kill -9" >& /dev/null
done;

if ! $cleanup ; then
	echo "Output: $PWD/$output"
fi

exit $ret