File: mpirun.pg.in

package info (click to toggle)
mpich 1.1.0-3
  • links: PTS
  • area: main
  • in suites: hamm
  • size: 22,116 kB
  • ctags: 27,349
  • sloc: ansic: 193,435; sh: 11,172; fortran: 6,545; makefile: 5,801; cpp: 5,020; tcl: 3,548; asm: 3,536; csh: 1,079; java: 614; perl: 183; awk: 168; sed: 70; f90: 62
file content (191 lines) | stat: -rwxr-xr-x 6,110 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#! /bin/sh
#
# This file is used to generate a p4-style procgroup file
#
# This should be used only for pg and execer runs.  It gnerates a list from
# the machines.$arch file
#
# The original csh code made extensive use of csh arrays, which are quite
# nice for reading parts of a file into a variable.
#
# Format of machines list is lines containing either a single
#    machinename
# or
#    machinename:n
# where n is the number of available processors.
# We use : separators instead of spaces to simplify parsing of the file
# 
#
if [ "$argsset" = "" ] ; then
   . mpirun.args
fi
#
# Get the defaults (this is in preparation for multi-architecture versions)
# 
if [ $narch = 0 ] ; then
    narch=1
    arch1=$arch
    archlist1="$archlist"
    archlocal=$arch
    np1=$np
fi
if [ $nolocal = 1 ] ; then
    # echo don\'t run local instance
    # if the job is not to be run locally, then don't include this machine
    # in the list
    procFound=0
else
    # echo run local instance
    # Note that in the SMP cluster case, we may need to increase this
    procFound=1
fi
#
machinelist=""
archuselist=""
nprocuselist=""
curarch=1
nolocalsave=$nolocal
archlocal=$arch1
#
# The first time through the loop, we use the default machinefile, unless
# one has been specified.
# 
# We use \$ instead of $ since some systems seem to require \$ and that
# appears to be correct.
#
while [ $curarch -le $narch ] ; do
    eval arch=\$"arch$curarch"
    eval archlist=\$"archlist$curarch"
    if [ -z "$archlist" ] ; then archlist=$arch ; fi
    eval np=\$"np$curarch"
    if [ -z "$np" ] ; then 
	echo "Warning: unspecified -np value for $arch.  Assuming -np 1."
	np=1
    fi
    eval mFile=\$"machineFile$curarch"
    if [ -n "$mFile" -a -r "$mFile" ] ; then machineFile=$mFile ; fi
    #
    # Get default machine file
    if [ -z "$machineFile" ] ; then
        # If on the ANL SPx, use getjid to get the machine list...
        if [ "$machine" = "ibmspx" -a -x /usr/local/bin/getjid ] ; then
	    machineFile="/sphome/$LOGNAME/SPnodes.`/usr/local/bin/getjid`"
        else
            for march in $archlist ; do
                machineFile="${MPIR_HOME}/util/machines/machines.${march}"
	        if [ -r $machineFile ] ; then break ; fi
    	    done
        fi
    fi
    #
    if [ -z "$machineFile" -o ! -s "$machineFile" -o \
	! -r "$machineFile" ] ; then
        if [ $nolocal != 0 -o $np != 1 ] ; then
            echo Cannot read $machineFile.  
	    echo Looked for files with extension $archlist in 
	    echo directory ${MPIR_HOME}/util/machines .
            exit 1
        fi
    fi
    #
    # Find the machines to use (build in file).  Note that "local"
    # refers only to the first architecture.
    # 
    if [ $nolocal = 0 -o $curarch -gt 1 ] ; then
        machineavail=`cat $machineFile | \
		sed -e '/^#/d' -e 's/#.*^//g' | head -${np} | \
		tr '\012' ' '`
    else
        if [ $np -gt 1 -o $nolocal = 1 ] ; then
            # Remove host from the list of available machines....
	    # Thanks to Bjarne Herland for modification to 
	    # remove EXACTLY the host machine (eg., bar and not bark)
	    machineavail=`cat $machineFile | sed -e '/^#/d' -e 's/#.*^//g' | \
		grep -v "^$MPI_HOST\([ -\.:]\)" | head -${np} | tr '\012' ' '`
	else
	    machineavail=""
        fi
    fi
    #
    # One final refinement.  We could make each job start with
    # different processors with code like
    #r=`date +%S`
    #r=`expr 1 + $r / 10`
    #m1=`echo $machineavail | cut -c $r- -d ' '`
    #r=`expr $r - 1`
    #m2=`echo $machineavail | cut -c 1-$r -d ' '`
    #machineavail="$m1 $m2"
    #
    # Get the machine list for the job
    # KeepHost suggested by Marc A. Viredaz; allows the same machine
    # to be listed multiple times in the machines file.
    # This will loop around the machines list until it
    # finds enough machines (the nfound is used to detect infinite loops)
    # Allow the host to be used if it is the only system found
    #
    KeepHost=0
    loopcnt=0
    nleft=`expr $np - $procFound`
    while [ $procFound -lt $np ] ; do
        nfound=0
        for machineName in $machineavail ; do
	    # Split off the number of processors, if present
	    nprocmachine=1
	    # Some systems return empty for no match; others return 0.  Empty 
	    # is what they should use (0 is ambiguous), but we have to be 
	    # prepared for either.
	    ntest=`expr $machineName : '.*:\([0-9]*\)'`
	    if [ -n "$ntest" -a "$ntest" != "0" ] ; then
		nprocmachine=$ntest
	        machineName=`expr $machineName : '\(.*\):.*'`
	    fi
	    #
	    if [ $nolocal = 1 -o $KeepHost = 1 -o \
		 $machineName != "$MPI_HOST" ] ; then
		# Should we convert this the ${machineName}:$nprocmachine
                # (with a care taken for nprocmachine > np - procFound).
		if [ $nprocmachine -gt $nleft ] ; then
		    nprocmachine=$nleft
		fi
		# Limit the allowed number
		if [ -z "$MPI_MAX_CLUSTER_SIZE" ] ; then
		    nprocmachine=1
		elif [ "$MPI_MAX_CLUSTER_SIZE" -lt $nprocmachine ] ; then
		    nprocmachine=$MPI_MAX_CLUSTER_SIZE
		fi
		# Even better, we should use 
		# machinename:nproc:arch, so that everything is together.
	        machinelist="$machinelist $machineName"
		archuselist="$archuselist $arch"
		nprocuselist="$nprocuselist $nprocmachine"
	        procFound=`expr $procFound + $nprocmachine`
	        nfound=`expr $nfound + $nprocmachine`
		nleft=`expr $nleft - $nprocmachine`
            fi
            if [ $procFound = $np ] ; then break ; fi
	    if [ $machineName = "$MPI_HOST" ] ; then KeepHost=1 ; fi
        done
        loopcnt=`expr $loopcnt + 1`
        # After the first time, we may have found no hosts BUT now
        # allow KeepHost.  
        if [ $nfound = 0 -a $loopcnt -gt 1 ] ; then
	    echo "Could not find enough machines for architecture $arch"
	    exit 1
        fi
    done
    curarch=`expr $curarch + 1`
    procFound=0
    nolocal=1
    machineFile=""
done
nolocal=$nolocalsave
#
if [ "$argsset" != "1" ] ; then 
    echo $machinelist
    echo $archuselist
    machinehead=""
else
    for machinehead in $machinelist ; do 
	break
    done
fi