File: mpirun.anlspx.in

package info (click to toggle)
mpich 1.1.0-3
  • links: PTS
  • area: main
  • in suites: hamm
  • size: 22,116 kB
  • ctags: 27,349
  • sloc: ansic: 193,435; sh: 11,172; fortran: 6,545; makefile: 5,801; cpp: 5,020; tcl: 3,548; asm: 3,536; csh: 1,079; java: 614; perl: 183; awk: 168; sed: 70; f90: 62
file content (314 lines) | stat: -rwxr-xr-x 8,450 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
#! /bin/sh
if [ "$MPIR_HOME" = "" ] ; then 
    MPIR_HOME=#MPIR_HOME#
fi
if [ "$MPIR_HOME" = "#""MPIR_HOME""#" ] ; then
    MPIR_HOME=`pwd`/..
fi
if [ "#MPIRUN_BIN#" = "#""MPIRUN_BIN""#" ] ; then 
    MPIRUN_HOME=$MPIR_HOME/bin
else
    MPIRUN_HOME=$MPIR_HOME/#MPIRUN_BIN#
fi
if [ "$argsset" = "" ] ; then
   . $MPIRUN_HOME/mpirun.args
   argsset=1
fi
# This if for the ANL SP1/2, using the ANL "spsubmit" program.
# Users of "loadleveler" will probably want something like this
#
# One special feature is that this script can use a pre-existing
# partition if one is present.  It uses the environment variable
# MPI_PARTITION to indicate a file that contains a list of nodes to
# use, one per line.  If this file is found, then the partition is
# assumed to already be available.
#
# This is the copy program
spxcp=$MPIR_HOME/lib/rs6000/ch_mpl/spxcp
#
# Check to see if we need to move the file
firstchar=`expr "$localpgm" : '\(.\)'`
if [ $mvhome = 0 -a "$firschar" != "/" ] ; then
    # May need to set mvhome.  This looks for /bonnie or /clyde in the
    # current directory
    curdir=`pwd | sed -e 's%/tmp_mnt%%g' -e 's%/Net%%g'`
    curname6=`expr "$curdir" : '/\(......\)'`
    curname5=`expr "$curdir" : '/\(.....\)'`
    if [ "$curname6" != "bonnie" -a "$curname5" != "clyde" ] ; then
	mvhome=1
	echo "Setting -mvhome for you (executable appears to not be"
	echo "in /sphome file system)."
    fi
fi
#
# Create a new file.  Also, make sure that 
# path is absolute; users can be surprised by things in their path.
#
if [ $mvhome = 1 ] ; then
    localpgm=`basename $progname`
    # if mvhome set, and the executable does NOT exist, then
    # remove it when we exit
    localpgm=/sphome/$LOGNAME/$localpgm	
    if [ -x $localpgm ] ; then
	rmhome=0
    else
	rmhome=1
    fi
    if [ $just_testing = 1 ] ; then
	echo cp $progname $localpgm
    else
	# If you don't remove the file first, sometimes the 
	# WRONG FILE (the OLD ONE) is found by the system.
	/bin/rm -f $localpgm
	if [ -s $localpgm ] ; then
	    echo "Could not move $progname to /sphome/$LOGNAME/$localpgm"
	    if [ -d $localpgm ] ; then
		echo "Destination is a directory!"
	    fi
	    exit 1
	fi
        cp $progname $localpgm
    fi
    PWD_LOCAL=/sphome/$LOGNAME
else
    rmhome=0
    localpgm=$progname
    PWD_LOCAL=$PWD_TRIAL
fi
#
# See if we can write in $PWD_LOCAL.  If not, try $HOME.
SCRIPT_DIR=$PWD_LOCAL
/bin/rm -f $PWD_LOCAL/PItest$$
echo "Test" > $PWD_LOCAL/PItest$$
if [ ! -s $PWD_LOCAL/PItest$$ ] ; then
    SCRIPT_DIR=$HOME
fi
/bin/rm -f $PWD_LOCAL/PItest$$
/bin/rm -f $SCRIPT_DIR/PIrun.$$
/bin/rm -f /sphome/$LOGNAME/job.output.$$
if [ -z "$stdinfile" ] ; then
   stdinfile=/dev/null
fi
#
# The actual script depends on how we're running
#
# If a partition is available, use it.  Otherwise, get time from the scheduler
#
if [ -n "$MPI_PARTITION" -a -s "$MPI_PARTITION" ] ; then
    cat >> $SCRIPT_DIR/PIrun.$$ <<EOF
#! /bin/sh
cd $PWD_LOCAL
MP_EUILIB=us
MP_RMPOOL=0
MP_HOSTFILE=$MPI_PARTITION
MP_PROCS=$np
MP_INFOLEVEL=0
MP_HOLD_STDIN=YES
MP_PULSE=0
export MP_EUILIB
export MP_RMPOOL
export MP_HOSTFILE
export MP_PROCS
export MP_INFOLEVEL
export MP_HOLD_STDIN
export MP_PULSE
# For MPI_INIT
MPIRUN_DEVICE=ch_mpl
export MPIRUN_DEVICE
#
if [ $polling_mode = 0 ] ; then
   MP_CSS_INTERRUPT=YES
   export MP_CSS_INTERRUPT
fi
/bin/rm -f /sphome/$LOGNAME/job.output.$$
# Wait for no-zero-sized SPnodes file
while test \! -s \$MP_HOSTFILE ; do sleep 1 ; done
#
# If the spxcp program is available, use it and change localpgm
if [ $np -gt 2 -a -x "$spxcp" ] ; then
    newlocalpgm=`basename $localpgm`
    cp $localpgm /tmp/\$newlocalpgm
    localpgm="/tmp/\$newlocalpgm"
    eval $spxcp \$localpgm
fi
#
if [ $mpirun_verbose = 1 ] ; then
    echo "About to run poe ... "
fi
poe $localpgm $cmdLineArgs < $stdinfile 2>&1
if [ $mpirun_verbose = 1 ] ; then
    echo "Poe exited ..."
fi
# Attempt to fix file cache problems
#SYNCLOC#
#sleep 2
exit 0
EOF
   # End of script for running progrma
   chmod a+x $SCRIPT_DIR/PIrun.$$
   # Get the home node
   homenode=`head -1 $MPI_PARTITION`
   if [ $just_testing = 1 ] ; then
       echo rsh $homenode -n $SCRIPT_DIR/PIrun.$$
   else
       rsh $homenode -n $SCRIPT_DIR/PIrun.$$
   fi
   if [ $rmhome = 1 ] ; then
	/bin/rm $localpgm
   fi
   /bin/rm -f $SCRIPT_DIR/PIrun.$$ 
   if [ "$mvback" != "" ] ; then
	(cd $PWD_LOCAL ; for file in $mvback ; do \
	if [ -f $file ] ; then cp $file $PWD_TRIAL ; /bin/rm $file ; \
	fi ; done )
   fi
   exit 0
fi
#
# If we reach here, there was no partition available.  This script will be
# run in a partition that will be allocated and removed
#
cat >$SCRIPT_DIR/PIrun.$$ <<.
#! /bin/sh
JID=\`/usr/local/bin/getjid\`
trap "sprelease \$JID"
cd $PWD_LOCAL
MP_EUILIB=us
MP_RMPOOL=0
MP_HOSTFILE=/sphome/$LOGNAME/SPnodes.\$JID    
MP_PROCS=$np
MP_INFOLEVEL=0
MP_HOLD_STDIN=YES
MP_PULSE=0
export MP_EUILIB
export MP_RMPOOL
export MP_HOSTFILE
export MP_PROCS
export MP_INFOLEVEL
export MP_HOLD_STDIN
export MP_PULSE
# For MPI_INIT
MPIRUN_DEVICE=ch_mpl
export MPIRUN_DEVICE
#
if [ $polling_mode = 0 ] ; then
   MP_CSS_INTERRUPT=YES
   export MP_CSS_INTERRUPT
fi
/bin/rm -f /sphome/$LOGNAME/job.output.$$
# Wait for no-zero-sized SPnodes file
while test \! -s \$MP_HOSTFILE ; do sleep 1 ; done
#
# If the spxcp program is available, use it and change localpgm
if [ $np -gt 2 -a -x "$spxcp" ] ; then
    newlocalpgm=`basename $localpgm`
    cp $localpgm /tmp/\$newlocalpgm
    localpgm="/tmp/\$newlocalpgm"
    eval $spxcp \$localpgm
fi
#
if [ $mpirun_verbose = 1 ] ; then
    echo "About to run poe ... " >> /sphome/$LOGNAME/job.output.$$
fi
poe $localpgm $cmdLineArgs < $stdinfile >> /sphome/$LOGNAME/job.output.$$ 2>&1
if [ $mpirun_verbose = 1 ] ; then
    echo "Poe exited ..." >> /sphome/$LOGNAME/job.output.$$
fi
# Attempt to fix file cache problems
#SYNCLOC#
sleep 2
sprelease \$JID
exit 0
.
#
# End of script for running progrma
chmod a+x $SCRIPT_DIR/PIrun.$$
#
# Determine CAC
if [ "$CAC" = "" ] ; then
   CAC=`whatcac | head -1 | cut -d' ' -f 9 | sed -e s/\"//g -e s/://`
fi
if [ $just_testing = 1 ] ; then
    echo "spsubmit -nomail <<."
    echo $CAC
    echo $max_time
    echo $np
    echo B
    echo M
    echo n
    echo $SCRIPT_DIR/PIrun.$$
    echo 
    echo C
    echo .
    echo JID=\`/usr/local/bin/getjid\`
    echo spwait \$JID
    if [ $rmhome = 1 ] ; then
	echo "/bin/rm $localpgm"
    fi
    if [ "$mvback" != "" ] ; then
	echo "(cd $PWD_LOCAL ; cp $mvback $PWD_TRIAL ; /bin/rm $mvback )"
    fi
else
    # Send output to /dev/null in case we're trying to 
    # use this to match up output
    #JID=`/usr/local/bin/getjid`
    #if [ "$JID" != "" ] ; then 
    #	echo "Can not start SP job while one is in the queue"
    #	echo "Job $JID was found"
    #	exit 1
    #fi
    # 
    # I've had lots of problems with files not being present that I'm going
    # to chalk up to NFS problems.  To work around this, I'm going to 
    # add some sleeps and syncs
    #SYNCLOC# > /dev/null 2>&1
    sleep 2
    JID=`sptest -cac $CAC -min $max_time -nodes $np -batch -mpl -us \
    	-path $SCRIPT_DIR/PIrun.$$ -noemail` 
#    spsubmit -nomail <<. > /dev/null 2>&1
#$CAC
#$max_time
#$np
#B
#M
#n
#$SCRIPT_DIR/PIrun.$$
#
#C
#.
#    JID=`/usr/local/bin/getjid`
    trap "sprelease ${JID}" 2 3
    # There may be some filesystem race condition in spwait
    # THIS IS FIXED IN THE MOST RECENT ANL CODE; IF YOU HAVE TROUBLE,
    # GET A NEW VERSION OF spq and spwait
    #SYNCLOC# > /dev/null 2>&1
    spwait $JID
    trap 2 3
    # 
    # I've had lots of problems with files not being present (or not being
    # written!) that I'm going to chalk up to NFS problems.  To work around 
    # this, I'm going to add some sleeps and syncs 
    # (sleeps removed now that spwait works!)
    #SYNCLOC# > /dev/null 2>&1
    if [ $rmhome = 1 ] ; then
	/bin/rm $localpgm
    fi
    /bin/rm -f /sphome/$LOGNAME/SPnodes.$JID $SCRIPT_DIR/PIrun.$$ 
    if [ "$mvback" != "" ] ; then
	(cd $PWD_LOCAL ; for file in $mvback ; do \
	if [ -f $file ] ; then cp $file $PWD_TRIAL ; /bin/rm $file ; \
	fi ; done )
    fi
    if [ -s /sphome/$LOGNAME/job.output.$$ ] ; then 
	# Try a touch to synchronize file caches... Sigh... 
	touch /sphome/$LOGNAME/job.output.$$
	cat /sphome/$LOGNAME/job.output.$$
	/bin/rm -f /sphome/$LOGNAME/job.output.$$
    # Note that there is no good way to identify problems with the run
    # now...
#    else
#	echo "mpirun: No output file!  Job did not run! "
    fi
fi