1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314
|
#! /bin/sh
if [ "$MPIR_HOME" = "" ] ; then
MPIR_HOME=#MPIR_HOME#
fi
if [ "$MPIR_HOME" = "#""MPIR_HOME""#" ] ; then
MPIR_HOME=`pwd`/..
fi
if [ "#MPIRUN_BIN#" = "#""MPIRUN_BIN""#" ] ; then
MPIRUN_HOME=$MPIR_HOME/bin
else
MPIRUN_HOME=$MPIR_HOME/#MPIRUN_BIN#
fi
if [ "$argsset" = "" ] ; then
. $MPIRUN_HOME/mpirun.args
argsset=1
fi
# This if for the ANL SP1/2, using the ANL "spsubmit" program.
# Users of "loadleveler" will probably want something like this
#
# One special feature is that this script can use a pre-existing
# partition if one is present. It uses the environment variable
# MPI_PARTITION to indicate a file that contains a list of nodes to
# use, one per line. If this file is found, then the partition is
# assumed to already be available.
#
# This is the copy program
spxcp=$MPIR_HOME/lib/rs6000/ch_mpl/spxcp
#
# Check to see if we need to move the file
firstchar=`expr "$localpgm" : '\(.\)'`
if [ $mvhome = 0 -a "$firschar" != "/" ] ; then
# May need to set mvhome. This looks for /bonnie or /clyde in the
# current directory
curdir=`pwd | sed -e 's%/tmp_mnt%%g' -e 's%/Net%%g'`
curname6=`expr "$curdir" : '/\(......\)'`
curname5=`expr "$curdir" : '/\(.....\)'`
if [ "$curname6" != "bonnie" -a "$curname5" != "clyde" ] ; then
mvhome=1
echo "Setting -mvhome for you (executable appears to not be"
echo "in /sphome file system)."
fi
fi
#
# Create a new file. Also, make sure that
# path is absolute; users can be surprised by things in their path.
#
if [ $mvhome = 1 ] ; then
localpgm=`basename $progname`
# if mvhome set, and the executable does NOT exist, then
# remove it when we exit
localpgm=/sphome/$LOGNAME/$localpgm
if [ -x $localpgm ] ; then
rmhome=0
else
rmhome=1
fi
if [ $just_testing = 1 ] ; then
echo cp $progname $localpgm
else
# If you don't remove the file first, sometimes the
# WRONG FILE (the OLD ONE) is found by the system.
/bin/rm -f $localpgm
if [ -s $localpgm ] ; then
echo "Could not move $progname to /sphome/$LOGNAME/$localpgm"
if [ -d $localpgm ] ; then
echo "Destination is a directory!"
fi
exit 1
fi
cp $progname $localpgm
fi
PWD_LOCAL=/sphome/$LOGNAME
else
rmhome=0
localpgm=$progname
PWD_LOCAL=$PWD_TRIAL
fi
#
# See if we can write in $PWD_LOCAL. If not, try $HOME.
SCRIPT_DIR=$PWD_LOCAL
/bin/rm -f $PWD_LOCAL/PItest$$
echo "Test" > $PWD_LOCAL/PItest$$
if [ ! -s $PWD_LOCAL/PItest$$ ] ; then
SCRIPT_DIR=$HOME
fi
/bin/rm -f $PWD_LOCAL/PItest$$
/bin/rm -f $SCRIPT_DIR/PIrun.$$
/bin/rm -f /sphome/$LOGNAME/job.output.$$
if [ -z "$stdinfile" ] ; then
stdinfile=/dev/null
fi
#
# The actual script depends on how we're running
#
# If a partition is available, use it. Otherwise, get time from the scheduler
#
if [ -n "$MPI_PARTITION" -a -s "$MPI_PARTITION" ] ; then
cat >> $SCRIPT_DIR/PIrun.$$ <<EOF
#! /bin/sh
cd $PWD_LOCAL
MP_EUILIB=us
MP_RMPOOL=0
MP_HOSTFILE=$MPI_PARTITION
MP_PROCS=$np
MP_INFOLEVEL=0
MP_HOLD_STDIN=YES
MP_PULSE=0
export MP_EUILIB
export MP_RMPOOL
export MP_HOSTFILE
export MP_PROCS
export MP_INFOLEVEL
export MP_HOLD_STDIN
export MP_PULSE
# For MPI_INIT
MPIRUN_DEVICE=ch_mpl
export MPIRUN_DEVICE
#
if [ $polling_mode = 0 ] ; then
MP_CSS_INTERRUPT=YES
export MP_CSS_INTERRUPT
fi
/bin/rm -f /sphome/$LOGNAME/job.output.$$
# Wait for no-zero-sized SPnodes file
while test \! -s \$MP_HOSTFILE ; do sleep 1 ; done
#
# If the spxcp program is available, use it and change localpgm
if [ $np -gt 2 -a -x "$spxcp" ] ; then
newlocalpgm=`basename $localpgm`
cp $localpgm /tmp/\$newlocalpgm
localpgm="/tmp/\$newlocalpgm"
eval $spxcp \$localpgm
fi
#
if [ $mpirun_verbose = 1 ] ; then
echo "About to run poe ... "
fi
poe $localpgm $cmdLineArgs < $stdinfile 2>&1
if [ $mpirun_verbose = 1 ] ; then
echo "Poe exited ..."
fi
# Attempt to fix file cache problems
#SYNCLOC#
#sleep 2
exit 0
EOF
# End of script for running progrma
chmod a+x $SCRIPT_DIR/PIrun.$$
# Get the home node
homenode=`head -1 $MPI_PARTITION`
if [ $just_testing = 1 ] ; then
echo rsh $homenode -n $SCRIPT_DIR/PIrun.$$
else
rsh $homenode -n $SCRIPT_DIR/PIrun.$$
fi
if [ $rmhome = 1 ] ; then
/bin/rm $localpgm
fi
/bin/rm -f $SCRIPT_DIR/PIrun.$$
if [ "$mvback" != "" ] ; then
(cd $PWD_LOCAL ; for file in $mvback ; do \
if [ -f $file ] ; then cp $file $PWD_TRIAL ; /bin/rm $file ; \
fi ; done )
fi
exit 0
fi
#
# If we reach here, there was no partition available. This script will be
# run in a partition that will be allocated and removed
#
cat >$SCRIPT_DIR/PIrun.$$ <<.
#! /bin/sh
JID=\`/usr/local/bin/getjid\`
trap "sprelease \$JID"
cd $PWD_LOCAL
MP_EUILIB=us
MP_RMPOOL=0
MP_HOSTFILE=/sphome/$LOGNAME/SPnodes.\$JID
MP_PROCS=$np
MP_INFOLEVEL=0
MP_HOLD_STDIN=YES
MP_PULSE=0
export MP_EUILIB
export MP_RMPOOL
export MP_HOSTFILE
export MP_PROCS
export MP_INFOLEVEL
export MP_HOLD_STDIN
export MP_PULSE
# For MPI_INIT
MPIRUN_DEVICE=ch_mpl
export MPIRUN_DEVICE
#
if [ $polling_mode = 0 ] ; then
MP_CSS_INTERRUPT=YES
export MP_CSS_INTERRUPT
fi
/bin/rm -f /sphome/$LOGNAME/job.output.$$
# Wait for no-zero-sized SPnodes file
while test \! -s \$MP_HOSTFILE ; do sleep 1 ; done
#
# If the spxcp program is available, use it and change localpgm
if [ $np -gt 2 -a -x "$spxcp" ] ; then
newlocalpgm=`basename $localpgm`
cp $localpgm /tmp/\$newlocalpgm
localpgm="/tmp/\$newlocalpgm"
eval $spxcp \$localpgm
fi
#
if [ $mpirun_verbose = 1 ] ; then
echo "About to run poe ... " >> /sphome/$LOGNAME/job.output.$$
fi
poe $localpgm $cmdLineArgs < $stdinfile >> /sphome/$LOGNAME/job.output.$$ 2>&1
if [ $mpirun_verbose = 1 ] ; then
echo "Poe exited ..." >> /sphome/$LOGNAME/job.output.$$
fi
# Attempt to fix file cache problems
#SYNCLOC#
sleep 2
sprelease \$JID
exit 0
.
#
# End of script for running progrma
chmod a+x $SCRIPT_DIR/PIrun.$$
#
# Determine CAC
if [ "$CAC" = "" ] ; then
CAC=`whatcac | head -1 | cut -d' ' -f 9 | sed -e s/\"//g -e s/://`
fi
if [ $just_testing = 1 ] ; then
echo "spsubmit -nomail <<."
echo $CAC
echo $max_time
echo $np
echo B
echo M
echo n
echo $SCRIPT_DIR/PIrun.$$
echo
echo C
echo .
echo JID=\`/usr/local/bin/getjid\`
echo spwait \$JID
if [ $rmhome = 1 ] ; then
echo "/bin/rm $localpgm"
fi
if [ "$mvback" != "" ] ; then
echo "(cd $PWD_LOCAL ; cp $mvback $PWD_TRIAL ; /bin/rm $mvback )"
fi
else
# Send output to /dev/null in case we're trying to
# use this to match up output
#JID=`/usr/local/bin/getjid`
#if [ "$JID" != "" ] ; then
# echo "Can not start SP job while one is in the queue"
# echo "Job $JID was found"
# exit 1
#fi
#
# I've had lots of problems with files not being present that I'm going
# to chalk up to NFS problems. To work around this, I'm going to
# add some sleeps and syncs
#SYNCLOC# > /dev/null 2>&1
sleep 2
JID=`sptest -cac $CAC -min $max_time -nodes $np -batch -mpl -us \
-path $SCRIPT_DIR/PIrun.$$ -noemail`
# spsubmit -nomail <<. > /dev/null 2>&1
#$CAC
#$max_time
#$np
#B
#M
#n
#$SCRIPT_DIR/PIrun.$$
#
#C
#.
# JID=`/usr/local/bin/getjid`
trap "sprelease ${JID}" 2 3
# There may be some filesystem race condition in spwait
# THIS IS FIXED IN THE MOST RECENT ANL CODE; IF YOU HAVE TROUBLE,
# GET A NEW VERSION OF spq and spwait
#SYNCLOC# > /dev/null 2>&1
spwait $JID
trap 2 3
#
# I've had lots of problems with files not being present (or not being
# written!) that I'm going to chalk up to NFS problems. To work around
# this, I'm going to add some sleeps and syncs
# (sleeps removed now that spwait works!)
#SYNCLOC# > /dev/null 2>&1
if [ $rmhome = 1 ] ; then
/bin/rm $localpgm
fi
/bin/rm -f /sphome/$LOGNAME/SPnodes.$JID $SCRIPT_DIR/PIrun.$$
if [ "$mvback" != "" ] ; then
(cd $PWD_LOCAL ; for file in $mvback ; do \
if [ -f $file ] ; then cp $file $PWD_TRIAL ; /bin/rm $file ; \
fi ; done )
fi
if [ -s /sphome/$LOGNAME/job.output.$$ ] ; then
# Try a touch to synchronize file caches... Sigh...
touch /sphome/$LOGNAME/job.output.$$
cat /sphome/$LOGNAME/job.output.$$
/bin/rm -f /sphome/$LOGNAME/job.output.$$
# Note that there is no good way to identify problems with the run
# now...
# else
# echo "mpirun: No output file! Job did not run! "
fi
fi
|