1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
|
#!/usr/bin/env python
# Copyright (C) 2001-2023 Artifex Software, Inc.
# All Rights Reserved.
#
# This software is provided AS-IS with no warranty, either express or
# implied.
#
# This software is distributed under license and may not be copied,
# modified or distributed except as expressly authorized under the terms
# of the license contained in the file LICENSE in this distribution.
#
# Refer to licensing information at http://www.artifex.com or contact
# Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
# CA 94129, USA, for further information.
#
# This is a script to parallelize the regression runs "by hand"
# for running on a batch-mode cluster under the PBS queue system.
# it generates a custom testing.cfg and comparefiles directory
# for each node, creates a set of job description files, and runs them.
import os
import string
import re
import sys
import getopt
## globals -- edit these to make it work
run=True # whether to submit the job after creating it
try:
home=os.environ["HOME"]
except KeyError:
home=''
base=os.getcwd()
testdir=home+"/tests/ps/ps3fts" # directory of files to run
configfile="testing.cfg" # template config file
files=os.listdir(testdir) # list of files to run
## defaults -- override from the command line
action='run'
revision=None # revision we're testing, if known
# parse the command line
opts, args = getopt.getopt(sys.argv[1:], "r:", ["rev="])
for o, a in opts:
if o in ("-r", "--rev"):
revision = a
print "parallel run for r" + revision
if len(args):
action = args[0]
## helper functions
def choosecluster():
'''Decide how many nodes of which cluster to run on.
returns a (cluster_name, node_count) tuple.'''
# figure out how many nodes are free
upnodes = os.popen("upnodes")
r = re.compile('^\s+(?P<cluster>\w+).*\s+(?P<procs>\d+)\s+(?P<free>\d+)\s*$')
clusters=[]
nodes = 0
for line in upnodes.readlines():
m = r.match(line)
if m:
name = m.group("cluster")
procs = int(m.group("procs"))
free = int(m.group("free"))
# remember the cluster with the most free nodes
if free > nodes and name != 'total':
nodes = free
cluster = name
clusters.append((name,procs,free))
return (cluster, nodes)
def makepbs(filename):
'''Make a pbs job description file for a command.'''
outfile=open(filename+".pbs","w")
if cluster == 'red' and nodes > 1:
# upnodes reports dual-core nodes twice
outfile.write("#PBS -l nodes=%d:run:%s:ppn=2\n" % (nodes/2,cluster))
else:
outfile.write("#PBS -l nodes=%d:run:%s\n" % (nodes,cluster))
outfile.write("cd %s\n" % base)
outfile.write("mpiexec -comm none ./%s\n" % filename)
outfile.close()
def makepbscleanup(configfiles, comparefiledirs, jobid=None):
'''Make a pbs job to cleanup after another.
Pass in sequences of the files and directories to be removed
and the jobid, if any, that the cleanup should run after.'''
outfile=open("run_regression_cleanup.pbs","w")
outfile.write("#PBS -l nodes=1:run")
# run this on nina by default since it's trivial
#outfile.write(":nina")
if jobid:
outfile.write(" -W depend=afterany:%s\n" % jobid)
else:
outfile.write("\n")
outfile.write("cd %s\n" % base)
for node in range(nodes):
outfile.write("rm -rf " + comparefiledirs[node] + "\n")
outfile.write("rm " + configfiles[node] + "\n")
outfile.close()
def makepbsreport(jobid):
'''Create a report from the output of a previous job
Pass the jobid of the actual regression run so we
know which log to parse.'''
outfile = open("run_regression_report.pbs","w")
outfile.write("#PBS -l nodes=1:run")
# run this on nina by default since it's trivial
#outfile.write(":nina")
# run after the regression job is complete
outfile.write(" -W depend=afterany:%s\n" % jobid)
outfile.write("cd %s\n" % base)
if revision:
dest = " >> regression-r%s.log\n" % revision
else:
dest = " >> regression.%s.log\n" % jobid
# hack: we want the stderr output from the regression run, but
# the jobid pbs appends to the file isn't quite the same as
# what qsub (or qstat) give us.
log = "run_regression.pbs.e%s" % string.split(jobid, '.')[0]
outfile.write("echo Cluster-based regression report BETA -- may not be accurate" + dest)
outfile.write("echo Run split over %d nodes" % nodes + dest)
outfile.write("JOBS=`cat %s | egrep ^Ran | wc -l`" % log + dest)
outfile.write("echo Run completed $JOBS jobs" + dest)
outfile.write("STARTT=`stat -c %Y run_regression.pbs`" + dest)
outfile.write("ENDT=`stat -c %Y " + log + "`" + dest)
outfile.write("echo elapsed time $(($ENDT - $STARTT)) seconds" + dest)
outfile.write("DIFFS=`cat %s | egrep DIFFER$ | wc -l`" % log + dest)
outfile.write("echo Run shows $DIFFS differences" + dest)
outfile.write("echo" + dest)
outfile.write("cat %s | egrep ^Checking | sort" % log + dest)
outfile.write("cat %s | grep 'relevant files'" % log + dest)
outfile.close()
## create a config files from the template for each node
(cluster, nodes) = choosecluster()
print "choosing %s with %d cpus free" % (cluster, nodes)
configfiles=[]
comparefiledirs=[]
print "configuring job..."
for node in range(nodes):
infile=open(configfile)
outfilename=configfile +"."+ str(node)
outfile=open(outfilename,"w")
# remember the filename for later cleanup
configfiles.append(outfilename)
for line in infile.readlines():
try:
key,value=string.split(line)
if key == "comparefiledir":
value=value[:-1]+"."+str(node)+"/"
# remember this for cleanup
comparefiledirs.append(value)
if key == "log_stderr": value=value[:-4]+"."+str(node)+".log"
if key == "log_stdout": value=value[:-4]+"."+str(node)+".log"
outfile.write(key+"\t"+value+"\n")
except ValueError:
outfile.write(line)
nodedir=home+"/comparefiles."+str(node)
# create the per-node directories
os.system("rm -rf " + comparefiledirs[node])
os.mkdir(comparefiledirs[node])
# split the test files into directories for each node
node=0
for file in files:
nodedir=comparefiledirs[node]
os.system("ln -s %s/%s %s/" % (testdir,file,nodedir))
node=node+1
if node >= len(comparefiledirs): node=0
# create our job description files
makepbs("run_regression")
makepbs("make_testdb")
## submit the actual jobs
if run:
# qsub the pbs file
if action == 'update':
job= os.popen("qsub make_testdb.pbs")
else:
job = os.popen("qsub run_regression.pbs")
jobid = string.strip(job.readline())
print "run submitted as", jobid
# append a follow-up report generation job
if action == 'run':
makepbsreport(jobid)
job = os.popen("qsub run_regression_report.pbs")
report_jobid = string.strip(job.readline())
print "report job is", report_jobid
# append a follow-up job to do the cleanup
makepbscleanup(configfiles,comparefiledirs,jobid)
job = os.popen("qsub run_regression_cleanup.pbs")
cleanup_jobid = string.strip(job.readline())
print "cleanup job is", cleanup_jobid
|