1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
|
#!/usr/bin/env python
# spawns a ddd session for each one found in mpilistjobs
# nco-specific: sends SIGUSR1 to nodeid>0 processes
# $Id: ddd_mpd.py,v 1.2 2005/09/26 23:13:38 wangd Exp $
# Usage:
# ./ddd_mpd.py
# -- spawn ddd sessions for each MPI process found via
# mpdlistjobs for a particular MPI job. Before spawning, send SIGUSR1
# to the processes of rank > 0 (the non-manager nodes) to reduce user
# tedium. If multiple jobs are found on MPD, prompt user to select
# one of jobs.
# -- If no jobs owned by the current user are found in mpd,
# complain and exit.
#
# Prefer something else than ddd? Try changing the argument at the
# call to mainthing(...). Support hasn't been checked for
# command-line arguments other than ddd/gdb, so you may have to hack
# spawnDebugger(...) as a short-term solution.
#
# Please direct feedback to the sourceforge forums for NCO. Thanks, -Daniel
from os import environ, getuid, getpid, path, getcwd, popen3
import os
class Job:
pass
def newJob(jid, username, host, pid, sid, rank, path):
job = Job()
job.jobid = jid
job.username = username
job.host = host
job.pid = pid
job.sid = sid
job.rank = rank
job.pgm = path
return job
def readJobInfo(jobtext):
job = Job()
#print "got job ", jobtext
lines = jobtext.split("\n")
for line in lines:
pair = line.split("=")
if len(pair) > 1:
key,value = pair
#print key.strip(),value.strip()
exec "job."+key.strip()+"="+"\""+value.strip()+"\""
return job;
def readJobListMPD():
joblist = []
jobI, jobO, jobE = popen3("mpdlistjobs")
allinfo = jobO.read()
for jobtext in allinfo.split("\n\n"):
joblist.append(readJobInfo(jobtext))
return joblist
def makeSampleJobList(): ## dummy for testing
joblist = []
joblist.append(newJob( "21@dirt_3734", "wangd", "dirt",
"11931", "11926", "0",
"/home/wangd/nco/nco/mpi_bin/mpncwa"))
joblist.append(newJob( "21@dirt_3734", "wangd", "dirt",
"11929", "11927", "1",
"/home/wangd/nco/nco/mpi_bin/mpncwa"))
joblist.append(newJob( "21@dirt_3734", "wangd", "dirt",
"11930", "11928", "2",
"/home/wangd/nco/nco/mpi_bin/mpncwa"))
# next two jobs should be culled
joblist.append(newJob( "10@dirt_3734", "somebody", "dirt",
"112", "110", "0",
"/badpath/mpncbo"))
joblist.append(newJob( "10@dirt_3734", "somebody", "dirt",
"119", "111", "1",
"/badpath/mpncbo"))
joblist.append(newJob( "24@dirt_3734", "wangd", "dirt",
"11951", "11946", "0",
"/home/wangd/nco/nco/mpi_bin/mpncbo"))
joblist.append(newJob( "24@dirt_3734", "wangd", "dirt",
"11949", "11947", "1",
"/home/wangd/nco/nco/mpi_bin/mpncbo"))
joblist.append(newJob( "24@dirt_3734", "wangd", "dirt",
"11950", "11948", "2",
"/home/wangd/nco/nco/mpi_bin/mpncbo"))
return joblist
def cutoutJobs(joblist, jobids, paths):
print "There seems to be more than one job in MPD. Which would you like?"
pick = ""
index = -1
while not pick in jobids:
for i in range(len(jobids)):
print i,jobids[i], paths[i]
print "which would you like(0..",len(jobids)-1,")?",
typed = raw_input()
try:
index = int(typed)
except ValueError:
pass
if index not in range(len(jobids)):
print "Sorry, bad choice: ",typed, " Try another."
else:
pick = jobids[index]
print "Chose id:", pick, " path:",paths[index]
# remove job objects that do not match
joblist = filter((lambda j: j.jobid == pick), joblist)
return joblist
def spawnDebugger(progname, joblist, debug):
shI, shO, shE = popen3("sh")
children = filter(lambda j: j.rank != "0", joblist)
for c in children: # resume the children first
s = "kill -USR1 %s\n" % (c.pid)
if not debug: shI.write(s)
else: print s,
print "Node 0 is pid:",filter(lambda j: j.rank == "0",joblist)[0].pid
shI.write("\n")
for job in joblist:
try:
#print job.jobid, job.pid, job.rank, job.pgm
dummy = job.pgm
dummy = job.pid
s = "%s %s %s &\n" % (progname, job.pgm, job.pid)
if not debug: shI.write(s)
#else:
print s,
except AttributeError:
continue
shI.close()
print shO.read().strip()
print shE.read().strip()
def mainthing(dbgprogname):
joblist = readJobListMPD()
#joblist = makeSampleJobList() # for simple testing
# now, go ahead and spawn ddd jobs.
# for now, just spit out the command line so we don't have to deal
# with the process management. *sigh*
####print len(joblist)
jobids = []
paths = []
joblist = filter(lambda j: hasattr(j, "jobid"), joblist)
myname = os.popen("whoami").read().strip()
joblist = filter(lambda j: j.username == myname, joblist)
if len(joblist) < 1:
print "No acceptable jobs found."
return
for job in joblist: ## check to see how many jobs there are
if not job.jobid in jobids:
jobids.append( job.jobid )
paths.append( job.pgm )
if len(jobids) > 1:
joblist = cutoutJobs(joblist, jobids, paths)
spawnDebugger(dbgprogname, joblist, False) #False for no debug
## -----------------
## main program body
## -----------------
mainthing("ddd")
# if you like gdb or dbx, you may wish to try something like:
#
# mainthing ("xterm -e gdb")
#
# You will probably want to spawn xterms for each gdb to avoid
# managing multiple gdb sessions from a single terminal window.
|