File: parallelCommon.py

package info (click to toggle)
lamarc 2.1.10.1%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 77,052 kB
  • sloc: cpp: 112,339; xml: 16,769; sh: 3,528; makefile: 1,219; python: 420; perl: 260; ansic: 40
file content (239 lines) | stat: -rw-r--r-- 8,107 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
####################################################################
# parallelCommon.py
#   routines used by: divide_data.py
#                     combine_replicates.py
#                     combine_regions.py
####################################################################
# system imports
import fileinput
import getopt
import random
import re
import sys
import os.path
from xml.dom.minidom import parse, Document

seedlim = 1000000


# share
def usage():
    print '''Usage:
        --lamarcfile REQUIRED <path to lamarc infile to fragment>
        --outdir     REQUIRED <path to output directory>
'''


# share
def getOptionsAndVerify(barfIfOutdirExists):
    lamarcfile = None
    outdir = None

    try:
        opts, args = getopt.getopt(sys.argv[1:],"hl:o:",["help","lamarcfile=","outdir="])
    except getopt.GetoptError, err:
        usage()
        sys.exit(2)

    for o, a in opts:
        if o in ("-h","--help"):
            usage()
            sys.exit()
        elif o in ("-l", "--lamarcfile"):
            lamarcfile = a
        elif o in ("-o", "--outdir"):
            outdir = a
        else:
            assert False, "unhandled option"

    # lamarc input file and output directory both must exist
    if lamarcfile == None or outdir == None:
        usage()
        sys.exit()

    # make sure lamarcfile exists and is a file
    if not os.path.isfile(lamarcfile):
        print "%s doesn't exist or isn't a file. exiting" % lamarcfile
        sys.exit(1)

    # make sure outdir is a new, empty directory
    if os.path.exists(outdir):
        if barfIfOutdirExists:
            print "directory %s already exists. exiting" % outdir
            sys.exit(1)
    else:
        os.makedirs(outdir)

    return (lamarcfile,outdir,os.getcwd())


# gets first sub-tag named "tagname" of element "domElem"
def getFirstTag(domElem,tagname):
    elems = domElem.getElementsByTagName(tagname)
    for elem in elems:
        return elem
    return None

# gets only sub-tag named "tagname" of element "domElem"
# EWFIX -- not actually doing any error checking
def getSingleTag(elem,tagname):
    return getFirstTag(elem,tagname)

# gets the long value embedded in a tag
def getLongVal(elem):
    for child in elem.childNodes:
        # EWFIX -- should check only one
        if child.nodeType == child.TEXT_NODE:
            return long(child.data)
    return None
        

# replaces data in the first text child node of "elem" with "strData"
def setVal(elem,strData):
    for child in elem.childNodes:
        # EWFIX -- should check only one
        if child.nodeType == child.TEXT_NODE:
            child.data = strData
            return
        
# when creates or replaces child tag "childTagName" of parent element
# "parentTag" with text of value "newValue"
def replaceOrSet(domTop,parentTag,childTagName,newValue):

    childTag = getSingleTag(parentTag,childTagName)
    if childTag:
        setVal(childTag,newValue)
    else:
        childTag = domTop.createElement(childTagName)
        parentTag.appendChild(childTag)
        parentTag.appendChild(domTop.createTextNode("\n"))
        childTag.appendChild(domTop.createTextNode(newValue))
        

# change the name (or set if not present) of all input and outputs specified
# in the format tag to have an infix of "idStr"
#
# also sets seed to an appropriate random value
#
# input "useIn" when true, controls input summary file reading
def fixFormatTag(domTop,fmtTag,idStr,useIn):
    replaceOrSet(domTop,fmtTag,"seed", "%d" % (random.randint(0,seedlim)*4+1))
    replaceOrSet(domTop,fmtTag,"results-file", "outfile_%s.txt" % idStr)
    if useIn:
        replaceOrSet(domTop,fmtTag,"use-in-summary", "true")
        replaceOrSet(domTop,fmtTag,"in-summary-file", "insumfile_%s.xml" % idStr)
    else:
        replaceOrSet(domTop,fmtTag,"use-in-summary", "false")
    replaceOrSet(domTop,fmtTag,"use-out-summary", "true")
    replaceOrSet(domTop,fmtTag,"out-summary-file", "outsumfile_%s.xml" % idStr)
    replaceOrSet(domTop,fmtTag,"curvefile-prefix", "curve_%s.xml" % idStr)
    replaceOrSet(domTop,fmtTag,"tracefile-prefix", "trace_%s.xml" % idStr)
    replaceOrSet(domTop,fmtTag,"newicktreefile-prefix", "newick_%s.xml" % idStr)
    replaceOrSet(domTop,fmtTag,"out-xml-file", "menusettings_%s.xml" % idStr)
    replaceOrSet(domTop,fmtTag,"xml-report-file", "report_%s.xml" % idStr)


#
#profilePattern = re.compile('\s*(\S+)\s+(.*)')
#def replaceEachProfile(profileText):
#    if not profileText:
#        return
#    remainingText = profileText
#    outText = ""
#    m = profilePattern.match(remainingText)
#    while m:
#        outText = "%s %s" % ( outText, "none")
#        remainingText = m.group(2)
#        m = profilePattern.match(remainingText)
#    return outText

# get population count from a top level lamarc XML document
def getPopCount(lamDom):
    dataTag = getSingleTag(lamDom,"data")
    regionTag = getFirstTag(dataTag,"region")
    return len(regionTag.getElementsByTagName("population"))

# makes a string containing the word "none" "numProfiles" times
# spaces are between each occurence and before the first and after
# the last
def makeProfilesString(numProfiles):
    outstr = ""
    for i in range(numProfiles):
        outstr = "%s none" % outstr
    outstr = "%s " % outstr
    return outstr

# turn off the profile for a given force
def turnOneProfileOff(domTop,forcesTag,forceName,numProfiles):
    thisForce = getSingleTag(forcesTag,forceName)
    if thisForce:
        newProfilesString = makeProfilesString(numProfiles)
        replaceOrSet(domTop,thisForce,"profiles",newProfilesString)


# turn off all profiles -- we use this because we don't want to
# do any profiling until the last run of lamarc on the brought-together
# data
def turnProfilesOff(domTop,lamTag):
    popCount = getPopCount(domTop)
    tag = getSingleTag(lamTag,"forces")
    turnOneProfileOff(domTop,tag,"coalescence",popCount)
    turnOneProfileOff(domTop,tag,"migration",popCount*popCount)
    turnOneProfileOff(domTop,tag,"growth",popCount)
    turnOneProfileOff(domTop,tag,"recombination",1)

           
# python outputs XML with an initial tag <?xml version="1.0" ?>
# even though this is perfectly correct, the lamarc sum file reading
# routines cannot handle it. This is a kludge to work around that
linePattern = re.compile('(.*)<\?.*\?>(.*)')
def stripXmlInfo(filename):
    firstLine = True
    for line in fileinput.input(filename,inplace=1): 
        if firstLine:
            m = linePattern.match(line)
            if m:
                line = "%s%s" % (m.group(1),m.group(2))
            firstLine = False
            print line      # prints line to file via fileinput inplace option
        else:
            line.rstrip()
            print line ,    # prints line to file via fileinput inplace option

def sumfileCombineWarn(where):
    print "Combining sumfiles in %s. This can take a while..." % where

def dividerLine():
    print "****************************************************"

def describeThisScript(scriptName,purpose,lamfile,lamdir):
    dividerLine()
    print "running python script: %s" % scriptName
    print "purpose:               %s" % purpose
    print "arguments:             -l %s" % lamfile
    print "                       -o %s" % lamdir

def nextStep(lamdir,filelist,nextIsLast):
    dividerLine()

    print "next step: run lamarc on",
    if not nextIsLast:
        print "each of:",
    print

    for fileName in filelist:
        print "               %s" % fileName
    print "           if you are running on a different machine, copy"
    print "           all files in each of the above directories there"
    if not nextIsLast:
        print "           and copy all files generated by the run back to"
        print "           the same directory"
    print

def finalStep(pydir,pyscript,lamfile,lamdir):
    print "then: change back to this directory"
    print "      (%s)" % pydir
    print "      and run python script %s with arguments" % pyscript
    print "           -l %s" % lamfile
    print "           -o %s" % lamdir
    dividerLine()