1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
|
#!/usr/bin/python3
"""
Returns all positions of a maf with any pwm score > threshold
The positions are projected onto human coordinates
"""
import sys
from bx.align import maf as align_maf
from bx.pwm.pwm_score_maf import MafMotifScorer
def isnan(x):
return not x == x
def main():
if len(sys.argv) < 4:
print(f"{sys.argv[0]} motif inmaf spec1,spec2,... ", file=sys.stderr)
sys.exit(0)
targmotif = sys.argv[1]
inmaf = open(sys.argv[2])
threshold = 0
species = []
for sp in sys.argv[3].split(","):
species.append(sp)
for maf in align_maf.Reader(inmaf):
mafchrom = maf.components[0].src.split(".")[1]
mafstart = maf.components[0].start
mafend = maf.components[0].end
reftext = maf.components[0].text
# maf block scores for each matrix
for scoremax, width, headers in MafMotifScorer(species, maf, targmotif):
blocklength = width
mafsrc, mafstart, mafend = headers[0]
mafchrom = mafsrc.split(".")[1]
# lists of scores for each position in scoremax
mx = scoremax
for offset in range(blocklength):
# scan all species with threshold
for i in range(len(species)):
if mx[i][offset] > threshold:
refstart = mafstart + offset - reftext.count("-", 0, offset)
refend = refstart + len(targmotif)
data = " ".join([f"{mx[x][offset]:.2f}" for x in range(len(species))])
# quote the motif
print(mafchrom, refstart, refend, "'" + targmotif + "'", data)
break
if __name__ == "__main__":
main()
|