1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
|
#!/usr/bin/python3
"""
Returns all positions of a maf with any pwm score > threshold
The positions are projected onto human coordinates
"""
import sys
import bx.pwm.position_weight_matrix as pwmx
from bx.align import maf as align_maf
from bx.pwm.pwm_score_maf import MafBlockScorer
def isnan(x):
return not x == x
def main():
if len(sys.argv) < 6:
print(f"{sys.argv[0]} transfac|basic pwmfile inmaf threshold spec1,spec2,... ", file=sys.stderr)
sys.exit(0)
pwm = {}
format = sys.argv[1]
for wm in pwmx.Reader(open(sys.argv[2]), format=format):
pwm[wm.id] = wm
inmaf = open(sys.argv[3])
threshold = float(sys.argv[4])
species = []
for sp in sys.argv[5].split(","):
species.append(sp)
for maf in align_maf.Reader(inmaf):
mafchrom = maf.components[0].src.split(".")[1]
mafstart = maf.components[0].start
mafend = maf.components[0].end
reftext = maf.components[0].text
# maf block scores for each matrix
for scoremax, width, headers in MafBlockScorer(pwm, species, maf):
blocklength = width
mafsrc, mafstart, mafend = headers[0]
mafchrom = mafsrc.split(".")[1]
# lists of scores for each position in scoremax
for id, mx in scoremax.items():
for offset in range(blocklength):
# scan all species with threshold
for i in range(len(species)):
if mx[i][offset] > threshold:
refstart = mafstart + offset - reftext.count("-", 0, offset)
refend = refstart + len(pwm[id])
data = " ".join([f"{mx[x][offset]:.2f}" for x in range(len(species))])
# underscore spaces in the name
print(mafchrom, refstart, refend, id.replace(" ", "_"), data)
break
if __name__ == "__main__":
main()
|