File: pwm_score_motifs.py

package info (click to toggle)
python-bx 0.13.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,000 kB
  • sloc: python: 17,136; ansic: 2,326; makefile: 24; sh: 8
file content (58 lines) | stat: -rwxr-xr-x 1,763 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/python3
"""
Returns all positions of a maf with any pwm score > threshold
The positions are projected onto human coordinates
"""

import sys

from bx.align import maf as align_maf
from bx.pwm.pwm_score_maf import MafMotifScorer


def isnan(x):
    return not x == x


def main():
    if len(sys.argv) < 4:
        print(f"{sys.argv[0]} motif inmaf spec1,spec2,... ", file=sys.stderr)
        sys.exit(0)

    targmotif = sys.argv[1]
    inmaf = open(sys.argv[2])
    threshold = 0

    species = []

    for sp in sys.argv[3].split(","):
        species.append(sp)

    for maf in align_maf.Reader(inmaf):
        mafchrom = maf.components[0].src.split(".")[1]
        mafstart = maf.components[0].start
        mafend = maf.components[0].end
        reftext = maf.components[0].text

        # maf block scores for each matrix
        for scoremax, width, headers in MafMotifScorer(species, maf, targmotif):
            blocklength = width
            mafsrc, mafstart, mafend = headers[0]
            mafchrom = mafsrc.split(".")[1]

            # lists of scores for each position in scoremax
            mx = scoremax
            for offset in range(blocklength):
                # scan all species with threshold
                for i in range(len(species)):
                    if mx[i][offset] > threshold:
                        refstart = mafstart + offset - reftext.count("-", 0, offset)
                        refend = refstart + len(targmotif)
                        data = " ".join([f"{mx[x][offset]:.2f}" for x in range(len(species))])
                        # quote the motif
                        print(mafchrom, refstart, refend, "'" + targmotif + "'", data)
                        break


if __name__ == "__main__":
    main()