File: pwm_score_positions.py

package info (click to toggle)
python-bx 0.13.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,000 kB
  • sloc: python: 17,136; ansic: 2,326; makefile: 24; sh: 8
file content (63 lines) | stat: -rwxr-xr-x 2,011 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/python3
"""
Returns all positions of a maf with any pwm score > threshold
The positions are projected onto human coordinates
"""

import sys

import bx.pwm.position_weight_matrix as pwmx
from bx.align import maf as align_maf
from bx.pwm.pwm_score_maf import MafBlockScorer


def isnan(x):
    return not x == x


def main():
    if len(sys.argv) < 6:
        print(f"{sys.argv[0]} transfac|basic pwmfile inmaf threshold spec1,spec2,... ", file=sys.stderr)
        sys.exit(0)

    pwm = {}
    format = sys.argv[1]
    for wm in pwmx.Reader(open(sys.argv[2]), format=format):
        pwm[wm.id] = wm

    inmaf = open(sys.argv[3])
    threshold = float(sys.argv[4])

    species = []

    for sp in sys.argv[5].split(","):
        species.append(sp)

    for maf in align_maf.Reader(inmaf):
        mafchrom = maf.components[0].src.split(".")[1]
        mafstart = maf.components[0].start
        mafend = maf.components[0].end
        reftext = maf.components[0].text

        # maf block scores for each matrix
        for scoremax, width, headers in MafBlockScorer(pwm, species, maf):
            blocklength = width
            mafsrc, mafstart, mafend = headers[0]
            mafchrom = mafsrc.split(".")[1]

            # lists of scores for each position in scoremax
            for id, mx in scoremax.items():
                for offset in range(blocklength):
                    # scan all species with threshold
                    for i in range(len(species)):
                        if mx[i][offset] > threshold:
                            refstart = mafstart + offset - reftext.count("-", 0, offset)
                            refend = refstart + len(pwm[id])
                            data = " ".join([f"{mx[x][offset]:.2f}" for x in range(len(species))])
                            # underscore spaces in the name
                            print(mafchrom, refstart, refend, id.replace(" ", "_"), data)
                            break


if __name__ == "__main__":
    main()