File: maf_select_motifs.py

package info (click to toggle)
python-bx 0.13.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,000 kB
  • sloc: python: 17,136; ansic: 2,326; makefile: 24; sh: 8
file content (73 lines) | stat: -rwxr-xr-x 2,126 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/usr/bin/python3
"""
Returns all positions of a maf with any pwm score > threshold
The positions are projected onto human coordinates
"""

import sys

import bx.pwm.position_weight_matrix as pwmx
from bx.align import maf as align_maf
from bx.pwm.pwm_score_maf import MafMotifSelect


def isnan(x):
    return not x == x


def main():
    if len(sys.argv) < 5:
        print(f"{sys.argv[0]} transfac|basic pwmfile inmaf threshold [motif]", file=sys.stderr)
        sys.exit(2)

    r = pwmx.Reader(open(sys.argv[2]), format=sys.argv[1])
    pwm = next(iter(r))
    inmaf = open(sys.argv[3])
    threshold = float(sys.argv[4])
    if len(sys.argv) > 5:
        motif = sys.argv[5]
    else:
        motif = None

    for maf in align_maf.Reader(inmaf):
        for mafmotif, pwm_score, motif_score in MafMotifSelect(maf, pwm, motif, threshold):
            print(mafmotif, pwm_score, motif_score)
            print("zzzzzzzzzzzzzzzzzzzzzzzzzzzzz")


def mafwrite(alignment, kvec=None, jvec=None, file=sys.stdout):
    file.write("a score=" + str(alignment.score))
    for key in alignment.attributes:
        file.write(f" {key}={alignment.attributes[key]}")
    file.write("\n")
    rows = []
    if not kvec:
        kvec = [0 for c in alignment.components]
    if not jvec:
        jvec = [0 for c in alignment.components]
    for c, x, y in zip(alignment.components, kvec, jvec):
        rows.append(("s", c.src, str(c.start), str(c.size), c.strand, str(c.src_size), c.text, f"{x:.2f}", str(y)))
        file.write(format_tabular(rows, "llrrrrrrr"))


def format_tabular(rows, align=None):
    if len(rows) == 0:
        return ""
    lengths = [len(col) for col in rows[0]]
    for row in rows[1:]:
        for i in range(0, len(row)):
            lengths[i] = max(lengths[i], len(row[i]))
    rval = ""
    for row in rows:
        for i in range(0, len(row)):
            if align and align[i] == "l":
                rval += row[i].ljust(lengths[i])
            else:
                rval += row[i].rjust(lengths[i])
            rval += " "
        rval += "\n"
    return rval


if __name__ == "__main__":
    main()