File: maf_filter_max_wc.py

package info (click to toggle)
python-bx 0.13.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,000 kB
  • sloc: python: 17,136; ansic: 2,326; makefile: 24; sh: 8
file content (35 lines) | stat: -rwxr-xr-x 808 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/usr/bin/python3

"""
Filter maf blocks for presence of wildcard columns. Blocks must meet the
criteria of having at least `min_good` columns, each of which has more than
`min_species` rows that are NOT wildcard bases ('*').

TODO: Allow specifying the character of the wildcard base.

usage: %prog min_good min_species < maf > maf
"""

import sys

from bx.align import maf


def main():
    min_good = int(sys.argv[1])
    min_species = int(sys.argv[2])

    maf_reader = maf.Reader(sys.stdin, parse_e_rows=True)
    maf_writer = maf.Writer(sys.stdout)

    for m in maf_reader:
        good = 0
        for col in m.column_iter():
            if col.count("*") <= min_species:
                good += 1
        if good >= min_good:
            maf_writer.write(m)


if __name__ == "__main__":
    main()