File: maf_mask_cpg.py

package info (click to toggle)
python-bx 0.13.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 5,000 kB
  • sloc: python: 17,136; ansic: 2,326; makefile: 24; sh: 8
file content (43 lines) | stat: -rw-r--r-- 1,104 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
#!/usr/bin/python3

"""
Mask out potential CpG sites from a maf. Restricted or inclusive definition
of CpG sites can be used. The total fraction masked is printed to stderr.

usage: %prog < input > output
    -m, --mask=N: Character to use as mask ('?' is default)
    -r, --restricted: Use restricted definition of CpGs
"""

import sys

import bx.align
import bx.align.maf
import bx.align.sitemask.cpg
from bx.cookbook import doc_optparse


def main():
    options, args = doc_optparse.parse(__doc__)
    try:
        if options.mask:
            mask = options.mask
        else:
            mask = "?"
    except Exception:
        doc_optparse.exception()

    reader = bx.align.maf.Reader(sys.stdin)
    writer = bx.align.maf.Writer(sys.stdout)

    if options.restricted:
        cpgfilter = bx.align.sitemask.cpg.Restricted(mask=mask)
    else:
        cpgfilter = bx.align.sitemask.cpg.Inclusive(mask=mask)
    cpgfilter.run(reader, writer.write)

    print(str(float(cpgfilter.masked) / float(cpgfilter.total) * 100) + "% bases masked.", file=sys.stderr)


if __name__ == "__main__":
    main()