File: randsample_cmd.py

package info (click to toggle)
macs 3.0.2-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 378,728 kB
  • sloc: ansic: 5,879; python: 4,342; sh: 451; makefile: 83
file content (116 lines) | stat: -rw-r--r-- 3,873 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# Time-stamp: <2020-11-24 17:00:16 Tao Liu>

"""Description: Random sample certain number/percentage of tags.

This code is free software; you can redistribute it and/or modify it
under the terms of the BSD License (see the file LICENSE included with
the distribution).
"""

# ------------------------------------
# python modules
# ------------------------------------

import os
import sys

# ------------------------------------
# own python modules
# ------------------------------------
from MACS3.Utilities.Constants import *
from MACS3.Utilities.OptValidator import opt_validate_randsample

# ------------------------------------
# Main function
# ------------------------------------
def run( options0 ):
    options = opt_validate_randsample( options0 )
    # end of parsing commandline options
    info = options.info
    warn = options.warn
    debug = options.debug
    error = options.error

    options.PE_MODE = options.format in ('BAMPE','BEDPE')

    #0 check output file
    if options.outputfile:
        outfhd = open( os.path.join( options.outdir, options.outputfile ), "w" )
    else:
        outfhd = sys.stdout

    #1 Read tag files
    if options.PE_MODE:
        info("# read input file in Paired-end mode.")
        treat = load_frag_files_options ( options ) # return PETrackI object
        t0 = treat.total # total fragments
        info("# total fragments/pairs in alignment file: %d" % (t0) )
    else:
        info("read tag files...")
        treat = load_tag_files_options (options)

        info("tag size = %d" % options.tsize)
        treat.fw = options.tsize

        t0 = treat.total
        info(" total tags in alignment file: %d" % (t0))

    if options.number:
        if options.number > t0:
            error(" Number you want is bigger than total number of tags in alignment file! Please specify a smaller number and try again!")
            error(" %.2e > %.2e" % (options.number, t0))
            sys.exit(1)
        info(" Number of tags you want to keep: %.2e" % (options.number))
        options.percentage = float(options.number)/t0*100
    info(" Percentage of tags you want to keep: %.2f%%" % (options.percentage))

    if options.seed >= 0:
        info(" Random seed has been set as: %d" % options.seed )

    treat.sample_percent(options.percentage/100.0, options.seed )

    info(" tags after random sampling in alignment file: %d" % (treat.total))

    info("Write to BED file")
    treat.print_to_bed(fhd=outfhd)
    info("finished! Check %s." % options.outputfile)

def load_tag_files_options ( options ):
    """From the options, load alignment tags.

    """
    options.info("# read treatment tags...")
    tp = options.parser(options.ifile[0], buffer_size=options.buffer_size)
    if not options.tsize:           # override tsize if user specified --tsize
        ttsize = tp.tsize()
        options.tsize = ttsize
    treat = tp.build_fwtrack()
    #treat.sort()
    if len(options.ifile) > 1:
        # multiple input
        for ifile in options.ifile[1:]:
            tp = options.parser(ifile, buffer_size=options.buffer_size)
            treat = tp.append_fwtrack( treat )
            #treat.sort()
    treat.finalize()

    options.info("tag size is determined as %d bps" % options.tsize)
    return treat

def load_frag_files_options ( options ):
    """From the options, load treatment fragments and control fragments (if available).

    """
    options.info("# read treatment fragments...")

    tp = options.parser(options.ifile[0], buffer_size=options.buffer_size)
    treat = tp.build_petrack()
    #treat.sort()
    if len(options.ifile) > 1:
        # multiple input
        for ifile in options.ifile[1:]:
            tp = options.parser(ifile, buffer_size=options.buffer_size)
            treat = tp.append_petrack( treat )
            #treat.sort()
    treat.finalize()
    return treat