File: TIDDIT.py

package info (click to toggle)
tiddit 2.12.0%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 10,816 kB
  • sloc: cpp: 1,353; python: 1,004; sh: 71; makefile: 18
file content (133 lines) | stat: -rwxr-xr-x 7,094 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
#! /usr/bin/python3
import argparse
import os
import sys
import time

wd=os.path.dirname(os.path.realpath(__file__))
sys.path.insert(0, '{}/src/'.format(wd))
import TIDDIT_calling

version = "2.12.0"
parser = argparse.ArgumentParser("""TIDDIT-{}""".format(version),add_help=False)
parser.add_argument('--sv'       , help="call structural variation", required=False, action="store_true")
parser.add_argument('--cov'        , help="generate a coverage bed file", required=False, action="store_true")
args, unknown = parser.parse_known_args()

if args.sv:
	parser = argparse.ArgumentParser("""TIDDIT --sv --bam inputfile [-o prefix] --ref ref.fasta""")
	parser.add_argument('--sv'       , help="call structural variation", required=False, action="store_true")
	parser.add_argument('--bam', type=str,required=True, help="coordinate sorted bam file(required)")
	parser.add_argument('-o', type=str,default="output", help="output prefix(default=output)")
	parser.add_argument('-i', type=int, help="paired reads maximum allowed insert size. Pairs aligning on the same chr at a distance higher than this are considered candidates for SV (default= 99.9th percentile of insert size)")
	parser.add_argument('-d', type=str,help="expected reads orientations, possible values \"innie\" (-> <-) or \"outtie\" (<- ->). Default: major orientation within the dataset")
	parser.add_argument('-p', type=int,default=3, help="Minimum number of supporting pairs in order to call a variation event (default 3)")
	parser.add_argument('-r', type=int,default=3, help="Minimum number of supporting split reads to call a small variant (default 3)")
	parser.add_argument('-q', type=int,default=5, help="Minimum mapping quality to consider an alignment (default 5)")
	parser.add_argument('-Q', type=int,default=20, help="Minimum regional mapping quality (default 20)")
	parser.add_argument('-n', type=int,default=2, help="the ploidy of the organism,(default = 2)")
	parser.add_argument('-e', type=int, help="clustering distance  parameter, discordant pairs closer than this distance are considered to belong to the same variant(default = sqrt(insert-size*2)*12)")
	parser.add_argument('-l', type=int,default=3, help="min-pts parameter (default=3),must be set >= 2")
	parser.add_argument('-s', type=int,default=25000000, help="Number of reads to sample when computing library statistics(default=25000000)")
	parser.add_argument('-z', type=int,default=100, help="minimum variant size (default=100), variants smaller than this will not be printed ( z < 10 is not recomended)")
	parser.add_argument('--force_ploidy',action="store_true", help="force the ploidy to be set to -n across the entire genome (i.e skip coverage normalisation of chromosomes)")
	parser.add_argument('--no_cluster',action="store_true", help="Run only the TIDDIT signal extraction")
	parser.add_argument('--debug',action="store_true", help="rerun the tiddit clustering procedure")
	parser.add_argument('--n_mask',type=float,default=0.5, help="exclude regions from coverage calculation if they contain more than this fraction of N (default = 0.5)")
	parser.add_argument('--ref', type=str, help="reference fasta, used for GC correction and for reading cram")
	parser.add_argument('--p_ratio', type=float,default=0.2, help="minimum discordant pair/normal pair ratio at the breakpoint junction(default=20%)")
	parser.add_argument('--r_ratio', type=float,default=0.1, help="minimum split read/coverage ratio at the breakpoint junction(default=10%)")

	args= parser.parse_args()
	args.wd=os.path.dirname(os.path.realpath(__file__))
	if args.l < 2:
		print ("error, too low --l value!")
		quit()
	if args.ref:
		if not os.path.isfile(args.ref):
			print ("error,  could not find the reference file")
			quit()

	if not args.ref and args.bam.endswith(".cram"):
		print("error, reference fasta is required when using cram input")
		quit()

	if not (args.bam.endswith(".bam") or args.bam.endswith(".cram")) and not "/dev/" in args.bam:
		print ("error, the input file is not a bam file, make sure that the file extension is .bam or .cram")
		quit()

	if not os.path.isfile(args.bam) and not "/dev/" in args.bam:
		print ("error,  could not find the bam file")
		quit()

	if not os.path.isfile("{}/TIDDIT".format(args.wd)):
		print ("error,  could not find the TIDDIT executable file, try rerun the INSTALL.sh script")
		quit()

	if not args.bam.endswith(".cram"):
		command_str="{}/TIDDIT --sv -b {} -o {} -p {} -r {} -q {} -n {} -s {}".format(args.wd,args.bam,args.o,args.p,args.r,args.q,args.n,args.s)
	else:
		command_str="samtools view -hbu {} -T {} | {}/TIDDIT --sv -b /dev/stdin -o {} -p {} -r {} -q {} -n {} -s {}".format(args.bam,args.ref,args.wd,args.o,args.p,args.r,args.q,args.n,args.s)
	

	if args.i:
		command_str += " -i {}".format(args.i)
	if args.d:
		command_str += " -d {}".format(args.d)

	if not args.debug:
		os.system(command_str)

		if args.ref:
			t=time.time()
			print ("Generating GC wig file")
			if args.ref.endswith(".gz"):
				os.system("zcat {} | {}/TIDDIT --gc -z 50 -o {}".format(args.ref,args.wd,args.o))
			else:
				os.system("cat {} | {}/TIDDIT --gc -z 50 -o {}".format(args.ref,args.wd,args.o))
			print ("Constructed GC wig in {} sec".format(time.time()-t))

	if not args.no_cluster:
		TIDDIT_calling.cluster(args)

elif args.cov:
	parser = argparse.ArgumentParser("""TIDDIT --cov --bam inputfile [-o prefix]""")
	parser.add_argument('--cov'        , help="generate a coverage bed/wig file", required=False, action="store_true")
	parser.add_argument('--bam', type=str,required=True, help="coordinate sorted bam file(required)")
	parser.add_argument('-o', type=str,default="output", help="output prefix(default=output)")
	parser.add_argument('-z', type=int,default=500, help="use bins of specified size(default = 500bp) to measure the coverage of the entire bam file, set output to stdout to print to stdout")
	parser.add_argument('-w'        , help="generate wig instead of bed", required=False, action="store_true")
	parser.add_argument('-u'        , help="skip per bin mapping quality", required=False, action="store_true")
	parser.add_argument('--ref', type=str, help="reference fasta, used for GC correction and for reading cram")
	args= parser.parse_args()
	args.wd=os.path.dirname(os.path.realpath(__file__))

	if args.ref:
		if not os.path.isfile(args.ref):
			print ("error,  could not find the reference file")
			quit()

	if not args.bam.endswith(".cram"):
		command="{}/TIDDIT --cov -b {} -o {} -z {}".format(args.wd,args.bam,args.o,args.z)
	else:
		if not args.ref:
			print("error, missing reference sequence!")
			quit()
		command="samtools view -hbu {} -T {} | {}/TIDDIT --cov -b /dev/stdin -o {} -z {}".format(args.bam,args.ref,args.wd,args.o,args.z)

	if args.w:
		command += " -w"

	if args.u:
		command += " -u"

	if not os.path.isfile(args.bam):
		print ("error,  could not find the bam file")
		quit()
	if not os.path.isfile("{}/TIDDIT".format(args.wd)):
		print ("error,  could not find the TIDDIT executable file, try rerun the INSTALL.sh script")
		quit()
	os.system(command)

else:
	parser.print_help()