1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
|
#! /usr/bin/python3
import argparse
import os
import sys
import time
wd=os.path.dirname(os.path.realpath(__file__))
sys.path.insert(0, '{}/src/'.format(wd))
import TIDDIT_calling
version = "2.12.0"
parser = argparse.ArgumentParser("""TIDDIT-{}""".format(version),add_help=False)
parser.add_argument('--sv' , help="call structural variation", required=False, action="store_true")
parser.add_argument('--cov' , help="generate a coverage bed file", required=False, action="store_true")
args, unknown = parser.parse_known_args()
if args.sv:
parser = argparse.ArgumentParser("""TIDDIT --sv --bam inputfile [-o prefix] --ref ref.fasta""")
parser.add_argument('--sv' , help="call structural variation", required=False, action="store_true")
parser.add_argument('--bam', type=str,required=True, help="coordinate sorted bam file(required)")
parser.add_argument('-o', type=str,default="output", help="output prefix(default=output)")
parser.add_argument('-i', type=int, help="paired reads maximum allowed insert size. Pairs aligning on the same chr at a distance higher than this are considered candidates for SV (default= 99.9th percentile of insert size)")
parser.add_argument('-d', type=str,help="expected reads orientations, possible values \"innie\" (-> <-) or \"outtie\" (<- ->). Default: major orientation within the dataset")
parser.add_argument('-p', type=int,default=3, help="Minimum number of supporting pairs in order to call a variation event (default 3)")
parser.add_argument('-r', type=int,default=3, help="Minimum number of supporting split reads to call a small variant (default 3)")
parser.add_argument('-q', type=int,default=5, help="Minimum mapping quality to consider an alignment (default 5)")
parser.add_argument('-Q', type=int,default=20, help="Minimum regional mapping quality (default 20)")
parser.add_argument('-n', type=int,default=2, help="the ploidy of the organism,(default = 2)")
parser.add_argument('-e', type=int, help="clustering distance parameter, discordant pairs closer than this distance are considered to belong to the same variant(default = sqrt(insert-size*2)*12)")
parser.add_argument('-l', type=int,default=3, help="min-pts parameter (default=3),must be set >= 2")
parser.add_argument('-s', type=int,default=25000000, help="Number of reads to sample when computing library statistics(default=25000000)")
parser.add_argument('-z', type=int,default=100, help="minimum variant size (default=100), variants smaller than this will not be printed ( z < 10 is not recomended)")
parser.add_argument('--force_ploidy',action="store_true", help="force the ploidy to be set to -n across the entire genome (i.e skip coverage normalisation of chromosomes)")
parser.add_argument('--no_cluster',action="store_true", help="Run only the TIDDIT signal extraction")
parser.add_argument('--debug',action="store_true", help="rerun the tiddit clustering procedure")
parser.add_argument('--n_mask',type=float,default=0.5, help="exclude regions from coverage calculation if they contain more than this fraction of N (default = 0.5)")
parser.add_argument('--ref', type=str, help="reference fasta, used for GC correction and for reading cram")
parser.add_argument('--p_ratio', type=float,default=0.2, help="minimum discordant pair/normal pair ratio at the breakpoint junction(default=20%)")
parser.add_argument('--r_ratio', type=float,default=0.1, help="minimum split read/coverage ratio at the breakpoint junction(default=10%)")
args= parser.parse_args()
args.wd=os.path.dirname(os.path.realpath(__file__))
if args.l < 2:
print ("error, too low --l value!")
quit()
if args.ref:
if not os.path.isfile(args.ref):
print ("error, could not find the reference file")
quit()
if not args.ref and args.bam.endswith(".cram"):
print("error, reference fasta is required when using cram input")
quit()
if not (args.bam.endswith(".bam") or args.bam.endswith(".cram")) and not "/dev/" in args.bam:
print ("error, the input file is not a bam file, make sure that the file extension is .bam or .cram")
quit()
if not os.path.isfile(args.bam) and not "/dev/" in args.bam:
print ("error, could not find the bam file")
quit()
if not os.path.isfile("{}/TIDDIT".format(args.wd)):
print ("error, could not find the TIDDIT executable file, try rerun the INSTALL.sh script")
quit()
if not args.bam.endswith(".cram"):
command_str="{}/TIDDIT --sv -b {} -o {} -p {} -r {} -q {} -n {} -s {}".format(args.wd,args.bam,args.o,args.p,args.r,args.q,args.n,args.s)
else:
command_str="samtools view -hbu {} -T {} | {}/TIDDIT --sv -b /dev/stdin -o {} -p {} -r {} -q {} -n {} -s {}".format(args.bam,args.ref,args.wd,args.o,args.p,args.r,args.q,args.n,args.s)
if args.i:
command_str += " -i {}".format(args.i)
if args.d:
command_str += " -d {}".format(args.d)
if not args.debug:
os.system(command_str)
if args.ref:
t=time.time()
print ("Generating GC wig file")
if args.ref.endswith(".gz"):
os.system("zcat {} | {}/TIDDIT --gc -z 50 -o {}".format(args.ref,args.wd,args.o))
else:
os.system("cat {} | {}/TIDDIT --gc -z 50 -o {}".format(args.ref,args.wd,args.o))
print ("Constructed GC wig in {} sec".format(time.time()-t))
if not args.no_cluster:
TIDDIT_calling.cluster(args)
elif args.cov:
parser = argparse.ArgumentParser("""TIDDIT --cov --bam inputfile [-o prefix]""")
parser.add_argument('--cov' , help="generate a coverage bed/wig file", required=False, action="store_true")
parser.add_argument('--bam', type=str,required=True, help="coordinate sorted bam file(required)")
parser.add_argument('-o', type=str,default="output", help="output prefix(default=output)")
parser.add_argument('-z', type=int,default=500, help="use bins of specified size(default = 500bp) to measure the coverage of the entire bam file, set output to stdout to print to stdout")
parser.add_argument('-w' , help="generate wig instead of bed", required=False, action="store_true")
parser.add_argument('-u' , help="skip per bin mapping quality", required=False, action="store_true")
parser.add_argument('--ref', type=str, help="reference fasta, used for GC correction and for reading cram")
args= parser.parse_args()
args.wd=os.path.dirname(os.path.realpath(__file__))
if args.ref:
if not os.path.isfile(args.ref):
print ("error, could not find the reference file")
quit()
if not args.bam.endswith(".cram"):
command="{}/TIDDIT --cov -b {} -o {} -z {}".format(args.wd,args.bam,args.o,args.z)
else:
if not args.ref:
print("error, missing reference sequence!")
quit()
command="samtools view -hbu {} -T {} | {}/TIDDIT --cov -b /dev/stdin -o {} -z {}".format(args.bam,args.ref,args.wd,args.o,args.z)
if args.w:
command += " -w"
if args.u:
command += " -u"
if not os.path.isfile(args.bam):
print ("error, could not find the bam file")
quit()
if not os.path.isfile("{}/TIDDIT".format(args.wd)):
print ("error, could not find the TIDDIT executable file, try rerun the INSTALL.sh script")
quit()
os.system(command)
else:
parser.print_help()
|