1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
|
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse # to parse command line arguments
import sys
import multiprocessing
import os
from deeptools import parserCommon
from deeptools.getRatio import getRatio
from deeptools import writeBedGraph_bam_and_bw
import deeptools.deepBlue as db
debug = 0
def parse_arguments(args=None):
parentParser = parserCommon.getParentArgParse()
outputParser = parserCommon.output()
dbParser = parserCommon.deepBlueOptionalArgs()
parser = argparse.ArgumentParser(
parents=[parentParser, outputParser, dbParser],
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description='This tool compares two bigWig files based on the number '
'of mapped reads. To compare the bigWig files, the genome is '
'partitioned into bins of equal size, then the number of reads found '
'in each BAM file are counted per bin and finally a summary '
'value is reported. This value can be the ratio of the number of reads'
'per bin, the log2 of the ratio, the sum or the difference.')
# define the arguments
parser.add_argument('--bigwig1', '-b1',
metavar='Bigwig file',
help='Bigwig file 1. Usually the file for the '
'treatment.',
required=True)
parser.add_argument('--bigwig2', '-b2',
metavar='Bigwig file',
help='Bigwig file 2. Usually the file for the '
'control.',
required=True)
parser.add_argument('--scaleFactors',
help='Set this parameter to multipy the bigwig values '
'by a constant. The format is '
'scaleFactor1:scaleFactor2. '
'For example 0.7:1 to scale the first bigwig file '
'by 0.7 while not scaling the second bigwig file',
default=None,
required=False)
parser.add_argument('--pseudocount',
help='A small number to avoid x/0. Only useful '
'together with --operation log2 or --operation ratio. '
'You can specify different values as pseudocounts for '
'the numerator and the denominator by providing two '
'values (the first value is used as the numerator '
'pseudocount and the second the denominator pseudocount). (Default: %(default)s)',
default=1,
nargs='+',
action=parserCommon.requiredLength(1, 2),
type=float,
required=False)
parser.add_argument('--skipZeroOverZero',
help='Skip bins where BOTH BAM files lack coverage. '
'This is determined BEFORE any applicable pseudocount '
'is added.',
action='store_true')
parser.add_argument('--operation',
help='The default is to output the log2ratio of the '
'two samples. The reciprocal ratio returns the '
'the negative of the inverse of the ratio '
'if the ratio is less than 0. The resulting '
'values are interpreted as negative fold changes. '
'Instead of performing a '
'computation using both files, the scaled signal can '
'alternatively be output for the first or second file using '
'the \'--operation first\' or \'--operation second\' (Default: %(default)s)',
default='log2',
choices=['log2', 'ratio', 'subtract', 'add', 'mean',
'reciprocal_ratio', 'first', 'second'],
required=False)
parser.add_argument('--skipNonCoveredRegions', '--skipNAs',
help='This parameter determines if non-covered regions (regions without a score) '
'in the bigWig files should be skipped. The default is to treat those '
'regions as having a value of zero. '
'The decision to skip non-covered regions '
'depends on the interpretation of the data. Non-covered regions '
'in a bigWig file may represent repetitive regions that should '
'be skipped. Alternatively, the interpretation of non-covered regions as '
'zeros may be wrong and this option should be used ',
action='store_true')
return parser
def getType(fname):
"""
Tries to determine if a file is a wiggle file from deepBlue or a bigWig file.
Returns 'wiggle' if the file name ends with .wig, otherwise 'bigwig'
"""
if fname.endswith(".wig") or fname.endswith(".wiggle"):
return "wiggle"
elif fname.endswith(".bedgraph"):
return "bedgraph"
else:
return "bigwig"
def main(args=None):
args = parse_arguments().parse_args(args)
if args.scaleFactors:
scaleFactors = [float(x) for x in args.scaleFactors.split(":")]
else:
scaleFactors = [1, 1]
if not isinstance(args.pseudocount, list):
args.pseudocount = [args.pseudocount]
if len(args.pseudocount) == 1:
args.pseudocount *= 2
# the getRatio function is called and receives
# the function_args per each tile that is considered
FUNC = getRatio
function_args = {'valueType': args.operation,
'scaleFactors': scaleFactors,
'pseudocount': args.pseudocount}
# Preload deepBlue files, which need to then be deleted
deepBlueFiles = []
for idx, fname in enumerate([args.bigwig1, args.bigwig2]):
if db.isDeepBlue(fname):
deepBlueFiles.append([fname, idx])
if len(deepBlueFiles) > 0:
sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles])))
foo = db.deepBlue(deepBlueFiles[0][0], url=args.deepBlueURL, userKey=args.userKey)
regs = db.makeChromTiles(foo)
for x in deepBlueFiles:
x.extend([args, regs])
if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1:
pool = multiprocessing.Pool(args.numberOfProcessors)
res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999)
else:
res = list(map(db.preloadWrapper, deepBlueFiles))
# substitute the file names with the temp files
for (ftuple, r) in zip(deepBlueFiles, res):
if ftuple[1] == 0:
args.bigwig1 = r
else:
args.bigwig2 = r
deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles]
del regs
writeBedGraph_bam_and_bw.writeBedGraph(
[(args.bigwig1, getType(args.bigwig1)),
(args.bigwig2, getType(args.bigwig2))],
args.outFileName, 0, FUNC,
function_args, tileSize=args.binSize, region=args.region,
blackListFileName=args.blackListFileName,
verbose=args.verbose,
numberOfProcessors=args.numberOfProcessors,
skipZeroOverZero=args.skipZeroOverZero,
format=args.outFileFormat,
smoothLength=False,
missingDataAsZero=not args.skipNonCoveredRegions,
extendPairedEnds=False)
# Clean up temporary bigWig files, if applicable
if not args.deepBlueKeepTemp:
for k, v in deepBlueFiles:
if v == 0:
os.remove(args.bigwig1)
else:
os.remove(args.bigwig2)
else:
for k, v in deepBlueFiles:
foo = args.bigwig1
if v == 1:
foo = args.bigwig2
print("{} is stored in {}".format(k, foo))
|