1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
|
#!/usr/bin/python3
# -*- coding: utf-8 -*-
import argparse
import sys
import numpy as np
from deeptools import parserCommon
from deeptools import writeBedGraph_bam_and_bw
debug = 0
def parse_arguments(args=None):
parentParser = parserCommon.getParentArgParse()
outputParser = parserCommon.output()
parser = argparse.ArgumentParser(
parents=[parentParser, outputParser],
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description='This tool average multiple bigWig files based on the number '
'of mapped reads. To average the bigWig files, the genome is '
'partitioned into bins of equal size, then the scores '
'in each bigwig file are computed per bin.'
'These scores are averaged and scaleFactors can be applied before the average.',
usage='bigwigAverage -b sample1.bw sample2.bw -o outfile.bw\n'
'help: bigwigAverage -h / bigwigAverage --help')
# define the arguments
parser.add_argument('--bigwigs', '-b',
metavar='Bigwig files',
help='Bigwig files separated by space.',
nargs='+',
required=True)
parser.add_argument('--scaleFactors',
help='Set this parameter to multipy the bigwig values '
'by a constant. The format is '
'scaleFactor1:scaleFactor2:scaleFactor3 etc. '
'For example 0.7:1 to scale the first bigwig file '
'by 0.7 while not scaling the second bigwig file',
default=None,
required=False)
parser.add_argument('--skipNonCoveredRegions', '--skipNAs',
help='This parameter determines if non-covered regions (regions without a score) '
'in the bigWig files should be skipped. The default is to treat those '
'regions as having a value of zero. '
'The decision to skip non-covered regions '
'depends on the interpretation of the data. Non-covered regions '
'in a bigWig file may represent repetitive regions that should '
'be skipped. Alternatively, the interpretation of non-covered regions as '
'zeros may be wrong and this option should be used ',
action='store_true')
return parser
def getType(fname):
"""
Tries to determine if a file is a wiggle, a bedgraph, or a bigWig file.
"""
if fname.endswith(".wig") or fname.endswith(".wiggle"):
return "wiggle"
elif fname.lower().endswith(".bedgraph") or fname.endswith(".bdg"):
return "bedgraph"
else:
return "bigwig"
def average(tileCoverage, args):
r"""
The mapreduce method calls this function
for each tile. The parameters (args) are fixed
in the main method.
>>> funcArgs= {'scaleFactors': (1,1)}
>>> average([1, 2], funcArgs)
1.5
>>> funcArgs= {'scaleFactors': (1,0.5)}
>>> average([1, 2], funcArgs)
1.0
>>> funcArgs= {'scaleFactors': (1,0.5,0.1,0.2)}
>>> average([1, 2, 3, 12], funcArgs)
1.175
>>> average([1, 2, 3, np.nan], funcArgs)
nan
"""
norm_values = [args['scaleFactors'][i] * cov for i, cov in enumerate(tileCoverage)]
return np.mean(norm_values)
def main(args=None):
args = parse_arguments().parse_args(args)
if len(sys.argv) == 1:
parse_arguments().print_help()
sys.exit()
nFiles = len(args.bigwigs)
if args.scaleFactors:
scaleFactors = [float(x) for x in args.scaleFactors.split(":")]
if len(scaleFactors) == 1:
scaleFactors = scaleFactors * nFiles
elif len(scaleFactors) != nFiles:
raise argparse.ArgumentTypeError(
"Format of scaleFactors is factor or factor1:factor2... as many as bigwig files. "
"There are {} bigwigs and {} factors."
"The value given ( {} ) is not valid".format(nFiles, len(scaleFactors), args.scaleFactors))
else:
scaleFactors = [1] * nFiles
# the average function is called and receives
# the function_args per each tile that is considered
FUNC = average
function_args = {'scaleFactors': scaleFactors}
writeBedGraph_bam_and_bw.writeBedGraph(
[(b, getType(b)) for b in args.bigwigs],
args.outFileName, 0, FUNC,
function_args, tileSize=args.binSize, region=args.region,
blackListFileName=args.blackListFileName,
verbose=args.verbose,
numberOfProcessors=args.numberOfProcessors,
skipZeroOverZero=False,
format=args.outFileFormat,
smoothLength=False,
missingDataAsZero=not args.skipNonCoveredRegions,
extendPairedEnds=False)
|