1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337
|
<tool id="deeptools_compute_matrix" name="computeMatrix" version="@TOOL_VERSION@+galaxy0" profile="@GALAXY_VERSION@">
<description>prepares data for plotting a heatmap or a profile of given regions</description>
<macros>
<token name="@BINARY@">computeMatrix</token>
<import>deepTools_macros.xml</import>
</macros>
<expand macro="requirements" />
<command>
<![CDATA[
@multiple_input_bigwigs@
#set bed_files=[]
#for $counter, $rf, in enumerate($regionsFiles):
#set identifier = re.sub('[^\s\w\-]', '_', str($rf.regionsFile.element_identifier))
ln -f -s '${rf.regionsFile}' '${identifier}_${counter}.bed' &&
#silent $bed_files.append('%s_%s.bed' % ($identifier, $counter))
#end for
@BINARY@
$mode.mode_select
--regionsFileName '#echo "' '".join($bed_files)#'
--scoreFileName #echo ' '.join($files)#
--outFileName '$outFileName'
--samplesLabel #echo ' '.join($labels)#
@THREADS@
#if $output.showOutputSettings == "yes"
#if $output.saveMatrix:
--outFileNameMatrix '$outFileNameMatrix'
#end if
#if $output.saveSortedRegions:
--outFileSortedRegions '$outFileSortedRegions'
#end if
#end if
#if $mode.mode_select == "reference-point":
--referencePoint $mode.referencePoint
$mode.nanAfterEnd
--beforeRegionStartLength $mode.beforeRegionStartLength
--afterRegionStartLength $mode.afterRegionStartLength
#else
--regionBodyLength $mode.regionBodyLength
#if $mode.regionStartLength.regionStartLength_select == "yes":
--beforeRegionStartLength $mode.regionStartLength.beforeRegionStartLength
--afterRegionStartLength $mode.regionStartLength.afterRegionStartLength
--unscaled5prime $mode.regionStartLength.unscaled5prime
--unscaled3prime $mode.regionStartLength.unscaled3prime
#end if
#end if
#if $advancedOpt.showAdvancedOpt == "yes":
--sortRegions '$advancedOpt.sortRegions'
--sortUsing '$advancedOpt.sortUsing'
--averageTypeBins '$advancedOpt.averageTypeBins'
$advancedOpt.skipZeros
$advancedOpt.missingDataAsZero
--binSize $advancedOpt.binSize
#if $advancedOpt.minThreshold is not None and str($advancedOpt.minThreshold) != '':
--minThreshold $advancedOpt.minThreshold
#end if
#if $advancedOpt.maxThreshold is not None and str($advancedOpt.maxThreshold) != '':
--maxThreshold $advancedOpt.maxThreshold
#end if
#if $advancedOpt.scale is not None and str($advancedOpt.scale) != '':
--scale $advancedOpt.scale
#end if
@ADVANCED_OPTS_GTF@
@blacklist@
#end if
]]>
</command>
<inputs>
<repeat name="regionsFiles" title="Select regions" min="1">
<param name="regionsFile" format="bed,gtf" type="data" label="Regions to plot"
help="File, in BED format, containing the regions to plot."/>
</repeat>
<expand macro="multiple_input_bigwigs" MIN="1" LABEL="Score file" TITLE="Score files"/>
<expand macro="custom_sample_labels" />
<conditional name="mode" >
<param name="mode_select" type="select"
label="computeMatrix has two main output options"
help="In the scale-regions mode, all regions in the BED file are
stretched or shrunken to the same length (in bases) that is indicated
by the user. Reference-point refers to a position within the BED
regions (start or end of each region). In the reference-point mode only
those genomic positions before (upstream) and/or after (downstream)
the reference point will be considered.">
<option value="scale-regions" selected="true">scale-regions</option>
<option value="reference-point">reference-point</option>
</param>
<when value="scale-regions" >
<param argument="--regionBodyLength" type="integer" value="500"
label="Distance in bases to which all regions are going to be fit" help=""/>
<conditional name="regionStartLength">
<param name="regionStartLength_select" type="select" label="Set distance up- and downstream of the given regions">
<option value="no" selected="true">no</option>
<option value="yes">yes</option>
</param>
<when value="no" />
<when value="yes">
<param argument="--beforeRegionStartLength" type="integer" value="1000" min="1"
label="Distance upstream of the region start position"
help="If the regions are genes, this would be the
distance upstream of the transcription start site."/>
<param argument="--afterRegionStartLength" type="integer" value="1000" min="1"
label="Distance downstream of the region end position"
help="If the regions are genes, this would be the
distance downstream of the transcription end site."/>
<param argument="--unscaled5prime" type="integer" min="0" value="0"
label="Number of bases within the gene body at the 5-prime end to exclude from scaling."
help="By default, each region is scaled to a given length (see the --regionBodyLength
option). In some cases it is useful to look at unscaled signals around region boundaries, so this
setting specifies the number of unscaled bases on the 5-prime end of each boundary." />
<param argument="--unscaled3prime" type="integer" min="0" value="0"
label="Number of bases within the gene body at the 3-prime end to exclude from scaling."
help="As with --unscaled5prime, but for the 3-prime end." />
</when>
</conditional>
</when>
<when value="reference-point">
<param name="referencePoint" type="select" label="The reference point for the plotting">
<option value="TSS" selected="true">beginning of region (e.g. TSS)</option>
<option value="TES">end of region (e.g. TES)</option>
<option value="center">center of region</option>
</param>
<param name="nanAfterEnd" type="boolean" truevalue="--nanAfterEnd" falsevalue=""
label="Discard any values after the region end"
help="This is useful to visualize the region end when not using the
scale-regions mode and when the reference-point is set to the TSS. (--nanAfterEnd)"/>
<param name="beforeRegionStartLength" type="integer" value="1000" min="1"
label="Distance upstream of the start site of the regions defined in the region file"
help="If the regions are genes, this would be the distance upstream of the transcription start site. (--beforeRegionStartLength)"/>
<param name="afterRegionStartLength" type="integer" value="1000" min="1"
label="Distance downstream of the end site of the given regions"
help="If the regions are genes, this would be the distance downstream of the transcription end site. (--afterRegionStartLength)"/>
</when>
</conditional>
<expand macro="input_graphic_output_settings">
<expand macro="input_save_matrix_values" />
</expand>
<conditional name="advancedOpt" >
<param name="showAdvancedOpt" type="select" label="Show advanced options" >
<option value="no" selected="true">no</option>
<option value="yes">yes</option>
</param>
<when value="no" />
<when value="yes">
<param name="binSize" type="integer" value="50" min="1"
label="Length, in bases, of non-overlapping bins used for averaging the score over the regions length"
help="(--binSize)"/>
<expand macro="sortRegionsComputeMatrix" />
<expand macro="sortUsing" />
<param name="averageTypeBins" type="select"
label="Define the type of statistic that should be displayed."
help="The value is computed for each bin. (--averageTypeBins)">
<option value="mean" selected="true">mean</option>
<option value="median">median</option>
<option value="min">min</option>
<option value="max">max</option>
<option value="sum">sum</option>
<option value="std">std</option>
</param>
<param name="missingDataAsZero" type="boolean" truevalue="--missingDataAsZero" falsevalue="" checked="False"
label="Convert missing values to 0?"
help="If set to 'yes', missing values (NAs) are converted
to 0. If you want to use clustering with plotHeatmap
or plotProfile, set this to 'yes'.
The default is to ignore missing values, which will be
depicted as black areas once a heatmap is created.
(--missingDataAsZero)" />
<expand macro="skipZeros" />
<param name="minThreshold" type="float" optional="True"
label="Minimum threshold"
help="Any region containing a value that is equal or less than this numeric
value will be skipped. This is useful to skip, for example, genes where the
read count is zero for any of the bins which could be the result of
unmappable areas and can bias the overall results. (--minThreshold)"/>
<param name="maxThreshold" type="float" optional="True"
label="Maximum threshold"
help="Any region containing a value that is equal or higher that this
numeric value will be skipped. The max threshold is useful to skip those
few regions with very high read counts (e.g. major satellites) that may
bias the average values. (--maxThreshold)"/>
<param name="scale" type="float" optional="True" label="Scaling factor"
help="If set, all values are multiplied by this number. (--scale)"/>
<expand macro="gtf_options" />
<expand macro="blacklist" />
</when>
</conditional>
</inputs>
<outputs>
<data format="deeptools_compute_matrix_archive" name="outFileName" label="${tool.name} on ${on_string}: Matrix" />
<expand macro="output_graphic_outputs" />
<expand macro="output_save_matrix_values" />
</outputs>
<tests>
<test>
<param name="regionsFile" value="computeMatrix1.bed" ftype="bed" />
<param name="bigwigfiles" value="bamCoverage_result4.bw" ftype="bigwig" />
<param name="showAdvancedOpt" value="yes" />
<param name="mode_select" value="reference-point" />
<param name="binSize" value="10" />
<param name="sortUsing" value="sum" />
<param name="averageTypeBins" value="sum" />
<param name="beforeRegionStartLength" value="10" />
<param name="afterRegionStartLength" value="10" />
<output name="outFileName" file="computeMatrix_result1.gz" ftype="deeptools_compute_matrix_archive" compare="sim_size" />
</test>
<test>
<param name="regionsFile" value="computeMatrix2.bed" ftype="bed" />
<param name="bigwigfiles" value="computeMatrix2.bw" ftype="bigwig" />
<param name="showAdvancedOpt" value="yes" />
<param name="mode_select" value="reference-point" />
<param name="binSize" value="10" />
<param name="beforeRegionStartLength" value="10" />
<param name="afterRegionStartLength" value="10" />
<output name="outFileName" file="computeMatrix_result2.gz" ftype="deeptools_compute_matrix_archive" compare="sim_size" />
</test>
<test>
<param name="regionsFile" value="computeMatrix2.bed" ftype="bed" />
<param name="bigwigfiles" value="computeMatrix2.bw" ftype="bigwig" />
<param name="showAdvancedOpt" value="yes" />
<param name="mode_select" value="scale-regions" />
<param name="endLabel" value="END" />
<param name="regionStartLength" value="yes" />
<output name="outFileName" file="computeMatrix_result3.gz" ftype="deeptools_compute_matrix_archive" compare="sim_size" />
</test>
</tests>
<help>
<![CDATA[
What it does
----------------
This tool prepares an intermediate file (a gzipped table of values)
that contains scores associated with genomic regions.
The regions can either be scaled to the same size (using the ``scale-regions`` mode) or you can choose the start, end, or center of each region as the focus point for the score calculations.
For more details, check out the explanation `here <https://deeptools.readthedocs.io/en/latest/content/tools/computeMatrix.html#details>`_.
The intermediate file produced by ``computeMatrix`` is meant to be used with ``plotHeatmap`` and ``plotProfile``.
See the descriptions of ``plotHeatmap`` and ``plotProfile`` for example plots.
.. image:: $PATH_TO_IMAGES/computeMatrix_overview.png
:alt: Relationship between computeMatrix, heatmapper and profiler
:width: 600
:height: 418
=======
Usage hints
-------------
The supplied genomic regions can really be anything - genes, parts of genes, ChIP-seq peaks, favorite genome regions... as long as you provide a proper file
in BED or INTERVAL format. If you would like to compare different groups of regions (e.g., genes from chromosome 2 and 3), you can supply more than 1 regions file, one for each group by selecting "Insert Select regions".
.. image:: $PATH_TO_IMAGES/computeMatrix_selectRegions.png
:width: 600
:height: 150
You can select as many score (bigWig) files as you like. Simply use the Shift and/or Command key while clicking on the files of interest.
.. image:: $PATH_TO_IMAGES/computeMatrix_selectScores.png
:width: 600
:height: 136
The multitude of parameters can seem daunting at first - here are the options that we tend to tune most often:
* ``bin Size`` -- The default value works well most of the time, but if you want to have a more finely grained image, decrease the default value (but not smaller than your bigWig file(s)' bin size). If you want to reduce the computation time, increase it.
* ``Skip zeros`` -- useful to avoid completely blank lines in the heatmap.
* ``Convert missing values to 0?`` -- If you want to identify clusters of similar regions in an unsupervised fashion using ``plotHeatmap`` and/or ``plotProfile``, you should definitely set this to 'yes'.
Output files
---------------
The default output is a **gzipped table of values** that is used by both ``plotHeatmap`` and ``plotProfile``.
The optional output files include a) the **regions after sorting and filtering (if selected)** as they were used to calculate the values for the plotting, and b) the uncompressed table that **underlies the heatmap**.
**TIP:** ``computeMatrix`` can also be used to filter and sort regions according to their score by making use of the "advanced output settings".
.. image:: $PATH_TO_IMAGES/computeMatrix_advancedOutput.png
:width: 600
:height: 189
.. image:: $PATH_TO_IMAGES/computeMatrix_output.png
:width: 600
:height: 297
Note that these advanced output options are available for ``plotHeatmap`` and ``plotProfile``, too.
See the following table for the optional output options:
+-----------------------------------+--------------------+-----------------+-----------------+
| **optional output type** | **computeMatrix** | **plotHeatmap** | **plotProfile** |
+-----------------------------------+--------------------+-----------------+-----------------+
| values underlying the heatmap | yes | yes | no |
+-----------------------------------+--------------------+-----------------+-----------------+
| values underlying the profile | no | no | yes |
+-----------------------------------+--------------------+-----------------+-----------------+
| sorted and/or filtered regions | yes | yes | yes |
+-----------------------------------+--------------------+-----------------+-----------------+
**More examples** can be found in our `Gallery <http://deeptools.readthedocs.org/en/latest/content/example_gallery.html#normalized-chip-seq-signals-and-peak-regions>`_.
-----
@REFERENCES@
]]>
</help>
<expand macro="citations" />
</tool>
|