1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472
|
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<!-- Version: Multiflex-5.4 / About -->
<!-- Type: Design with sidebar -->
<!-- Date: March 13, 2008 -->
<!-- Design: www.1234.info -->
<!-- License: Fully open source without restrictions. -->
<!-- Please keep footer credits with the words -->
<!-- "Design by 1234.info". Thank you! -->
<head>
<script type="text/javascript">
var _gaq = _gaq || [];
_gaq.push(['_setAccount', 'UA-25486066-1']);
_gaq.push(['_trackPageview']);
(function() {
})();
</script>
<meta http-equiv="content-type" content="text/html; charset=utf-8" />
<meta http-equiv="cache-control" content="no-cache" />
<meta http-equiv="expires" content="3600" />
<meta name="revisit-after" content="2 days" />
<meta name="robots" content="index,follow" />
<meta name="publisher" content="Your publisher infos here" />
<meta name="copyright" content="Copyright (c) 2011 Adam Roberts" />
<meta name="author" content="Designed by www.1234.info / Modified: Adam Roberts" />
<meta name="distribution" content="global" />
<meta name="image" content="http://bio.math.berkeley.edu/eXpress/img/logo.png" />
<meta name="description" content="eXpress is a general quantification tool for target DNA/RNA sequences. While its primary use currently is RNA-Seq it has the potential for applications in many other areas including as allele-specific expression and metgenomics. What makes eXpress different is that it is an online (or streaming) algorithm, meaning it only makes one pass through the data. This allows it to be very light-weight and efficient using a constant amount of memory and time linear in the number of sequenced fragments being processed. Furthermore, it accepts piped SAM/BAM input, allowing users to avoid storing extremely large alignment files. eXpress models fragment biases, fragment lengths, and errors, allowing it to also be one of the most accurate quantification methods available." />
<meta name="keywords" content="RNA-Seq, Genomics, transcript,quantification" />
<link rel="stylesheet" type="text/css" media="screen,projection,print" href="./css/mf54_reset.css" />
<link rel="stylesheet" type="text/css" media="screen,projection,print" href="./css/mf54_grid.css" />
<link rel="stylesheet" type="text/css" media="screen,projection,print" href="./css/mf54_content.css" />
<link rel="icon" type="image/x-icon" href="./img/favicon.ico" />
<title>eXpress • Manual</title>
<script language="Javascript">
logo1=new Image
logo1.src="img/logo.png"
logo2=new Image
logo2.src="img/logo_yellow.png"
</script>
<title>eXpress • Manual</title>
</head>
<!-- Global IE fix to avoid layout crash when single word size wider than column width -->
<!-- Following line MUST remain as a comment to have the proper effect -->
<!--[if IE]><style type="text/css"> body {word-wrap: break-word;}</style><![endif]-->
<body>
<!-- CONTAINER FOR ENTIRE PAGE -->
<div class="container">
<!-- A. HEADER -->
<div class="corner-page-top"></div>
<div class="header">
<div class="header-top">
<!-- A.1 SITENAME -->
<div class="sitelogo">
<ul>
<li><a href="#" onMouseOver="document.logo.src=logo2.src" onMouseOut="document.logo.src=logo1.src"><img name="logo" src="img/logo.png"/></a></li>
</ul>
</div>
<div class="sitename">
<h1><a href="#">eXpress</a></h1>
<h2><i>Streaming</i> quantification for high-throughput sequencing</h2>
</div>
<!-- A.2 BUTTON NAVIGATION -->
<div class="navbutton">
<ul>
<li><a href="http://www.berkeley.edu"><img src="img/berkeley_seal.gif"/></a></li>
</ul>
</div>
</div>
<!-- A.4 BREADCRUMB and SEARCHFORM -->
<div class="header-bottom">
<!-- Search form -->
<div class="searchform" id="cse-search-form" style="padding-top:4px; width:30%;">Loading</div>
<script type="text/javascript">
}, true);
</script>
<link rel="stylesheet" href="css/googlesearch.css" type="text/css" />
</div>
</div>
<div class="corner-page-bottom"></div>
<!-- B. NAVIGATION BAR -->
<div class="corner-page-top"></div>
<div class="navbar">
<!-- Navigation item -->
<ul>
<li><a href="index.html">Home</a></li>
</ul>
<!-- Navigation item -->
<ul>
<li><a href="overview.html">About</a></li>
</ul>
<!-- Navigation item -->
<ul>
<li><a href="#">Download<!--[if IE 7]><!--></a><!--<![endif]-->
<!--[if lte IE 6]><table><tr><td><![endif]-->
<ul>
<li><a href=downloads/express-1.5.1/express-1.5.1-macosx_x86_64.tgz onClick="_gaq.push(['_trackEvent', 'Downloads', 'Mac', 'Manual']);" target="_blank">Mac OS X (64-bit)</a></li>
<li><a href=downloads/express-1.5.1/express-1.5.1-linux_x86_64.tgz onClick="_gaq.push(['_trackEvent', 'Downloads', 'Linux', 'Manual']);" target="_blank">Linux (64-bit)</a></li>
<li><a href=downloads/express-1.5.1/express-1.5.1-win32_x86_64.zip onClick="_gaq.push(['_trackEvent', 'Downloads', 'Windows', 'Manual']);" target="_blank">Windows (64-bit)</a></li>
<li><a href=downloads/express-1.5.1/express-1.5.1-src.tgz onClick="_gaq.push(['_trackEvent', 'Downloads', 'Source', 'Manual']);" target="_blank">Source Code</a></li>
<li><a href="downloads" onClick="_gaq.push(['_trackEvent', 'Downloads', 'Previous','Manual']);">Previous Versions</a></li>
</ul>
<!--[if lte IE 6]></td></tr></table></a><![endif]-->
</li>
</ul>
<ul>
<li><a href="tutorial.html">Getting Started</a></li>
</ul>
<ul>
<li><a href="https://github.com/adarob/eXpress">Source</a></li>
</ul>
<ul>
<li><a href="manual.html">Manual</a></li>
</ul>
<ul>
<li><a href="faq.html">FAQ</a></li>
</ul>
</div>
<!-- C. MAIN SECTION -->
<div class="main">
<h1 class="pagetitle">Manual</h1>
<!-- C.1 CONTENT -->
<div class="content">
<!-- CONTENT CELL -->
<div class="corner-content-1col-top"></div>
<div class="content-1col-nobox">
<h1 id="doc">Documentation</h1>
<p>Complete documentation for the source code is available in both <a href=doc/index.html>html</a> and <a href=doc/express-doc.pdf>pdf</a> formats.</p>
<p>→ <a href="#top">Back to top.</a></p>
</div>
<div class="corner-content-1col-bottom"></div>
<!-- CONTENT CELL -->
<div class="corner-content-1col-top"></div>
<div class="content-1col-nobox">
<h1 id="usage">Usage</h1>
<h2 id="prereq">Prerequisites</h2>
<p>eXpress runs on intel-based computers running Linux, Mac OS X, or Windows. You can install pre-compiled binaries or build eXpress from the source code. If you wish to build eXpress yourself, you must have a C++ compiler installed (for example, <a href=http://developer.apple.com/tools/xcode>XCode</a> for Mac OS X, <a href=http://www.microsoft.com/visualstudio/en-us/products/2010-editions/visual-cpp-express>Visual C++ Express</a> for Windows 7) as well as <a href=http://www.cmake.org>CMake</a>, <a href=https://github.com/pezmaster31/bamtools>BamTools</a>, and the <a href="http://www.boost.org">Boost C++ libraries</a>. See the <a href="tutorial.html#install">Installation</a> section on the Getting Started page for detailed instructions.</p>
<h2 id="running">Running eXpress</h2>
<p>Run <tt>eXpress</tt> from the command line as follows:
<ul><pre class="sc"><code>$ express [options]* <target_seqs.fasta> <aligned_reads.(sam/bam)></code></pre></ul></p>
<p id="options">The following is a detailed description of the options used to control eXpress:
<table>
<tr><td WIDTH=40% VALIGN=top><b>Arguments:</b></td><td WIDTH=60% VALIGN=top></td></tr>
<tr><td VALIGN=top nowrap><tt><target_seqs.fasta></tt></td>
<td VALIGN=top>A file of target sequences in <a href=http://en.wikipedia.org/wiki/FASTA_format>multi-FASTA</a> format. See <a href="#fasta">Input Files</a> for more details.</td></tr>
<tr><td VALIGN=top nowrap><tt><lib_1.sam,lib_2.sam,...,lib_N.sam></tt></td>
<td VALIGN=top>A comma-separated list of filenames for reads aligned to the target sequences in <a href="http://samtools.sourceforge.net">
SAM format</a>. See <a href="#sam">Input Files</a> for more details.</td></tr>
<tr><td VALIGN=top><b>Standard Options:</b></td><td VALIGN=top></td></tr>
<tr><td VALIGN=top nowrap><tt>-h/--help</tt> </td>
<td VALIGN=top>Prints the help message and exits</td></tr>
<tr><td VALIGN=top nowrap>
<tt>-o/--output-dir <string></tt>
</td><td VALIGN=top>
Sets the name of the directory in which eXpress will write all of its
output. The default is "./".
</td></tr>
<tr><td VALIGN=top nowrap>
<tt>-B/--additional-batch <int></tt>
</td><td VALIGN=top>
Specifies the number of additional batch EM rounds to perform on the data using the initial results from the online EM as a seed. Can improve accuracy at the cost of time.
</td></tr>
<tr><td VALIGN=top nowrap>
<tt>-O/--additional-online <int></tt>
</td><td VALIGN=top>
Specifies the number of additional online EM rounds to perform on the data after the initial online round. Can improve accuracy at the cost of time.
</td></tr>
<tr><td VALIGN=top nowrap>
<tt>-m/--frag-len-mean <int></tt>
</td><td VALIGN=top>
Specifies the mean fragment length. While the empirical distribution is estimated from paired-end reads on-the-fly, this value paramaterizes the prior distribution. If only single-end reads are available, this prior distribution is also used to determine the effective length. Default is 200.
</td></tr>
<tr><td VALIGN=top nowrap>
<tt>-s/--frag-len-stddev <int></tt>
</td><td VALIGN=top>
Specifies the fragment length standard deviation. While the empirical distribution is estimated from paired-end reads on-the-fly, this value paramaterizes the prior distribution. If only single-end reads are available, this prior distribution is also used to determine the effective length. Default is 60.
</td></tr>
<tr><td VALIGN=top nowrap>
<tt>-H/--haplotype-file <string></tt>
</td><td VALIGN=top>
Specifies the location of a comma-separated file of sets of target IDs (one set per line) specifying which targets represent multiple haplotypes of a single feature (ie, transcript). Useful for allele-specific expression.
</td></tr>
<tr><td VALIGN=top nowrap>
<tt>--output-align-prob</tt>
</td><td VALIGN=top>
With this option, eXpress outputs an additional file called <tt>hits.prob.(sam/bam)</tt> containing identical copies of all input alignments with an additional <tt>XP</tt> tag that contains the estimated probability that each alignment of the read (pair) is the "correct" one. The <tt>XP</tt> values for all alignments of of the same read (pair) will sum to 1.
</td></tr>
<tr><td VALIGN=top nowrap>
<tt>--output-align-samp</tt>
</td><td VALIGN=top>
With this option, eXpress outputs an additional file called <tt>hits.samp.(sam/bam)</tt> containing a single alignment for each fragment sampled at random based on the alignment likelihoods calculated by eXpress.
</td></tr>
<tr><td VALIGN=top nowrap>
<tt>--fr-stranded</tt>
</td><td VALIGN=top>
With this option, eXpress only accepts alignments (single-end or paired) where the first (or only) read is aligned to the forward target sequence and the second read is aligned to the reverse-complemented target sequence. In directional sequencing, this is equivalent to second-strand only. If all reads are single-end, <tt>--f-stranded</tt> should be used instead. Disabled by default.
</td></tr>
<tr><td VALIGN=top nowrap>
<tt>--rf-stranded</tt>
</td><td VALIGN=top>
With this option, eXpress only accepts alignments (single-end or paired) where the first (or only) read is aligned to the reverse-completemented target sequence and the second read is aligned to the forward target sequence. In directional sequencing, this is equivalent to first-strand only. If all reads are single-end, <tt>--r-stranded</tt> should be used instead. Disabled by default.
</td></tr>
<tr><td VALIGN=top nowrap>
<tt>--f-stranded</tt>
</td><td VALIGN=top>
With this option, eXpress only accepts single-end alignments to the forward target sequence. In directional sequencing, this is equivalent to second-strand only. Disabled by default.
</td></tr>
<tr><td VALIGN=top nowrap>
<tt>--r-stranded</tt>
</td><td VALIGN=top>
With this option, eXpress only accepts single-end alignments to the reverse target sequence. In directional sequencing, this is equivalent to second-strand only. Disabled by default.
</td></tr>
<tr><td VALIGN=top nowrap>
<tt>--no-update-check</tt>
</td><td VALIGN=top>
With this option, eXpress will not ping our server to see if a newer version is available.
</td></tr>
<tr><td VALIGN=top><b>Advanced Options:</b></td><td VALIGN=top></td></tr>
<tr><td VALIGN=top nowrap>
<tt>-f/--forget-param <float></tt>
</td><td VALIGN=top>
A parameter specifying the rate at which the prior is "forgotten" by increasing the mass of fragments during online processing. Larger numbers (max of 1) mean a slower rate, which decreases convergence but improves stability. Smaller numbers (minumum of 0.5) increase the rate, which may lead to faster convergence but can also lead to instability.
</td></tr>
<tr><td VALIGN=top nowrap>
<tt>--library-size <int></tt>
</td><td VALIGN=top>
Specifies the number of fragments in the library to be used in the FPKM calculation. If left unspecified, this number will be computed from the input.
</td></tr>
<tr><td VALIGN=top nowrap>
<tt>--max-indel-size <int></tt>
</td><td VALIGN=top>
A parameter specifying the maximum allowed size of a single indel. Alignments with larger indels will be ignored. A geometric prior for indel length is fit so that all but 10e-6 of the probability mass lies within the allowed region. The default is 10.
</td></tr>
<tr><td VALIGN=top nowrap>
<tt>--calc-covar</tt>
</td><td VALIGN=top>
With this option, eXpress calculates the covariance between targets and outputs them for use in differential expression analysis. This calculation requires slightly more time and memory.
</td></tr>
<tr><td VALIGN=top nowrap>
<tt>--expr-alpha <float></tt>
</td><td VALIGN=top>
A parameter specifying the weight of uniform the target abundance prior, in pseudo-counts per bp. The default is 0.01.
</td></tr>
<tr><td VALIGN=top nowrap>
<tt>--stop-at <int></tt>
</td><td VALIGN=top>
A parameter specifying the number of fragments to process before quitting.
</td></tr>
<tr><td VALIGN=top nowrap>
<tt>--burn-out <int></tt>
</td><td VALIGN=top>
A parameter specifying the number of fragments after which to stop learning the auxiliary parameters (fragment length, bias, error).
</td></tr>
<tr><td VALIGN=top nowrap>
<tt>--no-bias-correct</tt>
</td><td VALIGN=top>
With this option, eXpress will not measure and account for sequence-specific biases. Will lead to a slight initial increase in speed at the expense of accuracy.
</td></tr>
<tr><td VALIGN=top nowrap>
<tt>--no-error-model</tt>
</td><td VALIGN=top>
With this option, eXpress will not measure and account for errors in alignments. Will lead to an increase in speed, but may greatly decrease accuracy.
</td></tr>
<tr><td VALIGN=top nowrap>
<tt>--aux-param-file <string></tt>
</td><td VALIGN=top>
Specifies an auxiliary parameter file output by a different run of eXpress to be used as the auxiliary parameters for this round. Greatly improves speed and should be used when a subset of the targets or fragments are being used in a second estimation.
</td></tr>
</table></p>
<p>→ <a href="#top">Back to top.</a></p>
</div>
<div class="corner-content-1col-bottom"></div>
<!-- CONTENT CELL -->
<div class="corner-content-1col-top"></div>
<div class="content-1col-nobox">
<h1 id="input">Input Files</h1>
<h2 id="fasta">Target Sequences (FASTA)</h2>
<p>eXpress requires a <a href=http://en.wikipedia.org/wiki/FASTA_format>multi-FASTA</a> file of target sequences for which the abundances will be measured. In the case of RNA-Seq, these are the transcript sequences. If
the transcriptome of your organism is not
annotated, you can generate this file from your sequencing
reads using a <i>de novo</i> transcriptome assembler such as
<a href=http://trinityrnaseq.sourceforge.net/>Trinity</a>, <a
href=http://www.ebi.ac.uk/~zerbino/oases/>Oases</a>, or <a
href=http://www.bcgsc.ca/platform/bioinfo/software/trans-abyss>Trans-ABySS</a>.
If your organism has a reference genome, you can assemble
transcripts directly from mapped reads using <a
href="http://cufflinks.cbcb.umd.edu/">Cufflinks</a>. If your genome is already annotated (in
GTF/GFF), you can generate a multi-FASTA file using the <a href=http://genome.ucsc.edu/>UCSC Genome Browser</a> by uploading your annotation as a track and downloading the sequences under the "Tables" tab.</p>
<h2 id="sam">Read Alignments (SAM/BAM)</h2>
<p>eXpress also requires a file, multiple files, or a piped stream of SAM or binary SAM (BAM) alignments as input. The SAM alignments should be generated by mapping your sequencing reads to the target sequences specified in the multi-FASTA input file described above. For more details on the SAM format, see the <a href="http://samtools.sourceforge.net/SAM1.pdf">specification</a>. Many short read mappers including <a href="http://bowtie-bio.sourceforge.net">Bowtie</a>, <a href="http://bowtie-bio.sourceforge.net/bowtie2">Bowtie2</a>, <a href="http://bio-bwa.sourceforge.net/">BWA</a>, and <a href=http://maq.sourceforge.net/>MAQ</a> can produce output in this format. It is important that you allow many multi-mappings (preferably unlimited) in order to allow eXpress to select the correct alignment instead of the mapper. See <a href="tutorial.html#example">Getting Started</a> for an example using Bowtie in both streaming and file input modes.</p>
<p>
If using paired-end reads, the read names must match for each pair, excluding '/1' and '/2' suffix identifiers. Also, the SAM file supplied to eXpress should be grouped by read id. If you aligned your reads with Bowtie, your alignments will be properly ordered already. If you used another tool, you
should ensure that they are properly sorted. You can sort your SAM using the following command:
<ol><pre class="sc"><code>sort -k 1 hits.sam > hits.sam.sorted</code></pre></ol></p>
<p>You can sort your BAM using this command:
<ol><pre class="sc"><code>samtools sort -n hits.bam hits.sorted</code></pre></ol></p>
<p>If multiple libraries were prepared for the same sample or multiple read lengths were used in different sequencing runs, the alignments for each should be grouped in separate SAM files so that auxiliary parameters can be estimated independently. The filenames can then be input into eXpress as a comma-separated (with no spaces) list of SAM files. See <a href="#running">above</a> for an example. When this feature is used, separate <a href="#param">parameter estimates</a> will be output for each library, but only a single <a href="#expr">abundance</a> file will be produced.</p>
<p>→ <a href="#top">Back to top.</a></p>
</div>
<div class="corner-content-1col-bottom"></div>
<!-- CONTENT CELL -->
<div class="corner-content-1col-top"></div>
<div class="content-1col-nobox">
<h1 id="output">Output Files</h1>
<h2 id="expr">Target Abundances (results.xprs)</h2>
<p>This file is always output and contains the target abundances and other values calculated based on the input sequences and read alignments. The file has 10 tab-delimited columns, sorted by the bundle_id (column 1). The columns are defined as follows:
<!--"bundle_id transcript_id length eff_length bundle_frac est_counts est_counts_var fpkm --!>
<table CELLSPACING=15>
<tr><th class="top" scope="col" width=4%>#</th><th class="top" scope="col" width=20%>Column Name</th><th class="top" scope="col" width=16%>Example</th><th class="top" scope="col" width=60%>Description</th></tr>
<tr><th scope="row">1</th><td>bundle_id</td><td><tt>10</tt></td><td>ID of bundle the target belongs to. A bundle is defined as the transitive closure of targets that share multi-mapping reads.</td></tr>
<tr><th scope="row">2</th><td>target_id</td><td><tt>NM_016467</tt></td><td>The ID given to the target in the input multi-FASTA file.</td></tr>
<tr><th scope="row">3</th><td>length</td><td><tt>2182</tt></td><td>The number of base pairs in the target sequence given in the input multi-FASTA file.</td></tr>
<tr><th scope="row">4</th><td>eff_length</td><td><tt>783.136288</tt></td><td>The length of the target adjusted for fragment biases (length, sequence-specificity, and relative position). This number is what the fragment counts are normalized by to calculate FPKM, not the true length.</td></tr>
<tr><th scope="row">5</th><td>tot_counts</td><td><tt>99</tt></td><td>The number of fragments mapping (uniquely or ambiguously) to this target.</td></tr>
<tr><th scope="row">6</th><td>uniq_counts</td><td><tt>7</tt></td><td>The number of fragments uniquely mapping to this target.</td></tr>
<tr><th scope="row">7</th><td>est_counts</td><td><tt>26.702456</tt></td><td>The estimated number of fragments generated from this target in the sequencing experiment.</td></tr>
<tr><th scope="row">8</th><td>eff_counts</td><td><tt>74.399258</tt></td><td>The estimated number of fragments generated from this target in the sequencing experiment, adjusted for fragment and length biases. In other words, his is the expected number of reads from the experiment if these biases did not exist. This is the value recommended for input to count-biased differential expression tools.</td></tr>
<tr><th scope="row">8</th><td>ambig_distr_alpha</td><td><tt>3.154652</tt></td><td>The alpha parameter for the posterior beta-binomial distribution fit to the ambiguous reads.</td></tr>
<tr><th scope="row">10</th><td>ambig_distr_beta</td><td><tt>2.293653</tt></td><td>The beta parameter for the posterior beta-binomial distribution fit to the ambiguous reads.</td></tr>
<tr><th scope="row">9</th><td>fpkm</td><td><tt>3.514176</tt></td><td>The estimated relative abundance of this target in the sample in units of <b>f</b>ragments <b>p</b>er <b>k</b>ilobase per <b>m</b>illion mapped. This value is proportional to est_counts divided by eff_length.</td></tr>
<tr><th scope="row">10</th><td>fpkm_conf_low</td><td><tt>2.119151</tt></td><td>The lower bound of the 95% confidence interval for the FPKM.</td></tr>
<tr><th scope="row">11</th><td>fpkm_conf_high</td><td><tt>4.909200</tt></td><td>The upper bound of the 95% confidence interval for the FPKM.</td></tr>
<tr><th scope="row">12</th><td>solvable</td><td><tt>T</tt></td><td>A binary (T/F) value indicating whether the likelihood function has a unique maximum. If false (F), the reported posterior distribution is uniform.</td></tr>
<tr><th scope="row">13</th><td>tpm</td><td><tt>2.347222e+05</tt></td><td>Transcripts per million. See <a href=http://lynchlab.uchicago.edu/publications/Wagner,%20Kin,%20and%20Lynch%20(2012).pdf>description</a>.</td></tr>
</table></p>
<p>See the <a href="overview.html#methods">Methods</a> for more details on how these values are calculated.</p>
<h2 id="param">Parameter Estimates (params.xprs)</h2>
<p>This file contains the values of the other parameters (besides abundances and counts) estimated by eXpress. The file is separated into sections for each parameter type, beginning with a '>' symbol. Following this symbol is the section header containing a name for the parameter type followed by the values on subsequent lines. All values belong to this parameter field until the next '>' or the end of the file. The following parameter types are output to this file:
<table CELLSPACING=15>
<tr><th class="top" scope="col" width=4%>#</th><th class="top" scope="col" width=16%>Parameter Type</th><th class="top" scope="col" width=25%>Description</th><th class="top" scope="col" width=55%>Output Format</th></tr>
<tr><th scope="row">1</th><td>Fragment Length Distribution</td><td>The empirical distribution on fragments lengths.</td><td>The fragment length range is listed next to the section header in paranthesis (0-800 by default). The next line contains a tab-delimited list of probabilities for these lengths in order.</td></tr>
<tr><th scope="row">2</th><td>First Read Mismatch</td><td>The first-order Markov model for mismatches between the reference and observed nucleotides for the first read sequenced in a pair.</td><td>Each line begins with the nucleotide position in the read followed by a colon (0-indexed). The column header denotes the which "substitution" the probability is for. For example, a value in the column labeled "CG->*T" in the row labeled 10 is the conditional probability that a read has a 'T' at the 11th position given it is mapped to a reference having a 'C' in the 10th position and a 'G' in the 11th. Note that since this is a conditional probability, CG->*A, CG->*C, CG->*G, CG->*T will sum to 1.</td></tr>
<tr><th scope="row">3</th><td>Second Read Mismatch</td><td>The first-order Markov model for mismatches between the reference and observed nucleotides for the second read sequenced in a pair.</td><td>Same as above.</td></tr>
<tr><th scope="row">4</th><td>5' Sequence-Specific Bias</td><td>Parameters relating to the likelihood of the sequence surrounding the 5' end of a fragment in transcript coordinates. See <a href="http://genomebiology.com/2011/12/3/R22/abstract">Roberts, et al. (2010a)</a> for more details.</td><td>This section is divided into 3 subsections. First is a matrix of the empirical nucleotide distribution for observed fragments ("Observed Marginal Distribution") at each position in a window surrounding the 5' end of the fragment. The column headers give the 0-indexed position number with negatives being upstream in the target sequence. Each row gives the probability for a different nucleotide, which is specified in the first column followed by a colon. Note that since this is a probability distribution, each column will sum to 1. The second subsection contains the "Observed Conditional Probabilities". These are the conditional probabilities for the 3rd order Markov model, the columns specifying the conditional event in the observed fragments and the row specifying the window position. The third matrix is the "Expected Conditional Probabilities". This matrix is similar to the previous, except the probabilities are calculated assuming target sampling based only on fragment length and relative abundance, and fragment sampling within a target dependent only on length (no sequence biases). Bias weights in eXpress are calculated by taking the ratio of obesrved to expected probability.</td></tr>
<tr><th scope="row">5</th><td>3' Sequence-Specific Bias</td><td>Parameters relating to the likelihood of the sequence surrounding the 3' end of a fragment in transcript coordinates. See <a href="http://genomebiology.com/2011/12/3/R22/abstract">Roberts, et al. (2010a)</a> for more details.</td><td>Same as above, except for the 3' fragment end.</td></tr>
</table></p>
<p>If multiple alignment files were provided, a separate parameter output will be output for each with a unique index identifying its position in the command-line argument given by the user (ie, the second SAM file in the argument list will be named 'params.2.xprs').</p>
<h2 id="covar">Count Variance-Covariance (varcov.xprs)</h2>
<p>This file is produced only when the <tt><a href="options">--calc-covars</a></tt> option flag is used as described <a href="#running">above</a>. The file contains the estimated variances and covariances on the counts between pairs of targets that shared multi-mapped reads, primarily to be used in differential expression analysis. Since the covariance between targets in different bundles is always 0, the full sparse matrix is broken up into smaller tab-delimited matrices for each bundle. An example of this output for the sample dataset used in the <a href=tutorial.html#example>Getting Starting</a> tutorial is shown below:</p>
<ol><li><pre class="output"><code>>>1: NM_014212
0.000000e+00
>2: NM_001168316, NM_174914, NR_031764
3.234847e+02 -2.570762e+02 -6.640854e+01
-2.570762e+02 4.082292e+02 -0.000000e+00
-6.640854e+01 -0.000000e+00 2.175616e+02
>3: NM_022658
0.000000e+00
>4: NM_173860
0.000000e+00
>5: NM_014620, NM_153693, NR_003084, NM_153633, NM_018953, NM_004503
2.067753e+02 -0.000000e+00 -0.000000e+00 -0.000000e+00 -0.000000e+00 -0.000000e+00
-0.000000e+00 6.035824e+01 -0.000000e+00 -0.000000e+00 -0.000000e+00 -0.000000e+00
-0.000000e+00 -0.000000e+00 1.731434e+01 -0.000000e+00 -0.000000e+00 -0.000000e+00
-0.000000e+00 -0.000000e+00 -0.000000e+00 1.879961e+02 -0.000000e+00 -2.499948e-01
-0.000000e+00 -0.000000e+00 -0.000000e+00 -0.000000e+00 1.149211e+01 -0.000000e+00
-0.000000e+00 -0.000000e+00 -0.000000e+00 -2.499948e-01 -0.000000e+00 4.581855e+01
>6: NM_017409
0.000000e+00
>7: NM_017410
0.000000e+00
>8: NM_006897
0.000000e+00</code></pre></li></ol>
<p>Each bundle's matrix is headed by an identifier line that begins with a greater than symbol (>) followed by the bundle id and a comma-separated list of targets in the bundle. The ordering of this list provides the indices for the matrix that is to follow. For example, in bundle 1 of the output above, the fifth value in the second row (-2.862072e+02) is the covariance between NM_153633 and NM_014620. Notice that an identical value is also in the second column of the fifth row, as the variance-covariance matrix will always be symmetric.</p>
<p>See the <a href="overview.html#methods">Methods</a> for more details on how these values are calculated.</p>
<p>→ <a href="#top">Back to top.</a></p>
</div>
<div class="corner-content-1col-bottom"></div>
</div>
<!-- C.2 SUBCONTENT -->
<div class="subcontent">
<!-- SUBCONTENT CELL -->
<div class="corner-subcontent-top"></div>
<div class="subcontent-box">
<h1 class="menu">Outline</h1>
<div class="sidemenu1">
<!-- CONTENT CELL -->
<ul>
<li><a href="#doc">Documentation</a></li>
<li><a href="#usage">Usage</a></li>
<li><a href="#prereq">→Prerequisites</a></li>
<li><a href="#running">→Running eXpress</a></li>
<li><a href="#input">Input Files</a></li>
<li><a href="#fasta">→Target Sequences (FASTA)</a></li>
<li><a href="#sam">→Read Alignments (SAM/BAM)</a></li>
<li><a href="#output">Output Files</a></li>
<li><a href="#expr">→Target Abundances (results.xprs)</a></li>
<li><a href="#covar">→Variance-Covariance (varcov.xprs)</a></li>
</ul>
</div>
</div>
<div class="corner-subcontent-bottom"></div>
</div>
</div>
<!-- D. FOOTER -->
<div class="footer">
<p>Copyright © 2011 Adam Roberts | All Rights Reserved</p>
<p class="credits">Design by <a href="http://1234.info/" title="Designer Homepage">1234.info</a> | Modified by <a href="http://cs.berkeley.edu/~adarob/">Adam Roberts</a> | <a href="http://validator.w3.org/check?uri=referer" title="Validate XHTML code">XHTML 1.0</a> | <a href="http://jigsaw.w3.org/css-validator/" title="Validate CSS code">CSS 2.0</a></p>
<br />
<p>The eXpress project was funded in part by an NSF graduate fellowship to Adam Roberts and NIH grant 1R01HG006129-01</p>
</div>
<div class="corner-page-bottom"></div>
</div>
</body>
</html>
|