File: plotComplexity.Rd

package info (click to toggle)
r-bioc-dada2 1.34.0%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 3,016 kB
  • sloc: cpp: 3,096; makefile: 5
file content (61 lines) | stat: -rw-r--r-- 1,848 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plot-methods.R
\name{plotComplexity}
\alias{plotComplexity}
\title{Plot sequence complexity profile of a fastq file.}
\usage{
plotComplexity(
  fl,
  kmerSize = 2,
  window = NULL,
  by = 5,
  n = 1e+05,
  bins = 100,
  aggregate = FALSE,
  ...
)
}
\arguments{
\item{fl}{(Required). \code{character}.
File path(s) to fastq or fastq.gz file(s).}

\item{kmerSize}{(Optional). Default 2.
The size of the kmers (or "oligonucleotides" or "words") to use.}

\item{window}{(Optional). Default NULL.
The width in nucleotides of the moving window. If NULL the whole sequence is used.}

\item{by}{(Optional). Default 5.
The step size in nucleotides between each moving window tested.}

\item{n}{(Optional). Default 100,000.
The number of records to sample from the fastq file.}

\item{bins}{(Optional). Default 100.
The number of bins to use for the histogram.}

\item{aggregate}{(Optional). Default FALSE.
If TRUE, compute an aggregate quality profile for all fastq files provided.}

\item{...}{(Optional). Arguments passed on to \code{\link{geom_histogram}}.}
}
\value{
A \code{\link{ggplot}2} object.
 Will be rendered to default device if \code{\link{print}ed},
 or can be stored and further modified.
 See \code{\link{ggsave}} for additional options.
}
\description{
This function plots a histogram of the distribution of sequence complexities
in the form of effective numbers of kmers as determined by \code{\link{seqComplexity}}.
By default, kmers of size 2 are used, in which case a perfectly random sequences
will approach an effective kmer number of 16 = 4 (nucleotides) ^ 2 (kmer size).
}
\examples{
plotComplexity(system.file("extdata", "sam1F.fastq.gz", package="dada2"))

}
\seealso{
\code{\link{seqComplexity}}
 \code{\link[Biostrings]{oligonucleotideFrequency}}
}