1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/plot-methods.R
\name{plotComplexity}
\alias{plotComplexity}
\title{Plot sequence complexity profile of a fastq file.}
\usage{
plotComplexity(
fl,
kmerSize = 2,
window = NULL,
by = 5,
n = 1e+05,
bins = 100,
aggregate = FALSE,
...
)
}
\arguments{
\item{fl}{(Required). \code{character}.
File path(s) to fastq or fastq.gz file(s).}
\item{kmerSize}{(Optional). Default 2.
The size of the kmers (or "oligonucleotides" or "words") to use.}
\item{window}{(Optional). Default NULL.
The width in nucleotides of the moving window. If NULL the whole sequence is used.}
\item{by}{(Optional). Default 5.
The step size in nucleotides between each moving window tested.}
\item{n}{(Optional). Default 100,000.
The number of records to sample from the fastq file.}
\item{bins}{(Optional). Default 100.
The number of bins to use for the histogram.}
\item{aggregate}{(Optional). Default FALSE.
If TRUE, compute an aggregate quality profile for all fastq files provided.}
\item{...}{(Optional). Arguments passed on to \code{\link{geom_histogram}}.}
}
\value{
A \code{\link{ggplot}2} object.
Will be rendered to default device if \code{\link{print}ed},
or can be stored and further modified.
See \code{\link{ggsave}} for additional options.
}
\description{
This function plots a histogram of the distribution of sequence complexities
in the form of effective numbers of kmers as determined by \code{\link{seqComplexity}}.
By default, kmers of size 2 are used, in which case a perfectly random sequences
will approach an effective kmer number of 16 = 4 (nucleotides) ^ 2 (kmer size).
}
\examples{
plotComplexity(system.file("extdata", "sam1F.fastq.gz", package="dada2"))
}
\seealso{
\code{\link{seqComplexity}}
\code{\link[Biostrings]{oligonucleotideFrequency}}
}
|