File: dustyScore.Rd

package info (click to toggle)
r-bioc-shortread 1.32.0-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 8,384 kB
  • ctags: 293
  • sloc: ansic: 2,718; cpp: 202; sh: 3; makefile: 2
file content (88 lines) | stat: -rw-r--r-- 2,189 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
\name{dustyScore}

\alias{dustyScore}
\alias{dustyScore,DNAStringSet-method}
\alias{dustyScore,ShortRead-method}

\title{Summarize low-complexity sequences}

\description{

  \code{dustyScore} identifies low-complexity sequences, in a manner
  inspired by the \code{dust} implementation in \code{BLAST}.

}
\usage{

dustyScore(x, batchSize=NA, ...)

}

\arguments{

  \item{x}{A \code{DNAStringSet} object, or object derived from
    \code{ShortRead}, containing a collection of reads to be
    summarized.}

  \item{batchSize}{\code{NA} or an \code{integer(1)} vector indicating
  the maximum number of reads to be processed at any one time.}

  \item{...}{Additional arguments, not currently used.}

}

\details{

  The following methods are defined:
  \describe{

    \item{dustyScore}{\code{signature(x = "DNAStringSet")}: operating on
      an object derived from class \code{DNAStringSet}.}

    \item{dustyScore}{\code{signature(x = "ShortRead")}: operating on
      the \code{sread} of an object derived from class
      \code{ShortRead}.}

  }

  The dust-like calculations used here are as implemented at
  \url{https://stat.ethz.ch/pipermail/bioc-sig-sequencing/2009-February/000170.html}.
  Scores range from 0 (all triplets unique) to the square of the width
  of the longest sequence (poly-A, -C, -G, or -T).

  The \code{batchSize} argument can be used to reduce the memory
  requirements of the algorithm by processing the \code{x} argument in
  batches of the specified size. Smaller batch sizes use less memory,
  but are computationally less efficient.

}

\value{

  A vector of numeric scores, with length equal to the length of
  \code{x}.

}

\references{

  Morgulis, Getz, Schaffer and Agarwala, 2006. WindowMasker: window-based
  masker for sequenced genomes, Bioinformatics 22: 134-141.

}

\seealso{

  The WindowMasker supplement defining \code{dust}
  \url{ftp://ftp.ncbi.nlm.nih.gov/pub/agarwala/windowmasker/windowmasker_suppl.pdf}

}

\author{Herve Pages (code); Martin Morgan}

\examples{
sp <- SolexaPath(system.file('extdata', package='ShortRead'))
rfq <- readFastq(analysisPath(sp), pattern="s_1_sequence.txt")
range(dustyScore(rfq))
}
\keyword{manip}