File: srdistance.Rd

package info (click to toggle)
r-bioc-shortread 1.32.0-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 8,384 kB
  • ctags: 293
  • sloc: ansic: 2,718; cpp: 202; sh: 3; makefile: 2
file content (73 lines) | stat: -rw-r--r-- 2,329 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
\name{srdistance}
\alias{srdistance}
%
\alias{srdistance,DNAStringSet,character-method}
\alias{srdistance,DNAStringSet,DNAString-method}
\alias{srdistance,DNAStringSet,DNAStringSet-method}

\title{Edit distances between reads and a small number of short references}

\description{

  \code{srdistance} calculates the edit distance from each read in
  \code{pattern} to each read in \code{subject}. The underlying
  algorithm \code{\link[Biostrings]{pairwiseAlignment}} is only efficient when both
  reads are short, and when the number of \code{subject} reads is small.

}

\usage{
srdistance(pattern, subject, ...)
}

\arguments{
  \item{pattern}{An object of class \code{DNAStringSet} containing reads
    whose edit distance is desired.}

  \item{subject}{A short \code{character} vector, \code{DNAString} or
    (small) \code{DNAStringSet} to serve as reference.}

  \item{\dots}{additional arguments, unused.}
}

\details{

  The underlying algorithm performs pairwise alignment from each read in
  \code{pattern} to each sequence in \code{subject}. The return value is
  a list of numeric vectors of distances, one list element for each
  sequence in \code{subject}. The vector in each list element contains
  for each read in \code{pattern} the edit distance from the read to the
  corresponding subject. The weight matrix and gap penalties used to
  calculate the distance are structured to weight base substitutions and
  single base insert/deletions equally. Edit distance between known and
  ambiguous (e.g., N) nucleotides, or between ambiguous nucleotides, are
  weighted as though each possible nucleotide in the ambiguity were
  equally likely.

}

\value{

  A list of length equal to that of \code{subject}. Each element is a
  numeric vector equal to the length of \code{pattern}, with values
  corresponding to the minimum distance between between the
  corresponding pattern and subject sequences.

}

\author{Martin Morgan <mtmorgan@fhcrc.org>}

\seealso{\code{\link[Biostrings]{pairwiseAlignment}}}

\examples{
sp <- SolexaPath(system.file("extdata", package="ShortRead"))
aln <- readAligned(sp, "s_2_export.txt")
polyA <- polyn("A", 35)
polyT <- polyn("T", 35)

d1 <- srdistance(clean(sread(aln)), polyA)
d2 <- srdistance(sread(aln), polyA)
d3 <- srdistance(sread(aln), c(polyA, polyT))
}

\keyword{manip}