File: nearest-methods.Rd

package info (click to toggle)
r-bioc-iranges 2.16.0-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 1,808 kB
  • sloc: ansic: 4,789; sh: 4; makefile: 2
file content (230 lines) | stat: -rw-r--r-- 8,406 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
\name{nearest-methods}

\alias{nearest-methods}

\alias{class:IntegerRanges_OR_missing}
\alias{IntegerRanges_OR_missing-class}
\alias{IntegerRanges_OR_missing}

\alias{nearest}
\alias{precede}
\alias{follow}
\alias{distance}
\alias{distanceToNearest}
\alias{selectNearest}
\alias{nearest,IntegerRanges,IntegerRanges_OR_missing-method}
\alias{precede,IntegerRanges,IntegerRanges_OR_missing-method}
\alias{follow,IntegerRanges,IntegerRanges_OR_missing-method}
\alias{distance,IntegerRanges,IntegerRanges-method}
\alias{distance,Pairs,missing-method}
\alias{distanceToNearest,IntegerRanges,IntegerRanges_OR_missing-method}

\title{Finding the nearest range neighbor}

\description{
  The \code{nearest}, \code{precede}, \code{follow}, \code{distance}
  and \code{distanceToNearest} methods for \code{\linkS4class{IntegerRanges}} 
  objects and subclasses.
}

\usage{
\S4method{nearest}{IntegerRanges,IntegerRanges_OR_missing}(x, subject, select = c("arbitrary", "all"))

\S4method{precede}{IntegerRanges,IntegerRanges_OR_missing}(x, subject, select = c("first", "all"))

\S4method{follow}{IntegerRanges,IntegerRanges_OR_missing}(x, subject, select = c("last", "all"))

\S4method{distanceToNearest}{IntegerRanges,IntegerRanges_OR_missing}(x, subject, select = c("arbitrary", "all"))

\S4method{distance}{IntegerRanges,IntegerRanges}(x, y)
\S4method{distance}{Pairs,missing}(x, y)
}

\arguments{
  \item{x}{The query \code{\linkS4class{IntegerRanges}} object, or (for
    \code{distance()}) a \code{\linkS4class{Pairs}} containing both the
    query (first) and subject (second).
  }
  \item{subject}{The subject \code{IntegerRanges} object, within which the
    nearest neighbors are found. Can be missing, in which case
    \code{x} is also the subject.
  }
  \item{select}{Logic for handling ties. By default, all the methods
    select a single interval (arbitrary for \code{nearest},the first 
    by order in \code{subject} for \code{precede}, and the last for 
    \code{follow}). To get all matchings, as a \code{Hits} object, 
    use \dQuote{all}.
  }
  \item{y}{For the \code{distance} method, a \code{IntegerRanges} object.
    Cannot be missing. If \code{x} and \code{y} are not the same
    length, the shortest will be recycled to match the length of the 
    longest.
  }
  \item{hits}{The hits between \code{x} and \code{subject}}
  \item{...}{Additional arguments for methods}
}

\details{
  \itemize{
    \item{nearest: }{
      The conventional nearest neighbor finder. Returns an integer vector 
      containing the index of the nearest neighbor range in \code{subject} 
      for each range in \code{x}. If there is no nearest neighbor 
      (if \code{subject} is empty), NA's are returned.

      Here is roughly how it proceeds, for a range \code{xi} in \code{x}:
      \enumerate{
        \item Find the ranges in \code{subject} that overlap \code{xi}. If a
          single range \code{si} in \code{subject} overlaps \code{xi}, 
          \code{si} is returned as the nearest neighbor of \code{xi}. If there 
          are multiple overlaps, one of the overlapping ranges is chosen
          arbitrarily.
        \item If no ranges in \code{subject} overlap with \code{xi}, then
          the range in \code{subject} with the shortest distance from its end 
          to the start \code{xi} or its start to the end of \code{xi} is
          returned.
      }
    }
    \item{precede: }{
      For each range in \code{x}, \code{precede} returns the index of the
      interval in \code{subject} that is directly preceded by the query
      range. Overlapping ranges are excluded. \code{NA} is returned when 
      there are no qualifying ranges in \code{subject}.
    }
    \item{follow: }{
      The opposite of \code{precede}, this function returns the index
      of the range in \code{subject} that a query range in \code{x} 
      directly follows. Overlapping ranges are excluded. \code{NA} is
      returned when there are no qualifying ranges in \code{subject}.
    }
    \item{distanceToNearest: }{
      Returns the distance for each range in \code{x} to its nearest 
      neighbor in \code{subject}.
    }
    \item{distance: }{
      Returns the distance for each range in \code{x} to the range in 
      \code{y}. 

      The \code{distance} method differs from others documented on this 
      page in that it is symmetric; \code{y} cannot be missing. If \code{x} 
      and \code{y} are not the same length, the shortest will be recycled to
      match the length of the longest. The \code{select} argument is not
      available for \code{distance} because comparisons are made in a
      pair-wise fashion. The return value is the length of the longest
      of \code{x} and \code{y}.

      The \code{distance} calculation changed in BioC 2.12 to accommodate
      zero-width ranges in a consistent and intuitive manner. The new distance
      can be explained by a \emph{block} model where a range is represented by
      a series of blocks of size 1. Blocks are adjacent to each other and there
      is no gap between them. A visual representation of \code{IRanges(4,7)}
      would be
 
      \preformatted{
        +-----+-----+-----+-----+
           4     5     6     7
      }

      The distance between two consecutive blocks is 0L (prior to 
      Bioconductor 2.12 it was 1L). The new distance calculation now returns 
      the size of the gap between two ranges.

      This change to distance affects the notion of overlaps in that
      we no longer say: 

      x and y overlap   <=>   distance(x, y) == 0

      Instead we say

      x and y overlap    =>   distance(x, y) == 0

      or

      x and y overlap or are adjacent   <=>   distance(x, y) == 0
    }
    \item{selectNearest: }{
      Selects the hits that have the minimum distance within those for
      each query range. Ties are possible and can be broken with
      \code{\link[S4Vectors]{breakTies}}.
    }
  }
}

\value{
  For \code{nearest}, \code{precede} and \code{follow}, an integer
  vector of indices in \code{subject}, or a \code{\linkS4class{Hits}} 
  if \code{select="all"}.

  For \code{distanceToNearest}, a \code{Hits} object with an elementMetadata
  column of the \code{distance} between the pair. Access \code{distance}
  with \code{mcols} accessor.

  For \code{distance}, an integer vector of distances between the ranges
  in \code{x} and \code{y}.

  For \code{selectNearest}, a \code{\linkS4class{Hits}} object, sorted
  by query.
}

\author{M. Lawrence}

\seealso{
  \itemize{
    \item The \link{IntegerRanges} and \link{Hits} classes.
    \item The \link[GenomicRanges]{GenomicRanges} and 
          \link[GenomicRanges]{GRanges} classes in the GenomicRanges package.
    \item \code{\link{findOverlaps}} for finding just the overlapping ranges.
    \item{}{
    GenomicRanges methods for 
    \itemize{
      \item \code{precede}
      \item \code{follow}
      \item \code{nearest}
      \item \code{distance}
      \item \code{distanceToNearest}
    }
    are documented at
    ?\code{\link[GenomicRanges]{nearest-methods}} or
    ?\code{\link[GenomicRanges]{precede,GenomicRanges,GenomicRanges-method}}
    }
  }
}

\examples{
  ## ------------------------------------------
  ## precede() and follow()
  ## ------------------------------------------
  query <- IRanges(c(1, 3, 9), c(3, 7, 10))
  subject <- IRanges(c(3, 2, 10), c(3, 13, 12))
 
  precede(query, subject)     # c(3L, 3L, NA)
  precede(IRanges(), subject) # integer()
  precede(query, IRanges())   # rep(NA_integer_, 3)
  precede(query)              # c(3L, 3L, NA)
 
  follow(query, subject)      # c(NA, NA, 1L)
  follow(IRanges(), subject)  # integer()
  follow(query, IRanges())    # rep(NA_integer_, 3)
  follow(query)               # c(NA, NA, 2L)

  ## ------------------------------------------
  ## nearest()
  ## ------------------------------------------
  query <- IRanges(c(1, 3, 9), c(2, 7, 10))
  subject <- IRanges(c(3, 5, 12), c(3, 6, 12))

  nearest(query, subject) # c(1L, 1L, 3L)
  nearest(query)          # c(2L, 1L, 2L)

  ## ------------------------------------------
  ## distance()
  ## ------------------------------------------
  ## adjacent
  distance(IRanges(1,5), IRanges(6,10)) # 0L
  ## overlap
  distance(IRanges(1,5), IRanges(3,7))  # 0L
  ## zero-width
  sapply(-3:3, function(i) distance(shift(IRanges(4,3), i), IRanges(4,3))) 
}

\keyword{utilities}