File: IPos-class.Rd

package info (click to toggle)
r-bioc-iranges 2.16.0-1
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 1,808 kB
  • sloc: ansic: 4,789; sh: 4; makefile: 2
file content (229 lines) | stat: -rw-r--r-- 7,555 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
\name{IPos-class}
\docType{class}

\alias{class:IPos}
\alias{IPos-class}
\alias{IPos}

\alias{pos}
\alias{pos,IPos-method}
\alias{length,IPos-method}

\alias{coerce,IntegerRanges,IPos-method}
\alias{coerce,ANY,IPos-method}
\alias{as.data.frame,IPos-method}
\alias{extractROWS,IPos-method}
\alias{show,IPos-method}
\alias{bindROWS,IPos-method}

\title{Memory-efficient representation of integer positions}

\description{
  The IPos class is a container for storing a set of \emph{integer positions}
  where most of the positions are typically (but not necessarily) adjacent.
  Because integer positions can be seen as integer ranges of width 1, the IPos
  class extends the \link{IntegerRanges} virtual class. Note that even though
  an \link{IRanges} object can be used for storing integer positions, using an
  IPos object will be much more memory-efficient, especially when the object
  contains long runs of adjacent positions in \emph{ascending order}.
}

\usage{
IPos(pos_runs)  # constructor function
}

\arguments{
  \item{pos_runs}{
    An \link{IRanges} object (or any other \link{IntegerRanges} derivative)
    where each range is interpreted as a run of adjacent ascending positions.
    If \code{pos_runs} is not an \link{IntegerRanges} derivative,
    \code{IPos()} first tries to coerce it to one with
    \code{as(pos_runs, "IntegerRanges", strict=FALSE)}.
  }
}

\value{
  An IPos object.
}

\section{Accessors}{

  \subsection{Getters}{
    IPos objects support the same set of getters as other \link{IntegerRanges}
    derivatives (i.e. \code{start()}, \code{end()}, \code{mcols()}, etc...),
    plus the \code{pos()} getter which is equivalent to \code{start()}
    or \code{end()}. See \code{?\link{IntegerRanges}} for the list of getters
    supported by \link{IntegerRanges} derivatives.

    IMPORTANT NOTE: An IPos object cannot hold names i.e. \code{names()}
    always returns \code{NULL} on it.
  }

  \subsection{Setters}{
    IPos objects support the \code{mcols()} and \code{metadata()} setters
    only.
  }
}

\section{Coercion}{
  From \link{IntegerRanges} to IPos:
  An \link{IntegerRanges} derivative \code{x} in which all the ranges have
  a width of 1 can be coerced to an IPos object with \code{as(x, "IPos")}.
  The names on \code{x} are not propagated (a warning is issued if \code{x}
  has names on it).

  From IPos to \link{IRanges}:
  An IPos object \code{x} can be coerced to an \link{IRanges} object
  with \code{as(x, "IRanges")}. However be aware that the resulting object
  can use thousands times (or more) memory than \code{x}!
  See "MEMORY USAGE" in the Examples section below.

  From IPos to ordinary R objects:
  Like with any other \link{IntegerRanges} derivative, \code{as.character()},
  \code{as.factor()}, and \code{as.data.frame()} work on an IPos object
  \code{x}. Note however that \code{as.data.frame(x)} returns a data frame
  with a \code{pos} column (containing \code{pos(x)}) instead of the
  \code{start}, \code{end}, and \code{width} columns that one gets with other
  \link{IntegerRanges} derivatives.
}

\section{Subsetting}{
  An IPos object can be subsetted exactly like an \link{IRanges} object.
}

\section{Concatenation}{
  IPos objects can be concatenated with \code{c()} or \code{append()}.
  See \code{?\link[S4Vectors]{c}} in the \pkg{S4Vectors} package for
  more information about concatenating Vector derivatives.
}

\section{Splitting and Relisting}{
  Like with an \link{IRanges} object, \code{split()} and \code{relist()} work
  on an IPos object.
}

\note{
  Like for any \link[S4Vectors]{Vector} derivative, the length of an
  IPos object cannot exceed \code{.Machine$integer.max} (i.e. 2^31 on
  most platforms). \code{IPos()} will return an error if \code{pos_runs}
  contains too many integer positions.
}

\author{
  Hervé Pagès; based on ideas borrowed from Georg Stricker
  \email{georg.stricker@in.tum.de} and Julien Gagneur
  \email{gagneur@in.tum.de}
}

\seealso{
  \itemize{
    \item The \link[GenomicRanges]{GPos} class in the \pkg{GenomicRanges}
          package for a memory-efficient representation of \emph{genomic
          positions} (i.e. genomic ranges of width 1).

    \item \link{IntegerRanges} and \link{IRanges} objects.

    \item \link{IPosRanges-comparison} for comparing and ordering integer
          ranges and/or positions.

    \item \link{findOverlaps-methods} for finding overlapping
          integer ranges and/or positions.

    \item \link{nearest-methods} for finding the nearest integer range
          and/or position.
  }
}

\examples{
## ---------------------------------------------------------------------
## BASIC EXAMPLES
## ---------------------------------------------------------------------

## Example 1:
ipos1 <- IPos(c("44-53", "5-10", "2-5"))
ipos1

length(ipos1)
pos(ipos1)  # same as 'start(ipos1)' and 'end(ipos1)'
as.character(ipos1)
as.data.frame(ipos1)
as(ipos1, "IRanges")
as.data.frame(as(ipos1, "IRanges"))
ipos1[9:17]

## Example 2:
pos_runs <- IRanges(c(1, 6, 12, 17), c(5, 10, 16, 20))
ipos2 <- IPos(pos_runs)
ipos2

## Example 3:
ipos3A <- ipos3B <- IPos(c("1-15000", "15400-88700"))
npos <- length(ipos3A)

mcols(ipos3A)$sample <- Rle("sA")
sA_counts <- sample(10, npos, replace=TRUE)
mcols(ipos3A)$counts <- sA_counts

mcols(ipos3B)$sample <- Rle("sB")
sB_counts <- sample(10, npos, replace=TRUE)
mcols(ipos3B)$counts <- sB_counts

ipos3 <- c(ipos3A, ipos3B)
ipos3

## ---------------------------------------------------------------------
## MEMORY USAGE
## ---------------------------------------------------------------------

## Coercion to IRanges works...
ipos4 <- IPos(c("1-125000", "135000-575000"))
ir4 <- as(ipos4, "IRanges")
ir4
## ... but is generally not a good idea:
object.size(ipos4)
object.size(ir4)  # 1739 times bigger than the IPos object!

## Shuffling the order of the positions impacts memory usage:
ipos4s <- sample(ipos4)
object.size(ipos4s)

## AN IMPORTANT NOTE: In the worst situations, IPos still performs as
## good as an IRanges object.
object.size(as(ipos4s, "IRanges"))  # same size as 'ipos4s'

## Best case scenario is when the object is strictly sorted (i.e.
## positions are in strict ascending order).
## This can be checked with:
is.unsorted(ipos4, strict=TRUE)  # 'ipos4' is strictly sorted

## ---------------------------------------------------------------------
## USING MEMORY-EFFICIENT METADATA COLUMNS
## ---------------------------------------------------------------------
## In order to keep memory usage as low as possible, it is recommended
## to use a memory-efficient representation of the metadata columns that
## we want to set on the object. Rle's are particularly well suited for
## this, especially if the metadata columns contain long runs of
## identical values. This is the case for example if we want to use an
## IPos object to represent the coverage of sequencing reads along a
## chromosome.

## Example 5:
library(pasillaBamSubset)
library(Rsamtools)  # for the BamFile() constructor function
bamfile1 <- BamFile(untreated1_chr4())
bamfile2 <- BamFile(untreated3_chr4())
ipos5 <- IPos(IRanges(1, seqlengths(bamfile1)[["chr4"]]))
library(GenomicAlignments)  # for "coverage" method for BamFile objects
cov1 <- coverage(bamfile1)$chr4
cov2 <- coverage(bamfile2)$chr4
mcols(ipos5) <- DataFrame(cov1, cov2)
ipos5

object.size(ipos5)  # lightweight

## Keep only the positions where coverage is at least 10 in one of the
## 2 samples:
ipos5[mcols(ipos5)$cov1 >= 10 | mcols(ipos5)$cov2 >= 10]
}
\keyword{methods}
\keyword{classes}