File: Sampler-class.Rd

package info (click to toggle)
r-bioc-shortread 1.32.0-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 8,384 kB
  • ctags: 293
  • sloc: ansic: 2,718; cpp: 202; sh: 3; makefile: 2
file content (226 lines) | stat: -rw-r--r-- 6,489 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
\name{FastqFile-class}
\docType{class}
% Classes
\alias{ShortReadFile-class}
\alias{FastqFile-class}
\alias{FastqFileReader-class}
\alias{FastqSampler-class}
\alias{FastqSamplerList-class}
\alias{FastqStreamer-class}
\alias{FastqStreamerList-class}
\alias{FastqFileList-class}

% ShortReadFile / FastqFile / FastqFileList
\alias{FastqFile}
\alias{FastqFileList}
\alias{open.ShortReadFile}
\alias{close.ShortReadFile}
\alias{readFastq,FastqFile-method}

% FastqFileList
\alias{FastqFileList,ANY-method}
\alias{FastqFileList,character-method}

% FastqFileReader
\alias{yield,FastqFileReader-method}

% FastqSampler
\alias{FastqSampler}
\alias{FastqSamplerList}
\alias{FastqSamplerList,ANY-method}
\alias{FastqSamplerList,character-method}
\alias{yield}
\alias{yield,FastqSampler-method}

% FastqStreamer
\alias{FastqStreamer}
\alias{FastqStreamer,ANY,missing-method}
\alias{FastqStreamer,ANY,numeric-method}
\alias{FastqStreamer,ANY,IRanges-method}
\alias{FastqStreamerList}
\alias{FastqStreamerList,ANY-method}
\alias{FastqStreamerList,character-method}
\alias{yield,FastqStreamer-method}

\title{Sampling and streaming records from fastq files}

\description{

  \code{FastqFile} represents a path and connection to a fastq
  file. \code{FastqFileList} is a list of such connections.

  \code{FastqSampler} draws a subsample from a fastq file. \code{yield}
  is the method used to extract the sample from the \code{FastqSampler}
  instance; a short illustration is in the example
  below. \code{FastqSamplerList} is a list of \code{FastqSampler}
  elements.

  \code{FastqStreamer} draws successive subsets from a fastq file, a
  short illustration is in the example below. \code{FastqStreamerList}
  is a list of \code{FastqStreamer} elements.

}

\usage{
## FastqFile and FastqFileList
FastqFile(con, ...)
FastqFileList(..., class="FastqFile")
\S3method{open}{ShortReadFile}(con, ...)
\S3method{close}{ShortReadFile}(con, ...)
\S4method{readFastq}{FastqFile}(dirPath, pattern=character(), ...)

## FastqSampler and FastqStreamer
FastqSampler(con, n=1e6, readerBlockSize=1e8, verbose=FALSE,
    ordered = FALSE)
FastqSamplerList(..., n=1e6, readerBlockSize=1e8, verbose=FALSE,
    ordered = FALSE)
FastqStreamer(con, n, readerBlockSize=1e8, verbose=FALSE)
FastqStreamerList(..., n, readerBlockSize=1e8, verbose=FALSE)
yield(x, ...)
}

\arguments{

  \item{con, dirPath}{A character string naming a connection, or (for
    \code{con}) an R connection (e.g., \code{file}, \code{gzfile}).}

  \item{n}{For \code{FastqSampler}, the size of the sample (number of
    records) to be drawn.  For \code{FastqStreamer} a \code{numeric(1)}
    (set to 1e6 when \code{n} is missing) providing the number of
    successive records to be returned on each yield, or an
    \code{\linkS4class{IRanges}}-class delimiting the (1-based) indicies
    of records returned by each yield; entries in \code{n} must have
    non-zero width and must not overlap.}

  \item{readerBlockSize}{The number of bytes or characters to be read at
    one time; smaller \code{readerBlockSize} reduces memory requirements
    but is less efficient.}

  \item{verbose}{Display progress.}

  \item{ordered}{logical(1) indicating whether sampled reads should be
    returned in the same order as they were encountered in the file.}

  \item{x}{An instance from the \code{FastqSampler} or
    \code{FastqStreamer} class.}

  \item{...}{Additional arguments. For \code{FastqFileList},
    \code{FastqSamplerList}, or \code{FastqStreamerList}, this can
    either be a single character vector of paths to fastq files, or
    several instances of the corresponding \code{FastqFile},
    \code{FastqSampler}, or \code{FastqStreamer} objects.}

  \item{pattern}{Ignored.}

  \item{class}{For developer use, to specify the underlying class
    contained in the \code{FastqFileList}.}

}

\section{Objects from the class}{

  Available classes include:

  \describe{

    \item{\code{FastqFile}}{A file path and connection to a fastq file.}

    \item{\code{FastqFileList}}{A list of \code{FastqFile} instances.}

    \item{\code{FastqSampler}}{Uniformly sample records from a fastq
      file.}

    \item{\code{FastqStreamer}}{Iterate over a fastq file, returning
      successive parts of the file.}

  }
}

\section{Methods}{

  The following methods are available to users:

  \describe{

    \item{\code{readFastq,FastqFile-method}:}{see also
      \code{?\link{readFastq}}.}

    \item{\code{writeFastq,ShortReadQ,FastqFile-method}:}{see also
      \code{?\link{writeFastq}}, 
      \code{?"writeFastq,ShortReadQ,FastqFile-method"}.}

    \item{\code{yield}:}{Draw a single sample from the
      instance. Operationally this requires that the underlying data
      (e.g., file) represented by the \code{Sampler} instance be
      visited; this may be time consuming.}

  }

}

\note{

  \code{FastqSampler} and \code{FastqStreamer} use OpenMP threads (when
  available) during creation of the return value. This may sometimes
  create problems when a process is already running on multiple threads,
  e.g., with an error message like \preformatted{
    libgomp: Thread creation failed: Resource temporarily unavailable
  } A solution is to precede problematic code with the following code
  snippet, to disable threading \preformatted{
    nthreads <- .Call(ShortRead:::.set_omp_threads, 1L)
    on.exit(.Call(ShortRead:::.set_omp_threads, nthreads))
  }

}

\seealso{

  \code{\link{readFastq}}, \code{\link{writeFastq}}, \code{\link{yield}}.

}

\examples{
sp <- SolexaPath(system.file('extdata', package='ShortRead'))
fl <- file.path(analysisPath(sp), "s_1_sequence.txt")

f <- FastqFile(fl)
rfq <- readFastq(f)
close(f)

f <- FastqSampler(fl, 50)
yield(f)    # sample of size n=50
yield(f)    # independent sample of size 50
close(f)

## Return sample as ordered in original file
f <- FastqSampler(fl, 50, ordered=TRUE)
yield(f)
close(f)

f <- FastqStreamer(fl, 50)
yield(f)    # records 1 to 50
yield(f)    # records 51 to 100
close(f)

## iterating over an entire file
f <- FastqStreamer(fl, 50)
while (length(fq <- yield(f))) {
    ## do work here
    print(length(fq))
}
close(f)

## iterating over IRanges
rng <- IRanges(c(50, 100, 200), width=10:8)
f <- FastqStreamer(fl, rng)
while (length(fq <- yield(f))) {
    print(length(fq))
}
close(f)

## Internal fields, methods, and help; for developers
ShortRead:::.FastqSampler_g$methods()
ShortRead:::.FastqSampler_g$fields()
ShortRead:::.FastqSampler_g$help("yield")

}