File: BatchtoolsParam-class.Rd

package info (click to toggle)
r-bioc-biocparallel 1.40.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,768 kB
  • sloc: cpp: 139; sh: 14; makefile: 8
file content (252 lines) | stat: -rw-r--r-- 8,945 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
\name{BatchtoolsParam-class}
\Rdversion{1.1}
\docType{class}

\alias{BatchtoolsParam-class}
\alias{BatchtoolsParam}
\alias{bpRNGseed,BatchtoolsParam-method}
\alias{bpRNGseed<-,BatchtoolsParam,numeric-method}
\alias{bpbackend,BatchtoolsParam-method}
\alias{bpisup,BatchtoolsParam-method}
\alias{bplapply,ANY,BatchtoolsParam-method}
\alias{bplogdir,BatchtoolsParam-method}
\alias{bplogdir<-,BatchtoolsParam,character-method}
\alias{bpschedule,BatchtoolsParam-method}
\alias{bpstart,BatchtoolsParam-method}
\alias{bpstop,BatchtoolsParam-method}
\alias{bpworkers,BatchtoolsParam-method}
\alias{show,BatchtoolsParam-method}
\alias{batchtoolsWorkers}
\alias{batchtoolsCluster}
\alias{batchtoolsTemplate}
\alias{batchtoolsRegistryargs}

\title{Enable parallelization on batch systems}

\description{

    This class is used to parameterize scheduler options on managed
    high-performance computing clusters using batchtools.

    \code{BatchtoolsParam()}: Construct a BatchtoolsParam-class object.

    \code{batchtoolsWorkers()}: Return the default number of workers for
    each backend.

    \code{batchtoolsTemplate()}: Return the default template for each
    backend.

    \code{batchtoolsCluster()}: Return the default cluster.

    \code{batchtoolsRegistryargs()}: Create a list of arguments to be
    used in batchtools' \code{makeRegistry}; see \code{registryargs}
    argument.

}

\usage{
BatchtoolsParam(
    workers = batchtoolsWorkers(cluster),
    cluster = batchtoolsCluster(),
    registryargs = batchtoolsRegistryargs(),
    saveregistry = FALSE,
    resources = list(),
    template = batchtoolsTemplate(cluster),
    stop.on.error = TRUE, progressbar = FALSE, RNGseed = NA_integer_,
    timeout = WORKER_TIMEOUT, exportglobals=TRUE,
    log = FALSE, logdir = NA_character_, resultdir=NA_character_,
    jobname = "BPJOB"
)
batchtoolsWorkers(cluster = batchtoolsCluster())
batchtoolsCluster(cluster)
batchtoolsTemplate(cluster)
batchtoolsRegistryargs(...)
}

\arguments{

    \item{workers}{\code{integer(1)}} Number of workers to divide tasks
	(e.g., elements in the first argument of \code{bplapply})
	between. On 'multicore' and 'socket' backends, this defaults to
	\code{multicoreWorkers()} and \code{snowWorkers()}.  On managed
	(e.g., slurm, SGE) clusters \code{workers} has no default,
	meaning that the number of workers needs to be provided by the
	user.

    \item{cluster}{\code{character(1)}} Cluster type being used as the
	backend by \code{BatchtoolsParam}. The available options are
	"socket", "multicore", "interactive", "sge", "slurm", "lsf",
	"torque" and "openlava". The cluster type if available on the
	machine registers as the backend. Cluster types which need
	a \code{template} are "sge", "slurm", "lsf", "openlava", and
	"torque". If the template is not given then a default is
	selected from the \code{batchtools} package.

    \item{registryargs}{\code{list()}} Arguments given to the registry
        created by \code{BatchtoolsParam} to configure the registry and
        where it's being stored. The \code{registryargs} can be
        specified by the function \code{batchtoolsRegistryargs()} which
        takes the arguments \code{file.dir}, \code{work.dir},
        \code{packages}, \code{namespaces}, \code{source}, \code{load},
        \code{make.default}. It's useful to configure these option,
        especially the \code{file.dir} to a location which is accessible
        to all the nodes on your job scheduler i.e master and
        workers. \code{file.dir} uses a default setting to make a
        registry in your working directory.

    \item{saveregistry}{\code{logical(1)}} Option given to store the
        entire registry for the job(s). This functionality should only
        be used when debugging. The storage of the entire registry can
        be time and space expensive on disk. The registry will be saved
        in the directory specified by \code{file.dir} in
        \code{registryargs}; the default locatoin is the current working
        directory. The saved registry directories will have suffix "-1",
        "-2" and so on, for each time the \code{BatchtoolsParam} is
        used.

    \item{resources}{\code{named list()}} Arguments passed to the
        \code{resources} argument of \code{batchtools::submitJobs}
        during evaluation of \code{bplapply} and similar
        functions. These name-value pairs are used for substitution
        into the template file.

    \item{template}{\code{character(1)}} Path to a template for the
        backend in \code{BatchtoolsParam}. It is possible to check which
	template is being used by the object using the getter
	\code{bpbackend(BatchtoolsParam())}. The template needs to be
	written specific to each backend. Please check the list of available
	templates in the \code{batchtools} package.

    \item{stop.on.error}{\code{logical(1)}} Stop all jobs as soon as one
	jobs fails (\code{stop.on.error == TRUE}) or wait for all jobs
	to terminate.  Default is \code{TRUE}.

    \item{progressbar}{\code{logical(1)}} Suppress the progress bar used
        in BatchtoolsParam and be less verbose.  Default is
        \code{FALSE}.

    \item{RNGseed}{\code{integer(1)}} Set an initial seed for the RNG.
	Default is \code{NULL} where a random seed is chosen upon
	initialization.

    \item{timeout}{\code{list()}} Time (in seconds) allowed for worker
        to complete a task.  If the computation exceeds \code{timeout}
        an error is thrown with message 'reached elapsed time limit'.

    \item{exportglobals}{\code{logical(1)}} Export
        \code{base::options()} from manager to workers? Default \code{TRUE}.

    \item{log}{\code{logical(1)}} Option given to save the logs which
        are produced by the jobs. If \code{log=TRUE} then the \code{logdir}
	option must be specified.

    \item{logdir}{\code{character(1)}} Path to location where logs are
    stored. The argument \code{log=TRUE} is required before using the
    logdir option.

    \item{resultdir}{\code{logical(1)}} Path where results are stored.

    \item{jobname}{\code{character(1)}} Job name that is prepended
    to the output log and result files. Default is "BPJOB".

    \item{\dots}{name-value pairs}
    Names and values correspond to arguments from batchtools
    \code{\link[batchtools]{makeRegistry}}.

}

\section{BatchtoolsParam constructor}{

    Return an object with specified values. The object may be saved to
    disk or reused within a session.
}

\section{Methods}{

    The following generics are implemented and perform as documented on
    the corresponding help page: \code{\link{bpworkers}},
    \code{\link{bpnworkers}}, \code{\link{bpstart}},
    \code{\link{bpstop}}, \code{\link{bpisup}}, \code{\link{bpbackend}}.

    \code{\link{bplapply}} handles arguments \code{X} of classes derived
    from \code{S4Vectors::List} specially, coercing to \code{list}.
}

\author{Nitesh Turaga, \url{mailto:nitesh.turaga@roswellpark.org}}

\seealso{

    \code{getClass("BiocParallelParam")} for additional parameter classes.

    \code{register} for registering parameter classes for use in parallel
    evaluation.

    The batchtools package.
}

\examples{
## Pi approximation
piApprox = function(n) {
    nums = matrix(runif(2 * n), ncol = 2)
    d = sqrt(nums[, 1]^2 + nums[, 2]^2)
    4 * mean(d <= 1)
}

piApprox(1000)

## Calculate piApprox 10 times
param <- BatchtoolsParam()
result <- bplapply(rep(10e5, 10), piApprox, BPPARAM=param)

\dontrun{
## see vignette for additional explanation
library(BiocParallel)
param = BatchtoolsParam(workers=5,
                        cluster="sge",
                        template="script/test-sge-template.tmpl")
## Run parallel job
result = bplapply(rep(10e5, 100), piApprox, BPPARAM=param)

## bpmapply
param = BatchtoolsParam()
result = bpmapply(fun, x = 1:3, y = 1:3, MoreArgs = list(z = 1),
                   SIMPLIFY = TRUE, BPPARAM = param)

## bpvec
param = BatchtoolsParam(workers=2)
result = bpvec(1:10, seq_along, BPPARAM=param)

## bpvectorize
param = BatchtoolsParam(workers=2)
## this returns a function
bpseq_along = bpvectorize(seq_along, BPPARAM=param)
result = bpseq_along(1:10)

## bpiterate
ITER <- function(n=5) {
        i <- 0L
        function() {
            i <<- i + 1L
            if (i > n)
                return(NULL)
        rep(i, n)
        }
    }

param <- BatchtoolsParam()
res <- bpiterate(ITER=ITER(), FUN=function(x,y) sum(x) + y, y=10, BPPARAM=param)

## save logs
logdir <- tempfile()
dir.create(logdir)
param <- BatchtoolsParam(log=TRUE, logdir=logdir)
res <- bplapply(rep(10e5, 10), piApprox, BPPARAM=param)

## save registry (should be used only for debugging)
file.dir <- tempfile()
registryargs <- batchtoolsRegistryargs(file.dir = file.dir)
param <- BatchtoolsParam(saveregistry = TRUE, registryargs = registryargs)
res <- bplapply(rep(10e5, 10), piApprox, BPPARAM=param)
dir(dirname(file.dir), basename(file.dir))
}
}