1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/waitForJobs.R
\name{waitForJobs}
\alias{waitForJobs}
\title{Wait for Termination of Jobs}
\usage{
waitForJobs(
ids = NULL,
sleep = NULL,
timeout = 604800,
expire.after = NULL,
stop.on.error = FALSE,
stop.on.expire = FALSE,
reg = getDefaultRegistry()
)
}
\arguments{
\item{ids}{[\code{\link[base]{data.frame}} or \code{integer}]\cr
A \code{\link[base]{data.frame}} (or \code{\link[data.table]{data.table}})
with a column named \dQuote{job.id}.
Alternatively, you may also pass a vector of integerish job ids.
If not set, defaults to the return value of \code{\link{findSubmitted}}.
Invalid ids are ignored.}
\item{sleep}{[\code{function(i)} | \code{numeric(1)}]\cr
Parameter to control the duration to sleep between queries.
You can pass an absolute numeric value in seconds or a \code{function(i)} which returns
the number of seconds to sleep in the \code{i}-th iteration.
If not provided (\code{NULL}), tries to read the value (number/function) from the configuration file
(stored in \code{reg$sleep}) or defaults to a function with exponential backoff between
5 and 120 seconds.}
\item{timeout}{[\code{numeric(1)}]\cr
After waiting \code{timeout} seconds, show a message and return
\code{FALSE}. This argument may be required on some systems where, e.g.,
expired jobs or jobs on hold are problematic to detect. If you don't want
a timeout, set this to \code{Inf}. Default is \code{604800} (one week).}
\item{expire.after}{[\code{integer(1)}]\cr
Jobs count as \dQuote{expired} if they are not found on the system but have not communicated back
their results (or error message). This frequently happens on managed system if the scheduler kills
a job because the job has hit the walltime or request more memory than reserved.
On the other hand, network file systems often require several seconds for new files to be found,
which can lead to false positives in the detection heuristic.
\code{waitForJobs} treats such jobs as expired after they have not been detected on the system
for \code{expire.after} iterations.
If not provided (\code{NULL}), tries to read the value from the configuration file (stored in \code{reg$expire.after}),
and finally defaults to \code{3}.}
\item{stop.on.error}{[\code{logical(1)}]\cr
Immediately cancel if a job terminates with an error? Default is
\code{FALSE}.}
\item{stop.on.expire}{[\code{logical(1)}]\cr
Immediately cancel if jobs are detected to be expired? Default is \code{FALSE}.
Expired jobs will then be ignored for the remainder of \code{waitForJobs()}.}
\item{reg}{[\code{\link{Registry}}]\cr
Registry. If not explicitly passed, uses the default registry (see \code{\link{setDefaultRegistry}}).}
}
\value{
[\code{logical(1)}]. Returns \code{TRUE} if all jobs terminated
successfully and \code{FALSE} if either the timeout is reached or at least
one job terminated with an exception or expired.
}
\description{
This function simply waits until all jobs are terminated.
}
|