File: foreach.R

package info (click to toggle)
r-cran-foreach 1.5.2-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 648 kB
  • sloc: makefile: 2
file content (310 lines) | stat: -rw-r--r-- 12,820 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
#
# Copyright (c) Microsoft. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

#' @title foreach
#' @description
#' `%do%` and `%dopar%` are binary operators that operate
#' on a `foreach` object and an `R` expression.
#' The expression, `ex`, is evaluated multiple times in an environment
#' that is created by the `foreach` object, and that environment is
#' modified for each evaluation as specified by the `foreach` object.
#' `%do%` evaluates the expression sequentially, while `%dopar%`
#' evaluates it in parallel.
#' The results of evaluating `ex` are returned as a list by default,
#' but this can be modified by means of the `.combine` argument.
#'
#' @param ... one or more arguments that control how `ex` is
#'    evaluated.  Named arguments specify the name and values of variables
#'    to be defined in the evaluation environment.
#'    An unnamed argument can be used to specify the number of times that
#'    `ex` should be evaluated.
#'    At least one argument must be specified in order to define the
#'    number of times `ex` should be executed.
#'
#'	If multiple arguments are supplied, the number of times `ex` is
#'	evaluated is equal to the smallest number of iterations among the supplied
#'	arguments. See the examples.
#' @param .combine function that is used to process the tasks results as
#'    they generated.  This can be specified as either a function or
#'    a non-empty character string naming the function.
#'    Specifying 'c' is useful for concatenating the results into
#'    a vector, for example.  The values 'cbind' and 'rbind' can combine
#'    vectors into a matrix.  The values '+' and '*' can be used to
#'    process numeric data.
#'    By default, the results are returned in a list.
#' @param .init initial value to pass as the first argument of the
#'    `.combine` function.
#'    This should not be specified unless `.combine` is also specified.
#' @param .final function of one argument that is called to return final result.
#' @param .inorder logical flag indicating whether the `.combine`
#'    function requires the task results to be combined in the same order
#'    that they were submitted.  If the order is not important, then it
#'    setting `.inorder` to `FALSE` can give improved performance.
#'    The default value is `TRUE.
#' @param .multicombine logical flag indicating whether the `.combine`
#'    function can accept more than two arguments.
#'    If an arbitrary `.combine` function is specified, by default,
#'    that function will always be called with two arguments.
#'    If it can take more than two arguments, then setting `.multicombine`
#'    to `TRUE` could improve the performance.
#'    The default value is `FALSE` unless the `.combine`
#'    function is `cbind`, `rbind`, or `c`, which are known
#'    to take more than two arguments.
#' @param .maxcombine maximum number of arguments to pass to the combine function.
#'    This is only relevant if `.multicombine` is `TRUE`.
#' @param .errorhandling specifies how a task evaluation error should be handled.
#'    If the value is "stop", then execution will be stopped via
#'    the `stop` function if an error occurs.
#'    If the value is "remove", the result for that task will not be
#'    returned, or passed to the `.combine` function.
#'    If it is "pass", then the error object generated by task evaluation
#'    will be included with the rest of the results.  It is assumed that
#'    the combine function (if specified) will be able to deal with the
#'    error object.
#'    The default value is "stop".
#' @param .packages character vector of packages that the tasks depend on.
#'    If `ex` requires a `R` package to be loaded, this option
#'    can be used to load that package on each of the workers.
#'    Ignored when used with `%do%`.
#' @param .export character vector of variables to export.
#'    This can be useful when accessing a variable that isn't defined in the
#'    current environment.
#'    The default value in `NULL`.
#' @param .noexport character vector of variables to exclude from exporting.
#'    This can be useful to prevent variables from being exported that aren't
#'    actually needed, perhaps because the symbol is used in a model formula.
#'    The default value in `NULL`.
#' @param .verbose logical flag enabling verbose messages.  This can be
#'    very useful for trouble shooting.
#' @param obj `foreach` object used to control the evaluation
#'    of `ex`.
#' @param e1 `foreach` object to merge.
#' @param e2 `foreach` object to merge.
#' @param ex the `R` expression to evaluate.
#' @param cond condition to evaluate.
#' @param n number of times to evaluate the `R` expression.
#'
#' @details
#' The `foreach` and `%do%`/`%dopar%` operators provide
#' a looping construct that can be viewed as a hybrid of the standard
#' `for` loop and `lapply` function.
#' It looks similar to the `for` loop, and it evaluates an expression,
#' rather than a function (as in `lapply`), but its purpose is to
#' return a value (a list, by default), rather than to cause side-effects.
#' This facilitates parallelization, but looks more natural to people that
#' prefer `for` loops to `lapply`.
#'
#' The `%:%` operator is the _nesting_ operator, used for creating
#' nested foreach loops. Type `vignette("nested")` at the R prompt for
#' more details.
#'
#' Parallel computation depends upon a _parallel backend_ that must be
#' registered before performing the computation. The parallel backends available
#' will be system-specific, but include `doParallel`, which uses R's built-in
#' \pkg{parallel} package. Each parallel backend has a specific registration function,
#' such as `registerDoParallel`.
#'
#' The `times` function is a simple convenience function that calls
#' `foreach`.  It is useful for evaluating an `R` expression multiple
#' times when there are no varying arguments.  This can be convenient for
#' resampling, for example.
#'
#' @seealso
#' [`iterators::iter`]
#' @examples
#' # equivalent to rnorm(3)
#' times(3) %do% rnorm(1)
#'
#' # equivalent to lapply(1:3, sqrt)
#' foreach(i=1:3) %do%
#'   sqrt(i)
#'
#' # multiple ... arguments
#' foreach(i=1:4, j=1:10) %do%
#' 	sqrt(i+j)
#'
#' # equivalent to colMeans(m)
#' m <- matrix(rnorm(9), 3, 3)
#' foreach(i=1:ncol(m), .combine=c) %do%
#'   mean(m[,i])
#'
#' # normalize the rows of a matrix in parallel, with parenthesis used to
#' # force proper operator precedence
#' # Need to register a parallel backend before this example will run
#' # in parallel
#' foreach(i=1:nrow(m), .combine=rbind) %dopar%
#'   (m[i,] / mean(m[i,]))
#'
#' # simple (and inefficient) parallel matrix multiply
#' library(iterators)
#' a <- matrix(1:16, 4, 4)
#' b <- t(a)
#' foreach(b=iter(b, by='col'), .combine=cbind) %dopar%
#'   (a %*% b)
#'
#' # split a data frame by row, and put them back together again without
#' # changing anything
#' d <- data.frame(x=1:10, y=rnorm(10))
#' s <- foreach(d=iter(d, by='row'), .combine=rbind) %dopar% d
#' identical(s, d)
#'
#' # a quick sort function
#' qsort <- function(x) {
#'   n <- length(x)
#'   if (n == 0) {
#'     x
#'   } else {
#'     p <- sample(n, 1)
#'     smaller <- foreach(y=x[-p], .combine=c) %:% when(y <= x[p]) %do% y
#'     larger  <- foreach(y=x[-p], .combine=c) %:% when(y >  x[p]) %do% y
#'     c(qsort(smaller), x[p], qsort(larger))
#'   }
#' }
#' qsort(runif(12))
#'
#' @keywords utilities
#' @export
#' @rdname foreach
foreach <- function(..., .combine, .init, .final=NULL, .inorder=TRUE,
                    .multicombine=FALSE,
                    .maxcombine=if (.multicombine) 100 else 2,
                    .errorhandling=c('stop', 'remove', 'pass'),
                    .packages=NULL, .export=NULL, .noexport=NULL,
                    .verbose=FALSE) {
  if (missing(.combine)) {
    if (!missing(.init))
      stop('if .init is specified, then .combine must also be specified')
    .combine <- defcombine
    hasInit <- TRUE
    init <- quote(list())
  } else {
    .combine <- match.fun(.combine)
    if (missing(.init)) {
      hasInit <- FALSE
      init <- NULL
    } else {
      hasInit <- TRUE
      init <- substitute(.init)
    }
  }

  # .multicombine defaults to TRUE if the .combine function is known to
  # take multiple arguments
  if (missing(.multicombine) &&
      (identical(.combine, cbind) ||
       identical(.combine, rbind) ||
       identical(.combine, c) ||
       identical(.combine, defcombine)))
    .multicombine <- TRUE

  # sanity check the arguments
  if (!is.null(.final) && !is.function(.final))
    stop('.final must be a function')
  if (!is.logical(.inorder) || length(.inorder) > 1)
    stop('.inorder must be a logical value')
  if (!is.logical(.multicombine) || length(.multicombine) > 1)
    stop('.multicombine must be a logical value')
  if (!is.numeric(.maxcombine) || length(.maxcombine) > 1 || .maxcombine < 2)
    stop('.maxcombine must be a numeric value >= 2')
  if (!is.character(.errorhandling))
    stop('.errorhandling must be a character string')
  if (!is.null(.packages) && !is.character(.packages))
    stop('.packages must be a character vector')
  if (!is.null(.export) && !is.character(.export))
    stop('.export must be a character vector')
  if (!is.null(.noexport) && !is.character(.noexport))
    stop('.noexport must be a character vector')
  if (!is.logical(.verbose) || length(.verbose) > 1)
    stop('.verbose must be a logical value')

  specified <- c('errorHandling', 'verbose')
  specified <- specified[c(!missing(.errorhandling), !missing(.verbose))]

  args <- substitute(list(...))[-1]

  if (length(args) == 0)
    stop('no iteration arguments specified')
  argnames <- names(args)
  if (is.null(argnames))
    argnames <- rep('', length(args))

  # check for backend-specific options
  options <- list()
  opts <- grep('^\\.options\\.[A-Za-z][A-Za-z]*$', argnames)
  if (length(opts) > 0) {
    # put the specified options objects into the options list
    for (i in opts) {
      bname <- substr(argnames[i], 10, 100)
      options[[bname]] <- list(...)[[i]]
    }

    # remove the specified options objects from args and argnames
    args <- args[-opts]
    argnames <- argnames[-opts]
  }

  # check for arguments that start with a '.', and issue an error,
  # assuming that these are misspelled options
  unrecog <- grep('^\\.', argnames)
  if (length(unrecog) > 0)
    stop(sprintf('unrecognized argument(s): %s',
                 paste(argnames[unrecog], collapse=', ')))

  # check for use of old-style arguments, and issue an error
  oldargs <- c('COMBINE', 'INIT', 'INORDER', 'MULTICOMBINE', 'MAXCOMBINE',
               'ERRORHANDLING', 'PACKAGES', 'VERBOSE', 'EXPORT', 'NOEXPORT',
               'LOADFACTOR', 'CHUNKSIZE')
  oldused <- argnames %in% oldargs
  if (any(oldused))
    stop(sprintf('old style argument(s) specified: %s',
                 paste(argnames[oldused], collapse=', ')))

  .errorhandling <- match.arg(.errorhandling)

  combineInfo <- list(fun=.combine, in.order=.inorder, has.init=hasInit,
                      init=init, final=.final, multi.combine=.multicombine,
                      max.combine=.maxcombine)
  iterable <- list(args=args, argnames=argnames, evalenv=parent.frame(),
                   specified=specified, combineInfo=combineInfo,
                   errorHandling=.errorhandling, packages=.packages,
                   export=.export, noexport=.noexport, options=options,
                   verbose=.verbose)
  class(iterable) <- 'foreach'
  iterable
}

#' @export
#' @rdname foreach
'%:%' <- function(e1, e2) {
  if (!inherits(e1, 'foreach'))
    stop('"%:%" was passed an illegal right operand')

  if (inherits(e2, 'foreach'))
    makeMerged(e1, e2)
  else if (inherits(e2, 'foreachCondition'))
    makeFiltered(e1, e2)
  else
    stop('"%:%" was passed an illegal right operand')
}

#' @export
#' @rdname foreach
when <- function(cond) {
  obj <- list(qcond=substitute(cond), evalenv=parent.frame())
  class(obj) <- 'foreachCondition'
  obj
}