File: generateGridDesign.R

package info (click to toggle)
r-cran-paramhelpers 1.14.2-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 992 kB
  • sloc: ansic: 102; sh: 13; makefile: 2
file content (141 lines) | stat: -rw-r--r-- 4,974 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
#' @title Generates a grid design for a parameter set.
#'
#' @description
#' The following types of columns are created:
#' \tabular{ll}{
#'  numeric(vector)   \tab  `numeric`  \cr
#'  integer(vector)   \tab  `integer`  \cr
#'  discrete(vector)  \tab  `factor` (names of values = levels) \cr
#'  logical(vector)   \tab  `logical`
#' }
#' If you want to convert these, look at [BBmisc::convertDataFrameCols()].
#' Dependent parameters whose constraints are unsatisfied generate `NA` entries
#' in their respective columns. For discrete vectors the levels and their order
#' will be preserved.
#'
#' The algorithm currently performs these steps:
#' \enumerate{
#'   \item{We create a grid. For numerics and integers we use the specified resolution. For discretes all values will be taken.}
#'   \item{Forbidden points are removed.}
#'   \item{Parameters are trafoed (potentially, depending on the setting of argument `trafo`);
#'   dependent parameters whose constraints are unsatisfied are set to `NA` entries.}
#'   \item{Duplicated points are removed. Duplicated points are not generated in a
#'    grid design, but the way parameter dependencies are handled make this possible.}
#' }
#'
#' Note that if you have trafos attached to your params, the complete creation
#' of the design (except for the detection of invalid parameters w.r.t to their
#' `requires` setting) takes place on the UNTRANSFORMED scale. So this function
#' creates a regular grid over the param space on the UNTRANSFORMED scale, but
#' not necessarily the transformed scale.
#'
#' `generateDesign` will NOT work if there are dependencies over multiple levels
#' of parameters and the dependency is only given with respect to the
#' \dQuote{previous} parameter. A current workaround is to state all
#' dependencies on all parameters involved. (We are working on it.)
#'
#' @template arg_parset
#' @param resolution (`integer`)\cr
#'   Resolution of the grid for each numeric/integer parameter in `par.set`.
#'   For vector parameters, it is the resolution per dimension.
#'   Either pass one resolution for all parameters, or a named vector.
#' @template arg_trafo
#' @template ret_gendes_df
#' @export
#' @examples
#' ps = makeParamSet(
#'   makeNumericParam("x1", lower = -2, upper = 1),
#'   makeNumericParam("x2", lower = -2, upper = 2, trafo = function(x) x^2)
#' )
#' generateGridDesign(ps, resolution = c(x1 = 4, x2 = 5), trafo = TRUE)
generateGridDesign = function(par.set, resolution, trafo = FALSE) {

  doBasicGenDesignChecks(par.set)

  pars = par.set$pars
  n = length(pars)
  lens = getParamLengths(par.set)
  m = sum(lens)
  pids = getParamIds(par.set, repeated = TRUE, with.nr = TRUE)
  par.set.num = filterParamsNumeric(par.set, include.int = TRUE)
  pids.num = getParamIds(par.set.num)

  if (hasNumeric(par.set, include.int = TRUE)) {
    if (isScalarNumeric(resolution)) {
      resolution = setNames(rep(resolution, length(pids.num)), pids.num)
    }
    resolution = asInteger(resolution, lower = 1L, len = length(pids.num), names = "named")
    if (!all(names(resolution) %in% pids.num)) {
      stop("'resolution' must be named with parameter ids!")
    }
  }

  assertFlag(trafo)

  vals.list = setNames(vector("list", m), pids)
  el.counter = 1L

  # iterate over all params and discretize them
  for (i in seq_len(n)) {
    p = pars[[i]]
    if (isNumeric(p)) {
      lower = p$lower
      upper = p$upper
    }
    if (isDiscrete(p, include.logical = FALSE)) {
      discvals = p$values
    }

    # iterate over vector elements and d
    for (j in seq_len(p$len)) {
      if (isDiscrete(p, include.logical = FALSE)) {
        newvals = names(discvals)
      } else if (isLogical(p)) {
        newvals = c(TRUE, FALSE)
      } else if (isNumeric(p, include.int = TRUE)) {
        newvals = seq(from = lower[[j]], to = upper[[j]], length.out = resolution[[p$id]])
        # round for integer
        if (isInteger(p)) {
          newvals = as.integer(unique(round(newvals)))
        }
      } else {
        stopf("generateGridDesign cannot be used for param '%s' of type '%s'!", p$id, p$type)
      }
      vals.list[[el.counter]] = newvals
      el.counter = el.counter + 1
    }
  }
  res = expand.grid(vals.list, KEEP.OUT.ATTRS = FALSE, stringsAsFactors = FALSE)
  # data types here:
  # num(vec): numeric
  # int(vec): integer
  # log(vec): logical
  # dis(vec): character

  colnames(res) = pids

  # check each row if forbidden, then remove
  if (hasForbidden(par.set)) {
    # FIXME: this is pretty slow, but correct
    fb = rowSapply(res, isForbidden, par.set = par.set)
    res = res[!fb, , drop = FALSE]
  }

  if (trafo) {
    res = applyTrafos(res, pars)
  }
  if (hasRequires(par.set)) {
    res = setRequiresToNA(res, pars)
  }

  # remove duplicates
  res = res[!duplicated(res), , drop = FALSE]

  res = convertDataFrameCols(res, chars.as.factor = TRUE)

  # fix factors
  res = fixDesignFactors(res, par.set)

  attr(res, "trafo") = trafo
  return(res)
}