File: RLearner_cluster_dbscan.R

package info (click to toggle)
r-cran-mlr 2.19.2%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 8,264 kB
  • sloc: ansic: 65; sh: 13; makefile: 5
file content (37 lines) | stat: -rw-r--r-- 1,490 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#' @export
makeRLearner.cluster.dbscan = function() {
  makeRLearnerCluster(
    cl = "cluster.dbscan",
    package = "fpc",
    par.set = makeParamSet(
      makeNumericLearnerParam(id = "eps", default = 1, lower = 0),
      # FIXME eps seems to have no default in dbscan(), if it has 1 par.vals is redundant
      makeIntegerLearnerParam(id = "MinPts", default = 5L, lower = 0L),
      makeLogicalLearnerParam(id = "scale", default = FALSE),
      makeLogicalLearnerParam(id = "showplot", default = FALSE, tunable = FALSE),
      makeDiscreteLearnerParam(id = "method", values = c("hybrid", "raw", "dist"), default = "hybrid")
    ),
    par.vals = list(eps = 1),
    properties = "numerics",
    name = "DBScan Clustering",
    note = "A cluster index of NA indicates noise points. Specify `method = 'dist'` if the data should be interpreted as dissimilarity matrix or object. Otherwise Euclidean distances will be used.",
    short.name = "dbscan",
    callees = "dbscan"
  )
}

#' @export
trainLearner.cluster.dbscan = function(.learner, .task, .subset, .weights = NULL, ...) {
  data = getTaskData(.task, .subset)
  model = fpc::dbscan(data, ...)
  # dbscan needs this in the prediction phase
  model$data = data
  return(model)
}

#' @export
predictLearner.cluster.dbscan = function(.learner, .model, .newdata, ...) {
  indices = as.integer(predict(.model$learner.model, .model$learner.model$data, newdata = .newdata, ...))
  indices[indices == 0L] = NA_integer_
  return(indices)
}