File: test_base_generatePartialDependence.R

package info (click to toggle)
r-cran-mlr 2.19.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 8,392 kB
  • sloc: ansic: 65; sh: 13; makefile: 5
file content (170 lines) | stat: -rwxr-xr-x 8,007 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170

test_that("generatePartialDependenceData", {
  m = c(4, 10)

  # test regression with interactions, centering, and mixed factor features
  fr = train("regr.rpart", regr.task)
  dr = generatePartialDependenceData(fr, input = regr.task,
    features = c("lstat", "chas"), interaction = TRUE, n = m)
  nfeat = length(dr$features)
  nfacet = length(unique(regr.df[["chas"]]))
  n = getTaskSize(regr.task)
  expect_equal(nrow(dr$data), m[1] * nfeat)
  expect_true(all(dr$data$medv >= min(regr.df$medv) | dr$data$medv <= max(regr.df$medv)))

  plotPartialDependence(dr, facet = "chas")
  dir = tempdir()
  path = file.path(dir, "test.svg")
  suppressMessages(ggsave(path))
  doc = XML::xmlParse(path)
  expect_equal(length(XML::getNodeSet(doc, grey.rect.xpath, ns.svg)), nfacet)
  expect_equal(length(XML::getNodeSet(doc, black.circle.xpath, ns.svg)), nfacet * m[1])

  # check that if the input is a data.frame things work
  dr.df = generatePartialDependenceData(fr, input = regr.df, features = "lstat")

  # check that the interactions and centering work with ICE
  dr = generatePartialDependenceData(fr, input = regr.task,
    features = c("lstat", "chas"), interaction = TRUE, individual = TRUE, n = m)
  expect_equal(nrow(dr$data), m[1] * nfeat * m[2])

  plotPartialDependence(dr, facet = "chas", data = regr.df, p = 1)
  suppressMessages(ggsave(path))
  doc = XML::xmlParse(path)
  expect_equal(length(XML::getNodeSet(doc, grey.rect.xpath, ns.svg)), nfacet)
  # black.circle.xpath counts points which are omitted when individual = TRUE
  expect_equal(length(XML::getNodeSet(doc, black.circle.xpath, ns.svg)), n + prod(m) * nfacet)

  # check that multiple features w/o interaction work with a label outputting classifier with
  # an appropriate aggregation function
  fc = train("classif.rpart", multiclass.task)
  dc = generatePartialDependenceData(fc, input = multiclass.task, features = c("Petal.Width", "Petal.Length"),
    fun = function(x) table(x) / length(x), n = m)
  nfeat = length(dc$features)
  n = getTaskSize(multiclass.task)
  plotPartialDependence(dc, data = multiclass.df)
  suppressMessages(ggsave(path))
  doc = XML::xmlParse(path)
  # minus one because the of the legend
  expect_equal(length(XML::getNodeSet(doc, grey.rect.xpath, ns.svg)), nfeat)
  expect_equal(length(XML::getNodeSet(doc, red.circle.xpath, ns.svg)) - 1, nfeat * m[1])
  expect_equal(length(XML::getNodeSet(doc, blue.circle.xpath, ns.svg)) - 1, nfeat * m[1])
  expect_equal(length(XML::getNodeSet(doc, green.circle.xpath, ns.svg)) - 1, nfeat * m[1])

  fcp = train(makeLearner("classif.svm", predict.type = "prob"), multiclass.task)

  # test that with classif but predict.type = "response" we can use fun which
  # return a vector
  dcp = generatePartialDependenceData(fcp, input = multiclass.task,
    features = "Petal.Width",
    fun = function(x) quantile(x, c(.025, .5, .975)), n = m)
  plotPartialDependence(dcp)

  # check that probability outputting classifiers work w/ interactions
  dcp = generatePartialDependenceData(fcp, input = multiclass.task, features = c("Petal.Width", "Petal.Length"),
    interaction = TRUE, n = m)
  nfacet = length(unique(dcp$data$Petal.Length))
  ntarget = length(dcp$target)
  plotPartialDependence(dcp, "tile")

  # check that probability outputting classifiers work with ICE
  dcp = generatePartialDependenceData(fcp, input = multiclass.task, features = c("Petal.Width", "Petal.Length"),
    interaction = TRUE, individual = TRUE, n = m)

  # check that survival tasks work with multiple features
  fs = train("surv.rpart", surv.task)
  ds = generatePartialDependenceData(fs, input = surv.task, features = c("x1", "x2"), n = m)
  nfeat = length(ds$features)
  n = getTaskSize(surv.task)
  plotPartialDependence(ds, data = surv.df)
  suppressMessages(ggsave(path))
  doc = XML::xmlParse(path)
  expect_equal(length(XML::getNodeSet(doc, grey.rect.xpath, ns.svg)), nfeat)
  expect_equal(length(XML::getNodeSet(doc, black.circle.xpath, ns.svg)), m[1] * nfeat)

  # issue 1180 test
  pd = generatePartialDependenceData(fr, input = regr.task,
    features = c("lstat", "chas"), n = m)
  plotPartialDependence(pd)

  # check that bounds work for regression
  db = generatePartialDependenceData(fr, input = regr.task, features = c("lstat", "chas"),
    interaction = TRUE,
    fun = function(x) quantile(x, c(.25, .5, .75)), n = m)
  nfacet = length(unique(regr.df[["chas"]]))
  n = getTaskSize(regr.task)
  expect_equal(colnames(db$data), c("medv", "Function", "lstat", "chas"))
  plotPartialDependence(db, facet = "chas", data = regr.df)
  suppressMessages(ggsave(path))
  doc = XML::xmlParse(path)
  expect_equal(length(XML::getNodeSet(doc, grey.rect.xpath, ns.svg)), nfacet * 3)
  expect_equal(length(XML::getNodeSet(doc, black.circle.xpath, ns.svg)), nfacet * 3 * m[1] + n * 3)

  # check derivative and factor feature failure
  expect_error(generatePartialDependenceData(fr, input = regr.task, features = c("lstat", "chas"),
    derivative = TRUE))

  # check interaction + derivative failure
  expect_error(generatePartialDependenceData(fr, input = regr.task, features = c("lstat", "chas"),
    interaction = TRUE, derivative = TRUE))

  fcpb = train(makeLearner("classif.rpart", predict.type = "prob"), binaryclass.task)
  bc = generatePartialDependenceData(fcpb, input = binaryclass.task, features = c("V11", "V12"),
    individual = TRUE, n = m)
  ## tests for binary classification plotting, discovered whilst trying to merge
  ## pr #142
  plotPartialDependence(bc)
  bc = generatePartialDependenceData(fcpb, input = binaryclass.task, features = c("V11", "V12"), n = m)
  plotPartialDependence(bc)

  # check that derivative estimation works for ICE and pd for classification and regression
  fr = train(makeLearner("regr.ksvm"), regr.task)
  pfr = generatePartialDependenceData(fr, input = regr.df, features = c("lstat", "crim"),
    derivative = TRUE, individual = FALSE, n = m)
  pfri = generatePartialDependenceData(fr, input = regr.df,
    features = c("lstat", "crim"),
    derivative = TRUE, individual = TRUE, n = m)

  fc = train(makeLearner("classif.ksvm", predict.type = "prob"), multiclass.task)
  pfc = generatePartialDependenceData(fc, input = multiclass.df,
    features = c("Petal.Width", "Petal.Length"),
    derivative = TRUE, n = m)
  fs = train(makeLearner("surv.coxph"), surv.task)
  pfs = generatePartialDependenceData(fs, input = surv.df,
    features = c("x1", "x2"),
    derivative = TRUE, n = m)

  # check that se estimation works
  fse = train(makeLearner("regr.lm", predict.type = "se"), regr.task)
  pfse = generatePartialDependenceData(fse, input = regr.task, features = c("lstat", "crim"),
    bounds = c(-2, 2), n = m)
  plotPartialDependence(pfse)

  # check that tile + contour plots work for two and three features with regression and survival
  expect_error(plotPartialDependence(ds, geom = "tile")) # interaction == FALSE
  tfr = generatePartialDependenceData(fr, regr.df, features = c("lstat", "crim", "chas"),
    interaction = TRUE, n = m)
  plotPartialDependence(tfr, geom = "tile", facet = "chas", data = regr.df)
  tfs = generatePartialDependenceData(fs, surv.df, c("x1", "x2"), interaction = TRUE)
  plotPartialDependence(tfs, geom = "tile", data = surv.df)

  # facetting works with plotPartialDependence:
  q = plotPartialDependence(dr, facet = "chas", data = regr.df,
    facet.wrap.nrow = 2L)
  testFacetting(q, 2L)
  q = plotPartialDependence(dr, facet = "chas", facet.wrap.ncol = 2L,
    data = regr.df)
  testFacetting(q, ncol = 2L)

  # bug found in pr #1206
  pd = generatePartialDependenceData(fcp, multiclass.task, "Petal.Width",
    individual = TRUE, n = m)

  # issue 63 in the tutorial
  pd = generatePartialDependenceData(fcp, multiclass.task, "Petal.Width",
    individual = TRUE, derivative = TRUE, n = m)

  # test that would have caught a bug that occurs when the jacobian is estimated
  pd.der.classif = generatePartialDependenceData(fcp, multiclass.task, "Petal.Width",
    derivative = TRUE, n = m)
})