File: test.gbm.Rout.save

package info (click to toggle)
r-cran-plotmo 3.7.0-1
links: PTS, VCS
area: main
in suites: forky, sid
size: 3,400 kB
sloc: sh: 13; makefile: 2
file content (806 lines) | stat: -rw-r--r-- 34,673 bytes
parent folder | download | duplicates (2)
> # test.gbm.R: gbm tests for plotmo and plotres
> 
> source("test.prolog.R")
> library(gbm)
Loaded gbm 2.2.2
This version of gbm is no longer under development. Consider transitioning to gbm3, https://github.com/gbm-developers/gbm3
> library(rpart.plot) # for ptitanic, want data with NAs for testing
Loading required package: rpart
> library(plotmo)
Loading required package: Formula
Loading required package: plotrix
> data(ptitanic)
> 
> cat("--- distribution=\"gaussian\", formula interface ----------------------------------\n")
--- distribution="gaussian", formula interface ----------------------------------
> 
> set.seed(2016)
> ptit <- ptitanic[sample(1:nrow(ptitanic), size=70), ] # small data for fast test
> set.seed(2016)
> # # TODO bug in gbm: following causes error: survived is not of type numeric, ordered, or factor
> # ptit$survived <- ptit$survived == "survived"
> ptit <- ptit[!is.na(ptit$age), ]
> train.frac <- .8
> set.seed(2016)
> gbm.gaussian <- gbm(age~., data=ptit, train.frac=train.frac,
+                    distribution="gaussian",
+                    n.trees=50, shrinkage=.1, keep.data=FALSE)
> expect.err(try(plotres(gbm.gaussian)), "use keep.data=TRUE in the call to gbm")
Error : use keep.data=TRUE in the call to gbm (cannot determine the variable importances)
Got expected error from try(plotres(gbm.gaussian))
> set.seed(2016)
> gbm.gaussian <- gbm(age~., data=ptit, train.frac=train.frac,
+                    distribution="gaussian",
+                    n.trees=50, shrinkage=.1)
> par(mfrow=c(2,2), mar=c(3,3,4,1))
> w1 <- plotres(gbm.gaussian, which=1, do.par=FALSE, w1.smooth=TRUE,
+         w1.main="gbm.gaussian")
> cat("w1 plot for gbm.gaussian returned (w1.smooth=TRUE):\n")
w1 plot for gbm.gaussian returned (w1.smooth=TRUE):
> print(w1)
train  test    CV   OOB 
   50     1     0     1 
> plot(0, 0) # dummy plot
> w3 <- plotres(gbm.gaussian, which=3, do.par=FALSE, info=TRUE,
+         smooth.col=0, col=ptit$sex, # ylim=c(-40,40),
+         wmain="nresponse=1")
> # compare to manual residuals
> iused <- 1:(train.frac * nrow(ptit))
> y <- ptit$age[iused]
> n.trees <- plotmo:::gbm.n.trees(gbm.gaussian)
> # TODO following fails in the new version of gbm (version 2.2) (you have to provide newdata)
> # yhat <- predict(gbm.gaussian, type="response", n.trees=n.trees)
> yhat <- predict(gbm.gaussian, newdata=ptit, type="response", n.trees=n.trees)
> yhat <- yhat[iused]
> plot(yhat, y - yhat,
+      col=ptit$sex[iused], main="manual gaussian residuals",
+      pch=20, ylim=c(-40,40))
> abline(h=0, col="gray")
> stopifnot(all(yhat == w3$x))
> stopifnot(all(y - yhat == w3$y))
> par(org.par)
> 
> w1 <- plotres(gbm.gaussian, predict.n.trees=13, w1.grid.col=1, trace=1, SHOWCALL=TRUE,
+               w1.smooth=TRUE,
+               w1.main="predict.n.trees=13 w1.grid.col=1")
importance: survived pclass parch sibsp sex
stats::residuals(object=gbm.object, type="response")
residuals() was unsuccessful, will use predict() instead
stats::predict(gbm.object, data.frame[3,5], type="response", n.trees=13)
stats::fitted(object=gbm.object)
fitted() was unsuccessful, will use predict() instead
plot_gbm(gbm.object, main="predict.n.trees=13 w1.grid.col=1", n.trees=13,
         grid.col=1, smooth=TRUE)

training rsq 0.07
> cat("second w1 plot for gbm.gaussian returned (w1.smooth=TRUE):\n")
second w1 plot for gbm.gaussian returned (w1.smooth=TRUE):
> print(w1)
train  test    CV   OOB 
   50     1     0     1 
> plotmo(gbm.gaussian, trace=-1, SHOWCALL=TRUE)
> # plotmo(gbm.gaussian, trace=-1, all1=TRUE, SHOWCALL=TRUE)
> # plotmo(gbm.gaussian, trace=-1, all2=TRUE, SHOWCALL=TRUE)
> 
> # test color argument
> par(mfrow=c(2,2), mar=c(3,3,4,1))
> plotres(gbm.gaussian, which=1)
> title("test color argument")
> plotres(gbm.gaussian, which=1, w1.col=c(1,2,3,0))
> plotres(gbm.gaussian, which=1, w1.col=c(1,0,0,4), w1.legend.x=40, w1.legend.y=.3)
> plotres(gbm.gaussian, which=1, w1.col=c(2,3,4,1), w1.legend.x="topright")
> par(org.par)
> 
> par(mfrow=c(2,2), mar=c(3,3,4,1))
> plot_gbm(gbm.gaussian)
> title("test plot_gbm")
> w1 <- plot_gbm(gbm.gaussian, col=c(1,2,3,0), grid.col=1, smooth=TRUE,
+                main="col=c(1,2,3,0), grid.col=1")
> cat("third w1 plot for gbm.gaussian returned (smooth=TRUE):\n")
third w1 plot for gbm.gaussian returned (smooth=TRUE):
> print(w1)
train  test    CV   OOB 
   50     1     0     1 
> par(org.par)
> 
> # test xlim and ylim
> par(mfrow=c(2,3), mar=c(3,3,4,1))
> plot_gbm(gbm.gaussian,                  main="test xlim and ylim default")
> plot_gbm(gbm.gaussian, ylim=NULL,       main="ylim=NULL")
> plot_gbm(gbm.gaussian, xlim=c(5, 50),    main="xlim=c(5, 50)")
> plot_gbm(gbm.gaussian, ylim=c(100, 250), main="ylim=c(100, 250)")
> plot_gbm(gbm.gaussian, xlim=c(10, 25),
+                        ylim=c(150, 170),  main="xlim=c(10, 25), ylim=c(150, 170)")
> plot_gbm(gbm.gaussian, xlim=c(-10, 40), ylim=c(-10, 300), legend.x=NA,
+                                         main="xlim=c(-10, 40), ylim=c(-10, 300)\nlegend.x=NA")
> par(org.par)
> 
> # test the smooth argument
> par(mfrow=c(3,3), mar=c(3,3,4,1))
> imin <- plot_gbm(gbm.gaussian,                    main="smooth=default")
> imin.default <- imin
> cat("smooth=default imin=c(", imin[1], ",", imin[2], ",", imin[3], ",", imin[4], ")\n", sep="")
smooth=default imin=c(50,2,0,1)
> 
> imin <- plot_gbm(gbm.gaussian, smooth=c(1,0,0,0), main="smooth=c(1,0,0,0)")
> cat("smooth=c(1,0,0,0) imin=c(", imin[1], ",", imin[2], ",", imin[3], ",", imin[4], ")\n", sep="")
smooth=c(1,0,0,0) imin=c(50,2,0,6)
> 
> imin <- plot_gbm(gbm.gaussian, smooth=c(0,1,0,0), main="smooth=c(0,1,0,0)")
> cat("smooth=c(0,1,0,0) imin=c(", imin[1], ",", imin[2], ",", imin[3], ",", imin[4], ")\n", sep="")
smooth=c(0,1,0,0) imin=c(50,1,0,6)
> 
> imin <- plot_gbm(gbm.gaussian, smooth=c(0,0,1,0), main="smooth=c(0,0,1,0)")
> cat("smooth=c(0,0,1,0) imin=c(", imin[1], ",", imin[2], ",", imin[3], ",", imin[4], ")\n", sep="")
smooth=c(0,0,1,0) imin=c(50,2,0,6)
> 
> imin <- plot_gbm(gbm.gaussian, smooth=c(0,0,0,1), main="smooth=c(0,0,0,1)\nsame as default")
> cat("smooth=c(0,0,0,1) imin=c(", imin[1], ",", imin[2], ",", imin[3], ",", imin[4], ")\n", sep="")
smooth=c(0,0,0,1) imin=c(50,2,0,1)
> 
> imin <- plot_gbm(gbm.gaussian, smooth=c(0,0,0,0), main="smooth=c(0,0,0,0)")
> cat("smooth=c(0,0,0,0) imin=c(", imin[1], ",", imin[2], ",", imin[3], ",", imin[4], ")\n", sep="")
smooth=c(0,0,0,0) imin=c(50,2,0,6)
> 
> imin <- plot_gbm(gbm.gaussian, smooth=c(0,0,1,1), main="smooth=c(0,0,1,1)")
> cat("smooth=c(0,0,1,1) imin=c(", imin[1], ",", imin[2], ",", imin[3], ",", imin[4], ")\n", sep="")
smooth=c(0,0,1,1) imin=c(50,2,0,1)
> 
> imin <- plot_gbm(gbm.gaussian, smooth=1,          main="smooth=1") # gets recycled
> cat("smooth=1          imin=c(", imin[1], ",", imin[2], ",", imin[3], ",", imin[4], ")\n", sep="")
smooth=1          imin=c(50,1,0,1)
> imin.smooth <- imin
> 
> imin.noplot <- plot_gbm(gbm.gaussian, col=0) # will not be plotted
> print(imin.default)
train  test    CV   OOB 
   50     2     0     1 
> print(imin.noplot)
train  test    CV   OOB 
   50     2     0     1 
> stopifnot(identical(imin.default, imin.noplot))
> 
> imin.noplot <- plot_gbm(gbm.gaussian, col=0, smooth=1) # will not be plotted
> print(imin.smooth)
train  test    CV   OOB 
   50     1     0     1 
> print(imin.noplot)
train  test    CV   OOB 
   50     1     0     1 
> stopifnot(identical(imin.smooth, imin.noplot))
> 
> par(org.par)
> 
> cat("--- distribution=\"gaussian\", glm.fit interface ----------------------------------\n")
--- distribution="gaussian", glm.fit interface ----------------------------------
> 
> set.seed(2016)
> ptit <- ptitanic[sample(1:nrow(ptitanic), size=70), ]
> set.seed(2016)
> ptit <- ptit[!is.na(ptit$age), ]
> train.frac <- .8
> set.seed(2016)
> gbm.gaussian.fit <- gbm.fit(ptit[,-4], ptit[,4], nTrain=floor(train.frac * nrow(ptit)),
+                    distribution="gaussian", verbose=FALSE,
+                    n.trees=50, shrinkage=.1)
> par(mfrow=c(2,2), mar=c(3,3,4,1))
> w1 <- plotres(gbm.gaussian.fit, which=1, do.par=FALSE, w1.smooth=TRUE,
+         w1.main="gbm.gaussian.fit")
> 
> cat("w1 plot for gbm.gaussian.fit returned (w1.smooth=TRUE):\n")
w1 plot for gbm.gaussian.fit returned (w1.smooth=TRUE):
> print(w1)
train  test    CV   OOB 
   50     1     0     1 
> 
> plot(0, 0) # dummy plot
> 
> w3 <- plotres(gbm.gaussian.fit, which=3, do.par=FALSE, info=TRUE, trace=0,
+         smooth.col=0, col=ptit$sex, # ylim=c(-40,40),
+         wmain="nresponse=1")
> 
> # compare to manual residuals
> iused <- 1:(train.frac * nrow(ptit))
> y.fit <- ptit$age[iused]
> n.trees <- plotmo:::gbm.n.trees(gbm.gaussian.fit)
> # TODO following fails in the new version of gbm (version 2.2) (you have to provide newdata)
> # yhat.fit <- predict(gbm.gaussian.fit, type="response", n.trees=n.trees)
> yhat.fit <- predict(gbm.gaussian.fit, newdata=ptit[,-4], type="response", n.trees=n.trees)
> yhat.fit <- yhat.fit[iused]
> # plot(yhat.fit, y.fit - yhat.fit,
> #      col=ptit$sex[iused], main="manual gaussian residuals\n(TODO gbm.fit don't match)",
> #      pch=20, ylim=c(-40,40))
> # abline(h=0, col="gray")
> # --- TODO known issue, these fail ---
> # compare to formual interface
> # stopifnot(all(yhat.fit == yhat))
> stopifnot(all(y.fit == y))
> # # sanity check
> # stopifnot(all(yhat.fit == w3$x))
> # stopifnot(all(y.fit - yhat.fit == w3$y.fit))
> plotmo(gbm.gaussian.fit, trace=-1, SHOWCALL=TRUE)
> par(org.par)
> 
> cat("--- distribution=\"laplace\" ----------------------------------\n")
--- distribution="laplace" ----------------------------------
> 
> set.seed(2016)
> ptit <- ptitanic[sample(1:nrow(ptitanic), size=70), ]
> ptit <- ptit[!is.na(ptit$age), ]
> ptit$survived <- ptit$parch <- ptit$sex <- NULL
> train.frac <- .8
> set.seed(2016)
> gbm.laplace <- gbm(age~., data=ptit, train.frac=train.frac,
+                    distribution="laplace",
+                    n.trees=100, shrinkage=.1)
> par(mfrow=c(2,2), mar=c(3,3,4,1))
> w1 <- plotres(gbm.laplace, which=1:2, do.par=FALSE, w1.smooth=TRUE,
+         w1.main="gbm.laplace")
> 
> cat("w1 plot for gbm.laplace returned (w1.smooth=TRUE):\n")
w1 plot for gbm.laplace returned (w1.smooth=TRUE):
> print(w1)
train  test    CV   OOB 
   75   100     0     1 
> 
> w3 <- plotres(gbm.laplace, which=3, do.par=FALSE, info=TRUE)
> 
> # compare to manual residuals
> iused <- 1:(train.frac * nrow(ptit))
> y <- ptit$age[iused]
> n.trees <- plotmo:::gbm.n.trees(gbm.laplace)
> # TODO following fails in the new version of gbm (version 2.2) (you have to provide newdata)
> # yhat <- predict(gbm.laplace, type="response", n.trees=n.trees)
> yhat <- predict(gbm.laplace, newdata=ptit, type="response", n.trees=n.trees)
> yhat <- yhat[iused]
> plot(yhat, y - yhat,
+      main="manual laplace residuals",
+      pch=20, ylim=c(-40,40))
> abline(h=0, col="gray")
> stopifnot(all(yhat == w3$x))
> stopifnot(all(y - yhat == w3$y))
> plotmo(gbm.laplace, trace=-1, SHOWCALL=TRUE)
> par(org.par)
> 
> # # TODO commented out because gives random slightly different results per invocation
> # cat("--- distribution=\"tdist\" ----------------------------------\n")
> #
> # set.seed(2016)
> # ptit <- ptitanic[sample(1:nrow(ptitanic), size=70), ]
> # ptit <- ptit[!is.na(ptit$age), ]
> # ptit$survived <- ptit$parch <- ptit$sex <- NULL
> # train.frac <- .8
> # set.seed(2016)
> # gbm.tdist <- gbm(age~., data=ptit, train.frac=train.frac,
> #                    distribution="tdist",
> #                    n.trees=100, shrinkage=.1)
> # par(mfrow=c(2,2), mar=c(3,3,4,1))
> # set.seed(2016)
> # w1 <- plotres(gbm.tdist, which=1:2, do.par=FALSE,
> #         w1.main="gbm.tdist")
> #
> # cat("w1 plot for gbm.tdist returned (w1.smooth=default):\n")
> # print(w1)
> #
> # set.seed(2016)
> # w3 <- plotres(gbm.tdist, which=3, do.par=FALSE, info=TRUE)
> #
> # # compare to manual residuals
> # iused <- 1:(train.frac * nrow(ptit))
> # y <- ptit$age[iused]
> # n.trees <- plotmo:::gbm.n.trees(gbm.tdist)
> # # TODO following fails in the new version of gbm (version 2.2) (you have to provide newdata)
> # # yhat <- predict(gbm.tdist, type="response", n.trees=n.trees)
> # yhat <- predict(gbm.tdist, newdata=ptit, type="response", n.trees=n.trees)
> # yhat <- yhat[iused]
> # plot(yhat, y - yhat,
> #      main="manual tdist residuals",
> #      pch=20, ylim=c(-40,40))
> # abline(h=0, col="gray")
> # stopifnot(all(yhat == w3$x))
> # stopifnot(all(y - yhat == w3$y))
> # plotmo(gbm.tdist, trace=-1, SHOWCALL=TRUE)
> # par(org.par)
> 
> cat("--- distribution=\"bernoulli\" ----------------------------------\n")
--- distribution="bernoulli" ----------------------------------
> 
> set.seed(2016)
> ptit <- ptitanic[sample(1:nrow(ptitanic), size=80), ]
> ptit$survived <- as.numeric(ptit$survived == "survived")
> temp <- ptit$pclass # put pclass at the end so can check ordering of importances
> ptit$pclass <- NULL
> ptit$pclass <- factor(as.numeric(temp), labels=c("first", "second", "third"))
> train.frac <- .9
> set.seed(2016)
> gbm.bernoulli <- gbm(survived~., data=ptit, train.frac=train.frac,
+                      distribution="bernoulli",
+                      n.trees=100, shrinkage=.1, cv.folds=3)
> par(mfrow=c(2,2))
> par(mar=c(3.5, 3, 2, 0.5))  # small margins and text to pack figs in
> par(mgp=c(1.5, .4, 0))      # squash axis annotations
> w1 <- plotres(gbm.bernoulli, which=c(1,4),
+         col=ptit$survived+2, trace=0, do.par=FALSE,
+         w1.main="gbm.bernoulli")
> cat("w1 plot for gbm.bernoulli with cv.folds=3 returned:\n")
w1 plot for gbm.bernoulli with cv.folds=3 returned:
> print(w1)
train  test    CV   OOB 
  100    24    99    16 
> 
> w3 <- plotres(gbm.bernoulli, which=3, predict.n.trees=40,
+         ylim=c(-.6, 1), xlim=c(.1, .6),
+         col=ptit$sex, trace=0, do.par=FALSE, smooth.col=0)
> 
> # compare to manual residuals
> iused <- 1:(train.frac * nrow(ptit))
> y <- ptit$survived[iused]
> # TODO following fails in the new version of gbm (version 2.2) (you have to provide newdata)
> # yhat <- predict(gbm.bernoulli, type="response", n.trees=40)
> yhat <- predict(gbm.bernoulli, newdata=ptit, type="response", n.trees=40)
> yhat <- yhat[iused]
> plot(yhat, y - yhat, col=ptit$sex,
+      main="manual bernoulli residuals", pch=20, cex=1,
+      ylim=c(-.6, 1), xlim=c(.1, .6))
> abline(h=0, col="gray")
> stopifnot(all(yhat == w3$x))
> stopifnot(all(y - yhat == w3$y))
> par(org.par)
> 
> plotmo(gbm.bernoulli, do.par=2)
 plotmo grid:    sex age sibsp parch pclass
                male  27     0     0  third
> print(summary(gbm.bernoulli)) # will also plot
          var   rel.inf
age       age 32.307096
sex       sex 29.921593
pclass pclass 17.323084
parch   parch 13.277759
sibsp   sibsp  7.170468
> par(org.par)
> 
> cat("--- distribution=\"huberized\" ----------------------------------\n")
--- distribution="huberized" ----------------------------------
> 
> set.seed(2016)
> ptit <- ptitanic[sample(1:nrow(ptitanic), size=100), ]
> ptit$survived <- as.numeric(ptit$survived == "survived")
> ptit$sibsp <- ptit$parch <- ptit$pclass <- NULL
> train.frac <- 1
> set.seed(2016)
> gbm.huberized <- gbm(survived~., data=ptit, train.frac=train.frac,
+                      distribution="huberized",
+                      n.trees=200, shrinkage=.1)
> par(mfrow=c(2,2))
> par(mar=c(3.5, 3, 2, 0.5))  # small margins and text to pack figs in
> par(mgp=c(1.5, .4, 0))      # squash axis annotations
> w1 <- plotres(gbm.huberized, which=c(1,4),
+         col=ptit$survived+2, trace=0, do.par=FALSE,
+         w1.main="gbm.huberized")
Warning: plot_gbm: cannot plot OOB curve (it has some non-finite values)
> cat("w1 plot for gbm.huberized returned (smooth=default):\n")
w1 plot for gbm.huberized returned (smooth=default):
> print(w1)
train  test    CV   OOB 
  169     0     0     0 
> 
> # TODO huberized residuals look weird
> w3 <- plotres(gbm.huberized, which=3, predict.n.trees=40,
+         col=ptit$sex, trace=0, do.par=FALSE, smooth.col=0)
> 
> # compare to manual residuals
> iused <- 1:(train.frac * nrow(ptit))
> y <- ptit$survived[iused]
> # TODO following fails in the new version of gbm (version 2.2) (you have to provide newdata)
> # yhat <- predict(gbm.huberized, type="response", n.trees=40)
> yhat <- predict(gbm.huberized, newdata=ptit, type="response", n.trees=40)
> yhat <- yhat[iused]
> plot(yhat, y - yhat, col=ptit$sex, ylim=c(-2.5, 2.5),
+      main="manual huberized residuals", pch=20)
> abline(h=0, col="gray")
> stopifnot(all(yhat == w3$x))
> stopifnot(all(y - yhat == w3$y))
> par(org.par)
> 
> plotmo(gbm.huberized, do.par=2)
 plotmo grid:    sex age
                male  28
> print(summary(gbm.huberized)) # will also plot
    var  rel.inf
age age 68.12613
sex sex 31.87387
> par(org.par)
> 
> cat("--- distribution=\"adaboost\" ----------------------------------\n")
--- distribution="adaboost" ----------------------------------
> 
> set.seed(2016)
> ptit <- ptitanic[sample(1:nrow(ptitanic), size=100), ]
> ptit$survived <- as.numeric(ptit$survived == "survived")
> ptit$sibsp <- ptit$parch <- ptit$pclass <- NULL
> train.frac <- .8
> set.seed(2016)
> gbm.adaboost <- gbm(survived~., data=ptit, train.frac=train.frac,
+                      distribution="adaboost",
+                      n.trees=150, shrinkage=.01)
> par(mfrow=c(2,2))
> par(mar=c(3.5, 3, 2, 0.5))  # small margins and text to pack figs in
> par(mgp=c(1.5, .4, 0))      # squash axis annotations
> w1 <- plotres(gbm.adaboost, which=c(1,4),
+         col=ptit$survived+2, trace=0, do.par=FALSE,
+         w1.main="gbm.adaboost")
> cat("w1 plot for gbm.adaboost returned (smooth=default):\n")
w1 plot for gbm.adaboost returned (smooth=default):
> print(w1)
train  test    CV   OOB 
  150   150     0   117 
> 
> w3 <- plotres(gbm.adaboost, which=3, predict.n.trees=40,
+         col=ptit$sex, trace=0, do.par=FALSE, smooth.col=0)
> 
> # compare to manual residuals
> iused <- 1:(train.frac * nrow(ptit))
> y <- ptit$survived[iused]
> # TODO following fails in the new version of gbm (version 2.2) (you have to provide newdata)
> # yhat <- predict(gbm.adaboost, type="response", n.trees=40)
> yhat <- predict(gbm.adaboost, newdata=ptit, type="response", n.trees=40)
> yhat <- yhat[iused]
> plot(yhat, y - yhat, col=ptit$sex,
+      main="manual adaboost residuals", pch=20)
> abline(h=0, col="gray")
> stopifnot(all(yhat == w3$x))
> stopifnot(all(y - yhat == w3$y))
> par(org.par)
> 
> plotmo(gbm.adaboost, do.par=2)
 plotmo grid:    sex  age
                male 27.5
> print(summary(gbm.adaboost)) # will also plot
    var  rel.inf
sex sex 75.09661
age age 24.90339
> par(org.par)
> 
> # test gbm multinomial model, also test very small number of trees in plot_gbm
> 
> data(iris)
> set.seed(2016)
> gbm.iris <- gbm(Species~., data=iris, distribution="multinomial", n.tree=5)
Warning: Setting `distribution = "multinomial"` is ill-advised as it is currently broken. It exists only for backwards compatibility. Use at your own risk.
> expect.err(try(plotres(gbm.iris)),
+            "gbm distribution=\"multinomial\" is not yet supported")
Error : gbm distribution="multinomial" is not yet supported
       (A direct call to plot_gbm may work)
Got expected error from try(plotres(gbm.iris))
> expect.err(try(plotmo(gbm.iris)),
+            "gbm distribution=\"multinomial\" is not yet supported")
Error : gbm distribution="multinomial" is not yet supported
       (A direct call to plot_gbm may work)
Got expected error from try(plotmo(gbm.iris))
> plot_gbm(gbm.iris)
> 
> # TODO following fails in the new version of gbm (version 2.2)
> #   (distribution "multinomial" is no longer supported)
> #
> # cat("--- distribution=\"multinomial\" ----------------------------------\n")
> #
> # set.seed(2016)
> # ptit <- ptitanic[sample(1:nrow(ptitanic), size=500), ]
> # set.seed(2016)
> # gbm.multinomial <- gbm(pclass~.,
> #                        data=ptit, train.frac=.7,
> #                        distribution="multinomial",
> #                        n.trees=100, shrinkage=.1)
> #
> # w1 <- plot_gbm(gbm.multinomial, main="gbm.multinomial", smooth=T)
> # cat("plot_gbm for gbm.multinomial returned (smooth=TRUE):\n")
> # print(w1)
> #
> # expect.err(try(plotres(gbm.multinomial)),
> #            "gbm distribution=\"multinomial\" is not yet supported")
> #
> # expect.err(try(plotmo(gbm.multinomial)),
> #            "gbm distribution=\"multinomial\" is not yet supported")
> 
> # cat("--- gbmt distribution=\"Gaussian\", formula interface ----------------------------------\n")
> #
> # set.seed(2016)
> # ptit <- ptitanic[sample(1:nrow(ptitanic), size=70), ] # small data for fast test
> # set.seed(2016)
> # # # TODO bug in gbm: following causes error: survived is not of type numeric, ordered, or factor
> # # ptit$survived <- ptit$survived == "survived"
> # ptit <- ptit[!is.na(ptit$age), ]
> # # TODO change this to build same model as gbm.gaussian
> # train_params <-
> #      training_params(num_trees = 50,
> #                      shrinkage = 0.1,
> #                      bag_fraction = 0.5,
> #                      num_train = round(.8 * nrow(ptit)))
> # par(mfrow=c(2,2), mar=c(3,3,4,1))
> # set.seed(2016)
> # gbmt.gaussian <- gbmt(age~., data=ptit,
> #             distribution=gbm_dist("Gaussian"),
> #             train_params = train_params,
> #             is_verbose = FALSE)
> # expect.err(try(plotres(gbmt.gaussian)),
> #            "use keep.data=TRUE in the call to gbm")
> # set.seed(2016)
> # gbmt.gaussian <- gbmt(age~., data=ptit,
> #             distribution=gbm_dist("Gaussian"),
> #             train_params = train_params,
> #             is_verbose = FALSE,  keep_gbm_data=TRUE)
> # w1 <- plotres(gbmt.gaussian, which=1, do.par=FALSE, w1.smooth=TRUE,
> #               w1.main="gbmt.gaussian")
> # cat("w1 plot for gbmt.gaussian returned (w1.smooth=TRUE):\n")
> # print(w1)
> # plot(0, 0) # dummy plot
> # set.seed(2016)
> # w3 <- plotres(gbmt.gaussian, which=3, do.par=FALSE, info=TRUE,
> #         smooth.col=0, col=ptit$sex, # ylim=c(-40,40),
> #         wmain="nresponse=1")
> #
> # # compare to manual residuals
> # iused <- 1:(train.frac * nrow(ptit))
> # y <- ptit$age[iused]
> # n.trees <- plotmo:::gbm.n.trees(gbmt.gaussian)
> # # TODO following fails in the new version of gbm (version 2.2) (you have to provide newdata)
> # # yhat <- predict(gbmt.gaussian, type="response", n.trees=n.trees)
> # yhat <- predict(gbmt.gaussian, newdata=ptit, type="response", n.trees=n.trees)
> # yhat <- yhat[iused]
> # plot(yhat, y - yhat,
> #      col=ptit$sex[iused], main="manual gaussian residuals",
> #      pch=20, ylim=c(-40,40))
> # abline(h=0, col="gray")
> # stopifnot(all(yhat == w3$x))
> # stopifnot(all(y - yhat == w3$y))
> # par(org.par)
> #
> # w1 <- plotres(gbmt.gaussian, predict.n.trees=13, w1.grid.col=1, trace=1, SHOWCALL=TRUE,
> #               w1.smooth=TRUE,
> #               w1.main="predict.n.trees=13 w1.grid.col=1")
> # cat("second w1 plot for gbmt.gaussian returned (w1.smooth=TRUE):\n")
> # print(w1)
> # plotmo(gbmt.gaussian, trace=-1, SHOWCALL=TRUE)
> #
> # par(org.par)
> #
> # cat("--- distribution=\"bernoulli\" ----------------------------------\n")
> #
> # set.seed(2016)
> # ptit <- ptitanic[sample(1:nrow(ptitanic), size=80), ]
> # ptit$survived <- ptit$survived == "survived"
> # temp <- ptit$pclass # put pclass at the end so can check ordering of importances
> # ptit$pclass <- NULL
> # ptit$pclass <- factor(as.numeric(temp), labels=c("first", "second", "third"))
> # # TODO change this to build same model as gbm.bernoulli
> # train_params <-
> #      training_params(num_trees = 100,
> #                      shrinkage = 0.1,
> #                      bag_fraction = 0.5,
> #                      num_train = round(.8 * nrow(ptit)))
> # set.seed(2016)
> # gbmt.bernoulli <- gbmt(survived~., data=ptit,
> #             distribution=gbm_dist("Bernoulli"),
> #             train_params = train_params,
> #             cv_folds = 3,
> #             is_verbose = FALSE,  keep_gbm_data=TRUE)
> # par(mfrow=c(2,2))
> # par(mar=c(3.5, 3, 2, 0.5))  # small margins and text to pack figs in
> # par(mgp=c(1.5, .4, 0))      # squash axis annotations
> # w1 <- plotres(gbmt.bernoulli, which=c(1,4),
> #         col=ptit$survived+2, trace=0, do.par=FALSE,
> #         w1.main="gbmt.bernoulli")
> # cat("w1 plot for gbmt.bernoulli with cv.folds=3 returned:\n")
> # print(w1)
> #
> # w3 <- plotres(gbmt.bernoulli, which=3, predict.n.trees=40,
> #         ylim=c(-.6, 1), xlim=c(.1, .6),
> #         col=ptit$sex, trace=0, do.par=FALSE, smooth.col=0)
> #
> # # compare to manual residuals
> # iused <- 1:(train.frac * nrow(ptit))
> # y <- ptit$survived[iused]
> # # TODO following fails in the new version of gbm (version 2.2) (you have to provide newdata)
> # # yhat <- predict(gbmt.bernoulli, type="response", n.trees=40)
> # yhat <- predict(gbmt.bernoulli, newdata=ptit, type="response", n.trees=40)
> # yhat <- yhat[iused]
> # plot(yhat, y - yhat, col=ptit$sex,
> #      main="manual bernoulli residuals", pch=20, cex=1,
> #      ylim=c(-.6, 1), xlim=c(.1, .6))
> # abline(h=0, col="gray")
> # stopifnot(all(yhat == w3$x))
> # stopifnot(all(y - yhat == w3$y))
> # par(org.par)
> #
> # plotmo(gbmt.bernoulli, do.par=2)
> # print(summary(gbmt.bernoulli)) # will also plot
> # par(org.par)
> 
> cat("--- gbm3: distribution=\"gaussian\", formula interface ----------------------------------\n")
--- gbm3: distribution="gaussian", formula interface ----------------------------------
> 
> library(gbm3)

Attaching package: 'gbm3'

The following objects are masked from 'package:gbm':

    gbm, gbm.fit, gbm.perf

> 
> set.seed(2016)
> ptit <- ptitanic[sample(1:nrow(ptitanic), size=70), ] # small data for fast test
> set.seed(2016)
> # # TODO bug in gbm3: following causes error: survived is not of type numeric, ordered, or factor
> # ptit$survived <- ptit$survived == "survived"
> ptit <- ptit[!is.na(ptit$age), ]
> train.frac <- .8
> set.seed(2016)
> gbm3.gaussian <- gbm3::gbm(age~., data=ptit, train.frac=train.frac,
+                    distribution="gaussian",
+                    n.trees=50, shrinkage=.1, keep.data=FALSE)
> expect.err(try(plotres(gbm3.gaussian)), "use keep_gbm_data=TRUE in the call to gbm")
Error : use keep_gbm_data=TRUE in the call to gbmt (object$gbm_data_obj is NULL)
Got expected error from try(plotres(gbm3.gaussian))
> set.seed(2016)
> gbm3.gaussian <- gbm3::gbm(age~., data=ptit, train.frac=train.frac,
+                    distribution="gaussian",
+                    n.trees=50, shrinkage=.1)
> par(mfrow=c(2,2), mar=c(3,3,4,1))
> w1 <- plotres(gbm3.gaussian, which=1, do.par=FALSE, w1.smooth=TRUE,
+         w1.main="gbm3.gaussian")
> cat("w1 plot for gbm3.gaussian returned (w1.smooth=TRUE):\n")
w1 plot for gbm3.gaussian returned (w1.smooth=TRUE):
> print(w1)
train  test    CV   OOB 
   50     1     0    12 
> plot(0, 0) # dummy plot
> w3 <- plotres(gbm3.gaussian, which=3, do.par=FALSE, info=TRUE,
+         smooth.col=0, col=ptit$sex, # ylim=c(-40,40),
+         wmain="nresponse=1")
> 
> # compare to manual residuals
> iused <- 1:(train.frac * nrow(ptit))
> y <- ptit$age[iused]
> n.trees <- plotmo:::gbm.n.trees(gbm3.gaussian)
> # TODO following fails in the new version of gbm (version 2.2) (you have to provide newdata)
> # yhat <- predict(gbm3.gaussian, type="response", n.trees=n.trees)
> yhat <- predict(gbm3.gaussian, newdata=ptit, type="response", n.trees=n.trees)
> yhat <- yhat[iused]
> plot(yhat, y - yhat,
+      col=ptit$sex[iused], main="manual gaussian residuals",
+      pch=20, ylim=c(-40,40))
> abline(h=0, col="gray")
> stopifnot(all(yhat == w3$x))
> stopifnot(all(y - yhat == w3$y))
> par(org.par)
> 
> w1 <- plotres(gbm3.gaussian, predict.n.trees=13, w1.grid.col=1, trace=1, SHOWCALL=TRUE,
+               w1.smooth=TRUE,
+               w1.main="predict.n.trees=13 w1.grid.col=1")
stats::residuals(object=GBMFit.object, type="response")
residuals() was unsuccessful, will use predict() instead
stats::predict(GBMFit.object, data.frame[3,5], type="response", n.trees=13)
stats::fitted(object=GBMFit.object)
fitted() was unsuccessful, will use predict() instead
plot_gbm(GBMFit.object, main="predict.n.trees=13 w1.grid.col=1", n.trees=13,
         grid.col=1, smooth=TRUE)

training rsq 0.23
> cat("second w1 plot for gbm3.gaussian returned (w1.smooth=TRUE):\n")
second w1 plot for gbm3.gaussian returned (w1.smooth=TRUE):
> print(w1)
train  test    CV   OOB 
   50     1     0    12 
> plotmo(gbm3.gaussian, trace=-1, SHOWCALL=TRUE)
> # plotmo(gbm3.gaussian, trace=-1, all1=TRUE, SHOWCALL=TRUE)
> # plotmo(gbm3.gaussian, trace=-1, all2=TRUE, SHOWCALL=TRUE)
> 
> cat("--- gbm3: distribution=\"gaussian\", xy interface ----------------------------------\n")
--- gbm3: distribution="gaussian", xy interface ----------------------------------
> 
> y = ptit$age
> x = ptit[,c(1,2,3,5,6)]
> train_params=gbm3::training_params(num_trees=100,
+  interaction_depth=2,
+  min_num_obs_in_node=3,
+  shrinkage=0.1, bag_fraction=0.5,
+  id=seq_len(nrow(x)), num_train=round(0.5 * nrow(x)),
+  num_features=ncol(x))
> gbm3fit <- gbm3::gbmt_fit(x, y, train_params=train_params,
+                           keep_gbm_data=TRUE, dist=gbm_dist("Gaussian"))
> plotmo(gbm3fit, trace=-1, SHOWCALL=TRUE)
> plotres(gbm3fit, trace=-1, SHOWCALL=TRUE)
> 
> cat("--- gbm3: large number of variables ----------------------------------\n")
--- gbm3: large number of variables ----------------------------------
> 
> set.seed(2024)
> N <- 1000
> 
> X <- data.frame(X1=runif(N), X2=2*runif(N), X3=3*runif(N),
+                 X4=runif(N), X5=2*runif(N), X6=3*runif(N),
+                 X7=runif(N), X8=2*runif(N), X9=3*runif(N),
+                 X10=runif(N), X11=2*runif(N), X12=3*runif(N),
+                 X13=runif(N), X14=2*runif(N), X15=3*runif(N))
> 
> # Y <- sample(c(0, 1), N, replace = TRUE)
> set.seed(2024)
> Y <- sqrt(X[,1])  +
+      sqrt(X[,2])  +
+      sqrt(X[,3])  +
+      sqrt(X[,4])  +
+      sqrt(X[,5])  +
+      sqrt(X[,6])  +
+      .5 * sqrt(X[,8]) +
+      sqrt(X[,9])  +
+      sqrt(X[,10]) +
+      sqrt(X[,11]) +
+      sqrt(X[,12])
> 
> data <- data.frame(Y, X)
> set.seed(2024)
> gbm3.big <- gbm3::gbm(Y~., data=data, shrinkage=0.1, dist="gaussian")
> y = data[,1]
> x = data[,2:ncol(data)]
> train_params=gbm3::training_params(num_trees=100,
+  interaction_depth=3,
+  min_num_obs_in_node=10,
+  shrinkage=0.1, bag_fraction=0.5,
+  id=seq_len(nrow(x)), num_train=round(0.5 * nrow(x)),
+  num_features=ncol(x))
> gbm3fit.big <- gbm3::gbmt_fit(x, y, train_params=train_params, keep_gbm_data=TRUE, dist=gbm_dist("Gaussian"))
> 
> set.seed(2024)
> plotmo(gbm3.big, SHOWCALL=TRUE)
 plotmo grid:    X1        X2       X3        X4        X5      X6        X7
          0.5143486 0.9627323 1.474525 0.5117489 0.9737853 1.50762 0.4913198
       X8       X9       X10       X11      X12       X13       X14      X15
 1.009313 1.496057 0.5009011 0.9942378 1.568558 0.5078663 0.9544786 1.408739
> plotmo(gbm3.big, all1=TRUE, all2=TRUE, caption="all1=TRUE, all2=TRUE")
Warning: too many predictors to plot all pairs,
         so plotting degree2 plots for just the first 7 predictors.
         Call plotmo with all2=2 to plot degree2 plots for up to 20 predictors.
 plotmo grid:    X1        X2       X3        X4        X5      X6        X7
          0.5143486 0.9627323 1.474525 0.5117489 0.9737853 1.50762 0.4913198
       X8       X9       X10       X11      X12       X13       X14      X15
 1.009313 1.496057 0.5009011 0.9942378 1.568558 0.5078663 0.9544786 1.408739
> plotmo(gbm3.big, all1=TRUE, all2=2, caption="all1=TRUE, all2=2")
 plotmo grid:    X1        X2       X3        X4        X5      X6        X7
          0.5143486 0.9627323 1.474525 0.5117489 0.9737853 1.50762 0.4913198
       X8       X9       X10       X11      X12       X13       X14      X15
 1.009313 1.496057 0.5009011 0.9942378 1.568558 0.5078663 0.9544786 1.408739
> plotres(gbm3.big, trace=-1, SHOWCALL=TRUE)
> 
> set.seed(2024)
> plotmo(gbm3fit.big, SHOWCALL=TRUE)
 plotmo grid:    X1        X2       X3        X4      X5       X6        X7
          0.5223775 0.9345499 1.439748 0.5139633 0.98243 1.518506 0.4884308
       X8       X9       X10       X11     X12       X13       X14      X15
 1.005225 1.582743 0.5212554 0.9505451 1.60459 0.5136175 0.9552039 1.413134
> plotmo(gbm3fit.big, all1=TRUE, caption="all1=TRUE")
 plotmo grid:    X1        X2       X3        X4      X5       X6        X7
          0.5223775 0.9345499 1.439748 0.5139633 0.98243 1.518506 0.4884308
       X8       X9       X10       X11     X12       X13       X14      X15
 1.005225 1.582743 0.5212554 0.9505451 1.60459 0.5136175 0.9552039 1.413134
> plotmo(gbm3fit.big, all2=TRUE, caption="all2=TRUE")
Warning: too many predictors to plot all pairs,
         so plotting degree2 plots for just the first 7 predictors.
         Call plotmo with all2=2 to plot degree2 plots for up to 20 predictors.
 plotmo grid:    X1        X2       X3        X4      X5       X6        X7
          0.5223775 0.9345499 1.439748 0.5139633 0.98243 1.518506 0.4884308
       X8       X9       X10       X11     X12       X13       X14      X15
 1.005225 1.582743 0.5212554 0.9505451 1.60459 0.5136175 0.9552039 1.413134
> plotmo(gbm3fit.big, all2=2, caption="all2=2")
More than 64 degree2 plots.
Consider using plotmo's degree2 argument to limit the number of plots.
For example,  degree2=1:10  or  degree2="X1"
Call plotmo with trace=-1 to make this message go away.

 plotmo grid:    X1        X2       X3        X4      X5       X6        X7
          0.5223775 0.9345499 1.439748 0.5139633 0.98243 1.518506 0.4884308
       X8       X9       X10       X11     X12       X13       X14      X15
 1.005225 1.582743 0.5212554 0.9505451 1.60459 0.5136175 0.9552039 1.413134
> plotres(gbm3.big, trace=-1, SHOWCALL=TRUE)
> 
> source("test.epilog.R")