File: quickstart.R

package info (click to toggle)
r-cran-projpred 2.0.2%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 740 kB
  • sloc: cpp: 355; sh: 14; makefile: 2
file content (91 lines) | stat: -rw-r--r-- 3,782 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
params <-
list(EVAL = TRUE)

## ---- SETTINGS-knitr, include=FALSE-------------------------------------------
stopifnot(require(knitr))
knitr::opts_chunk$set(
    fig.width = 7, fig.height = 5,
    comment = NA,
    message = FALSE,
    warning = FALSE,
    eval = if (isTRUE(exists("params"))) params$EVAL else FALSE,
    dev = "png",
    dpi = 150,
    fig.align = "center"
)

## ---- results='hide', message=FALSE, warning=FALSE----------------------------
#  library(brms)
#  library(projpred)
#  library(dplyr)
#  library(ggplot2)
#  library(bayesplot)
#  theme_set(theme_classic())
#  #options(mc.cores = parallel::detectCores())

## -----------------------------------------------------------------------------
#  data('df_gaussian', package = 'projpred')

## ---- results='hide', message=FALSE, warning=FALSE----------------------------
#  split_structure <- break_up_matrix_term(y ~ x, data = df_gaussian)
#  df_gaussian <- split_structure$data
#  formula <- split_structure$formula
#  d <- df_gaussian
#  n <- nrow(df_gaussian) # 100
#  D <- ncol(df_gaussian[, -1]) # 20
#  p0 <- 5 # prior guess for the number of relevant variables
#  tau0 <- p0/(D-p0) * 1/sqrt(n) # scale for tau (notice that stan_glm will automatically scale this by sigma)
#  fit <- brm(formula, family=gaussian(), data=df_gaussian,
#             prior=prior(horseshoe(scale_global = tau0, scale_slab = 1), class=b),
#             ## To make this vignette build fast, we use only 2 chains and
#             ## 500 iterations. In practice, at least 4 chains should be
#             ## used and 2000 iterations might be required for reliable
#             ## inference.
#             seed=1, chains=2, iter=500)

## ---- results='hide', warning=FALSE, messages=FALSE---------------------------
#  refmodel <- get_refmodel(fit)

## ---- results='hide', messages=FALSE, warnings=FALSE--------------------------
#  vs <- cv_varsel(refmodel, method = "forward")

## ---- messages=FALSE, warnings=FALSE------------------------------------------
#  solution_terms(vs) # selection order of the variables

## ---- fig.width=5, fig.height=4-----------------------------------------------
#  plot(vs, stats = c('elpd', 'rmse'))

## ---- fig.width=5, fig.height=4-----------------------------------------------
#  # plot the validation results, this time relative to the full model
#  plot(vs, stats = c('elpd', 'rmse'), deltas = TRUE)

## ---- fig.width=6, fig.height=2-----------------------------------------------
#   # Visualise the three most relevant variables in the full model -->
#   mcmc_areas(as.matrix(refmodel$fit),
#              pars = c("b_Intercept", paste0("b_", solution_terms(vs)[1:3]),
#                       "sigma")) +
#     coord_cartesian(xlim = c(-2, 2))

## ---- fig.width=6, fig.height=2-----------------------------------------------
#  # Visualise the projected three most relevant variables
#  proj <- project(vs, nterms = 3, ns = 500)
#  mcmc_areas(as.matrix(proj)) +
#    coord_cartesian(xlim = c(-2, 2))

## -----------------------------------------------------------------------------
#  pred <- proj_linpred(vs, newdata = df_gaussian, nterms = 6, integrated = TRUE)

## ---- fig.width=5, fig.height=3-----------------------------------------------
#  ggplot() +
#    geom_point(aes(x = pred$pred,y = df_gaussian$y)) +
#    geom_abline(slope = 1, color = "red") +
#    labs(x = "prediction", y = "y")

## ---- fig.height=3, fig.width=5-----------------------------------------------
#  subset <- df_gaussian %>% dplyr::sample_n(1)
#  y_subset <- subset %>% dplyr::select(y)
#  y1_rep <- proj_predict(vs, newdata = subset, nterms = 6, seed = 7560)
#  qplot(as.vector(y1_rep), bins = 25) +
#    geom_vline(xintercept = as.numeric(y_subset), color = "red") +
#    xlab("y1_rep")