File: sjPlotPolynomials.R

package info (click to toggle)
r-cran-sjplot 2.8.17%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 1,596 kB
  • sloc: sh: 13; makefile: 2
file content (254 lines) | stat: -rw-r--r-- 11,228 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
#' @title Plot polynomials for (generalized) linear regression
#' @name sjp.poly
#'
#' @description This function plots a scatter plot of a term \code{poly.term}
#'                against a response variable \code{x} and adds - depending on
#'                the amount of numeric values in \code{poly.degree} - multiple
#'                polynomial curves. A loess-smoothed line can be added to see
#'                which of the polynomial curves fits best to the data.
#'
#' @param x A vector, representing the response variable of a linear (mixed) model; or
#'          a linear (mixed) model as returned by \code{\link{lm}} or \code{\link[lme4]{lmer}}.
#' @param poly.term If \code{x} is a vector, \code{poly.term} should also be a vector, representing
#'          the polynomial term (independent variabl) in the model; if \code{x} is a
#'          fitted model, \code{poly.term} should be the polynomial term's name as character string.
#'          See 'Examples'.
#' @param poly.degree Numeric, or numeric vector, indicating the degree of the polynomial.
#'          If \code{poly.degree} is a numeric vector, multiple polynomial curves for
#'          each degree are plotted. See 'Examples'.
#' @param poly.scale Logical, if \code{TRUE}, \code{poly.term} will be scaled before
#'          linear regression is computed. Default is \code{FALSE}. Scaling the polynomial
#'          term may have an impact on the resulting p-values.
#' @param fun Linear function when modelling polynomial terms. Use \code{fun = "lm"}
#'          for linear models, or \code{fun = "glm"} for generalized linear models.
#'          When \code{x} is not a vector, but a fitted model object, the function
#'          is detected automatically. If \code{x} is a vector, \code{fun} defaults
#'          to \code{"lm"}.
#' @param show.loess Logical, if \code{TRUE}, an additional loess-smoothed line is plotted.
#' @param show.loess.ci Logical, if \code{TRUE}, a confidence region for the loess-smoothed line
#'          will be plotted.
#' @param show.p Logical, if \code{TRUE} (default), p-values for polynomial terms are
#'          printed to the console.
#' @param loess.color Color of the loess-smoothed line. Only applies, if \code{show.loess = TRUE}.
#' @param show.scatter Logical, if TRUE (default), adds a scatter plot of data
#'    points to the plot.
#' @param point.alpha Alpha value of point-geoms in the scatter plots. Only
#'    applies, if \code{show.scatter = TRUE}.
#' @param point.color Color of of point-geoms in the scatter plots. Only applies,
#'    if \code{show.scatter = TRUE.}
#'
#' @return A ggplot-object.
#'
#'
#' @inheritParams plot_model
#' @inheritParams plot_scatter
#' @inheritParams plot_grpfrq
#'
#' @details For each polynomial degree, a simple linear regression on \code{x} (resp.
#'            the extracted response, if \code{x} is a fitted model) is performed,
#'            where only the polynomial term \code{poly.term} is included as independent variable.
#'            Thus, \code{lm(y ~ x + I(x^2) + ... + I(x^i))} is repeatedly computed
#'            for all values in \code{poly.degree}, and the predicted values of
#'            the reponse are plotted against the raw values of \code{poly.term}.
#'            If \code{x} is a fitted model, other covariates are ignored when
#'            finding the best fitting polynomial. \cr \cr
#'            This function evaluates raw polynomials, \emph{not orthogonal} polynomials.
#'            Polynomials are computed using the \code{\link{poly}} function,
#'            with argument \code{raw = TRUE}. \cr \cr
#'            To find out which polynomial degree fits best to the data, a loess-smoothed
#'            line (in dark grey) can be added (with \code{show.loess = TRUE}). The polynomial curves
#'            that comes closest to the loess-smoothed line should be the best
#'            fit to the data.
#'
#' @examples
#' library(sjmisc)
#' data(efc)
#' # linear fit. loess-smoothed line indicates a more
#' # or less cubic curve
#' sjp.poly(efc$c160age, efc$quol_5, 1)
#'
#' # quadratic fit
#' sjp.poly(efc$c160age, efc$quol_5, 2)
#'
#' # linear to cubic fit
#' sjp.poly(efc$c160age, efc$quol_5, 1:4, show.scatter = FALSE)
#'
#'
#' # fit sample model
#' fit <- lm(tot_sc_e ~ c12hour + e17age + e42dep, data = efc)
#' # inspect relationship between predictors and response
#' plot_model(fit, type = "slope")
#' # "e17age" does not seem to be linear correlated to response
#' # try to find appropiate polynomial. Grey line (loess smoothed)
#' # indicates best fit. Looks like x^4 has the best fit,
#' # however, only x^3 has significant p-values.
#' sjp.poly(fit, "e17age", 2:4, show.scatter = FALSE)
#'
#' \dontrun{
#' # fit new model
#' fit <- lm(tot_sc_e ~ c12hour + e42dep + e17age + I(e17age^2) + I(e17age^3),
#'           data = efc)
#' # plot marginal effects of polynomial term
#' plot_model(fit, type = "pred", terms = "e17age")}
#'
#' @import ggplot2
#' @importFrom scales grey_pal brewer_pal
#' @importFrom stats lm glm binomial predict poly
#' @importFrom graphics plot
#' @export
sjp.poly <- function(x,
                     poly.term,
                     poly.degree,
                     poly.scale = FALSE,
                     fun = NULL,
                     axis.title = NULL,
                     geom.colors = NULL,
                     geom.size = .8,
                     show.loess = TRUE,
                     show.loess.ci = TRUE,
                     show.p = TRUE,
                     show.scatter = TRUE,
                     point.alpha = .2,
                     point.color = "#404040",
                     loess.color = "#808080") {
  # --------------------------------------------
  # check color parameter
  # --------------------------------------------
  geom.colors <- col_check2(geom.colors, length(poly.degree))
  # --------------------------------------------
  # check poly.term parameter
  # --------------------------------------------
  if (is.character(poly.term))
    defv <- poly.term
  else
    defv <- get_var_name(deparse(substitute(poly.term)))
  # --------------------------------------------
  # parameter check: fitted model or variables?
  # --------------------------------------------
  if (!is.vector(x) && !is.numeric(x) && !is.factor(x)) {
    mf <- insight::get_data(x, verbose = FALSE)
    # retrieve response vector
    resp <- insight::get_response(x)
    # retrieve polynomial term
    poly.term <- mf[[poly.term]]
  } else {
    resp <- x
  }

  # --------------------------------------------
  # check for glm or lm
  # --------------------------------------------
  if (is.null(fun)) {
    if (inherits(x, c("glmerMod", "glm"))) {
      fun <- "glm"
    } else {
      fun <- "lm"
    }
  }
  # --------------------------------------------
  # retrieve labels
  # --------------------------------------------
  if (is.null(axis.title)) axis.title <- sjlabelled::get_label(poly.term, def.value = defv)
  axisTitle.y <- sjlabelled::get_label(resp, def.value = "Response")
  # --------------------------------------------
  # init data frame
  # --------------------------------------------
  plot.df <- data.frame()
  # scale polynomial term?
  if (poly.scale) poly.term <- scale(poly.term)
  # --------------------------------------------
  # get cutpoints for loess curve
  # --------------------------------------------
  # cutpoints <- get_loess_cutpoints(stats::na.omit(data.frame(x = poly.term, y = resp)))
  # --------------------------------------------
  # if user wants to plot multiple curves for
  # polynomials, create data frame for each curve here
  # --------------------------------------------
  for (i in poly.degree) {
    # poly-function can't cope with missings, so remove them here
    mydat <- stats::na.omit(data.frame(x = poly.term, y = resp))
    # fit model with polynomials
    if (fun == "lm")
      fit <- stats::lm(mydat$y ~ stats::poly(mydat$x, i, raw = TRUE))
    else
      fit <- stats::glm(mydat$y ~ stats::poly(mydat$x, i, raw = TRUE), family = stats::family(x))
    # check whether we have an integer poly.degree
    # or a float value
    poly.digit <- ifelse(i %% 1 == 0, 0, 1)
    # create data frame with raw data and the fitted poly-curve
    plot.df <- rbind(plot.df, cbind(mydat,
                                    stats::predict(fit),
                                    sprintf("x^%.*f", poly.digit, i)))
    # print p-values?
    if (show.p) {
      # get p-values
      pvals <- summary(fit)$coefficients[-1, 4]
      # prepare output string
      p.out <- sprintf("Polynomial degrees: %.*f\n---------------------\n", poly.digit, i)
      # iterate polynomial terms and print p-value for each polynom
      for (j in seq_len(i)) p.out <- paste0(p.out, sprintf("p(x^%i): %.3f\n", j, unname(pvals[j])))
      # add separator line after each model
      p.out <- paste0(p.out, "\n")
      # print p-values for fitted model
      cat(p.out)
    }
  }
  # name df
  colnames(plot.df) <- c("x","y", "pred", "grp")
  # create plot
  polyplot <- ggplot(plot.df, aes_string(x = "x", y = "y", colour = "grp"))
  # show scatter plot as well?
  if (show.scatter) polyplot <- polyplot +
    geom_jitter(colour = point.color, alpha = point.alpha, shape = 16)
  # show loess curve? this curve indicates the "perfect" curve through
  # the data
  if (show.loess) polyplot <- polyplot + stat_smooth(method = "loess",
                                                    color = loess.color,
                                                    se = show.loess.ci,
                                                    size = geom.size)
  # add curves for polynomials
  polyplot <- polyplot +
    geom_line(aes_string(y = "pred"), linewidth = geom.size) +
    scale_color_manual(values = geom.colors, labels = lapply(poly.degree, function(j) bquote(x^.(j)))) +
    labs(x = axis.title, y = axisTitle.y, colour = "Polynomial\ndegrees")

  polyplot
}


#' @importFrom stats loess predict
get_loess_cutpoints <- function(mydat) {
  # sort data frame by x-values
  mydat <- mydat[order(mydat$x), ]
  # fit loess
  fit <- stats::loess(y ~ x, mydat)
  # get predicted values
  preds <- unique(stats::predict(fit))
  xuni <- unique(mydat$x)
  # define counter
  cnt <- 1
  cutpoints <- c()
  xvals <- c()
  # initial direction for finding first cutpoint?
  direction <- ifelse(preds[cnt + 1] > preds[cnt], "up", "down")
  # "follow" path of loess line until cutpoint
  # then save value and change direction
  while (cnt < length(preds)) {
    if (direction == "up") {
      if (preds[cnt + 1] < preds[cnt]) {
        direction <- "down"
        cutpoints <- c(cutpoints, preds[cnt])
        xvals <- c(xvals, xuni[cnt])
      }
    } else {
      if (preds[cnt + 1] > preds[cnt]) {
        direction <- "up"
        cutpoints <- c(cutpoints, preds[cnt])
        xvals <- c(xvals, xuni[cnt])
      }
    }
    cnt <- cnt + 1
  }

  data.frame(cutpoint.x = xvals, cutpoint.y = cutpoints)
}