File: plot.statcheck.R

package info (click to toggle)
r-cran-statcheck 1.5.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 764 kB
  • sloc: makefile: 2
file content (260 lines) | stat: -rw-r--r-- 9,792 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
#' Plot method for statcheck
#' 
#' Function for plotting of \code{statcheck} objects. Reported p values are 
#' plotted against recalculated p values, which allows the user to easily spot 
#' if articles contain miscalculations of statistical results.
#' 
#' If APAstyle = FALSE, inconsistencies between the reported and the recalculated p value are indicated with an orange dot. Recalculations of the p value that render a previously non significant result (p >= .5) as significant (p < .05), and vice versa, are considered decision errors, and are indicated with a red dot. Exactly reported p values (i.e. p = ..., as opposed to p < ... or p > ...) are indicated with a diamond.
#' 
#' @section Acknowledgements:
#' Many thanks to John Sakaluk who adapted the plot code to create graphs in 
#' APA style.
#' 
#' @seealso \code{\link{statcheck}}
#' 
#' @param x A statcheck object. See \code{\link{statcheck}}.
#' @param alpha assumed level of significance in the scanned texts. Defaults to 
#' .05.
#' @param APAstyle If TRUE, prints plot in APA style.
#' @param group Indicate grouping variable to facet plot. Only works when 
#' \code{APAstyle==TRUE}
#' @param ... arguments to be passed to methods, such as graphical parameters 
#' (see \code{\link{par}}).
#' 
#' @examples 
#' # First we need a statcheck object
#' # Here, we create one by running statcheck on some raw text
#' 
#' txt <- "This test is consistent t(28) = 0.2, p = .84, but this one is 
#' inconsistent: F(2, 28) = 4.2, p = .01. This final test is even a
#' gross/decision inconsistency: z = 1.23, p = .03"
#' 
#' result <- statcheck(txt)
#' 
#' # We can then plot the statcheck object 'result' by simply calling plot() on 
#' # "result". R will know what kind of plot to make, because "result" is of 
#' # class "statcheck"
#' plot(result)
#' 
#' @importFrom ggplot2 theme theme_bw element_blank element_line ggplot aes 
#' geom_point geom_vline geom_hline geom_abline annotate scale_x_continuous
#' scale_y_continuous scale_color_manual facet_grid
#' @importFrom rlang .data
#' @importFrom graphics plot.default points abline text par legend
#' 
#' @export

plot.statcheck <- function(
  x,
  alpha = .05,
  APAstyle = TRUE,
  group = NULL,
  ...
){
  
  # replace 'ns' for > alpha
  ns <- x[[VAR_P_COMPARISON]] == "ns"
  x[[VAR_P_COMPARISON]][ns] <- ">"
  x[[VAR_REPORTED_P]][ns] <- alpha
  
  if (APAstyle == TRUE) {
    
    # Add vector "Type" to statcheck object, specifying whether observations are
    # correctly reported, reporting inconsistencies, or decision errors.
    # First create an empty variable for Type to avoid a NOTE in the R CMD Check
    # that there is "no visible binding for global variable"
    Type <- rep(NA, nrow(x))
    x <- cbind(x, Type)
    
    x$Type[x[[VAR_ERROR]] == "FALSE" &
             x[[VAR_DEC_ERROR]] == "FALSE"] <- "Correctly Reported"
    x$Type[x[[VAR_ERROR]] == "TRUE" &
             x[[VAR_DEC_ERROR]] == "FALSE"] <- "Reporting Inconsistency"
    x$Type[x[[VAR_ERROR]] == "TRUE" &
             x[[VAR_DEC_ERROR]] == "TRUE"] <- "Decision Error"
    
    #Create ggplot "APA format" theme
    apatheme <- theme_bw() +
      theme(
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.line = element_line()
      )
    
    #If no grouping variable is specified, don't use faceting
    if (is.null(group)) {
      #Create plot "p"; map computed p-values to x-axis, reported p-values to y-axis, and
      #color to the Type variable created earlier. Environment command allows apatheme to
      #be applied later because of bug when creating functions with ggplot2
      p <- ggplot(x,
                  aes(y = .data[[VAR_COMPUTED_P]],
                      x = .data[[VAR_REPORTED_P]],
                      col = Type),
                  environment = environment())
      
      #Add data points to plot
      p + geom_point(size = 2.5) +
        #Add vertical grey dashed line, located at specified alpha level
        geom_vline(xintercept = alpha,
                   color = "grey60",
                   linetype = "dashed") +
        #Add horizontal grey dashed line, located at specified alpha level
        geom_hline(yintercept = alpha,
                   color = "grey60",
                   linetype = "dashed") +
        #Add a line showing where accurately reported p-values should fall
        geom_abline(intercept = 0,
                    slope = 1,
                    color = "grey60") +
        #Add text annotations demarcating over-/under-estimated areas of the plot
        annotate("text",
                 x = 0.5,
                 y = .10,
                 label = "overestimated") +
        annotate("text",
                 x = 0.5,
                 y = .90,
                 label = "underestimated") +
        #Rename the x- and y-axis, and manually specify breaks
        scale_x_continuous(
          name = "Reported p-values",
          breaks = c(0.00, 0.05, 0.10, 0.25, 0.50, 0.75, 1.0),
          limits = c(0, 1)
        ) +
        scale_y_continuous(
          name = "Computed p-values",
          breaks = c(0.00, 0.05, 0.10, 0.25, 0.50, 0.75, 1.0),
          limits = c(0, 1)
        ) +
        #Manually specify greyscale colors for different levels of Type
        scale_color_manual(
          breaks = c(
            "Correctly Reported",
            "Reporting Inconsistency",
            "Decision Error"
          ),
          values = c("grey80", "black", "grey50")
        ) +
        apatheme
      
    } else {
      #If grouping variable is specified, use for faceting
      
      #Create plot "p"; map computed p-values to x-axis, reported p-values to y-axis, and
      #color to the Type variable created earlier. Environment command allows apatheme to
      #be applied later because of bug when creating functions with ggplot2
      p <- ggplot(x,
                  aes(y = rlang::.data[[VAR_COMPUTED_P]],
                      x = rlang::.data[[VAR_REPORTED_P]],
                      col = Type),
                  environment = environment())
      
      #Add data points to plot
      p + geom_point(size = 2.5) +
        #Add vertical grey dashed line, located at specified alpha level
        geom_vline(xintercept = alpha,
                   color = "grey60",
                   linetype = "dashed") +
        #Add horizontal grey dashed line, located at specified alpha level
        geom_hline(yintercept = alpha,
                   color = "grey60",
                   linetype = "dashed") +
        #Add a line showing where accurately reported p-values should fall
        geom_abline(intercept = 0,
                    slope = 1,
                    color = "grey60") +
        #Add text annotations demarcating over-/under-estimated areas of the plot
        annotate("text",
                 x = 0.5,
                 y = .10,
                 label = "overestimated") +
        annotate("text",
                 x = 0.5,
                 y = .90,
                 label = "underestimated") +
        #Rename the x- and y-axis, and manually specify breaks
        scale_x_continuous(name = "Reported p-values",
                           breaks = c(0.00, 0.05, 0.10, 0.25, 0.50, 0.75, 1.0)) +
        scale_y_continuous(name = "Computed p-values",
                           breaks = c(0.00, 0.05, 0.10, 0.25, 0.50, 0.75, 1.0)) +
        #Manually specify greyscale colors for different levels of Type
        scale_color_manual(
          breaks = c(
            "Correctly Reported",
            "Reporting Inconsistency",
            "Decision Error"
          ),
          values = c("grey80", "black", "grey50")
        ) +
        facet_grid(stats::as.formula(paste(group, "~ ."))) +
        apatheme
    }
    
  } else {
    # Extract limit args:
    args <- list(...)
    if (is.null(args$xlim))
      args$xlim <- c(0, 1)
    if (is.null(args$ylim))
      args$ylim <- c(0, 1)
    
    reported <- x[[VAR_REPORTED_P]]
    computed <- x[[VAR_COMPUTED_P]]
    
    # replace 'ns' for > alpha
    reported[x[[VAR_P_COMPARISON]] == "ns"] <- alpha
    
    # scatterplot of reported and recalculated p values
    do.call(plot.default, c(
      list(
        x = reported,
        y = computed,
        xlab = "reported p value",
        ylab = "recalculated p value",
        pch = 20
      ),
      args
    ))
    
    # orange dot for error
    points(reported[x[[VAR_ERROR]]],
           computed[x[[VAR_ERROR]]],
           pch = 20, col = "orange")
    
    # red dot for gross error (non-sig reported as sig and vice versa)
    points(reported[x[[VAR_DEC_ERROR]]],
           computed[x[[VAR_DEC_ERROR]]],
           pch = 20, col = "red")
    
    # indicate exact p values with diamond
    points(x[[VAR_REPORTED_P]][x[[VAR_P_COMPARISON]] == "="],
           computed[x[[VAR_P_COMPARISON]] == "="],
           pch = 5)
    
    # general layout of figure:
    # lines & text to indicate under- and overestimates
    abline(h = .05)
    abline(v = .05)
    abline(0, 1)
    
    text(.8, .4, "overestimated")
    text(.4, .8, "underestimated")
    
    text(0, .53, "non-sig", cex = .7)
    text(0, .50, "reported", cex = .7)
    text(0, .47, "as sig", cex = .7)
    
    text(.5, 0, "sig reported as non-sig", cex = .7)
    
    par(xpd = TRUE)
    legend(
      .88,
      -.15,
      pch = c(20, 20, 5),
      col = c("orange", "red", "black"),
      legend = c("p inconsistency", "decision error", "exact (p = ...)"),
      cex = .8
    )
    par(xpd = FALSE)
  }
}