File: data_write.R

package info (click to toggle)
r-cran-datawizard 1.0.1%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,300 kB
  • sloc: sh: 13; makefile: 2
file content (276 lines) | stat: -rw-r--r-- 8,509 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
#' @param data The data frame that should be written to a file.
#' @param delimiter For CSV-files, specifies the delimiter. Defaults to `","`,
#'   but in particular in European regions, `";"` might be a useful alternative,
#'   especially when exported CSV-files should be opened in Excel.
#' @param save_labels Only applies to CSV files. If `TRUE`, value and variable
#'   labels (if any) will be saved as additional CSV file. This file has the same
#'   file name as the exported CSV file, but includes a `"_labels"` suffix (i.e.
#'   when the file name is `"mydat.csv"`, the additional file with value and
#'   variable labels is named `"mydat_labels.csv"`).
#' @rdname data_read
#' @export
data_write <- function(data,
                       path,
                       delimiter = ",",
                       convert_factors = FALSE,
                       save_labels = FALSE,
                       verbose = TRUE,
                       ...) {
  # check file type, so we know the target dta format
  file_type <- .file_ext(path)
  type <- switch(file_type,
    txt = ,
    csv = "csv",
    sav = ,
    por = "spss",
    zsav = "zspss",
    dta = "stata",
    xpt = "sas",
    "unknown"
  )

  # no file type provided?
  if (!is.character(file_type) || file_type == "") {
    insight::format_error(
      "Could not detect file type. The `path` argument has no file extension.",
      "Please provide a file path including extension, like \"myfile.csv\" or \"c:/Users/Default/myfile.sav\"."
    )
  }

  if (type %in% c("csv", "unknown")) {
    .write_csv_or_unknown(data, path, type, delimiter, convert_factors, save_labels, verbose, ...)
  } else {
    .write_haven(data, path, verbose, type, ...)
  }
}


# saving into CSV or unknown format -----

.write_csv_or_unknown <- function(data,
                                  path,
                                  type = "csv",
                                  delimiter = ",",
                                  convert_factors = FALSE,
                                  save_labels = FALSE,
                                  verbose = TRUE,
                                  ...) {
  # save labels
  if (save_labels && type == "csv") {
    data <- .save_labels_to_file(data, path, delimiter, verbose)
  }

  # this might make sense when writing labelled data to CSV
  if (convert_factors) {
    data <- .pre_process_exported_data(data, convert_factors)
  }

  if (type == "csv") {
    insight::check_if_installed("readr")
    if (delimiter == ",") {
      readr::write_csv(x = data, file = path, ...)
    } else {
      readr::write_csv2(x = data, file = path, ...)
    }
  } else {
    insight::check_if_installed("rio")
    rio::export(x = data, file = path, ...)
  }
}


# saving into haven format -----

.write_haven <- function(data, path, verbose = TRUE, type = "spss", ...) {
  insight::check_if_installed("haven")

  # check if user provided "compress" argument for SPSS files,
  # else, default to compression
  dots <- list(...)
  if (!is.null(dots$compress)) {
    compress <- dots$compress
  } else if (identical(type, "zspss")) {
    compress <- "zsav"
  } else {
    compress <- "byte"
  }

  # labelled data needs "labelled" class attributes
  data <- .set_haven_class_attributes(data, verbose)

  # fix invalid column names
  data <- .fix_column_names(data, verbose)

  if (type %in% c("spss", "zspss")) {
    # write to SPSS
    haven::write_sav(data = data, path = path, compress = compress)
  } else if (type == "stata") {
    # write to Stata
    haven::write_dta(data = data, path = path, ...)
  } else {
    # write to SAS
    haven::write_xpt(data = data, path = path, ...)
  }
}


# helper -------------------------------


# make sure we have the "labelled" class for labelled data
.set_haven_class_attributes <- function(x, verbose = TRUE) {
  insight::check_if_installed("haven")

  if (verbose) {
    insight::format_alert("Preparing data file: converting variable types.")
  }
  x[] <- lapply(x, function(i) {
    # make sure we have labelled class for labelled data
    value_labels <- attr(i, "labels", exact = TRUE)
    variable_label <- attr(i, "label", exact = TRUE)
    # factor requires special preparation to save levels as labels
    # haven:::vec_cast_named requires "x" and "labels" to be of same type
    if (is.factor(i)) {
      haven::labelled(
        x = as.numeric(i),
        labels = stats::setNames(seq_along(levels(i)), levels(i)),
        label = variable_label
      )
    } else if (!is.null(value_labels) || !is.null(variable_label)) {
      # character requires special preparation to save value labels
      # haven:::vec_cast_named requires "x" and "labels" to be of same type
      if (is.character(i)) {
        # only prepare value labels when these are not NULL
        if (!is.null(value_labels)) {
          value_labels <- stats::setNames(as.character(value_labels), names(value_labels))
        }
        haven::labelled(
          x = i,
          labels = value_labels,
          label = variable_label
        )
      } else {
        # this should work for the remaining types...
        haven::labelled(x = i, labels = value_labels, label = variable_label)
      }
    } else {
      # non labelled data can be saved "as is"
      i
    }
  })
  x
}


# packages like SPSS cannot deal with variable which names end with a dot
# fix column names here by added a "fix" suffix
.fix_column_names <- function(x, verbose = TRUE) {
  # check for correct column names
  dot_ends <- vapply(colnames(x), endsWith, FUN.VALUE = TRUE, suffix = ".")
  if (any(dot_ends)) {
    if (verbose) {
      insight::format_alert("Found and fixed invalid column names so they can be read by other software packages.")
    }
    colnames(x)[dot_ends] <- paste0(colnames(x)[dot_ends], "fix")
  }
  x
}


# save value and variable labels as addtional file
.save_labels_to_file <- function(x, path, delimiter, verbose = TRUE) {
  insight::check_if_installed("readr")

  # get file path information
  fpath <- dirname(path)
  fname <- sub("\\.csv$", "", basename(path))
  path <- paste0(fpath, .Platform$file.sep, fname, "_labels.csv")

  if (verbose) {
    insight::format_alert(
      paste0("Saving variable and value labels to \"", path, "\".")
    )
  }

  # extract labels
  var_labs <- vapply(x, function(i) {
    l <- attr(i, "label", exact = TRUE)
    if (is.null(l)) {
      l <- ""
    }
    l
  }, character(1))

  # extract value labels
  value_labs <- vapply(x, function(i) {
    l <- attr(i, "labels", exact = TRUE)
    if (is.null(l)) {
      ""
    } else {
      paste0(l, "=", names(l), collapse = "; ")
    }
  }, character(1))

  out <- data.frame(
    variable = colnames(x),
    label = var_labs,
    labels = value_labs,
    stringsAsFactors = FALSE
  )

  if (delimiter == ",") {
    readr::write_csv(x = out, file = path)
  } else {
    readr::write_csv2(x = out, file = path)
  }
}


# process data for export, use factor levels as data values -------------------

.pre_process_exported_data <- function(x, convert_factors) {
  # user may decide whether we automatically detect variable type or not
  if (isTRUE(convert_factors)) {
    x[] <- lapply(x, function(i) {
      # only proceed if not all missing
      if (!all(is.na(i))) {
        # save labels
        value_labels <- attr(i, "labels", exact = TRUE)
        variable_labels <- attr(i, "label", exact = TRUE)

        # filter, so only matching value labels remain
        value_labels <- value_labels[value_labels %in% unique(i)]

        # guess variable type
        if (is.character(i)) {
          # we need this to drop haven-specific class attributes
          i <- as.character(i)
        } else if (!is.null(value_labels) && length(value_labels) == insight::n_unique(i)) {
          # if all values are labelled, we assume factor. Use labels as levels
          if (is.numeric(i)) {
            i <- factor(i, labels = names(value_labels))
          } else {
            i <- factor(as.character(i), labels = names(value_labels))
          }
          i <- as.character(i)
        } else {
          # else, fall back to numeric
          i <- as.numeric(as.character(i))
        }
        # add back variable label
        attr(i, "label") <- variable_labels
      }
      i
    })
  } else {
    # drop haven class attributes
    x[] <- lapply(x, function(i) {
      # save labels
      class(i) <- setdiff(class(i), c("haven_labelled", "vctrs_vctr"))
      i
    })
  }

  class(x) <- "data.frame"
  x
}