1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
|
#' @param data The data frame that should be written to a file.
#' @param delimiter For CSV-files, specifies the delimiter. Defaults to `","`,
#' but in particular in European regions, `";"` might be a useful alternative,
#' especially when exported CSV-files should be opened in Excel.
#' @param save_labels Only applies to CSV files. If `TRUE`, value and variable
#' labels (if any) will be saved as additional CSV file. This file has the same
#' file name as the exported CSV file, but includes a `"_labels"` suffix (i.e.
#' when the file name is `"mydat.csv"`, the additional file with value and
#' variable labels is named `"mydat_labels.csv"`).
#' @rdname data_read
#' @export
data_write <- function(data,
path,
delimiter = ",",
convert_factors = FALSE,
save_labels = FALSE,
verbose = TRUE,
...) {
# check file type, so we know the target dta format
file_type <- .file_ext(path)
type <- switch(file_type,
txt = ,
csv = "csv",
sav = ,
por = "spss",
zsav = "zspss",
dta = "stata",
xpt = "sas",
"unknown"
)
# no file type provided?
if (!is.character(file_type) || file_type == "") {
insight::format_error(
"Could not detect file type. The `path` argument has no file extension.",
"Please provide a file path including extension, like \"myfile.csv\" or \"c:/Users/Default/myfile.sav\"."
)
}
if (type %in% c("csv", "unknown")) {
.write_csv_or_unknown(data, path, type, delimiter, convert_factors, save_labels, verbose, ...)
} else {
.write_haven(data, path, verbose, type, ...)
}
}
# saving into CSV or unknown format -----
.write_csv_or_unknown <- function(data,
path,
type = "csv",
delimiter = ",",
convert_factors = FALSE,
save_labels = FALSE,
verbose = TRUE,
...) {
# save labels
if (save_labels && type == "csv") {
data <- .save_labels_to_file(data, path, delimiter, verbose)
}
# this might make sense when writing labelled data to CSV
if (convert_factors) {
data <- .pre_process_exported_data(data, convert_factors)
}
if (type == "csv") {
insight::check_if_installed("readr")
if (delimiter == ",") {
readr::write_csv(x = data, file = path, ...)
} else {
readr::write_csv2(x = data, file = path, ...)
}
} else {
insight::check_if_installed("rio")
rio::export(x = data, file = path, ...)
}
}
# saving into haven format -----
.write_haven <- function(data, path, verbose = TRUE, type = "spss", ...) {
insight::check_if_installed("haven")
# check if user provided "compress" argument for SPSS files,
# else, default to compression
dots <- list(...)
if (!is.null(dots$compress)) {
compress <- dots$compress
} else if (identical(type, "zspss")) {
compress <- "zsav"
} else {
compress <- "byte"
}
# labelled data needs "labelled" class attributes
data <- .set_haven_class_attributes(data, verbose)
# fix invalid column names
data <- .fix_column_names(data, verbose)
if (type %in% c("spss", "zspss")) {
# write to SPSS
haven::write_sav(data = data, path = path, compress = compress)
} else if (type == "stata") {
# write to Stata
haven::write_dta(data = data, path = path, ...)
} else {
# write to SAS
haven::write_xpt(data = data, path = path, ...)
}
}
# helper -------------------------------
# make sure we have the "labelled" class for labelled data
.set_haven_class_attributes <- function(x, verbose = TRUE) {
insight::check_if_installed("haven")
if (verbose) {
insight::format_alert("Preparing data file: converting variable types.")
}
x[] <- lapply(x, function(i) {
# make sure we have labelled class for labelled data
value_labels <- attr(i, "labels", exact = TRUE)
variable_label <- attr(i, "label", exact = TRUE)
# factor requires special preparation to save levels as labels
# haven:::vec_cast_named requires "x" and "labels" to be of same type
if (is.factor(i)) {
haven::labelled(
x = as.numeric(i),
labels = stats::setNames(seq_along(levels(i)), levels(i)),
label = variable_label
)
} else if (!is.null(value_labels) || !is.null(variable_label)) {
# character requires special preparation to save value labels
# haven:::vec_cast_named requires "x" and "labels" to be of same type
if (is.character(i)) {
# only prepare value labels when these are not NULL
if (!is.null(value_labels)) {
value_labels <- stats::setNames(as.character(value_labels), names(value_labels))
}
haven::labelled(
x = i,
labels = value_labels,
label = variable_label
)
} else {
# this should work for the remaining types...
haven::labelled(x = i, labels = value_labels, label = variable_label)
}
} else {
# non labelled data can be saved "as is"
i
}
})
x
}
# packages like SPSS cannot deal with variable which names end with a dot
# fix column names here by added a "fix" suffix
.fix_column_names <- function(x, verbose = TRUE) {
# check for correct column names
dot_ends <- vapply(colnames(x), endsWith, FUN.VALUE = TRUE, suffix = ".")
if (any(dot_ends)) {
if (verbose) {
insight::format_alert("Found and fixed invalid column names so they can be read by other software packages.")
}
colnames(x)[dot_ends] <- paste0(colnames(x)[dot_ends], "fix")
}
x
}
# save value and variable labels as addtional file
.save_labels_to_file <- function(x, path, delimiter, verbose = TRUE) {
insight::check_if_installed("readr")
# get file path information
fpath <- dirname(path)
fname <- sub("\\.csv$", "", basename(path))
path <- paste0(fpath, .Platform$file.sep, fname, "_labels.csv")
if (verbose) {
insight::format_alert(
paste0("Saving variable and value labels to \"", path, "\".")
)
}
# extract labels
var_labs <- vapply(x, function(i) {
l <- attr(i, "label", exact = TRUE)
if (is.null(l)) {
l <- ""
}
l
}, character(1))
# extract value labels
value_labs <- vapply(x, function(i) {
l <- attr(i, "labels", exact = TRUE)
if (is.null(l)) {
""
} else {
paste0(l, "=", names(l), collapse = "; ")
}
}, character(1))
out <- data.frame(
variable = colnames(x),
label = var_labs,
labels = value_labs,
stringsAsFactors = FALSE
)
if (delimiter == ",") {
readr::write_csv(x = out, file = path)
} else {
readr::write_csv2(x = out, file = path)
}
}
# process data for export, use factor levels as data values -------------------
.pre_process_exported_data <- function(x, convert_factors) {
# user may decide whether we automatically detect variable type or not
if (isTRUE(convert_factors)) {
x[] <- lapply(x, function(i) {
# only proceed if not all missing
if (!all(is.na(i))) {
# save labels
value_labels <- attr(i, "labels", exact = TRUE)
variable_labels <- attr(i, "label", exact = TRUE)
# filter, so only matching value labels remain
value_labels <- value_labels[value_labels %in% unique(i)]
# guess variable type
if (is.character(i)) {
# we need this to drop haven-specific class attributes
i <- as.character(i)
} else if (!is.null(value_labels) && length(value_labels) == insight::n_unique(i)) {
# if all values are labelled, we assume factor. Use labels as levels
if (is.numeric(i)) {
i <- factor(i, labels = names(value_labels))
} else {
i <- factor(as.character(i), labels = names(value_labels))
}
i <- as.character(i)
} else {
# else, fall back to numeric
i <- as.numeric(as.character(i))
}
# add back variable label
attr(i, "label") <- variable_labels
}
i
})
} else {
# drop haven class attributes
x[] <- lapply(x, function(i) {
# save labels
class(i) <- setdiff(class(i), c("haven_labelled", "vctrs_vctr"))
i
})
}
class(x) <- "data.frame"
x
}
|