File: chooseMissingPlaceholderForHdf5.R

package info (click to toggle)
r-bioc-alabaster.base 1.6.1%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 1,652 kB
  • sloc: cpp: 11,377; sh: 29; makefile: 2
file content (82 lines) | stat: -rw-r--r-- 2,812 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#' Choose a missing value placeholder
#'
#' In the \pkg{alabaster.*} framework, we mark missing entries inside HDF5 datasets with placeholder values.
#' This function chooses a value for the placeholder that does not overlap with anything else in a vector.
#'
#' @param x An atomic vector to be saved to HDF5.
#' @param .version Internal use only.
#' 
#' @return
#' A placeholder value for missing values in \code{x},
#' guaranteed to not be equal to any non-missing value in \code{x}.
#'
#' @details
#' For floating-point datasets, the placeholder will not be NA if there are mixtures of NAs and NaNs.
#' We do not rely on the NaN payload to distinguish between these two values.
#'
#' Placeholder values are typically saved as scalar attributes on the HDF5 dataset that they are used in.
#' The usual name of this attribute is \code{"missing-value-placeholder"}, as encoding by \code{missingPlaceholderName}.
#'
#' @examples
#' chooseMissingPlaceholderForHdf5(c(TRUE, NA, FALSE))
#' chooseMissingPlaceholderForHdf5(c(1L, NA, 2L))
#' chooseMissingPlaceholderForHdf5(c("aaron", NA, "barry"))
#' chooseMissingPlaceholderForHdf5(c("aaron", NA, "barry", "NA"))
#' chooseMissingPlaceholderForHdf5(c(1.5, NA, 2.6))
#' chooseMissingPlaceholderForHdf5(c(1.5, NaN, NA, 2.6))
#'
#' @aliases
#' missingPlaceholderName
#' addMissingPlaceholderAttributeForHdf5
#' .addMissingStringPlaceholderAttribute
#' .chooseMissingStringPlaceholder
#
#' @export
chooseMissingPlaceholderForHdf5 <- function(x, .version=3) {
    missing.placeholder <- NULL

    if (is.logical(x)) {
        missing.placeholder <- -1L

    } else if (is.character(x)) {
        missing.placeholder <- "NA"
        search <- unique(x)
        while (missing.placeholder %in% search) {
            missing.placeholder <- paste0("_", missing.placeholder)
        }

    } else if (is.double(x)) {
        if (.version < 3) {
            missing.placeholder <- NA_real_
        } else {
            missing.placeholder <- choose_numeric_missing_placeholder(x)
        }

    } else {
        missing.placeholder <- as(NA, storage.mode(x))
    }

    missing.placeholder
}

#' @export
missingPlaceholderName <- "missing-value-placeholder"

# Soft-deprecated back-compatibility fixes.

#' @export
addMissingPlaceholderAttributeForHdf5 <- function(file, name, placeholder) {
    if (is.character(file)) {
        file <- H5Fopen(file)
        on.exit(H5Fclose(file), add=TRUE)
    }
    dhandle <- H5Dopen(file, name)
    on.exit(H5Dclose(dhandle), add=TRUE)
    h5writeAttribute(placeholder, h5obj=dhandle, name=missingPlaceholderName, asScalar=TRUE)
}

#' @export
.chooseMissingStringPlaceholder <- function(...) chooseMissingPlaceholderForHdf5(...)

#' @export
.addMissingStringPlaceholderAttribute <- function(...) addMissingPlaceholderAttributeForHdf5(...)