File: makePerCellDF.R

package info (click to toggle)
r-bioc-scuttle 1.8.4%2Bdfsg-1
links: PTS, VCS
area: main
in suites: bookworm
size: 888 kB
sloc: cpp: 508; sh: 7; makefile: 2
file content (159 lines) | stat: -rw-r--r-- 7,993 bytes
parent folder | download | duplicates (2)
#' Create a per-cell data.frame
#'
#' Create a per-cell data.frame (i.e., where each row represents a cell) from a \linkS4class{SingleCellExperiment},
#' most typically for creating custom \pkg{ggplot2} plots.
#'
#' @param x A \linkS4class{SingleCellExperiment} object.
#' This is expected to have non-\code{NULL} row names.
#' @param features Character vector specifying the features for which to extract expression profiles across cells.
#' May also include features in alternative Experiments if permitted by \code{use.altexps}.
#' @param assay.type String or integer scalar indicating the assay to use to obtain expression values.
#' Must refer to a matrix-like object with integer or numeric values.
#' @param use.coldata Logical scalar indicating whether column metadata of \code{x} should be included.
#' Alternatively, a character or integer vector specifying the column metadata fields to use.
#' @param use.altexps Logical scalar indicating whether (meta)data should be extracted for alternative experiments in \code{x}.
#' Alternatively, a character or integer vector specifying the alternative experiments to use.
#' @param use.dimred Logical scalar indicating whether data should be extracted for dimensionality reduction results in \code{x}.
#' Alternatively, a character or integer vector specifying the dimensionality reduction results to use.
#' @param prefix.altexps Logical scalar indicating whether \code{\link{altExp}}-derived fields should be prefixed with the name of the alternative Experiment.
#' @param check.names Logical scalar indicating whether column names of the output should be made syntactically valid and unique.
#' @param swap.rownames String specifying the \code{\link{rowData}} column containing the \code{features}.
#' If \code{NULL}, \code{rownames(x)} is used.
#' @param exprs_values,use_dimred,use_altexps,prefix_altexps,check_names
#' Soft-deprecated equivalents of the arguments described above.
#'
#' @return A data.frame containing one field per aspect of data in \code{x} - see Details.
#' Each row corresponds to a cell (i.e., column) of \code{x}.
#'
#' @details
#' This function enables us to conveniently create a per-feature data.frame from a \linkS4class{SingleCellExperiment}.
#' Each row of the returned data.frame corresponds to a column in \code{x},
#' while each column of the data.frame corresponds to one aspect of the (meta)data in \code{x}.
#'
#' Columns are provided in the following order:
#' \enumerate{
#' \item Columns named according to the entries of \code{features} represent the expression values across cells for the specified feature in the \code{assay.type} assay.
#' \item Columns named according to the columns of \code{colData(x)} represent column metadata variables.
#' This consists of all variables if \code{use.coldata=TRUE}, no variables if \code{use.coldata=FALSE},
#' and only the specified variables if \code{use.coldata} is set to an integer or character vector.
#' \item Columns named in the format of \code{<DIM>.<NUM>} represent the \code{<NUM>}th dimension of the dimensionality reduction result \code{<DIM>}.
#' This is generated for all dimensionality reduction results if \code{use.dimred=TRUE}, none if \code{use.dimred=FALSE},
#' and only the specified results if \code{use.dimred}is set to an integer or character vector.
#' \item Columns named according to the row names of successive alternative Experiments,
#' representing the assay data in these objects.
#' These columns are only included if they are specified in \code{features} and if \code{use.altexps} is set.
#' Column names are prefixed with the name of the alternative Experiment if \code{prefix.altexps=TRUE}.
#' }
#'
#' By default, nothing is done to resolve syntactically invalid or duplicated column names.
#' \code{check_names=TRUE}, this is resolved by passing the column names through \code{\link{make.names}}.
#' Of course, as a result, some columns may not have the same names as the original fields in \code{x}.
#'
#' @author Aaron Lun
#'
#' @seealso
#' \code{\link{makePerFeatureDF}}, for the feature-level equivalent.
#'
#' @examples
#' sce <- mockSCE()
#' sce <- logNormCounts(sce)
#' reducedDim(sce, "PCA") <- matrix(rnorm(ncol(sce)*10), ncol=10) # made-up PCA.
#'
#' df <- makePerCellDF(sce, features="Gene_0001")
#' head(df)
#'
#' @export
#' @importFrom SingleCellExperiment colData reducedDims reducedDimNames altExps altExpNames
makePerCellDF <- function(x, features=NULL, assay.type="logcounts",
    use.coldata=TRUE, use.dimred=TRUE, use.altexps=TRUE, prefix.altexps=FALSE, check.names=FALSE,
    swap.rownames = NULL, exprs_values=NULL, use_dimred=NULL, use_altexps=NULL, prefix_altexps=NULL,
    check_names=NULL)
{
    use.dimred <- .replace(use.dimred, use_dimred)
    use.altexps <- .replace(use.altexps, use_altexps)
    assay.type <- .replace(assay.type, exprs_values)
    prefix.altexps <- .replace(prefix.altexps, prefix_altexps)
    check.names <- .replace(check.names, check_names)

    # Initialize output list
    output <- list()

    # Collecting the column metadata.
    use.coldata <- .use_names_to_integer_indices(use.coldata, x=x, nameFUN=function(x) colnames(colData(x)), msg="use.coldata")
    if (length(use.coldata)) {
        cd <- colData(x)[,use.coldata,drop=FALSE]
        output <- c(output, list(as.data.frame(cd)))
    }

    # Collecting the reduced dimensions.
    use.dimred <- .use_names_to_integer_indices(use.dimred, x=x, nameFUN=reducedDimNames, msg="use.dimred")
    if (length(use.dimred)) {
        all_reds <- reducedDims(x)[use.dimred]
        red_vals <- vector("list", length(all_reds))

        for (r in seq_along(red_vals)) {
            curred <- data.frame(all_reds[[r]])
            names(curred) <- sprintf("%s.%s", names(all_reds)[r], seq_len(ncol(curred)))
            red_vals[[r]] <- curred
        }

        red_vals <- do.call(cbind, red_vals)
        output <- c(output, list(red_vals))
    }

    # Collecting feature data from main and alternative Experiments.
    output <- c(output, .harvest_se_by_column(x, features=features, assay.type=assay.type, swap.rownames = swap.rownames))

    use.altexps <- .use_names_to_integer_indices(use.altexps, x=x, nameFUN=altExpNames, msg="use.altexps")
    if (length(use.altexps)) {
        all_alts <- altExps(x)[use.altexps]
        alt_vals <- vector("list", length(all_alts))

        for (a in seq_along(alt_vals)) {
            curalt <- .harvest_se_by_column(all_alts[[a]], features=features, assay.type=assay.type, swap.rownames = swap.rownames)
            if (prefix.altexps) {
                colnames(curalt) <- sprintf("%s.%s", names(all_alts)[a], colnames(curalt))
            }
            alt_vals[[a]] <- curalt
        }

        alt_vals <- do.call(cbind, alt_vals)
        output <- c(output, list(alt_vals))
    }

    # Checking the names.
    output <- do.call(cbind, output)
    if (check.names) {
        colnames(output) <- make.names(colnames(output), unique=TRUE)
    }
    output
}

.empty_df_from_se <- function(x) {
    data.frame(matrix(0, ncol(x), 0L), row.names=colnames(x))
}

#' @importFrom SummarizedExperiment assay rowData
#' @importFrom Matrix t
.harvest_se_by_column <- function(x, features, assay.type, swap.rownames) {
    if (is.null(swap.rownames)) {
        all.feats <- rownames(x)
    } else if (swap.rownames %in% colnames(rowData(x))) {
        all.feats <- rowData(x)[,swap.rownames]
    } else {
        # Avoid throwing an error for altexps if it doesn't have the swapped rowData.
        return(.empty_df_from_se(x))
    }

    keep <- all.feats %in% features
    if (any(keep)) {
        curmat <- assay(x, assay.type, withDimnames=FALSE)[keep,,drop=FALSE]
        curmat <- as.matrix(t(curmat))
        assay_vals <- data.frame(curmat, row.names=colnames(x))
        colnames(assay_vals) <- all.feats[keep]
        assay_vals
    } else {
        # Avoid throwing an error for altexps if the feature doesn't even match.
        .empty_df_from_se(x)
    }
}