1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
|
#' Create a per-cell data.frame
#'
#' Create a per-cell data.frame (i.e., where each row represents a cell) from a \linkS4class{SingleCellExperiment},
#' most typically for creating custom \pkg{ggplot2} plots.
#'
#' @param x A \linkS4class{SingleCellExperiment} object.
#' This is expected to have non-\code{NULL} row names.
#' @param features Character vector specifying the features for which to extract expression profiles across cells.
#' May also include features in alternative Experiments if permitted by \code{use.altexps}.
#' @param assay.type String or integer scalar indicating the assay to use to obtain expression values.
#' Must refer to a matrix-like object with integer or numeric values.
#' @param use.coldata Logical scalar indicating whether column metadata of \code{x} should be included.
#' Alternatively, a character or integer vector specifying the column metadata fields to use.
#' @param use.altexps Logical scalar indicating whether (meta)data should be extracted for alternative experiments in \code{x}.
#' Alternatively, a character or integer vector specifying the alternative experiments to use.
#' @param use.dimred Logical scalar indicating whether data should be extracted for dimensionality reduction results in \code{x}.
#' Alternatively, a character or integer vector specifying the dimensionality reduction results to use.
#' @param prefix.altexps Logical scalar indicating whether \code{\link{altExp}}-derived fields should be prefixed with the name of the alternative Experiment.
#' @param check.names Logical scalar indicating whether column names of the output should be made syntactically valid and unique.
#' @param swap.rownames String specifying the \code{\link{rowData}} column containing the \code{features}.
#' If \code{NULL}, \code{rownames(x)} is used.
#' @param exprs_values,use_dimred,use_altexps,prefix_altexps,check_names
#' Soft-deprecated equivalents of the arguments described above.
#'
#' @return A data.frame containing one field per aspect of data in \code{x} - see Details.
#' Each row corresponds to a cell (i.e., column) of \code{x}.
#'
#' @details
#' This function enables us to conveniently create a per-feature data.frame from a \linkS4class{SingleCellExperiment}.
#' Each row of the returned data.frame corresponds to a column in \code{x},
#' while each column of the data.frame corresponds to one aspect of the (meta)data in \code{x}.
#'
#' Columns are provided in the following order:
#' \enumerate{
#' \item Columns named according to the entries of \code{features} represent the expression values across cells for the specified feature in the \code{assay.type} assay.
#' \item Columns named according to the columns of \code{colData(x)} represent column metadata variables.
#' This consists of all variables if \code{use.coldata=TRUE}, no variables if \code{use.coldata=FALSE},
#' and only the specified variables if \code{use.coldata} is set to an integer or character vector.
#' \item Columns named in the format of \code{<DIM>.<NUM>} represent the \code{<NUM>}th dimension of the dimensionality reduction result \code{<DIM>}.
#' This is generated for all dimensionality reduction results if \code{use.dimred=TRUE}, none if \code{use.dimred=FALSE},
#' and only the specified results if \code{use.dimred}is set to an integer or character vector.
#' \item Columns named according to the row names of successive alternative Experiments,
#' representing the assay data in these objects.
#' These columns are only included if they are specified in \code{features} and if \code{use.altexps} is set.
#' Column names are prefixed with the name of the alternative Experiment if \code{prefix.altexps=TRUE}.
#' }
#'
#' By default, nothing is done to resolve syntactically invalid or duplicated column names.
#' \code{check_names=TRUE}, this is resolved by passing the column names through \code{\link{make.names}}.
#' Of course, as a result, some columns may not have the same names as the original fields in \code{x}.
#'
#' @author Aaron Lun
#'
#' @seealso
#' \code{\link{makePerFeatureDF}}, for the feature-level equivalent.
#'
#' @examples
#' sce <- mockSCE()
#' sce <- logNormCounts(sce)
#' reducedDim(sce, "PCA") <- matrix(rnorm(ncol(sce)*10), ncol=10) # made-up PCA.
#'
#' df <- makePerCellDF(sce, features="Gene_0001")
#' head(df)
#'
#' @export
#' @importFrom SingleCellExperiment colData reducedDims reducedDimNames altExps altExpNames
makePerCellDF <- function(x, features=NULL, assay.type="logcounts",
use.coldata=TRUE, use.dimred=TRUE, use.altexps=TRUE, prefix.altexps=FALSE, check.names=FALSE,
swap.rownames = NULL, exprs_values=NULL, use_dimred=NULL, use_altexps=NULL, prefix_altexps=NULL,
check_names=NULL)
{
use.dimred <- .replace(use.dimred, use_dimred)
use.altexps <- .replace(use.altexps, use_altexps)
assay.type <- .replace(assay.type, exprs_values)
prefix.altexps <- .replace(prefix.altexps, prefix_altexps)
check.names <- .replace(check.names, check_names)
# Initialize output list
output <- list()
# Collecting the column metadata.
use.coldata <- .use_names_to_integer_indices(use.coldata, x=x, nameFUN=function(x) colnames(colData(x)), msg="use.coldata")
if (length(use.coldata)) {
cd <- colData(x)[,use.coldata,drop=FALSE]
output <- c(output, list(as.data.frame(cd)))
}
# Collecting the reduced dimensions.
use.dimred <- .use_names_to_integer_indices(use.dimred, x=x, nameFUN=reducedDimNames, msg="use.dimred")
if (length(use.dimred)) {
all_reds <- reducedDims(x)[use.dimred]
red_vals <- vector("list", length(all_reds))
for (r in seq_along(red_vals)) {
curred <- data.frame(all_reds[[r]])
names(curred) <- sprintf("%s.%s", names(all_reds)[r], seq_len(ncol(curred)))
red_vals[[r]] <- curred
}
red_vals <- do.call(cbind, red_vals)
output <- c(output, list(red_vals))
}
# Collecting feature data from main and alternative Experiments.
output <- c(output, .harvest_se_by_column(x, features=features, assay.type=assay.type, swap.rownames = swap.rownames))
use.altexps <- .use_names_to_integer_indices(use.altexps, x=x, nameFUN=altExpNames, msg="use.altexps")
if (length(use.altexps)) {
all_alts <- altExps(x)[use.altexps]
alt_vals <- vector("list", length(all_alts))
for (a in seq_along(alt_vals)) {
curalt <- .harvest_se_by_column(all_alts[[a]], features=features, assay.type=assay.type, swap.rownames = swap.rownames)
if (prefix.altexps) {
colnames(curalt) <- sprintf("%s.%s", names(all_alts)[a], colnames(curalt))
}
alt_vals[[a]] <- curalt
}
alt_vals <- do.call(cbind, alt_vals)
output <- c(output, list(alt_vals))
}
# Checking the names.
output <- do.call(cbind, output)
if (check.names) {
colnames(output) <- make.names(colnames(output), unique=TRUE)
}
output
}
.empty_df_from_se <- function(x) {
data.frame(matrix(0, ncol(x), 0L), row.names=colnames(x))
}
#' @importFrom SummarizedExperiment assay rowData
#' @importFrom Matrix t
.harvest_se_by_column <- function(x, features, assay.type, swap.rownames) {
if (is.null(swap.rownames)) {
all.feats <- rownames(x)
} else if (swap.rownames %in% colnames(rowData(x))) {
all.feats <- rowData(x)[,swap.rownames]
} else {
# Avoid throwing an error for altexps if it doesn't have the swapped rowData.
return(.empty_df_from_se(x))
}
keep <- all.feats %in% features
if (any(keep)) {
curmat <- assay(x, assay.type, withDimnames=FALSE)[keep,,drop=FALSE]
curmat <- as.matrix(t(curmat))
assay_vals <- data.frame(curmat, row.names=colnames(x))
colnames(assay_vals) <- all.feats[keep]
assay_vals
} else {
# Avoid throwing an error for altexps if the feature doesn't even match.
.empty_df_from_se(x)
}
}
|