File: makePerCellDF.R

package info (click to toggle)
r-bioc-scuttle 1.8.4%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 888 kB
  • sloc: cpp: 508; sh: 7; makefile: 2
file content (159 lines) | stat: -rw-r--r-- 7,993 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#' Create a per-cell data.frame
#'
#' Create a per-cell data.frame (i.e., where each row represents a cell) from a \linkS4class{SingleCellExperiment},
#' most typically for creating custom \pkg{ggplot2} plots.
#'
#' @param x A \linkS4class{SingleCellExperiment} object.
#' This is expected to have non-\code{NULL} row names.
#' @param features Character vector specifying the features for which to extract expression profiles across cells.
#' May also include features in alternative Experiments if permitted by \code{use.altexps}.
#' @param assay.type String or integer scalar indicating the assay to use to obtain expression values.
#' Must refer to a matrix-like object with integer or numeric values.
#' @param use.coldata Logical scalar indicating whether column metadata of \code{x} should be included.
#' Alternatively, a character or integer vector specifying the column metadata fields to use.
#' @param use.altexps Logical scalar indicating whether (meta)data should be extracted for alternative experiments in \code{x}.
#' Alternatively, a character or integer vector specifying the alternative experiments to use.
#' @param use.dimred Logical scalar indicating whether data should be extracted for dimensionality reduction results in \code{x}.
#' Alternatively, a character or integer vector specifying the dimensionality reduction results to use.
#' @param prefix.altexps Logical scalar indicating whether \code{\link{altExp}}-derived fields should be prefixed with the name of the alternative Experiment.
#' @param check.names Logical scalar indicating whether column names of the output should be made syntactically valid and unique.
#' @param swap.rownames String specifying the \code{\link{rowData}} column containing the \code{features}.
#' If \code{NULL}, \code{rownames(x)} is used.
#' @param exprs_values,use_dimred,use_altexps,prefix_altexps,check_names
#' Soft-deprecated equivalents of the arguments described above.
#'
#' @return A data.frame containing one field per aspect of data in \code{x} - see Details.
#' Each row corresponds to a cell (i.e., column) of \code{x}.
#'
#' @details
#' This function enables us to conveniently create a per-feature data.frame from a \linkS4class{SingleCellExperiment}.
#' Each row of the returned data.frame corresponds to a column in \code{x},
#' while each column of the data.frame corresponds to one aspect of the (meta)data in \code{x}.
#'
#' Columns are provided in the following order:
#' \enumerate{
#' \item Columns named according to the entries of \code{features} represent the expression values across cells for the specified feature in the \code{assay.type} assay.
#' \item Columns named according to the columns of \code{colData(x)} represent column metadata variables.
#' This consists of all variables if \code{use.coldata=TRUE}, no variables if \code{use.coldata=FALSE},
#' and only the specified variables if \code{use.coldata} is set to an integer or character vector.
#' \item Columns named in the format of \code{<DIM>.<NUM>} represent the \code{<NUM>}th dimension of the dimensionality reduction result \code{<DIM>}.
#' This is generated for all dimensionality reduction results if \code{use.dimred=TRUE}, none if \code{use.dimred=FALSE},
#' and only the specified results if \code{use.dimred}is set to an integer or character vector.
#' \item Columns named according to the row names of successive alternative Experiments,
#' representing the assay data in these objects.
#' These columns are only included if they are specified in \code{features} and if \code{use.altexps} is set.
#' Column names are prefixed with the name of the alternative Experiment if \code{prefix.altexps=TRUE}.
#' }
#'
#' By default, nothing is done to resolve syntactically invalid or duplicated column names.
#' \code{check_names=TRUE}, this is resolved by passing the column names through \code{\link{make.names}}.
#' Of course, as a result, some columns may not have the same names as the original fields in \code{x}.
#'
#' @author Aaron Lun
#'
#' @seealso
#' \code{\link{makePerFeatureDF}}, for the feature-level equivalent.
#'
#' @examples
#' sce <- mockSCE()
#' sce <- logNormCounts(sce)
#' reducedDim(sce, "PCA") <- matrix(rnorm(ncol(sce)*10), ncol=10) # made-up PCA.
#'
#' df <- makePerCellDF(sce, features="Gene_0001")
#' head(df)
#'
#' @export
#' @importFrom SingleCellExperiment colData reducedDims reducedDimNames altExps altExpNames
makePerCellDF <- function(x, features=NULL, assay.type="logcounts",
    use.coldata=TRUE, use.dimred=TRUE, use.altexps=TRUE, prefix.altexps=FALSE, check.names=FALSE,
    swap.rownames = NULL, exprs_values=NULL, use_dimred=NULL, use_altexps=NULL, prefix_altexps=NULL,
    check_names=NULL)
{
    use.dimred <- .replace(use.dimred, use_dimred)
    use.altexps <- .replace(use.altexps, use_altexps)
    assay.type <- .replace(assay.type, exprs_values)
    prefix.altexps <- .replace(prefix.altexps, prefix_altexps)
    check.names <- .replace(check.names, check_names)

    # Initialize output list
    output <- list()

    # Collecting the column metadata.
    use.coldata <- .use_names_to_integer_indices(use.coldata, x=x, nameFUN=function(x) colnames(colData(x)), msg="use.coldata")
    if (length(use.coldata)) {
        cd <- colData(x)[,use.coldata,drop=FALSE]
        output <- c(output, list(as.data.frame(cd)))
    }

    # Collecting the reduced dimensions.
    use.dimred <- .use_names_to_integer_indices(use.dimred, x=x, nameFUN=reducedDimNames, msg="use.dimred")
    if (length(use.dimred)) {
        all_reds <- reducedDims(x)[use.dimred]
        red_vals <- vector("list", length(all_reds))

        for (r in seq_along(red_vals)) {
            curred <- data.frame(all_reds[[r]])
            names(curred) <- sprintf("%s.%s", names(all_reds)[r], seq_len(ncol(curred)))
            red_vals[[r]] <- curred
        }

        red_vals <- do.call(cbind, red_vals)
        output <- c(output, list(red_vals))
    }

    # Collecting feature data from main and alternative Experiments.
    output <- c(output, .harvest_se_by_column(x, features=features, assay.type=assay.type, swap.rownames = swap.rownames))

    use.altexps <- .use_names_to_integer_indices(use.altexps, x=x, nameFUN=altExpNames, msg="use.altexps")
    if (length(use.altexps)) {
        all_alts <- altExps(x)[use.altexps]
        alt_vals <- vector("list", length(all_alts))

        for (a in seq_along(alt_vals)) {
            curalt <- .harvest_se_by_column(all_alts[[a]], features=features, assay.type=assay.type, swap.rownames = swap.rownames)
            if (prefix.altexps) {
                colnames(curalt) <- sprintf("%s.%s", names(all_alts)[a], colnames(curalt))
            }
            alt_vals[[a]] <- curalt
        }

        alt_vals <- do.call(cbind, alt_vals)
        output <- c(output, list(alt_vals))
    }

    # Checking the names.
    output <- do.call(cbind, output)
    if (check.names) {
        colnames(output) <- make.names(colnames(output), unique=TRUE)
    }
    output
}

.empty_df_from_se <- function(x) {
    data.frame(matrix(0, ncol(x), 0L), row.names=colnames(x))
}

#' @importFrom SummarizedExperiment assay rowData
#' @importFrom Matrix t
.harvest_se_by_column <- function(x, features, assay.type, swap.rownames) {
    if (is.null(swap.rownames)) {
        all.feats <- rownames(x)
    } else if (swap.rownames %in% colnames(rowData(x))) {
        all.feats <- rowData(x)[,swap.rownames]
    } else {
        # Avoid throwing an error for altexps if it doesn't have the swapped rowData.
        return(.empty_df_from_se(x))
    }

    keep <- all.feats %in% features
    if (any(keep)) {
        curmat <- assay(x, assay.type, withDimnames=FALSE)[keep,,drop=FALSE]
        curmat <- as.matrix(t(curmat))
        assay_vals <- data.frame(curmat, row.names=colnames(x))
        colnames(assay_vals) <- all.feats[keep]
        assay_vals
    } else {
        # Avoid throwing an error for altexps if the feature doesn't even match.
        .empty_df_from_se(x)
    }
}