File: aggregateAcrossFeatures.R

package info (click to toggle)
r-bioc-scuttle 1.0.4%2Bdfsg-5
links: PTS, VCS
area: main
in suites: bullseye
size: 728 kB
sloc: cpp: 356; sh: 17; makefile: 2
file content (66 lines) | stat: -rw-r--r-- 2,650 bytes
parent folder | download | duplicates (3)
#' Aggregate feature sets in a SummarizedExperiment
#' 
#' Sum together expression values (by default, counts) for each feature set 
#' in each cell of a \linkS4class{SummarizedExperiment} object.
#'
#' @param x A \linkS4class{SummarizedExperiment} containing an expression matrix.
#' @inheritParams sumCountsAcrossFeatures
#' @param ... Further arguments to be passed to \code{sumCountsAcrossFeatures}.
#' @param use.assay.type A character or integer vector specifying the assay(s) of \code{x} containing expression matrices.
#' @param use_exprs_values Soft-deprecated equivalent of \code{use.assay.type}.
#'
#' @return 
#' A SummarizedExperiment of the same class as \code{x} is returned,
#' containing summed matrices generated by \code{sumCountsAcrossFeatures} on all assays in \code{use.assay.type}.
#' 
#' If \code{ids} is a factor, row metadata is retained for the first instance of a feature from each set in \code{ids}.
#' This behavior assumes that \code{ids} specifies duplicates of the same gene, such that the first instance is a reasonable choice.
#'
#' If \code{ids} is a list, row metadata is simply discarded.
#' This behavior assumes that \code{ids} specifies gene sets such that any existing gene-level metadata is meaningless.
#'
#' @seealso
#' \code{\link{sumCountsAcrossFeatures}}, which does the heavy lifting.
#'
#' @author Aaron Lun
#'
#' @examples
#' example_sce <- mockSCE()
#' ids <- sample(LETTERS, nrow(example_sce), replace=TRUE)
#' aggr <- aggregateAcrossFeatures(example_sce, ids)
#' aggr
#'
#' @export
#' @importFrom SummarizedExperiment assays<- rowData rowData<- rowRanges<- assayNames
#' @importFrom GenomicRanges GRangesList GRanges
aggregateAcrossFeatures <- function(x, ids, ..., use.assay.type="counts", use_exprs_values=NULL) {
    use.assay.type <- .replace(use.assay.type, use_exprs_values)

    collected <- list()
    for (i in seq_along(use.assay.type)) {
        collected[[i]] <- sumCountsAcrossFeatures(x, ids=ids, ..., exprs_values=use.assay.type[i])
    }

    if (is.numeric(use.assay.type)) {
        names(collected) <- assayNames(x)[use.assay.type]
    } else {
        names(collected) <- use.assay.type
    }

    if (is.list(ids)) {
        x <- x[rep(1L, nrow(collected[[1]])),]

        # Wiping out the row metadata.
        if (is(x, "RangedSummarizedExperiment")) {
            rowRanges(x) <- rep(GRangesList(GRanges()), nrow(x))
        } else {
            rowData(x) <- rowData(x)[,0]
        }
    } else {
        x <- x[match(rownames(collected[[1]]), as.character(ids)),]
    }

    assays(x, withDimnames=FALSE) <- collected
    rownames(x) <- rownames(collected[[1]])
    x
}