File: bpaggregate.Rd

package info (click to toggle)
r-bioc-biocparallel 1.40.0-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,768 kB
  • sloc: cpp: 139; sh: 14; makefile: 8
file content (107 lines) | stat: -rw-r--r-- 3,235 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
\name{bpaggregate}
\alias{bpaggregate}

\alias{bpaggregate,formula,BiocParallelParam-method}
\alias{bpaggregate,matrix,BiocParallelParam-method}
\alias{bpaggregate,data.frame,BiocParallelParam-method}
\alias{bpaggregate,ANY,missing-method}

\title{Apply a function on subsets of data frames}

\description{

    This is a parallel version of \code{\link[stats]{aggregate}}.

}

\usage{

\S4method{bpaggregate}{formula,BiocParallelParam}(x, data, FUN, ...,
    BPREDO=list(), BPPARAM=bpparam(), BPOPTIONS = bpoptions())

\S4method{bpaggregate}{data.frame,BiocParallelParam}(x, by, FUN, ...,
    simplify=TRUE, BPREDO=list(), BPPARAM=bpparam(), BPOPTIONS = bpoptions())

\S4method{bpaggregate}{matrix,BiocParallelParam}(x, by, FUN, ...,
    simplify=TRUE, BPREDO=list(),
    BPPARAM=bpparam(), BPOPTIONS = bpoptions()
)

\S4method{bpaggregate}{ANY,missing}(x, ..., BPREDO=list(),
    BPPARAM=bpparam(), BPOPTIONS = bpoptions()
)

}

\arguments{

  \item{x}{A \code{data.frame}, \code{matrix} or a formula.
  }
  \item{by}{A list of factors by which \code{x} is split;
    applicable when \code{x} is \code{data.frame} or \code{matrix}.
  }
  \item{data}{A \code{data.frame}; applicable when \code{x} is a
    \code{formula}.
  }
  \item{FUN}{Function to apply.
  }
  \item{...}{Additional arguments for \code{FUN}.
  }
  \item{simplify}{If set to \code{TRUE}, the return values of \code{FUN}
    will be simplified using \code{\link{simplify2array}}.
  }
  \item{BPPARAM}{An optional \code{\link{BiocParallelParam}} instance
    determining the parallel back-end to be used during evaluation.
  }
  \item{BPREDO}{A \code{list} of output from \code{bpaggregate} with one
    or more failed elements. When a list is given in \code{BPREDO},
    \code{bpok} is used to identify errors, tasks are rerun and inserted
    into the original results.
  }
  \item{BPOPTIONS}{
    Additional options to control the behavior of the parallel evaluation, see \code{\link{bpoptions}}.
  }
}

\details{

  \code{bpaggregate} is a generic with methods for \code{data.frame}
  \code{matrix} and \code{formula} objects. \code{x} is divided
  into subsets according to factors in \code{by}. Data chunks are
  sent to the workers, \code{FUN} is applied and results are returned
  as a \code{data.frame}.

  The function is similar in spirit to \code{\link[stats]{aggregate}}
  from the stats package but \code{\link[stats]{aggregate}} is not
  explicitly called. The \code{bpaggregate} \code{formula} method
  reformulates the call and dispatches to the \code{data.frame} method
  which in turn distributes data chunks to workers with \code{bplapply}.

}

\value{

  See \code{\link[stats]{aggregate}}.

}

\author{
  Martin Morgan \url{mailto:mtmorgan@fhcrc.org}.
}

\examples{

if (interactive() && require(Rsamtools) && require(GenomicAlignments)) {

  fl <- system.file("extdata", "ex1.bam", package="Rsamtools")
  param <- ScanBamParam(what = c("flag", "mapq"))
  gal <- readGAlignments(fl, param=param)

  ## Report the mean map quality by range cutoff:
  cutoff <- rep(0, length(gal))
  cutoff[start(gal) > 1000 & start(gal) < 1500] <- 1
  cutoff[start(gal) > 1500] <- 2
  bpaggregate(as.data.frame(mcols(gal)$mapq), list(cutoff = cutoff), mean)

}
}