1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
|
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/DeMixT.R
\name{DeMixT}
\alias{DeMixT}
\title{Deconvolution of heterogeneous tumor samples with two or three
components using expression data from RNAseq or microarray platforms}
\usage{
DeMixT(
data.Y,
data.N1,
data.N2 = NULL,
niter = 10,
nbin = 50,
if.filter = TRUE,
filter.sd = 0.5,
ngene.selected.for.pi = NA,
mean.diff.in.CM = 0.25,
nspikein = min(200, ceiling(ncol(data.Y) * 0.3)),
gene.selection.method = "GS",
ngene.Profile.selected = NA,
tol = 10^(-5),
output.more.info = FALSE,
pi01 = NULL,
pi02 = NULL,
nthread = parallel::detectCores() - 1
)
}
\arguments{
\item{data.Y}{A SummarizedExperiment object of expression data from mixed
tumor samples. It is a \eqn{G} by \eqn{My} matrix where \eqn{G} is the
number of genes and \eqn{My} is the number of mixed samples. Samples with
the same tissue type should be placed together in columns.}
\item{data.N1}{A SummarizedExperiment object of expression data from
reference component 1 (e.g., normal). It is a \eqn{G} by \eqn{M1} matrix
where \eqn{G} is the number of genes and \eqn{M1} is the number of samples
for component 1.}
\item{data.N2}{A SummarizedExperiment object of expression data from
additional reference samples. It is a \eqn{G} by \eqn{M2} matrix where
\eqn{G} is the number of genes and \eqn{M2} is the number of samples for
component 2. Component 2 is needed only for running a three-component model.}
\item{niter}{The maximum number of iterations used in the algorithm of
iterated conditional modes. A larger value better guarantees the convergence
in estimation but increases the running time. The default is 10.}
\item{nbin}{The number of bins used in numerical integration for computing
complete likelihood. A larger value increases accuracy in estimation but
increases the running time, especially in a three-component deconvolution
problem. The default is 50.}
\item{if.filter}{The logical flag indicating whether a predetermined filter
rule is used to select genes for proportion estimation. The default is TRUE.}
\item{filter.sd}{The cut-off for the standard deviation of lognormal
distribution. Genes whose log transferred standard deviation smaller than
the cut-off will be selected into the model. The default is 0.5.}
\item{ngene.selected.for.pi}{The percentage or the number of genes used for
proportion estimation. The difference between the expression levels from
mixed tumor samples and the known component(s) are evaluated, and the most
differential expressed genes are selected, which is called DE. It is enabled
when if.filter = TRUE. The default is \eqn{min(1500, 0.3*G)}, where \eqn{G}
is the number of genes. Users can also try using more genes, ranging from
\eqn{0.3*G} to \eqn{0.5*G}, and evaluate the outcome.}
\item{mean.diff.in.CM}{Threshold of expression difference for selecting
genes in the component merging strategy. We merge three-component to
two-component by selecting genes with similar expressions for the two known
components. Genes with the mean differences less than the threshold will be
selected for component merging. It is used in the three-component setting,
and is enabled when if.filter = TRUE. The default is 0.25.}
\item{nspikein}{The number of spikes in normal reference used for proportion
estimation. The default value is \eqn{ min(200, 0.3*My)}, where \eqn{My} the
number of mixed samples. If it is set to 0, proportion estimation is
performed without any spike in normal reference.}
\item{gene.selection.method}{The method of gene selection used for
proportion estimation. The default method is 'GS', which applies a profile
likelihood based method for gene selection. If it is set to 'DE', the most
differential expressed genes are selected.}
\item{ngene.Profile.selected}{The number of genes used for proportion
estimation ranked by profile likelihood. The default is
\eqn{min(1500,0.1*G)}, where \eqn{G} is the number of genes. This is
enabled only when gene.selection.method is set to 'GS'.}
\item{tol}{The convergence criterion. The default is 10^(-5).}
\item{output.more.info}{The logical flag indicating whether to show the
estimated proportions in each iteration in the output.}
\item{pi01}{Initialized proportion for first kown component. The default is
\eqn{Null} and pi01 will be generated randomly from uniform distribution.}
\item{pi02}{Initialized proportion for second kown component. pi02 is needed
only for running a three-component model. The default is \eqn{Null} and pi02
will be generated randomly from uniform distribution.}
\item{nthread}{The number of threads used for deconvolution when OpenMP is
available in the system. The default is the number of whole threads minus
one. In our no-OpenMP version, it is set to 1.}
}
\value{
\item{pi}{A matrix of estimated proportion. First row and second row
corresponds to the proportion estimate for the known components and unkown
component respectively for two or three component settings, and each column
corresponds to one sample.}
\item{pi.iter}{Estimated proportions in each iteration. It is a \eqn{niter*
My*p} array, where \eqn{p} is the number of components. This is
enabled only when output.more.info = TRUE.}
\item{ExprT}{A matrix of deconvolved expression profiles corresponding to
T-component in mixed samples for a given subset of genes. Each row
corresponds to one gene and each column corresponds to one sample.}
\item{ExprN1}{A matrix of deconvolved expression profiles corresponding to
N1-component in mixed samples for a given subset of genes. Each row
corresponds to one gene and each column corresponds to one sample.}
\item{ExprN2}{A matrix of deconvolved expression profiles corresponding to
N2-component in mixed samples for a given subset of genes in a
three-component setting. Each row corresponds to one gene and each
column corresponds to one sample.}
\item{Mu}{A matrix of estimated \eqn{Mu} of log2-normal distribution for
both known (\eqn{MuN1, MuN2}) and unknown component (\eqn{MuT}). Each row
corresponds to one gene.}
\item{Sigma}{Estimated \eqn{Sigma} of log2-normal distribution for both
known (\eqn{SigmaN1, SigmaN2}) and unknown component (\eqn{SigmaT}). Each
row corresponds to one gene.}
\item{gene.name}{The names of genes used in estimating the proportions.
If no gene names are provided in the original data set, the genes will be
automatically indexed.}
\item{pi}{A matrix of estimated proportion. First row and second row
corresponds to the proportion estimate for the known components and unkown
component respectively for two or three component settings, and each column
corresponds to one sample.} \item{pi.iter}{Estimated proportions in each
iteration. It is a \eqn{niter* My*p} array, where \eqn{p} is the number of
components. This is enabled only when output.more.info = TRUE.}
\item{ExprT}{A matrix of deconvolved expression profiles corresponding to
T-component in mixed samples for a given subset of genes. Each row
corresponds to one gene and each column corresponds to one sample.}
\item{ExprN1}{A matrix of deconvolved expression profiles corresponding to
N1-component in mixed samples for a given subset of genes. Each row
corresponds to one gene and each column corresponds to one sample.}
\item{ExprN2}{A matrix of deconvolved expression profiles corresponding to
N2-component in mixed samples for a given subset of genes in a
three-component setting. Each row corresponds to one gene and each column
corresponds to one sample.} \item{Mu}{A matrix of estimated \eqn{Mu} of
log2-normal distribution for both known (\eqn{MuN1, MuN2}) and unknown
component (\eqn{MuT}). Each row corresponds to one gene.}
\item{Sigma}{Estimated \eqn{Sigma} of log2-normal distribution for both
known (\eqn{SigmaN1, SigmaN2}) and unknown component (\eqn{SigmaT}). Each
row corresponds to one gene.} \item{gene.name}{The names of genes used in
estimating the proportions. If no gene names are provided in the original
data set, the genes will be automatically indexed.}
}
\description{
DeMixT is a software that performs deconvolution on transcriptome
data from a mixture of two or three components.
}
\examples{
# Example 1: simulated two-component data by using GS(gene selection method)
data(test.data.2comp)
# res <- DeMixT(data.Y = test.data.2comp$data.Y,
# data.N1 = test.data.2comp$data.N1,
# data.N2 = NULL, nspikein = 50,
# gene.selection.method = 'GS',
# niter = 10, nbin = 50, if.filter = TRUE,
# ngene.selected.for.pi = 150,
# mean.diff.in.CM = 0.25, tol = 10^(-5))
# res$pi
# head(res$ExprT, 3)
# head(res$ExprN1, 3)
# head(res$Mu, 3)
# head(res$Sigma, 3)
#
# Example 2: simulated two-component data by using DE(gene selection method)
# data(test.data.2comp)
# res <- DeMixT(data.Y = test.data.2comp$data.Y,
# data.N1 = test.data.2comp$data.N1,
# data.N2 = NULL, nspikein = 50, g
# ene.selection.method = 'DE',
# niter = 10, nbin = 50, if.filter = TRUE,
# ngene.selected.for.pi = 150,
# mean.diff.in.CM = 0.25, tol = 10^(-5))
#
# Example 3: three-component mixed cell line data applying
# component merging strategy
# data(test.data.3comp)
# res <- DeMixT(data.Y = test.data.3comp$data.Y,
# data.N1 = test.data.3comp$data.N1,
# data.N2 = test.data.3comp$data.N2,
# if.filter = TRUE)
#
# Example: convert a matrix into the SummarizedExperiment format
# library(SummarizedExperiment)
# example <- matrix(c(1, 2, 3, 4, 5, 6), nrow = 2, ncol = 3, byrow = TRUE)
# example.se <- SummarizedExperiment(assays = list(counts = example))
# Example 1: simulated two-component data by using GS(gene selection method)
data(test.data.2comp)
# res <- DeMixT(data.Y = test.data.2comp$data.Y,
# data.N1 = test.data.2comp$data.N1,
# data.N2 = NULL, nspikein = 50,
# gene.selection.method = 'GS',
# niter = 10, nbin = 50, if.filter = TRUE,
# ngene.selected.for.pi = 150,
# mean.diff.in.CM = 0.25, tol = 10^(-5))
# res$pi
# head(res$ExprT, 3)
# head(res$ExprN1, 3)
# head(res$Mu, 3)
# head(res$Sigma, 3)
#
# Example 2: simulated two-component data by using DE(gene selection method)
# data(test.data.2comp)
# res <- DeMixT(data.Y = test.data.2comp$data.Y,
# data.N1 = test.data.2comp$data.N1,
# data.N2 = NULL, nspikein = 50, g
# ene.selection.method = 'DE',
# niter = 10, nbin = 50, if.filter = TRUE,
# ngene.selected.for.pi = 150,
# mean.diff.in.CM = 0.25, tol = 10^(-5))
#
# Example 3: three-component mixed cell line data applying
# component merging strategy
# data(test.data.3comp)
# res <- DeMixT(data.Y = test.data.3comp$data.Y,
# data.N1 = test.data.3comp$data.N1,
# data.N2 = test.data.3comp$data.N2,
# if.filter = TRUE)
#
# Example: convert a matrix into the SummarizedExperiment format
# library(SummarizedExperiment)
# example <- matrix(c(1, 2, 3, 4, 5, 6), nrow = 2, ncol = 3, byrow = TRUE)
# example.se <- SummarizedExperiment(assays = list(counts = example))
}
\references{
Wang Z, Cao S, Morris J S, et al. Transcriptome Deconvolution of
Heterogeneous Tumor Samples with Immune Infiltration. iScience, 2018, 9: 451-460.
Wang Z, Cao S, Morris J S, et al. Transcriptome Deconvolution of
Heterogeneous Tumor Samples with Immune Infiltration. iScience, 2018, 9:
451-460.
}
\seealso{
http://bioinformatics.mdanderson.org/main/DeMixT
http://bioinformatics.mdanderson.org/main/DeMixT
}
\author{
Zeya Wang, Wenyi Wang
Zeya Wang, Wenyi Wang
}
\keyword{DeMixT}
|