File: DeMixT.Rd

package info (click to toggle)
r-bioc-demixt 1.14.0%2Bdfsg-1
links: PTS, VCS
area: main
in suites: bookworm
size: 4,300 kB
sloc: ansic: 1,591; cpp: 1,209; makefile: 5; sh: 4
file content (260 lines) | stat: -rw-r--r-- 11,796 bytes
parent folder | download | duplicates (2)
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/DeMixT.R
\name{DeMixT}
\alias{DeMixT}
\title{Deconvolution of heterogeneous tumor samples with two or three 
components using expression data from RNAseq or microarray platforms}
\usage{
DeMixT(
  data.Y,
  data.N1,
  data.N2 = NULL,
  niter = 10,
  nbin = 50,
  if.filter = TRUE,
  filter.sd = 0.5,
  ngene.selected.for.pi = NA,
  mean.diff.in.CM = 0.25,
  nspikein = min(200, ceiling(ncol(data.Y) * 0.3)),
  gene.selection.method = "GS",
  ngene.Profile.selected = NA,
  tol = 10^(-5),
  output.more.info = FALSE,
  pi01 = NULL,
  pi02 = NULL,
  nthread = parallel::detectCores() - 1
)
}
\arguments{
\item{data.Y}{A SummarizedExperiment object of expression data from mixed
tumor samples. It is a \eqn{G} by \eqn{My} matrix where \eqn{G} is the
number of genes and \eqn{My} is the number of mixed samples. Samples with
the same tissue type should be placed together in columns.}

\item{data.N1}{A SummarizedExperiment object of expression data from
reference component 1 (e.g., normal). It is a \eqn{G} by \eqn{M1} matrix
where \eqn{G} is the number of genes and \eqn{M1} is the number of samples
for component 1.}

\item{data.N2}{A SummarizedExperiment object of expression data from
additional reference samples. It is a \eqn{G} by \eqn{M2} matrix where
\eqn{G} is the number of genes and \eqn{M2} is the number of samples for
component 2. Component 2 is needed only for running a three-component model.}

\item{niter}{The maximum number of iterations used in the algorithm of
iterated conditional modes. A larger value better guarantees the convergence
in estimation but increases the running time. The default is 10.}

\item{nbin}{The number of bins used in numerical integration for computing
complete likelihood. A larger value increases accuracy in estimation but
increases the running time, especially in a three-component deconvolution
problem. The default is 50.}

\item{if.filter}{The logical flag indicating whether a predetermined filter
rule is used to select genes for proportion estimation. The default is TRUE.}

\item{filter.sd}{The cut-off for the standard deviation of lognormal
distribution. Genes whose log transferred standard deviation smaller than
the cut-off will be selected into the model. The default is 0.5.}

\item{ngene.selected.for.pi}{The percentage or the number of genes used for
proportion estimation. The difference between the expression levels from
mixed tumor samples and the known component(s) are evaluated, and the most
differential expressed genes are selected, which is called DE. It is enabled
when if.filter = TRUE. The default is \eqn{min(1500, 0.3*G)}, where \eqn{G}
is the number of genes. Users can also try using more genes, ranging from
\eqn{0.3*G} to \eqn{0.5*G}, and evaluate the outcome.}

\item{mean.diff.in.CM}{Threshold of expression difference for selecting
genes in the component merging strategy. We merge three-component to
two-component by selecting genes with similar expressions for the two known
components. Genes with the mean differences less than the threshold will be
selected for component merging. It is used in the three-component setting,
and is enabled when if.filter = TRUE. The default is 0.25.}

\item{nspikein}{The number of spikes in normal reference used for proportion
estimation. The default value is \eqn{ min(200, 0.3*My)}, where \eqn{My} the
number of mixed samples. If it is set to 0, proportion estimation is
performed without any spike in normal reference.}

\item{gene.selection.method}{The method of gene selection used for
proportion estimation. The default method is 'GS', which applies a profile
likelihood based method for gene selection. If it is set to 'DE', the most
differential expressed genes are selected.}

\item{ngene.Profile.selected}{The number of genes used for proportion
estimation ranked by profile likelihood. The default is
\eqn{min(1500,0.1*G)}, where \eqn{G} is the number of genes.  This is
enabled only when gene.selection.method is set to 'GS'.}

\item{tol}{The convergence criterion. The default is 10^(-5).}

\item{output.more.info}{The logical flag indicating whether to show the
estimated proportions in each iteration in the output.}

\item{pi01}{Initialized proportion for first kown component. The default is
\eqn{Null} and pi01 will be generated randomly from uniform distribution.}

\item{pi02}{Initialized proportion for second kown component. pi02 is needed
only for running a three-component model. The default is \eqn{Null} and pi02
will be generated randomly from uniform distribution.}

\item{nthread}{The number of threads used for deconvolution when OpenMP is
available in the system. The default is the number of whole threads minus
one. In our no-OpenMP version, it is set to 1.}
}
\value{
\item{pi}{A matrix of estimated proportion. First row and second row 
corresponds to the proportion estimate for the known components and unkown 
component respectively for two or three component settings, and each column 
corresponds to one sample.}
\item{pi.iter}{Estimated proportions in each iteration. It is a \eqn{niter*
My*p} array, where \eqn{p} is the number of components. This is 
enabled only when output.more.info = TRUE.}
\item{ExprT}{A matrix of deconvolved expression profiles corresponding to 
T-component in mixed samples for a given subset of genes. Each row 
corresponds to one gene and each column corresponds to one sample.}  
\item{ExprN1}{A matrix of deconvolved expression profiles corresponding to 
N1-component in mixed samples for a given subset of genes. Each row 
corresponds to one gene and each column corresponds to one sample.} 
\item{ExprN2}{A matrix of deconvolved expression profiles corresponding to 
N2-component in mixed samples for a given subset of genes in a 
three-component setting. Each row corresponds to one gene and each 
column corresponds to one sample.}  
\item{Mu}{A matrix of estimated \eqn{Mu} of log2-normal distribution for 
both known (\eqn{MuN1, MuN2}) and unknown component (\eqn{MuT}). Each row 
corresponds to one gene.} 
\item{Sigma}{Estimated \eqn{Sigma} of log2-normal distribution for both 
known (\eqn{SigmaN1, SigmaN2}) and unknown component (\eqn{SigmaT}). Each 
row corresponds to one gene.}
\item{gene.name}{The names of genes used in estimating the proportions. 
If no gene names are provided in the original data set, the genes will be
automatically indexed.}

\item{pi}{A matrix of estimated proportion. First row and second row
corresponds to the proportion estimate for the known components and unkown
component respectively for two or three component settings, and each column
corresponds to one sample.} \item{pi.iter}{Estimated proportions in each
iteration. It is a \eqn{niter* My*p} array, where \eqn{p} is the number of
components. This is enabled only when output.more.info = TRUE.}
\item{ExprT}{A matrix of deconvolved expression profiles corresponding to
T-component in mixed samples for a given subset of genes. Each row
corresponds to one gene and each column corresponds to one sample.}
\item{ExprN1}{A matrix of deconvolved expression profiles corresponding to
N1-component in mixed samples for a given subset of genes. Each row
corresponds to one gene and each column corresponds to one sample.}
\item{ExprN2}{A matrix of deconvolved expression profiles corresponding to
N2-component in mixed samples for a given subset of genes in a
three-component setting. Each row corresponds to one gene and each column
corresponds to one sample.} \item{Mu}{A matrix of estimated \eqn{Mu} of
log2-normal distribution for both known (\eqn{MuN1, MuN2}) and unknown
component (\eqn{MuT}). Each row corresponds to one gene.}
\item{Sigma}{Estimated \eqn{Sigma} of log2-normal distribution for both
known (\eqn{SigmaN1, SigmaN2}) and unknown component (\eqn{SigmaT}). Each
row corresponds to one gene.} \item{gene.name}{The names of genes used in
estimating the proportions.  If no gene names are provided in the original
data set, the genes will be automatically indexed.}
}
\description{
DeMixT is a software that performs deconvolution on transcriptome
data from a mixture of two or three components.
}
\examples{
# Example 1: simulated two-component data by using GS(gene selection method)
  data(test.data.2comp)
# res <- DeMixT(data.Y = test.data.2comp$data.Y, 
#               data.N1 = test.data.2comp$data.N1, 
#               data.N2 = NULL, nspikein = 50, 
#               gene.selection.method = 'GS',
#               niter = 10, nbin = 50, if.filter = TRUE, 
#               ngene.selected.for.pi = 150,
#               mean.diff.in.CM = 0.25, tol = 10^(-5))
# res$pi
# head(res$ExprT, 3)
# head(res$ExprN1, 3)
# head(res$Mu, 3)
# head(res$Sigma, 3)
# 
# Example 2: simulated two-component data by using DE(gene selection method)
# data(test.data.2comp)
# res <- DeMixT(data.Y = test.data.2comp$data.Y,
#               data.N1 = test.data.2comp$data.N1, 
#               data.N2 = NULL, nspikein = 50, g
#               ene.selection.method = 'DE',
#               niter = 10, nbin = 50, if.filter = TRUE, 
#               ngene.selected.for.pi = 150,
#               mean.diff.in.CM = 0.25, tol = 10^(-5))
#
# Example 3: three-component mixed cell line data applying 
# component merging strategy
# data(test.data.3comp)
# res <- DeMixT(data.Y = test.data.3comp$data.Y, 
#               data.N1 = test.data.3comp$data.N1,
#               data.N2 = test.data.3comp$data.N2, 
#               if.filter = TRUE)
#
# Example: convert a matrix into the SummarizedExperiment format
# library(SummarizedExperiment)
# example <- matrix(c(1, 2, 3, 4, 5, 6), nrow = 2, ncol = 3, byrow = TRUE)
# example.se <- SummarizedExperiment(assays = list(counts = example))


# Example 1: simulated two-component data by using GS(gene selection method)
  data(test.data.2comp)
# res <- DeMixT(data.Y = test.data.2comp$data.Y, 
#               data.N1 = test.data.2comp$data.N1, 
#               data.N2 = NULL, nspikein = 50, 
#               gene.selection.method = 'GS',
#               niter = 10, nbin = 50, if.filter = TRUE, 
#               ngene.selected.for.pi = 150,
#               mean.diff.in.CM = 0.25, tol = 10^(-5))
# res$pi
# head(res$ExprT, 3)
# head(res$ExprN1, 3)
# head(res$Mu, 3)
# head(res$Sigma, 3)
# 
# Example 2: simulated two-component data by using DE(gene selection method)
# data(test.data.2comp)
# res <- DeMixT(data.Y = test.data.2comp$data.Y,
#               data.N1 = test.data.2comp$data.N1, 
#               data.N2 = NULL, nspikein = 50, g
#               ene.selection.method = 'DE',
#               niter = 10, nbin = 50, if.filter = TRUE, 
#               ngene.selected.for.pi = 150,
#               mean.diff.in.CM = 0.25, tol = 10^(-5))
#
# Example 3: three-component mixed cell line data applying 
# component merging strategy
# data(test.data.3comp)
# res <- DeMixT(data.Y = test.data.3comp$data.Y, 
#               data.N1 = test.data.3comp$data.N1,
#               data.N2 = test.data.3comp$data.N2, 
#               if.filter = TRUE)
#
# Example: convert a matrix into the SummarizedExperiment format
# library(SummarizedExperiment)
# example <- matrix(c(1, 2, 3, 4, 5, 6), nrow = 2, ncol = 3, byrow = TRUE)
# example.se <- SummarizedExperiment(assays = list(counts = example))


}
\references{
Wang Z, Cao S, Morris J S, et al. Transcriptome Deconvolution of 
Heterogeneous Tumor Samples with Immune Infiltration. iScience, 2018, 9: 451-460.

Wang Z, Cao S, Morris J S, et al. Transcriptome Deconvolution of
Heterogeneous Tumor Samples with Immune Infiltration. iScience, 2018, 9:
451-460.
}
\seealso{
http://bioinformatics.mdanderson.org/main/DeMixT

http://bioinformatics.mdanderson.org/main/DeMixT
}
\author{
Zeya Wang, Wenyi Wang

Zeya Wang, Wenyi Wang
}
\keyword{DeMixT}