1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
|
\name{ssea.analyze.simulate}
\alias{ssea.analyze.simulate}
\title{
Simulate scores for MSEA
}
\description{
\code{\link{ssea.analyze.simulate}} simulates enrichment scores by
randomly permuting database with respect to the specified permutation
type (either gene-level or marker-level).
}
\usage{
ssea.analyze.simulate(db, observ, nperm, permtype, trim_start, trim_end)
}
\arguments{
\item{db}{
database including the indexed identities for modules, genes and markers
(e.g. loci): \preformatted{
modulesizes: gene counts for modules.
modulelengths: distinct marker counts for modules.
moduledensities: ratio between distinct and
non-distinct markers.
genesizes: marker count for each gene.
module2genes: gene lists for each module.
gene2loci: marker lists for each gene.
locus2row: row indices in the marker data frame for
each marker.
observed: matrix of observed counts of values that
exceed each quantile point for each marker.
expected: 1.0 - quantile points.
}
}
\item{observ}{
observed enrichment scores
}
\item{nperm}{
maximum nubmer of permutations (for simulation)
}
\item{permtype}{
permutation type (either gene or locus)
}
\item{trim_start}{percentile taken from the beginning for
trimming away a defined proportion of genes with significant trait
association to avoid signal inflation of null background in gene permutation.
Default value is 0.002.
}
\item{trim_end}{percentile taken from the ending point for
trimming away a defined proportion of genes with significant trait
association to avoid signal inflation of null background in gene permutation.
Default value is 0.998.
}
}
\value{
\item{scoresets }{simulated score lists for the statistically significant
modules}
}
\examples{
job.msea <- list()
job.msea$label <- "hdlc"
job.msea$folder <- "Results"
job.msea$genfile <- system.file("extdata",
"genes.hdlc_040kb_ld70.human_eliminated.txt", package="Mergeomics")
job.msea$marfile <- system.file("extdata",
"marker.hdlc_040kb_ld70.human_eliminated.txt", package="Mergeomics")
job.msea$modfile <- system.file("extdata",
"modules.mousecoexpr.liver.human.txt", package="Mergeomics")
job.msea$inffile <- system.file("extdata",
"coexpr.info.txt", package="Mergeomics")
job.msea$nperm <- 100 ## default value is 20000
## ssea.start() process takes long time while merging the genes sharing high
## amounts of markers (e.g. loci). it is performed with full module list in
## the vignettes. Here, we used a very subset of the module list (1st 10 mods
## from the original module file) and we collected the corresponding genes
## and markers belonging to these modules:
moddata <- tool.read(job.msea$modfile)
gendata <- tool.read(job.msea$genfile)
mardata <- tool.read(job.msea$marfile)
mod.names <- unique(moddata$MODULE)[1:min(length(unique(moddata$MODULE)),
10)]
moddata <- moddata[which(!is.na(match(moddata$MODULE, mod.names))),]
gendata <- gendata[which(!is.na(match(gendata$GENE,
unique(moddata$GENE)))),]
mardata <- mardata[which(!is.na(match(mardata$MARKER,
unique(gendata$MARKER)))),]
## save this to a temporary file and set its path as new job.msea$modfile:
tool.save(moddata, "subsetof.coexpr.modules.txt")
tool.save(gendata, "subsetof.genfile.txt")
tool.save(mardata, "subsetof.marfile.txt")
job.msea$modfile <- "subsetof.coexpr.modules.txt"
job.msea$genfile <- "subsetof.genfile.txt"
job.msea$marfile <- "subsetof.marfile.txt"
## run ssea.start() and prepare for this small set: (due to the huge runtime)
job.msea <- ssea.start(job.msea)
job.msea <- ssea.prepare(job.msea)
job.msea <- ssea.control(job.msea)
## Observed enrichment scores.
db <- job.msea$database
scores <- ssea.analyze.observe(db)
nmods <- length(scores)
## Simulated scores.
nperm <- job.msea$nperm
trim_start=0.002 # default
trim_end=1-trim_start
nullsets <- ssea.analyze.simulate(db, scores, nperm, job.msea$permtype,
trim_start, trim_end)
## Remove the temporary files used for the test:
file.remove("subsetof.coexpr.modules.txt")
file.remove("subsetof.genfile.txt")
file.remove("subsetof.marfile.txt")
}
\references{
Shu L, Zhao Y, Kurt Z, Byars SG, Tukiainen T, Kettunen J, Orozco LD,
Pellegrini M, Lusis AJ, Ripatti S, Zhang B, Inouye M, Makinen V-P, Yang X.
Mergeomics: multidimensional data integration to identify pathogenic
perturbations to biological systems. BMC genomics. 2016;17(1):874.
}
\author{
Ville-Petteri Makinen
}
\seealso{
\code{\link{ssea.analyze}}
}
|