File: SAMseq.Rd

package info (click to toggle)
r-cran-samr 3.0-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye, forky, sid, trixie
  • size: 4,024 kB
  • sloc: fortran: 102; makefile: 7
file content (171 lines) | stat: -rwxr-xr-x 6,000 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
\name{SAMseq}
\alias{SAMseq}
\title{Significance analysis of sequencing data - simple user interface}
\description{
Correlates a large number of features (eg. genes) with an outcome
variable, such as a group indicator, quantitative variable or survival time.
This is a simple user interface for the samr function applied to sequencing data.
For array data applications, see the function SAM.

}
\usage{
SAMseq(x, y, censoring.status = NULL, 
resp.type = c("Quantitative", "Two class unpaired", 
"Survival", "Multiclass", "Two class paired"), 
geneid = NULL, genenames = NULL, nperms = 100, 
random.seed = NULL, nresamp = 20, fdr.output = 0.20)
}
\arguments{
\item{x}{Feature matrix: p (number of features) by n (number of samples),
one observation per column (missing values allowed)}
\item{y}{n-vector of outcome measurements}
\item{censoring.status}{n-vector of censoring censoring.status (1=died or event occurred, 0=survived, or event was censored), needed for a censored survival outcome}
\item{resp.type}{Problem type:
"Quantitative" for a continuous parameter;
"Two class unpaired" for two classes with unpaired observations; 
"Survival" for censored survival outcome; 
"Multiclass": more than 2 groups;  
"Two class paired" for two classes with paired observations.}
\item{geneid}{Optional character vector of geneids for output.}
\item{genenames}{Optional character vector of genenames for output.}
\item{nperms}{Number of permutations used to estimate false discovery rates}
\item{random.seed}{Optional initial seed for random number generator (integer)}
\item{nresamp}{Number of resamples used to construct test statistic. Default 20.}
\item{fdr.output}{(Approximate) False Discovery Rate cutoff for output in significant genes table}

}

\details{This is a simple, user-friendly interface to the samr package used on sequencing data.
It automatically disables arguments/features that do not apply to sequencing data.
It calls samr, samr.compute.delta.table and samr.compute.siggenes.table.
samr detects differential expression for microarray data,
and sequencing data,
and other data with a large number of features. samr is the R package
that is called by the "official" SAM Excel Addin.
The format of the response vector y and the calling sequence
is illustrated in the examples below. A more complete description
is given in the SAM manual
at http://www-stat.stanford.edu/~tibs/SAM}



\value{
A list with components
\item{samr.obj}{Output of samr. See documentation for samr for details }
\item{siggenes.table}{Table of significant genes, output of  samr.compute.siggenes.table.
This has components: genes.up---matrix of significant genes having positive correlation with the outcome and
genes.lo---matrix of significant genes having negative correlation with the outcome.
For survival data, genes.up are those genes having positive correlation with risk-
 that is, increased expression corresponds to higher risk (shorter survival)
genes.lo are those whose increased expression corresponds to lower risk (longer survival).}
\item{delta.table}{Output of  samr.compute.delta.table.}
\item{del}{Value of delta (distance from 45 degree line in SAM plot) for 
 used for creating delta.table and siggenes.table. Changing the input value fdr.output
will change the resulting del.}
\item{call}{The calling sequence}
}

\references{Tusher, V.,  Tibshirani, R.  and Chu, G. (2001): 
Significance analysis of microarrays applied to the ionizing radiation response  PNAS 2001 98: 5116-5121, (Apr 24). 
http://www-stat.stanford.edu/~tibs/SAM

Li, Jun and Tibshirani, R. (2011). Finding consistent patterns: a nonparametric
approach for identifying differential expression in
RNA-Seq data. To appear, Statistical Methods in Medical Research.
}
\author{Jun Li and Balasubrimanian Narasimhan and Robert Tibshirani}


\examples{

######### two class unpaired comparison
set.seed(100)
mu <- matrix(100, 1000, 20)
mu[1:100, 11:20] <- 200
mu <- scale(mu, center=FALSE, scale=runif(20, 0.5, 1.5))
x <- matrix(rpois(length(mu), mu), 1000, 20)
y <- c(rep(1, 10), rep(2, 10))

samfit <- SAMseq(x, y, resp.type = "Two class unpaired") 

# examine significant gene list
print(samfit)

# plot results
plot(samfit)

######### two class paired comparison
set.seed(100)
mu <- matrix(100, 1000, 20)
mu[1:100, 11:20] <- 200
mu <- scale(mu, center=FALSE, scale=runif(20, 0.5, 1.5))
x <- matrix(rpois(length(mu), mu), 1000, 20)
y <- c(-(1:10), 1:10)

samfit <- SAMseq(x, y, resp.type = "Two class paired") 

# examine significant gene list
print(samfit)

# plot results
plot(samfit)

######### Multiclass comparison
set.seed(100)
mu <- matrix(100, 1000, 20)
mu[1:20, 1:5] <- 120
mu[21:50, 6:10] <- 80
mu[51:70, 11:15] <- 150
mu[71:100, 16:20] <- 60
mu <- scale(mu, center=FALSE, scale=runif(20, 0.5, 1.5))
x <- matrix(rpois(length(mu), mu), 1000, 20)
y <- c(rep(1:4, rep(5, 4)))

samfit <- SAMseq(x, y, resp.type = "Multiclass") 

# examine significant gene list
print(samfit)

# plot results
plot(samfit)

######### Quantitative comparison
set.seed(100)
mu <- matrix(100, 1000, 20)
y <- runif(20, 1, 3)
mu[1 : 100, ] <- matrix(rep(100 * y, 100), ncol=20, byrow=TRUE)
mu <- scale(mu, center=FALSE, scale=runif(20, 0.5, 1.5))
x <- matrix(rpois(length(mu), mu), 1000, 20)
samfit <- SAMseq(x, y, resp.type = "Quantitative")

# examine significant gene list
print(samfit)

# plot results
plot(samfit)

######### Survival comparison
set.seed(100)
mu <- matrix(100, 1000, 20)
y <- runif(20, 1, 3)
mu[1 : 100, ] <- matrix(rep(100 * y, 100), ncol=20, byrow=TRUE)
mu <- scale(mu, center=FALSE, scale=runif(20, 0.5, 1.5))
x <- matrix(rpois(length(mu), mu), 1000, 20)
y <- y + runif(20, -0.5, 0.5)
censoring.status <- as.numeric(y < 2.3)
y[y >= 2.3] <- 2.3
samfit <- SAMseq(x, y, censoring.status = censoring.status, 
resp.type = "Survival")

# examine significant gene list
print(samfit)

# plot results
plot(samfit)
}

\keyword{univar}% at least one, from doc/KEYWORDS
\keyword{survival}
\keyword{ts}
\keyword{nonparametric}