File: scran.R

package info (click to toggle)
r-bioc-scran 1.18.5%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 1,856 kB
  • sloc: cpp: 960; sh: 13; makefile: 2
file content (153 lines) | stat: -rw-r--r-- 5,849 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
## ---- echo=FALSE, results="hide", message=FALSE-------------------------------
require(knitr)
opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE)

## ----setup, echo=FALSE, message=FALSE-----------------------------------------
library(scran)
library(BiocParallel)
register(SerialParam()) # avoid problems with fastMNN parallelization.
set.seed(100)

## -----------------------------------------------------------------------------
library(scRNAseq)
sce <- GrunPancreasData()
sce

## -----------------------------------------------------------------------------
library(scuttle)
qcstats <- perCellQCMetrics(sce)
qcfilter <- quickPerCellQC(qcstats, percent_subsets="altexps_ERCC_percent")
sce <- sce[,!qcfilter$discard]
summary(qcfilter$discard)

## -----------------------------------------------------------------------------
library(scran)
clusters <- quickCluster(sce)
sce <- computeSumFactors(sce, clusters=clusters)
summary(sizeFactors(sce))

## -----------------------------------------------------------------------------
sce2 <- computeSpikeFactors(sce, "ERCC")
summary(sizeFactors(sce2))

## -----------------------------------------------------------------------------
sce <- logNormCounts(sce)

## -----------------------------------------------------------------------------
dec <- modelGeneVar(sce)
plot(dec$mean, dec$total, xlab="Mean log-expression", ylab="Variance")
curve(metadata(dec)$trend(x), col="blue", add=TRUE)

## -----------------------------------------------------------------------------
dec2 <- modelGeneVarWithSpikes(sce, 'ERCC')
plot(dec2$mean, dec2$total, xlab="Mean log-expression", ylab="Variance")
points(metadata(dec2)$mean, metadata(dec2)$var, col="red")
curve(metadata(dec2)$trend(x), col="blue", add=TRUE)

## ---- fig.wide=TRUE, fig.asp=1.5----------------------------------------------
dec3 <- modelGeneVar(sce, block=sce$donor)
per.block <- dec3$per.block
par(mfrow=c(3, 2))
for (i in seq_along(per.block)) {
    decX <- per.block[[i]]
    plot(decX$mean, decX$total, xlab="Mean log-expression", 
        ylab="Variance", main=names(per.block)[i])
    curve(metadata(decX)$trend(x), col="blue", add=TRUE)
}

## -----------------------------------------------------------------------------
# Get the top 10% of genes.
top.hvgs <- getTopHVGs(dec, prop=0.1)

# Get the top 2000 genes.
top.hvgs2 <- getTopHVGs(dec, n=2000)

# Get all genes with positive biological components.
top.hvgs3 <- getTopHVGs(dec, var.threshold=0)

# Get all genes with FDR below 5%.
top.hvgs4 <- getTopHVGs(dec, fdr.threshold=0.05)

## -----------------------------------------------------------------------------
# Running the PCA with the 10% of HVGs.
library(scater)
sce <- runPCA(sce, subset_row=top.hvgs)
reducedDimNames(sce)

## -----------------------------------------------------------------------------
sced <- denoisePCA(sce, dec2, subset.row=getTopHVGs(dec2, prop=0.1))
ncol(reducedDim(sced, "PCA"))

## -----------------------------------------------------------------------------
output <- getClusteredPCs(reducedDim(sce))
npcs <- metadata(output)$chosen
reducedDim(sce, "PCAsub") <- reducedDim(sce, "PCA")[,1:npcs,drop=FALSE]
npcs

## -----------------------------------------------------------------------------
# In this case, using the PCs that we chose from getClusteredPCs().
g <- buildSNNGraph(sce, use.dimred="PCAsub")
cluster <- igraph::cluster_walktrap(g)$membership

# Assigning to the 'colLabels' of the 'sce'.
colLabels(sce) <- factor(cluster)
table(colLabels(sce))

## -----------------------------------------------------------------------------
sce <- runTSNE(sce, dimred="PCAsub")
plotTSNE(sce, colour_by="label", text_by="label")

## -----------------------------------------------------------------------------
ratio <- clusterModularity(g, cluster, as.ratio=TRUE)

library(pheatmap)
pheatmap(log10(ratio+1), cluster_cols=FALSE, cluster_rows=FALSE,
    col=rev(heat.colors(100)))

## -----------------------------------------------------------------------------
ass.prob <- bootstrapCluster(sce, FUN=function(x) {
    g <- buildSNNGraph(x, use.dimred="PCAsub")
    igraph::cluster_walktrap(g)$membership
}, clusters=sce$cluster)

pheatmap(ass.prob, cluster_cols=FALSE, cluster_rows=FALSE,
    col=colorRampPalette(c("white", "blue"))(100))

## -----------------------------------------------------------------------------
subout <- quickSubCluster(sce, groups=colLabels(sce))
table(metadata(subout)$subcluster) # formatted as '<parent>.<subcluster>'

## -----------------------------------------------------------------------------
# Uses clustering information from 'colLabels(sce)' by default:
markers <- findMarkers(sce)
markers[[1]][,1:3]

## -----------------------------------------------------------------------------
wmarkers <- findMarkers(sce, test.type="wilcox", direction="up", lfc=1)
wmarkers[[1]][,1:3]

## -----------------------------------------------------------------------------
markers <- findMarkers(sce, pval.type="all")
markers[[1]][,1:2]

## -----------------------------------------------------------------------------
# Using the first 200 HVs, which are the most interesting anyway.
# Also turning down the number of iterations for speed.
of.interest <- top.hvgs[1:200]
cor.pairs <- correlatePairs(sce, subset.row=of.interest, iters=1e5)
cor.pairs

## -----------------------------------------------------------------------------
cor.pairs2 <- correlatePairs(sce, subset.row=of.interest, 
    block=sce$donor, iters=1e5)

## -----------------------------------------------------------------------------
cor.genes <- correlateGenes(cor.pairs)
cor.genes

## -----------------------------------------------------------------------------
y <- convertTo(sce, type="edgeR")

## -----------------------------------------------------------------------------
sessionInfo()