File: rd.R

package info (click to toggle)
delly 1.7.2-2
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 4,728 kB
  • sloc: cpp: 12,571; python: 133; makefile: 57; sh: 23
file content (77 lines) | stat: -rw-r--r-- 3,255 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
library(DNAcopy)
library(ggplot2)
library(scales)
library(gtable)
library(grid)

chrNamesLong = c("chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20", "chr21", "chr22", "chrX")
#chrNamesLong = c("chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20", "chr21", "chr22", "chrX", "chrY")
chrNamesShort = c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","X")
#chrNamesShort = c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","X","Y")

# Params
minCN = 0
sdUNDO = 1.5

# Parse coverage table
args = commandArgs(trailingOnly=TRUE)
x = read.table(args[1], header=T)

# Fix chromosome ordering
if (sum(x$chr %in% chrNamesLong) > sum(x$chr %in% chrNamesShort)) { chrs = chrNamesLong; } else { chrs = chrNamesShort; }
x = x[x$chr %in% chrs,]
x$chr = factor(x$chr, levels=chrs)

# Log-ratio values
x$pos = (x$start + x$end) / 2
baseCN = median(x[,6])
x$logratio = log2(x[,6] / baseCN)

# Segmentation
if (length(args)>1) {
 # Provided on the command-line
 seg = read.table(args[2], header=F, sep="\t")
 colnames(seg) = c("chr", "start", "end", "id", "cn")
} else {
 # Segment logR
 cnaData = CNA(x$logratio, maploc=x$pos, chrom=x$chr, sampleid="Sample", presorted=T)
 cnaSegments = segment(smooth.CNA(cnaData), undo.splits="sdundo", undo.SD=sdUNDO)
 seg = data.frame(segments.summary(cnaSegments))
 colnames(seg) = c("ID", "chr", "start", "end", "num.mark", "seg.mean", "seg.sd", "seg.median", "seg.mad")
 seg$cn = 2^seg$seg.median * baseCN
 seg$chr = factor(seg$chr, levels=chrs)
}

# Whole genome
maxCN = as.integer(max(x[,6])+1)
#maxCN = 8
p = ggplot(data=x, aes(x=start, y=x[,6]))
p = p + geom_point(pch=21, color="black", fill="black", size=0.5)
p = p + xlab("Chromosome")
p = p + ylab("Copy-number")
p = p + scale_x_continuous(labels=comma)
if (nrow(seg)) { p = p + geom_segment(data=seg, aes(x=start, y=cn, xend=end, yend=cn), color="#31a354", size=1.2); }
p = p + facet_grid(. ~ chr, scales="free_x", space="free_x")
p = p + scale_y_continuous(labels=comma, breaks = c(minCN:maxCN), limits=c(minCN, maxCN))
p = p + theme(axis.text.x = element_text(angle=45, hjust=1))
p = p + ggtitle(args[1])
ggsave(p, file="plot.wholegenome.png", width=24, height=6)
print(warnings())

# By chromosome
for(chrname in unique(x$chr)) {
 print(chrname)
 sub = x[x$chr == chrname,]
 sl = seg[seg$chr == chrname,]
 maxCN = as.integer(max(sub[,6])+1)
 p = ggplot(data=sub, aes(x=start, y=sub[,6]))
 p = p + geom_point(pch=21, color="black", fill="black", size=0.5)
 p = p + ylab("Copy-number") + xlab(chrname)
 p = p + scale_x_continuous(labels=comma, breaks = scales::pretty_breaks(n=20))
 p = p + scale_y_continuous(labels=comma, breaks = c(minCN:maxCN), limits=c(minCN, maxCN))
 if (nrow(sl)) { p = p + geom_segment(data=sl, aes(x=start, y=cn, xend=end, yend=cn), color="#31a354", size=1.2); }
 p = p + theme(axis.text.x = element_text(angle=45, hjust=1))
 p = p + ggtitle(args[1])
 ggsave(p, file=paste0("plot.", chrname, ".png"), width=24, height=6)
 print(warnings())
}