File: expression_analysis_lib.R

package info (click to toggle)
trinityrnaseq 2.11.0%2Bdfsg-6
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 417,528 kB
  • sloc: perl: 48,420; cpp: 17,749; java: 12,695; python: 3,124; sh: 1,030; ansic: 983; makefile: 688; xml: 62
file content (118 lines) | stat: -rw-r--r-- 2,541 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118

plot_log2_fpkm_dist = function (fpkm_files) {

    num_files = length(fpkm_files);

	data_list = list();
	
	max_y = 0
	xlim = c(0,0)

	for (i in 1:num_files) {

		file = fpkm_files[i]
      	data = read.table(file, header=F)
		data = data[,6];
		data = log2(data+1)
		
		den = density(data);
	
		data_list[[i]] = den

		y = max(den$y);
		if (y > max_y) {
			max_y = y;
		}
	
		x = min(den$x)
		if (x < xlim[1]) {
			xlim[2] = x
		}	

		x = max(den$x);
		if (x > xlim[2]) {
			xlim[2] = x
		}


	}
	
	colors = rainbow(num_files);

	for (i in 1:num_files) {

		if (i == 1) {

			plot(data_list[[1]], col=colors[1], xlim=xlim, ylim=c(0,max_y), xlab="log2(fpkm+1)")
		}
		else {
			points(data_list[[i]], col=colors[i], type='l')
		}
	}

	return;
}





plot_expressed_gene_counts = function(fpkm_file, 
                                      title="expressed transcript counts vs. min fpkm", 
                                      fpkm_range=seq(0,10,0.2), 
                                      total=0, 
                                      outfile="count_summary.txt") {

    data = read.table(fpkm_file, header=T, row.names=1);
	data = data[,5]
	
	
	counts_expressed = c();
    counts_not_expressed = c();

    print_not_expressed_flag = 1;
    if (total == 0) {
        total = length(data);
        print_not_expressed_flag = 0;
    }

    count_expressed = total;
	for (i in fpkm_range) {
   
        if (i > 0) {
       		count_expressed = sum(data>=i)
		}
        count_not_expressed = total - count_expressed;
	
		counts_expressed[length(counts_expressed)+1] = count_expressed;
		counts_not_expressed[length(counts_not_expressed)+1] = count_not_expressed;
		
	}


    orig_settings = par(mfrow=c(1,2))

	plot(fpkm_range, counts_expressed, type='o', col='black', xlab="min(fpkm)", 
        main=title, ylab="count of transcripts", ylim=c(0,max(counts_expressed, counts_not_expressed)))
    
    if (print_not_expressed_flag) {
    	points(fpkm_range, counts_not_expressed, type='o', col='blue')
        legend('topright', c('expressed', 'not expressed'), col=c('black', 'blue'), pch=15);
    }   
	else {
		legend('bottomright', c('expressed transcripts'), col=c('black'), pch=15);
	}
   
	data_table = data.frame(fpkm_range=fpkm_range, expressed=counts_expressed, not_expressed=counts_not_expressed);
	
	write.table(data_table, file=outfile, quote=F, sep='\t', row.names=F);
	

    ## make density plot for non-zero FPKM values
    data = data[data>0]
    plot(density(log2(data)), xlab="log2(fpkm)")


    par(orig_settings)
    
}