File: run_UpperQuartileNormalization_matrix.Rscript

package info (click to toggle)
trinityrnaseq 2.15.2%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 468,004 kB
  • sloc: perl: 49,905; cpp: 17,993; java: 12,489; python: 3,282; sh: 1,989; ansic: 985; makefile: 717; xml: 62
file content (35 lines) | stat: -rwxr-xr-x 1,087 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/usr/bin/env Rscript

################################################
# for info on upper quartile normalization, see:
# http://vinaykmittal.blogspot.com/2013/10/fpkmrpkm-normalization-caveat-and-upper.html
################################################

suppressPackageStartupMessages(library("argparse"))

parser = ArgumentParser()
parser$add_argument("--matrix", help="input data file", required=TRUE, nargs=1)
parser$add_argument("--output", help="output data file", required=TRUE, nargs=1)    

args = parser$parse_args()
matrix_filename = args$matrix
output_filename = args$output
    
data = read.table(gzfile(matrix_filename), header=T, sep="\t", row.names=1, check.names=F, com='')


get_upper_quartile = function(vec) {
     vec = vec[vec > 0]
     quantile(vec, 0.75)
}

data = as.matrix(data)
upp_quartiles = apply(data, 2, get_upper_quartile)
m = sweep(data, MARGIN=2, upp_quartiles, '/')
mean_upp_quart = mean(upp_quartiles)
m = m * mean_upp_quart
write.table(m, file=output_filename, quote=F, sep="\t", col.names=NA)


quit(save = "no", status = 0, runLast = FALSE)