1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
|
#!/usr/bin/env bds
#-------------------------------------------------------------------------------
# Varialbes, files and options
#-------------------------------------------------------------------------------
string dir = "$HOME/snpEff/"
string dirGtex = "$dir/db/GTEx"
string dirReactome = "$dir/db/reactome"
string dirSim = "$dirReactome/simulations"
string gtexNorm = "$dirGtex/gtex_norm.txt.gz"
#-------------------------------------------------------------------------------
# Main
#-------------------------------------------------------------------------------
#---
# Split file by columns
#---
# Count columns
string countId = sys zcat $gtexNorm | head -n 1 | tr "\t" "\n" | wc -l
int numCols = countId.stdout().parseInt();
int colsPerProc = numCols / cpusLocal + 1;
print("Number of columns in GTEx : $numCols, using $colsPerProc columns per Process\n")
for( int cnum=0 ; cnum < cpusLocal ; cnum++ ) {
string gtexSplit = "$dirGtex/gtex_norm.split_$cnum.txt"
int minCol = colsPerProc * cnum;
int mmaxCol = colsPerProc * (cnum+1) - 1;
minCol = max(3, minCol)
print("\t$gtexSplit\t[$minCol, $mmaxCol]\n");
task( gtexSplit <- gtexNorm ) {
sys zcat $gtexNorm | cut -f 1,2,$minCol-$mmaxCol > $gtexSplit
}
}
wait
#---
# Calculate ncircuit for each parition
#---
print("Run circuit on each partition file\n")
string[] circSplits
for( int cnum=0 ; cnum < cpusLocal ; cnum++ ) {
string gtexSplit = "$dirGtex/gtex_norm.split_$cnum.txt"
string circSplit = "$dirSim/circuits.split_$cnum.txt"
print("\t$circSplit\n")
task( circSplit <- gtexSplit ) {
sys java -Xmx1G -jar snpEff.jar \
test \
$dirReactome/txt/ \
$dirReactome/gene_ids/biomart_query_uniq.txt \
$dirGtex \
$dirGtex/GTEx_Analysis_Annotations_Sample_DS__Pilot_2013_01_31.txt \
$gtexSplit \
> $circSplit
}
circSplits.add(circSplit)
}
wait
#---
# Join circuit result files
#---
# TODO: cut the first 2 columns of each file
# CHECK: First two columns must be equal for all files
string pasteCmd = "paste " + circSplits.join() + " > $dirSim/circuits.txt";
print("Paste commnad: $pasteCmd\n");
# TODO: Remove non-zero elements
# cd $HOME/snpEff/db/reactome/simulations
# ./filterAllZero.sh circuits.txt
# TODO: Run R analysis script
# Rscript --vanilla simmulationResults.r
# evince Rplots.pdf
|