File: run_mapsembler2_pipeline.sh

package info (click to toggle)
mapsembler2 2.2.4%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 8,208 kB
  • sloc: cpp: 51,300; ansic: 13,434; sh: 483; makefile: 394; asm: 271; python: 28
file content (255 lines) | stat: -rwxr-xr-x 9,585 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
#!/bin/bash

#######################################################################
#################### HEADER, CHANGE VALUES HERE #######################
#######################################################################
starters_file=""
read_sets="" # FOR instance: "read_set1.fa.gz read_set2.fq.gz"
prefix_index="index" # all files used for indexing will be written will start with this prefix
prefix_results="res" # all final files will be written will start with this prefix
extremities_filename="starter_extremities.fa"
k=31 # size of kmers
c=5 # minimal coverage
d=1 # estimated number of error per read
g=3000000000 # estimated genome size. Used only to control memory usage. e.g. 3 billion (3000000000) uses 4Gb of RAM.
t=0
f=1 #kind of process (search mode 1=Breadth and 2=Depth)
x=40 #node length limit
y=10000 #graph depth limit
PATH_TOOLS="" # path were executables mapsembler and phaser are. Leave blank if they are located in a directory located in the PATH environnement variable
#######################################################################
#################### END HEADER                 #######################
#######################################################################


function help {
echo "run_mapsembler_pipeline.sh, a pipelining mapsembler2_extremities, mapsembler2_extend and kissread_g"
echo "Usage: ./run_mapsembler_and_phaser.sh -s <starter.fasta> -r <reads.faste> -t [1/2/3/4]<options>"
echo -e "\t \t -s: file containing starters (fasta)"
echo -e "\t \t -r list of reads separated by space, surrounded by the '\"' character. Note that reads may be in fasta or fastq format, gzipped or not. Example: -r \"data_sample/reads_sequence1.fasta   data_sample/reads_sequence2.fasta.gz\"."
echo -e "\t \t -t: kind of assembly: 1=unitig (fasta), 2=contig (fasta), 3=unitig (graph), 4=contig(graph)"
echo -e "<options>:"
echo -e "\t\t -p prefix. All out files will start with this prefix. Example: -p my_prefix"
echo -e "\t\t -k value. Set the length of used kmers. Must fit the compiled value. Default=31. Example -k 31"
echo -e "\t\t -c value. Set the minimal coverage. Default=5. Example -c 5"
echo -e "\t\t -d value. Set the number of authorized substitutions used while mapping reads on found SNPs. Default=1. Example: -d 1"
echo -e "\t\t -g value. Estimated genome size. Used only to control memory usage. e.g. 3 billion (3000000000) uses 4Gb of RAM. Default=10 million. Example: -d 10000000"
echo -e "\t\t -f value. Set the process of search in the graph (1=Breadth  and 2=Depth). Default=1. Example: -f 1"
echo -e "\t\t -x value. Set the maximal nodes length . Default=40. Example: -x 40"
echo -e "\t\t -y value. Set the maximal graph depth . Default=10000. Example: -y 10000"
echo -e "\t\t -h Prints this message and exist"
echo "Any further question: read the readme file or contact us: pierre.peterlongo@inria.fr"
}


#######################################################################
#################### GET OPTIONS                #######################
#######################################################################
while getopts "s:r:t:p:k:c:d:g:f:x:y:h" opt; do
case $opt in
	h)
help
exit
;;
s)
echo "use starter file: $OPTARG" >&2
starters_file=$OPTARG
;;
r)
echo "use read set: $OPTARG" >&2
read_sets=$OPTARG
;;

p)
echo "use prefix=$OPTARG" >&2
prefix_index=$OPTARG
prefix_results=$OPTARG
;;

k)
echo "use k=$OPTARG" >&2
k=$OPTARG
;;

t)
echo "use t=$OPTARG" >&2
t=$OPTARG
;;

c)
echo "use c=$OPTARG" >&2
c=$OPTARG
;;

d)
echo "use d=$OPTARG" >&2
d=$OPTARG
;;

g)
echo "use g=$OPTARG" >&2
g=$OPTARG
;;

f)
echo "use f=$OPTARG" >&2
f=$OPTARG
;;

x)
echo "use x=$OPTARG" >&2
x=$OPTARG
;;

y)
echo "use y=$OPTARG" >&2
y=$OPTARG
;;

\?)
echo "Invalid option: -$OPTARG" >&2
exit 1
;;

:)
echo "Option -$OPTARG requires an argument." >&2
exit 1
;;
esac
done
#######################################################################
#################### END GET OPTIONS            #######################
#######################################################################

if [ -z "$starters_file" ] && [ -z "$read_sets" ];
then
help
exit
fi

if [ $t -eq 0 ]
then
echo "-t is mandatory, please fix t in value 1, 2, 3, or 4"
exit
fi

######### CHECK THE k PARITY ##########
rest=$(( $k % 2 ))
if [ $rest -eq 0 ]
then
echo "k=$k is even number, to avoid palindromes we set it to $(($k-1))"
k=$(($k-1))
fi
#######################################

#######################################################################
#################### OPTIONS SUMMARY            #######################
#######################################################################
MY_PATH="`( cd \"$MY_PATH\" && pwd )`"  # absolutized and normalized
if [ -z "$MY_PATH" ] ; then
# error; for some reason, the path is not accessible
# to the script (e.g. permissions re-evaled after suid)
exit 1  # fail
fi
echo -e "\tRunning pipeline Mapsembler2_extremities, Mapsembler2_extend and Kissread_g, in directory "$MY_PATH" with following parameters:"
echo -e "\t\t fasta file containing starters="$starters_file
echo -e "\t\t fasta files containing reads="$read_sets
echo -e "\t\t fasta files generating and containing starter extremities ="$extremities_filename
echo -e "\t\t prefix for index files="$prefix_index
echo -e "\t\t prefix for results files="$prefix_results


echo -e "\t\t k="$k
echo -e "\t\t t="$t
echo -e "\t\t c="$c
echo -e "\t\t d="$d
echo -e "\t\t g="$g
echo -e "\t\t f="$f
echo -e "\t\t x="$x
echo -e "\t\t y="$y
echo -e -n "\t\t starting date="
date
echo
#######################################################################
#################### END OPTIONS SUMMARY        #######################
#######################################################################


#######################################################################
#################### mapsembler_extremeities     #######################
#######################################################################
echo -e "\n"
echo -e "\033[31m running $PATH_TOOLS\mapsembler2_extremities --k $k --min-solid-subkmer $c --starters $starters_file --reads \"$read_sets\" --output $extremities_filename \033[0m"
$PATH_TOOLS\mapsembler2_extremities --k $k --min-solid-subkmer $c --starters $starters_file --reads "$read_sets" --output $extremities_filename
if [ $? -ne 0 ]
then
echo "there was a problem with mapsembler, command line: $PATH_TOOLS""mapsembler2_extremities --k $k --starters $starters_file --reads $read_sets --output $extremities_filename"
exit
fi


#######################################################################
#################### mapsembler                 #######################
#######################################################################
if [ -f "$extremities_filename" ]
then
echo -e "\n"
echo -e "\033[31m running $PATH_TOOLS""mapsembler2_extend $extremities_filename $read_sets -t $t -k $k -c $c -g $g -x $x -y $y -f $f -i $prefix_index -o $prefix_results\033[0m"
$PATH_TOOLS\mapsembler2_extend $extremities_filename $read_sets -t $t -k $k -c $c -g $g -x $x -y $y -f $f -i $prefix_index -o $prefix_results
if [ $? -ne 0 ]
then
echo "there was a problem with mapsembler, command line: $PATH_TOOLS""mapsembler_extend $extremities_filename $read_sets -t $t -k $k -c $c -g $g -x $x -y $y -f $f -i $prefix_index -o $prefix_results"
exit
fi
fi


#######################################################################
#################### kissreads_g and kissreads  #######################
#######################################################################

	if [ $t -gt 2 ]
	then
		echo -e "\n"
		echo -e "\033[31m running $PATH_TOOLS""mapsembler2_kissreads_graph $prefix_results\_k_$k\_c_$c\_t_$t.json $read_sets -k $k -c $c -t m -M -o $prefix_results\_k_$k\_c_$c\_t_$t\_modified.json\033[0m"
$PATH_TOOLS\mapsembler2_kissreads_graph $prefix_results\_k_$k\_c_$c\_t_$t.json $read_sets -k $k -c $c -t m -M -o $prefix_results\_k_$k\_c_$c\_t_$t\_modified.json
		if [ $? -ne 0 ]
		then
			echo "there was a problem with mapsembler2_kissreads_graph"
			exit
		fi

		echo -e "\n"
		echo -e "\033[31m $PATH_TOOLS""mapsembler2_kissreads_graph $prefix_results\_k_$k\_c_$c\_t_$t\_modified.json $read_sets -k $k -c $c  -t c -M -o $prefix_results\_k_$k\_c_$c\_t_$t\_modified_and_covered.json\033[0m"
$PATH_TOOLS\mapsembler2_kissreads_graph $prefix_results\_k_$k\_c_$c\_t_$t\_modified.json $read_sets -k $k -c $c -t c -M -o $prefix_results\_k_$k\_c_$c\_t_$t\_modified_and_covered.json
		if [ $? -ne 0 ]
		then
			echo "there was a problem with mapsembler2_kissreads_graph"
			exit
		fi

		res_file=$prefix_results\_k_$k\_c_$c\_t_$t\_modified_and_covered.json

	else
		echo -e "\n"
		echo -e "\033[31m running $PATH_TOOLS""mapsembler2_kissreads $prefix_results\_k_$k\_c_$c\_t_$t.fasta $read_sets -f -o $prefix_results\_coherent_k_$k\_c_$c\_t_$t.fasta -u $prefix_results\_uncoherent_k_$k\_c_$c\_t_$t.fasta\033[0m"
$PATH_TOOLS\mapsembler2_kissreads $prefix_results\_k_$k\_c_$c\_t_$t.fasta $read_sets -k $k -c $c -d $d -f -o $prefix_results\_coherent_k_$k\_c_$c\_t_$t.fasta -u $prefix_results\_uncoherent_k_$k\_c_$c\_t_$t.fasta
		if [ $? -ne 0 ]
		then
			echo "there was a problem with mapsembler2_kissreads"
			exit
		fi

		res_file=$prefix_results\_coherent_k_$k\_c_$c\_t_$t.fasta
	fi

		
#/Users/grizk/gassst  -d test_k_31_c_4_coherent.fa  -i /Users/grizk/kissnp2/kissnp2_tool/snp_list_readsnp  -p 100 -w 15 -m 8 -l 0 -r 1 -o temp;

echo "mapsembler done, results are in $res_file"
echo -e -n "\t ending date="
date
echo -e "\t Thanks for using mapsembler - http://http://colibread.inria.fr/mapsembler/"