File: 2to3.patch

package info (click to toggle)
discosnp 1%3A2.6.2-5
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 3,656 kB
  • sloc: python: 5,893; sh: 2,966; cpp: 2,692; makefile: 14
file content (516 lines) | stat: -rw-r--r-- 24,758 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
Author: Andreas Tille <tille@debian.org>
Last-Update: Mon, 21 Jan 2019 09:01:19 +0100
Description: Result of 2to3

--- a/scripts/filter_out_using_MAF.py
+++ b/scripts/filter_out_using_MAF.py
@@ -2,8 +2,8 @@ import sys
 import gzip
 
 if len(sys.argv)<3:
-    print "This tool filters out discoSnp prediction having a minor allele frequency lower than a provided threshold for ALL datasets."
-    print "python filter_out_using_MAF.py \".fa from discoSnp\" \"MAF threshold\""
+    print("This tool filters out discoSnp prediction having a minor allele frequency lower than a provided threshold for ALL datasets.")
+    print("python3 filter_out_using_MAF.py \".fa from discoSnp\" \"MAF threshold\"")
     sys.exit()
 
 
@@ -52,7 +52,7 @@ while True:
             break
     
     if to_output:
-        print (comment1,path1,comment2,path2,)
+        print((comment1,path1,comment2,path2,))
     
       
             
--- a/scripts/ClassVCF_creator.py
+++ b/scripts/ClassVCF_creator.py
@@ -1,4 +1,4 @@
-#!/bin/python
+#!/usr/bin/python3
 # -*- coding: utf-8 -*-
 ###############################################
 #Dresscode : class : uppercase
@@ -197,7 +197,7 @@ class VARIANT():
 #---------------------------------------------------------------------------------------------------------------------------                                                   
         def RetrievePolymorphismFromHeader(self):
                 '''Gets from the dicoAllele all the positions, and the nucleotides for each variant '''
-                for key,(posD,ntUp,ntLow) in self.dicoAllele.items(): #Goes through the dictionary of parsed header
+                for key,(posD,ntUp,ntLow) in list(self.dicoAllele.items()): #Goes through the dictionary of parsed header
                         self.upper_path.listPosForward.append(int(posD)+1)
                         self.lower_path.listPosForward.append(int(posD)+1)
                         self.upper_path.listPosReverse.append(len(self.upper_path.seq)-int(posD))
@@ -894,7 +894,7 @@ class INDEL(VARIANT):
                         else:
                                 self.longestSequenceForward=ReverseComplement(self.upper_path.seq)
                                 self.longestSequenceReverse = self.upper_path.seq
-                for key,(posD,ind,amb) in self.dicoAllele.items():#Goes through the dictionary of parsed header
+                for key,(posD,ind,amb) in list(self.dicoAllele.items()):#Goes through the dictionary of parsed header
                         #In case of forward strand mapped
                         #we return the disco indel + the lefmost nucleotide before the indel (by taking into account the ambiguity
                         self.upper_path.listPosForward.append(int(posD)-int(amb))
--- a/scripts/discoSnp++_to_csv.py
+++ b/scripts/discoSnp++_to_csv.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 import sys
 if len(sys.argv) !=2:
-    sys.stdout.write("Mandatory: python discoSnp_to_csv.py prefix_coherent_k_kval_c_cval.fa\n")
+    sys.stdout.write("Mandatory: python3 discoSnp_to_csv.py prefix_coherent_k_kval_c_cval.fa\n")
     sys.stdout.write("This program formats the .fa to .csv format by puting each couple of .fa sequence (4 lines = 2 comments + 2 nucleotide sequences) into one line, replacing the '|' character by spaces and removing the CX_ formating")
     sys.exit(1)
 
@@ -64,7 +64,7 @@ while 1:
         i+=1
     sys.stdout.write( com2_tab[i][:-1]+",")
     
-    print (data1_2,)
+    print((data1_2,))
     
     
     
--- a/scripts/filterOnBestDP_multiple_variant_at_same_pos.py
+++ b/scripts/filterOnBestDP_multiple_variant_at_same_pos.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 # -*- coding: utf-8 -*-
 #To apply on sorted vcf by position
-#Usage : python filterOnBestDP_multiple_variant_at_same_pos.py <vcf_for_igv> > output.vcf
+#Usage : python3 filterOnBestDP_multiple_variant_at_same_pos.py <vcf_for_igv> > output.vcf
 import sys
 import gzip
 import re
@@ -25,7 +25,7 @@ while True:
                 line = vcf_for_igv.readline()
                 if not line: break
         if line.startswith("#"): #We just print the header
-                print (line.rstrip())
+                print((line.rstrip()))
                 continue
         list_line_same_pos.append(line) #list with all the lines 
         pos=int(line.split("\t")[1])  
@@ -61,9 +61,9 @@ while True:
                                                 DP_max=DP
         compt+=1
         if to_output:
-                print (line_to_print.rstrip())
+                print((line_to_print.rstrip()))
         elif line_to_print :
-                print (line_to_print.rstrip())
+                print((line_to_print.rstrip()))
         if out:break         
 vcf_for_igv.close() 
 
--- a/scripts/filter_out_using_ratio_of_covered_files.py
+++ b/scripts/filter_out_using_ratio_of_covered_files.py
@@ -3,7 +3,7 @@ import gzip
 
 if len(sys.argv)<4:
     print ("This tool filters out discoSnp prediction whose number of read sets covering it is lower than a user defined threshold. A set covers a prediction if its coverage in at least one of the two alleles is higher than a user defined threshold")
-    print ("python filter_out_using_ratio_of_covered_files.py \".fa from discoSnp\" \"number of sets threshold\" \"minimal coverage\"")
+    print ("python3 filter_out_using_ratio_of_covered_files.py \".fa from discoSnp\" \"number of sets threshold\" \"minimal coverage\"")
     sys.exit()
 
 
@@ -53,7 +53,7 @@ while True:
         if coverage_high[i]>=minimal_coverage or coverage_low[i]>=minimal_coverage: number_of_covered_sets+=1
     
     if 100*number_of_covered_sets/float(number_of_read_sets)>=ratio_threshold:
-        print (comment1,path1,comment2,path2,)
+        print((comment1,path1,comment2,path2,))
     
       
             
--- a/scripts/functionObjectVCF_creator.py
+++ b/scripts/functionObjectVCF_creator.py
@@ -1,4 +1,4 @@
-#!/bin/python
+#!/usr/bin/python3
 # -*- coding: utf-8 -*-
 ###############################################
 import os
@@ -199,25 +199,25 @@ def CheckAtDistanceXBestHits(upper_path,
         best_up=1024
         if int(upper_path.mappingPosition)==0 and int(lower_path.mappingPosition)==0:#Checks if paths are unmappped
                 return(".")
-        for position,(nbMismatch,cigarcode) in posUp.items(): 
+        for position,(nbMismatch,cigarcode) in list(posUp.items()): 
                 if nbMismatch<best_up:
                         best_up=nbMismatch
 
         # get the best mapping distance for lower path 
         best_low=1024
-        for position,(nbMismatch,cigarcode) in posLow.items(): 
+        for position,(nbMismatch,cigarcode) in list(posLow.items()): 
                 if nbMismatch<best_low:
                         best_low=nbMismatch
 
         # get the union of the mapping position at the best mapping positions
         position_set = set()
-        for position,(nbMismatch,cigarcode) in posUp.items():
+        for position,(nbMismatch,cigarcode) in list(posUp.items()):
                 if nbMismatch == best_up:
                         position_set.add(position)
                 if len(position_set) > 1: 
                         return("MULTIPLE")
 
-        for position,(nbMismatch,cigarcode) in posLow.items():
+        for position,(nbMismatch,cigarcode) in list(posLow.items()):
                 if nbMismatch == best_low:
                         position_set.add(position)
                 if len(position_set) > 1: 
--- a/scripts/VCF_creator.py
+++ b/scripts/VCF_creator.py
@@ -1,4 +1,4 @@
-#!/bin/python
+#!/usr/bin/python3
 # -*- coding: utf-8 -*-
 #*****************************************************************************
 #   VCF_Creator: mapping and VCF creation feature in DiscoSnp++
--- a/scripts/format_phased_variants_for_haplotyping.py
+++ b/scripts/format_phased_variants_for_haplotyping.py
@@ -3,11 +3,11 @@ import sys
 ### first create connected components from disco (-A option)
 #sh from_phased_alleles_to_clusters.sh phased_alleles_read_set_id_1.txt # creates file connected_components_phased_alleles_read_set_id_1.txt
 ### them from the .fa file, the id of the set your interested in (e.g. 1 for phased_alleles_read_set_id_1.txt, this will correspond to C1 coverage in the fa file), the file containing the connected components, and the phased_alleles_read_set_id_X.txt file, generate the fact file
-#python format_phased_variants_for_haplotyping.py mapping_k_31_c_auto_D_100_P_10_b_0_coherent.fa 1 connected_components_phased_alleles_read_set_id_1.txt phased_alleles_read_set_id_1.txt  > phased_alles_read_set_1_facts.txt
+#python3 format_phased_variants_for_haplotyping.py mapping_k_31_c_auto_D_100_P_10_b_0_coherent.fa 1 connected_components_phased_alleles_read_set_id_1.txt phased_alleles_read_set_id_1.txt  > phased_alles_read_set_1_facts.txt
 
 
 if not len(sys.argv)==5:
-    print ("usage: python format_phased_variants_for_haplotyping.py <file coherent.fa> <id number><connected_component_file><phased_allele_file>")
+    print ("usage: python3 format_phased_variants_for_haplotyping.py <file coherent.fa> <id number><connected_component_file><phased_allele_file>")
     print (" * coherent.fa file: the file generated by discoSnp")
     print (" * id number is the id of the read set, for which variants are phased. With i, this corresponds to Ci in the .fa file headers.")
     print (" * connected_component_file: file obtained from \"from_phased_alleles_to_clusters.sh phased_alleles_read_set_id_1.txt\" continaing connected component of phased alleles")
--- a/scripts/remove_non_covered_genotypes.py
+++ b/scripts/remove_non_covered_genotypes.py
@@ -3,7 +3,7 @@ import gzip
 
 if len(sys.argv)<3:
     print ("This tool replaces discoSnp VCF genotypes with DP lower or equal to a threshold to \"./.\"")
-    print ("python remove_non_covered_genotypes.py \".vcf from discoSnp\" \"DP threshold\"")
+    print ("python3 remove_non_covered_genotypes.py \".vcf from discoSnp\" \"DP threshold\"")
     sys.exit()
 
 
@@ -37,4 +37,4 @@ while True:
             toprint+= splitted_geno[j]
             if j<len(splitted_geno)-1: toprint+= ":"
         if i<len(splitted_line): toprint+='\t'
-    print (toprint)
\ No newline at end of file
+    print (toprint)
--- a/scripts/simulations/multiple_samples_simulator.sh
+++ b/scripts/simulations/multiple_samples_simulator.sh
@@ -68,7 +68,7 @@ fi
 
 for p in `seq 1 $num_pop`
 	do
-	python ./random_mut_fasta.py $genome $div_pop > ERASEME_pos_mut_pop"$p"
+	python3 ./random_mut_fasta.py $genome $div_pop > ERASEME_pos_mut_pop"$p"
 
 	#pos random ordering
 	sort -R ERASEME_pos_mut_pop"$p" > ERASEME_pos_mut_random_pop"$p"
@@ -90,7 +90,7 @@ for p in `seq 1 $num_pop`
 		#homozygotes mutations
 		cat ERASEME_pos_mut_random_shared_allpop"$p" ERASEME_pos_mut_random_shared_pop"$p"_s"$i" > ERASEME_pos_mut_random_spe_pop"$p"_and_s"$i"
 		#mutation inducing
-		python ./targeted_mut_fasta_corrected.py "$genome" ERASEME_pos_mut_random_spe_pop"$p"_and_s"$i"
+		python3 ./targeted_mut_fasta_corrected.py "$genome" ERASEME_pos_mut_random_spe_pop"$p"_and_s"$i"
 		mv "$genome"_mut ERASEME_"$genome"_pop"$p"_s"$i"_withhetero.fasta
 		#homozygote and heterozygote mutations
 		nb_line2=`grep "." -c ERASEME_pos_mut_random_shared_pop"$p"_s"$i"`
@@ -99,7 +99,7 @@ for p in `seq 1 $num_pop`
 		head -n +"$nb_homo" ERASEME_pos_mut_random_shared_pop"$p"_s"$i" > ERASEME_pos_mut_random_shared_pop"$p"_s"$i"_homo
 		#population mutationq
 		cat ERASEME_pos_mut_random_shared_allpop"$p" ERASEME_pos_mut_random_shared_pop"$p"_s"$i"_homo > ERASEME_pos_mut_random_spe_pop"$p"_and_s"$i"_homo
-		python ./targeted_mut_fasta_corrected.py "$genome" ERASEME_pos_mut_random_spe_pop"$p"_and_s"$i"_homo
+		python3 ./targeted_mut_fasta_corrected.py "$genome" ERASEME_pos_mut_random_spe_pop"$p"_and_s"$i"_homo
 		mv "$genome"_mut ERASEME_"$genome"_pop"$p"_s"$i"_homo.fasta
 		#READS SIMULATION
 		mutareads_forward ERASEME_"$genome"_pop"$p"_s"$i"_withhetero.fasta pop"$p"_ind"$i"_allele1_err_reads $read_s $read_l 0.01 0 0
--- a/scripts/simulations/targeted_mut_fasta_corrected.py
+++ b/scripts/simulations/targeted_mut_fasta_corrected.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 # -*- coding: utf-8 -*-
 # 
 
--- a/scripts/from_phased_alleles_to_clusters.sh
+++ b/scripts/from_phased_alleles_to_clusters.sh
@@ -13,7 +13,7 @@ filename=$(basename -- "$file")
 path=$(dirname "${file}")
 
 # FIND THE PATH CONTAINING THE SCRIPT: 
-EDIR=$( python -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) 
+EDIR=$( python3 -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) 
 echo $EDIR
 
 edge_coverage_threshold=0
@@ -77,4 +77,4 @@ then
     exit 1
 fi
 
-echo "Connected components (clusters of variants) from file $file are in $path/connected_components_${filename}"
\ No newline at end of file
+echo "Connected components (clusters of variants) from file $file are in $path/connected_components_${filename}"
--- a/scripts/redundancy_removal_discosnp.py
+++ b/scripts/redundancy_removal_discosnp.py
@@ -1,4 +1,4 @@
-#!/bin/python
+#!/usr/bin/python3
 # -*- coding: utf-8 -*-
 ###################################
 # from kissnp output: 
--- a/scripts/simulations/random_mut_fasta.py
+++ b/scripts/simulations/random_mut_fasta.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 # -*- coding: utf-8 -*-
 # 
 
--- a/scripts/create_IGV_compatible_VCF.sh
+++ b/scripts/create_IGV_compatible_VCF.sh
@@ -30,7 +30,7 @@ igvfile=$(basename $vcffile .vcf)"_for_I
 cat $vcffile|grep "#">$igvfile
 #cat $vcffile|grep -v  "#"|sort -k 2n,2n -n|grep -v "^SNP"|grep -v "^INDEL">>$igvfile
 cat $vcffile|grep -v  "#"|sort -k 1,1 -k 2,2n |grep -v "^SNP"|grep -v "^INDEL">>$igvfile # from 2 2 6
-#python $DIR/tools/one2zeroBased_vcf.py $igvfiletemp 
+#python3 $DIR/tools/one2zeroBased_vcf.py $igvfiletemp 
 #cat VCFone2zeroBAsed.vcf >> $igvfile
 #rm -f $igvfiletemp VCFone2zeroBAsed.vcf
 echo -e "... Creation of the vcf file for IGV: done ...==> $igvfile"
--- a/scripts/k3000/K3000_gfa_to_dat.py
+++ b/scripts/k3000/K3000_gfa_to_dat.py
@@ -461,7 +461,7 @@ def main(gfa_file_name):
     '''
     Creation of a DAT file from the graph_plus.gfa GFA file 
     Usage: 
-        python ~/workspace/gatb-discosnp/scripts/k3000/K3000_gfa_to_dat.py graph_plus.gfa > graph_diploid.dat
+        python3 ~/workspace/gatb-discosnp/scripts/k3000/K3000_gfa_to_dat.py graph_plus.gfa > graph_diploid.dat
     '''
     warnings.warn(f"{sys.argv[0]} is not maintenained anymore (May 2020).", DeprecationWarning)
     
--- a/scripts/keep_extensions_disco_file.py
+++ b/scripts/keep_extensions_disco_file.py
@@ -1,4 +1,4 @@
-#!/bin/python
+#!/usr/bin/python3
 # -*- coding: utf-8 -*-
 ###################################
 # change extensions in uppercase and replace relative positions of SNPs in the header
--- a/scripts/remove_extensions_disco_file.py
+++ b/scripts/remove_extensions_disco_file.py
@@ -1,4 +1,4 @@
-#!/bin/python
+#!/usr/bin/python3
 # -*- coding: utf-8 -*-
 ###################################
 #Removes extensions in lowercase :
--- a/scripts/validation_scripts/compare_vcf_disco_pos_allele_only.py
+++ b/scripts/validation_scripts/compare_vcf_disco_pos_allele_only.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 # -*- coding: utf-8 -*-
 # 
 
--- a/scripts/validation_scripts/eval_disco_one_snp_per_locus.py
+++ b/scripts/validation_scripts/eval_disco_one_snp_per_locus.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 # -*- coding: utf-8 -*-
 # 
 
--- a/scripts/k3000/K3000.py
+++ b/scripts/k3000/K3000.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 # -*- coding: utf-8 -*-
 #
 '''
--- a/scripts/k3000/K3000_node_ids_to_node_sequences.py
+++ b/scripts/k3000/K3000_node_ids_to_node_sequences.py
@@ -163,7 +163,7 @@ def main():
     Produces a gfa file replacing the node content from int ids of alleles to their sequence
     '''
     if len(sys.argv) !=3:
-        sys.stderr.write("Usage: python K3000_node_ids_to_node_sequences.py graph_plus.gfa compacted_facts.fa > graph_final.gfa\n")
+        sys.stderr.write("Usage: python3 K3000_node_ids_to_node_sequences.py graph_plus.gfa compacted_facts.fa > graph_final.gfa\n")
         sys.exit(0)
     sys.stderr.write("Indexing sequence positions\n")
     header_to_file_position = index_sequences_seek(sys.argv[2])
--- a/scripts/remove_non_biallelic.py
+++ b/scripts/remove_non_biallelic.py
@@ -3,7 +3,7 @@ import gzip
 
 if len(sys.argv)<2:
     print ("This tool removes from discoSnp sorted VCF the locus which are tri-allelic or more")
-    print ("python remove_non_diploids.py \".vcf from discoSnp\" ")
+    print ("python3 remove_non_diploids.py \".vcf from discoSnp\" ")
     sys.exit()
 
 
--- a/discoSnpRAD/post-processing_scripts/README.md
+++ b/discoSnpRAD/post-processing_scripts/README.md
@@ -7,7 +7,7 @@
        * removes variants belonging to a cluster (locus) whose size (nb of variants) is outside the given size range (options `-m` and `-M`)
        * removes variants with rank lower than a given threshold given by option `-r`
        * Usage :  
-       `python filter_by_cluster_size_and_rank.py -i vcf_file [-o new_vcf_file -m 0 -M 150 -r 0.4]`
+       `python3 filter_by_cluster_size_and_rank.py -i vcf_file [-o new_vcf_file -m 0 -M 150 -r 0.4]`
 
    3. **script** `filter_vcf_by_indiv_cov_max_missing_and_maf.py`:
        * replaces individual genotypes that have DP less than the value given by option `-c` by missing genotype `./.`
@@ -15,14 +15,14 @@
        * removes variants (vcf lines) that have a minor allele frequency smaller than the value given by option `-f` 
        * outputs only SNP variants if option `-s`. 
        * Usage :    
-       `python filter_vcf_by_indiv_cov_max_missing_and_maf.py -i vcf_file -o new_vcf_file [-c min_cov -m max_missing -f maf -s] `
+       `python3 filter_vcf_by_indiv_cov_max_missing_and_maf.py -i vcf_file -o new_vcf_file [-c min_cov -m max_missing -f maf -s] `
 
   3. **script** `filter_paralogs.py`:
         * identifies variants (vcf lines) that have a fraction of heterozygous genotypes greater than `x` (not counting missing genotypes)
         * removes variants (vcf lines) that belong to a cluster having a fraction of such variants greater than `y`
         * Example : `x=0.1` and `y= 0.5` and if we consider a cluster to represent a locus. This filter removes loci that have more than 50% of the SNPs that have each more than 10% of heterozygous genotypes.
         * Usage :     
-        `python filter_paralogs.py -i vcf_file -o new_vcf_file [-x 0.1 -y 0.5]`
+        `python3 filter_paralogs.py -i vcf_file -o new_vcf_file [-x 0.1 -y 0.5]`
 
 
 ## Scripts for STRUCTURE analyses :
@@ -30,7 +30,7 @@
    4. **script** `1SNP_per_cluster.py`
         * selects one SNP per cluster (the one with less missing genotypes)
         * Usage :   
-        `python  1SNP_per_cluster.py -i vcf_file -o new_vcf_file`
+        `python3  1SNP_per_cluster.py -i vcf_file -o new_vcf_file`
 
    5. **script** `vcf2structure.sh`    
         * changes the vcf format to a Structure format (input of the software Structure)
@@ -56,13 +56,13 @@ Here is the full pipeline to map the res
 sh [DISCO_DIR]/scripts/run_VCF_creator.sh  -G dm6_masked.fa -p myDiscoSnpRADResult_raw_filtered.fa -e -o temp.vcf
 
 # Adding clustering information (and minimal filtering on cluster size)
-python add_cluster_info_to_mapped_vcf.py -m temp.vcf -u myDiscoSnpRADResult_clustered.vcf -o myDiscoSnpRADResult_mapped.vcf
+python3 add_cluster_info_to_mapped_vcf.py -m temp.vcf -u myDiscoSnpRADResult_clustered.vcf -o myDiscoSnpRADResult_mapped.vcf
 # final vcf is myDiscoSnpRADResult_mapped.vcf
 ```
 
 Additionnally, in a validation context, if one wants to compare variant positions between two such vcf files, the following command will output recall and precision metrics:
 ```
-python [DISCO_DIR]/scripts/validation_scripts/compare_vcf_disco_pos_allele_only.py truth.vcf myDiscoSnpRADResult_mapped.vcf
+python3 [DISCO_DIR]/scripts/validation_scripts/compare_vcf_disco_pos_allele_only.py truth.vcf myDiscoSnpRADResult_mapped.vcf
 ```
 
 
--- a/discoSnpRAD/post-processing_scripts/filter_by_cluster_size_and_rank.py
+++ b/discoSnpRAD/post-processing_scripts/filter_by_cluster_size_and_rank.py
@@ -1,5 +1,4 @@
-
-#!/usr/bin/env python
+#!/usr/bin/python3
 # -*- coding: utf-8 -*-
 
 
--- a/doc/discoSnp_user_guide.txt
+++ b/doc/discoSnp_user_guide.txt
@@ -299,7 +299,7 @@ See documentation specific to VCF_creato
 Output Analyze
 	From a fasta format to a csv format: If you wish to analyze the results in a tabulated 
 format:
-?	# python output_analyses/discoSnp++_to_csv.py discoSnp++_output.fa
+?	# python3 output_analyses/discoSnp++_to_csv.py discoSnp++_output.fa
 ?	will output a .csv tabulated file containing on each line the content of 4 lines of the .fa, 
 replacing the '|' character by comma ',' and removing the CX_
 Exemples of close SNPs and indels
--- a/run_discoSnp++.sh
+++ b/run_discoSnp++.sh
@@ -66,7 +66,7 @@ e="" # if set to -e: Map variant predict
 graph_reused="Egg62hdS7knSFvF3" # with -g option, we use a previously created graph. 
 
 #EDIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
-#EDIR=$( python -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron 
+#EDIR=$( python3 -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron 
 EDIR="/usr"
 
 
--- a/discoSnpRAD/post-processing_scripts/add_cluster_info_to_mapped_vcf.py
+++ b/discoSnpRAD/post-processing_scripts/add_cluster_info_to_mapped_vcf.py
@@ -1,5 +1,4 @@
-
-#!/usr/bin/env python
+#!/usr/bin/python3
 # -*- coding: utf-8 -*-
 
 import sys
--- a/discoSnpRAD/post-processing_scripts/filter_paralogs.py
+++ b/discoSnpRAD/post-processing_scripts/filter_paralogs.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 # -*- coding: utf-8 -*-
 
 
--- a/discoSnpRAD/post-processing_scripts/filter_vcf_by_indiv_cov_max_missing_and_maf.py
+++ b/discoSnpRAD/post-processing_scripts/filter_vcf_by_indiv_cov_max_missing_and_maf.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 # -*- coding: utf-8 -*-
 
 ''' ***********************************************
--- a/discoSnpRAD/post-processing_scripts/1SNP_per_cluster.py
+++ b/discoSnpRAD/post-processing_scripts/1SNP_per_cluster.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
 # -*- coding: utf-8 -*-
 
 
--- a/discoSnpRAD/run_discoSnpRad.sh
+++ b/discoSnpRAD/run_discoSnpRad.sh
@@ -82,7 +82,7 @@ max_missing=0.95
 min_rank=0.4
 
 #EDIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
-EDIR=$( python -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron 
+EDIR=$( python3 -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron 
 
 if [ -d "$EDIR/../build/" ] ; then # VERSION SOURCE COMPILED
     read_file_names_bin=$EDIR/../build/bin/read_file_names
@@ -640,7 +640,7 @@ fi
 echo "${yellow}     ############################################################"
 echo "     #################### REDUNDANCY REMOVAL  ###################"
 echo "     ############################################################$reset"
-redundancy_removal_cmd="python $EDIR/../scripts/redundancy_removal_discosnp.py ${kissprefix}_r.fa $k $kissprefix.fa"
+redundancy_removal_cmd="python3 $EDIR/../scripts/redundancy_removal_discosnp.py ${kissprefix}_r.fa $k $kissprefix.fa"
 echo $green${redundancy_removal_cmd}$cyan
 if [[ "$wraith" == "false" ]]; then
    eval ${redundancy_removal_cmd}
--- a/run_discoSnp++_ML.sh
+++ b/run_discoSnp++_ML.sh
@@ -67,7 +67,7 @@ stop_after_kissnp=0
 e=""
 prefix_trash=`head /dev/urandom | tr -dc A-Za-z0-9 | head -c 13 ; echo ''`
 #EDIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
-EDIR=$( python -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron 
+EDIR=$( python3 -c "import os.path; print(os.path.dirname(os.path.realpath(\"${BASH_SOURCE[0]}\")))" ) # as suggested by Philippe Bordron 
 
 
 if [ -d "$EDIR/build/" ] ; then # VERSION SOURCE COMPILED
@@ -839,4 +839,4 @@ if [[ "$wraith" == "false" ]]; then
     fi
     echo -e " Thanks for using discoSnp++ - http://colibread.inria.fr/discoSnp/ - Forum: http://www.biostars.org/t/discoSnp/"
     echo -e "################################################################################################################${reset}"
-fi
\ No newline at end of file
+fi