File: pipeline_consensus.py

package info (click to toggle)
hinge 0.5.0-8
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 2,972 kB
  • sloc: cpp: 9,480; ansic: 8,826; python: 5,023; sh: 340; makefile: 10
file content (102 lines) | stat: -rwxr-xr-x 3,373 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/python3

import sys
import os
import subprocess


if len(sys.argv) >= 2:
	bact_id = sys.argv[1]


ini_path = 'nominal.ini'
if len(sys.argv) >= 3:
    ini_path = sys.argv[2]


run_identifier = 'A'
if len(sys.argv) >= 4:
    run_identifier = sys.argv[3]

graphml_file = bact_id+run_identifier+'.G2.graphml'


# This is used to start the pipeline in the middle
st_point = 0
if len(sys.argv) >= 5:
	st_point = int(sys.argv[4])


# This is used to stop the pipeline in the middle
end_point = 20
if len(sys.argv) >= 6:
    end_point = int(sys.argv[5])



base_path = './'

if st_point <= 1 and end_point >= 1:
    draft_path_cmd = 'get_draft_path.py '+base_path+' '+ bact_id+' '+graphml_file
    print('1: '+draft_path_cmd)
    subprocess.check_output(draft_path_cmd,cwd=base_path, shell=True)


if st_point <= 2 and end_point >= 2:
    draft_assembly_cmd = 'draft_assembly --db '+bact_id+' --las '+bact_id+'.las --prefix '+bact_id+' --config '+ini_path+' --out '+bact_id+'.draft'
    print('2: '+draft_assembly_cmd)
    subprocess.check_output(draft_assembly_cmd,cwd=base_path, shell=True)
  

if st_point <= 3 and end_point >= 3:
    corr_head_cmd = 'correct_head.py '+bact_id+'.draft.fasta '+bact_id+'.draft.pb.fasta draft_map.txt'
    print('3: '+corr_head_cmd)
    subprocess.check_output(corr_head_cmd,cwd=base_path, shell=True)


if st_point <= 4 and end_point >= 4:
    subprocess.call("rm -f draft.db",shell=True,cwd=base_path)
    fasta2DB_cmd = "fasta2DB draft "+base_path+bact_id+'.draft.pb.fasta'
    print('4: '+fasta2DB_cmd)
    subprocess.check_output(fasta2DB_cmd.split(),cwd=base_path)

if st_point <= 5 and end_point >= 5:
    subprocess.call("rm -f draft.*.las",shell=True,cwd=base_path)
    mapper_cmd = "HPCmapper draft "+bact_id
    print('5: '+mapper_cmd)
    subprocess.call(mapper_cmd.split(),stdout=open(base_path+'draft_consensus.sh','w') , cwd=base_path)


if st_point <= 6 and end_point >= 6:
    # modify_cmd = """awk '{gsub("daligner -A -k20 -h50 -e.85","daligner -A",$0); print $0}' draft_consensus.sh"""
    modify_cmd = ['awk','{gsub("daligner -A -k20 -h50 -e.85","daligner -A",$0); print $0}','draft_consensus.sh']
    print('6: '+"""awk '{gsub("daligner -A -k20 -h50 -e.85","daligner -A",$0); print $0}' draft_consensus.sh""")
    subprocess.call(modify_cmd,stdout=open(base_path+'draft_consensus2.sh','w') , cwd=base_path)


if st_point <= 7 and end_point >= 7:
    mapper_shell_cmd = "csh -v draft_consensus.sh"
    print('7: '+mapper_shell_cmd)
    subprocess.check_output(mapper_shell_cmd.split(), cwd=base_path)

if st_point <= 8 and end_point >= 8:
    # remove_cmd = 'rm -f nonrevcompdraft.'+bact_id+'.*.las'
    # subprocess.call(remove_cmd,shell=True,cwd=base_path)
    LAmerge_cmd = "LAmerge draft."+bact_id+".las "+'draft.'+bact_id+'.[0-9].las'
    print('8: '+LAmerge_cmd)
    subprocess.check_output(LAmerge_cmd,cwd=base_path,shell=True)

if st_point <= 9 and end_point >= 9:
    consensus_cmd = 'consensus draft '+bact_id+' draft.'+bact_id+'.las '+bact_id+'.consensus.fasta '+ini_path
    print('9: '+consensus_cmd)
    subprocess.check_output(consensus_cmd,cwd=base_path,shell=True)
    

if st_point <= 10 and end_point >= 10:
    gfa_cmd =  'get_consensus_gfa.py '+base_path+ ' '+ bact_id+ ' '+bact_id+'.consensus.fasta' 
    print('10: '+gfa_cmd)
    subprocess.check_output(gfa_cmd,cwd=base_path,shell=True)