1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
|
import json
import os
import sys
import subprocess
base_dir = '/data/pacbio_assembly/pb_data/NCTC/'
bact_dict = json.load(open(base_dir+'NCTC.json'))
#bacterium_of_interest='NCTC7972'
bacterium_of_interest=sys.argv[1]
if len(sys.argv) > 2:
bact_dict=sys.argv[2]
bact_name="_".join(bact_dict[bacterium_of_interest]['Species'][0].split())
cmd_base = 'ascp -QT -l 1000m -i /data/pacbio_assembly/pb_data/asperaweb_id_dsa.openssh era-fasp@fasp.ega.ebi.ac.uk:vol1/'
dest_dir = base_dir+bacterium_of_interest+'/'
os.system('mkdir -p '+dest_dir)
for run, file_list in list(bact_dict[bacterium_of_interest]['file_paths'].items()):
for file_path in file_list:
cmd = cmd_base+file_path+' '+dest_dir
print(cmd)
os.system(cmd)
dest_fasta_name = dest_dir+bact_name
dextract_cmd = 'dextract -o'+dest_fasta_name
bax_files = [x for x in os.listdir(dest_dir) if x.endswith('.bax.h5')]
for bax_file in bax_files:
dextract_cmd += " " + dest_dir+bax_file
print(dextract_cmd)
try:
subprocess.check_output(dextract_cmd.split())
print('dextract done. deleting .bax.h5 files')
os.system('rm '+dest_dir+'*.bax.h5')
print('removing .quiva files')
os.system('rm '+dest_dir+'*.quiva')
except:
print('error')
|