1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
|
#!/usr/bin/env python3
import unittest
from subprocess import PIPE, run
import os
import shutil
import sys
import argparse
# This is not best practice but for testing, this is the best I could
# come up with
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
# TODO: Species specific aqquired genes only pheno results, not spec specific?
test_names = ["test1", "test2", "test3", "test4"]
test_data = {
# Test published acquired resistance
test_names[0]: "data/test_isolate_01.fa",
test_names[1]: "data/test_isolate_01_1.fq data/test_isolate_01_2.fq",
# Test published point mut resistance
test_names[2]: "data/test_isolate_05.fa",
test_names[3]: "data/test_isolate_05_1.fq data/test_isolate_05_2.fq",
}
run_test_dir = "running_test"
working_dir = os.path.dirname(os.path.realpath(__file__))
class ResFinderRunTest(unittest.TestCase):
@classmethod
def setUpClass(cls):
# Delete "running_test" folder from previous tests if still exists
if os.path.isdir(run_test_dir):
try:
shutil.rmtree(run_test_dir)
# The following error has occured using VirtualBox under Windows 10
# with ResFinder installed in a shared folder:
# OSError [Errno: 26] Text file busy: 'tmp'
except OSError:
procs = run(["rm", "-r", run_test_dir])
# Set absolute path for database folders and external programs
cls.db_path_res = os.path.abspath(args.db_path_res)
cls.blastPath = os.path.abspath(args.blast_path)
cls.kmaPath = os.path.abspath(args.kma_path)
cls.db_path_point = os.path.abspath(args.db_path_point)
cls.dir_res = os.path.join(os.path.dirname(__file__), '../', )
cls.dir_res = os.path.abspath(cls.dir_res)
# Change working dir to test dir
os.chdir(working_dir)
# Does not allow running two tests in parallel
os.makedirs(run_test_dir, exist_ok=False)
@classmethod
def tearDownClass(cls):
try:
shutil.rmtree(run_test_dir)
# The following error has occured using VirtualBox under Windows 10
# with ResFinder installed in a shared folder:
# OSError [Errno: 26] Text file busy: 'tmp'
except OSError:
procs = run(["rm", "-r", run_test_dir])
def test_on_data_with_just_acquired_resgene_using_blast(self):
# Maria has an E. coli isolate, with unknown resistance.
# At first, she just wants to know which acquired resistance genes are
# found in the genome.
# She therefore runs resfinder cmd line.
# First Maria checks out the documentation.
procs = run("run_resfinder.py -h", shell=True, stdout=PIPE,
check=True)
output = procs.stdout.decode()
self.assertIn("--help", output)
# Maria goes on to run ResFinder for acquired genes with her E. coli
# isolate.
# First she creates a few directories to store her output.
test1_dir = run_test_dir + "/" + test_names[0]
os.makedirs(test1_dir)
# Then she runs run_resfinder with her first isolate.
cmd_acquired = ("run_resfinder.py"
+ " -ifa " + test_data[test_names[0]]
+ " -o " + test1_dir
+ " -s 'Escherichia coli'"
+ " --min_cov 0.6"
+ " -t 0.8"
+ " --acquired"
+ " --db_path_res " + self.db_path_res
+ " --blastPath " + self.blastPath)
procs = run(cmd_acquired, shell=True, stdout=PIPE, stderr=PIPE,
check=True)
fsa_hit = test1_dir + "/ResFinder_Hit_in_genome_seq.fsa"
fsa_res = test1_dir + "/ResFinder_Resistance_gene_seq.fsa"
res_table = test1_dir + "/ResFinder_results_table.txt"
res_tab = test1_dir + "/ResFinder_results_tab.txt"
results = test1_dir + "/ResFinder_results.txt"
with open(fsa_hit, "r") as fh:
check_result = fh.readline()
self.assertIn("blaB-2_1_AF189300", check_result)
with open(fsa_res, "r") as fh:
check_result = fh.readline()
self.assertIn("blaB-2_AF189300", check_result)
with open(res_table, "r") as fh:
for line in fh:
if(line.startswith("blaB-2")):
check_result = line
break
self.assertIn("blaB-2_1_AF189300", check_result)
with open(res_tab, "r") as fh:
fh.readline()
check_result = fh.readline()
self.assertIn("blaB-2_1_AF189300", check_result)
with open(results, "r") as fh:
fh.readline()
fh.readline()
fh.readline()
fh.readline()
fh.readline()
check_result = fh.readline()
self.assertIn("blaB-2_1_AF189300", check_result)
def test_on_data_with_just_acquired_resgene_using_kma(self):
# Maria has another E. coli isolate, with unknown resistance.
# This time she does not have an assembly, but only raw data.
# She therefore runs resfinder cmd line using KMA.
# First she creates a few directories to store her output.
test2_dir = run_test_dir + "/" + test_names[1]
os.makedirs(test2_dir, exist_ok=False)
# Then she runs run_resfinder with her first isolate.
cmd_acquired = ("run_resfinder.py"
+ " -ifq " + test_data[test_names[1]]
+ " -o " + test2_dir
+ " -s 'Escherichia coli'"
+ " --min_cov 0.6"
+ " -t 0.8"
+ " --acquired"
+ " --db_path_res " + self.db_path_res
+ " --kmaPath " + self.kmaPath)
procs = run(cmd_acquired, shell=True, stdout=PIPE, stderr=PIPE,
check=True)
fsa_hit = test2_dir + "/ResFinder_Hit_in_genome_seq.fsa"
fsa_res = test2_dir + "/ResFinder_Resistance_gene_seq.fsa"
res_table = test2_dir + "/ResFinder_results_table.txt"
res_tab = test2_dir + "/ResFinder_results_tab.txt"
results = test2_dir + "/ResFinder_results.txt"
with open(fsa_hit, "r") as fh:
check_result = fh.readline()
self.assertIn("blaB-2", check_result)
with open(fsa_res, "r") as fh:
check_result = fh.readline()
self.assertIn("blaB-2_AF189300", check_result)
with open(res_table, "r") as fh:
for line in fh:
if(line.startswith("blaB-2")):
check_result = line
break
self.assertIn("blaB-2", check_result)
with open(res_tab, "r") as fh:
fh.readline()
check_result = fh.readline()
self.assertIn("blaB-2", check_result)
with open(results, "r") as fh:
fh.readline()
fh.readline()
fh.readline()
fh.readline()
fh.readline()
check_result = fh.readline()
self.assertIn("blaB-2", check_result)
def test_on_data_with_just_point_mut_using_blast(self):
# Maria also wants to check her assembled E. coli isolate for
# resistance caused by point mutations.
# First she creates a few directories to store her output.
test3_dir = run_test_dir + "/" + test_names[2]
os.makedirs(test3_dir)
# Then she runs run_resfinder with her first isolate.
cmd_point = ("run_resfinder.py"
+ " -ifa " + test_data[test_names[2]]
+ " -o " + test3_dir
+ " -s 'Escherichia coli'"
+ " --min_cov 0.6"
+ " --threshold 0.8"
+ " --point"
+ " --db_path_point " + self.db_path_point
+ " --db_path_res " + self.db_path_res
+ " --blastPath " + self.blastPath)
procs = run(cmd_point, shell=True, stdout=PIPE, stderr=PIPE,
check=True)
# Expected output files
pf_pred = test3_dir + "/PointFinder_prediction.txt"
pf_res = test3_dir + "/PointFinder_results.txt"
pf_table = test3_dir + "/PointFinder_table.txt"
with open(pf_res, "r") as fh:
fh.readline()
check_result = fh.readline()
self.assertIn("gyrA", check_result)
self.assertIn("p.S83A", check_result)
point_mut_found = False
with open(pf_table, "r") as fh:
for line in fh:
if(line.startswith("gyrA p.S83A")):
check_result = line
point_mut_found = True
break
self.assertEqual(point_mut_found, True)
def test_on_data_with_just_point_mut_using_kma(self):
# Maria has another E. coli isolate, with unknown resistance.
# This time she does not have an assembly, but only raw data.
# She therefore runs resfinder cmd line using KMA.
# First she creates a few directories to store her output.
test4_dir = run_test_dir + "/" + test_names[3]
os.makedirs(test4_dir, exist_ok=False)
# Then she runs run_resfinder with her first isolate.
cmd_acquired = ("run_resfinder.py"
+ " -ifq " + test_data[test_names[3]]
+ " -o " + test4_dir
+ " -s 'Escherichia coli'"
+ " --min_cov 0.6"
+ " --threshold 0.8"
+ " --point"
+ " --db_path_point " + self.db_path_point
+ " --db_path_res " + self.db_path_res
+ " --kmaPath " + self.kmaPath)
procs = run(cmd_acquired, shell=True, stdout=PIPE, stderr=PIPE,
check=True)
# Expected output files
pf_pred = test4_dir + "/PointFinder_prediction.txt"
pf_res = test4_dir + "/PointFinder_results.txt"
pf_table = test4_dir + "/PointFinder_table.txt"
with open(pf_res, "r") as fh:
fh.readline()
check_result = fh.readline()
self.assertIn("gyrA", check_result)
self.assertIn("p.S83A", check_result)
point_mut_found = False
with open(pf_table, "r") as fh:
for line in fh:
if(line.startswith("gyrA p.S83A")):
check_result = line
point_mut_found = True
break
self.assertEqual(point_mut_found, True)
def parse_args():
parser = argparse.ArgumentParser(add_help=False, allow_abbrev=False)
group = parser.add_argument_group("Options")
group.add_argument('-res_help', "--resfinder_help",
action="help")
group.add_argument("-db_res", "--db_path_res",
help="Path to the databases for ResFinder",
default="./db_resfinder")
group.add_argument("-b", "--blastPath",
dest="blast_path",
help="Path to blastn",
default="./cge/blastn")
group.add_argument("-k", "--kmaPath",
dest="kma_path",
help="Path to KMA",
default="./cge/kma/kma")
group.add_argument("-db_point", "--db_path_point",
help="Path to the databases for PointFinder",
default="./db_pointfinder")
ns, args = parser.parse_known_args(namespace=unittest)
return ns, sys.argv[:1] + args
if __name__ == "__main__":
args, argv = parse_args() # run this first
sys.argv[:] = argv # create cleans argv for main()
unittest.main()
|