1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
|
import sys, os, shutil, glob, gzip
os.chdir(os.path.split(os.path.realpath(__file__))[0])
suffix = " > /dev/null 2>&1"
dir = "__results__"
def clear():
if os.path.exists("temp_output"):
shutil.rmtree("temp_output")
if os.path.exists("__results__"):
shutil.rmtree("__results__")
os.mkdir(dir)
def decompress_simka_results(dir):
result_filenames = glob.glob(os.path.join(dir, '*.csv.gz'))
for filename_gz in result_filenames:
#filename_gz = result_dir + "/" + filename
with gzip.open(filename_gz, 'rb') as f:
outFile = open(filename_gz[:-3], "wb")
outFile.write(f.read())
outFile.close()
os.remove(filename_gz)
def __test_matrices(simka_vs_truth, result_dir, truth_dir):
ok = True
decompress_simka_results(result_dir)
result_filenames = glob.glob(os.path.join(result_dir, '*.csv'))
if len(result_filenames) == 0:
print("Error: no results")
exit(1)
if simka_vs_truth:
truth_filenames = glob.glob(os.path.join(truth_dir, '*.csv'))
else: #simka vs simka
#if result_dir+"/mat_abundance_jaccard.csv" in truth_filenames: #comparing simka results vs simka results
#truth_filenames.remove(result_dir+"/mat_abundance_jaccard.csv") #This distance is computed from Bray Curtis distance
decompress_simka_results(truth_dir)
truth_filenames = glob.glob(os.path.join(truth_dir, '*.csv'))
truth_filenames.sort()
result_filenames.sort()
for result_filename in result_filenames:
distanceName = os.path.split(result_filename)[1]
for truth_filename in truth_filenames:
distanceName2 = os.path.split(truth_filename)[1]
if distanceName != distanceName2: continue
res_file = open(result_filename, "r")
truth_file = open(truth_filename, "r")
#print res_file, truth_file
res_str = res_file.read()
truth_str = truth_file.read()
res_file.close()
truth_file.close()
if(res_str != truth_str):
print("\t- TEST ERROR: " + distanceName)
ok = False
return ok
def test_dists(dir):
if(__test_matrices(True, "__results__/" + dir, "truth/" + dir)):
print("\tOK")
else:
print("\tFAILED")
sys.exit(1)
def test_parallelization():
if(__test_matrices(False, "__results__/results_resources1", "__results__/results_resources2")):
print("\tOK")
else:
print("\tFAILED")
sys.exit(1)
#----------------------------------------------------------------
#----------------------------------------------------------------
#----------------------------------------------------------------
#test k=31 t=0
clear()
print("TESTING k=31 t=0", flush=True)
command = "simka -in simka_input.txt -out ./__results__/results_k31_t0 -out-tmp ./temp_output -simple-dist -complex-dist -kmer-size 31 -abundance-min 0 -verbose 0 -nb-cores 1"
print(command, flush=True)
os.system(command + suffix)
test_dists("results_k31_t0")
#test k=21 t=0
clear()
print("TESTING k=21 t=0", flush=True)
command = "simka -in simka_input.txt -out ./__results__/results_k21_t0 -out-tmp ./temp_output -simple-dist -complex-dist -kmer-size 21 -abundance-min 0 -verbose 0 -nb-cores 1"
print(command, flush=True)
os.system(command + suffix)
test_dists("results_k21_t0")
#test k=31 t=2
clear()
print("TESTING k=31 t=2", flush=True)
command = "simka -in simka_input.txt -out ./__results__/results_k31_t2 -out-tmp ./temp_output -simple-dist -complex-dist -kmer-size 31 -abundance-min 2 -verbose 0 -nb-cores 1"
print(command, flush=True)
os.system(command + suffix)
test_dists("results_k31_t2")
#test k=21 t=2
clear()
print("TESTING k=21 t=2", flush=True)
command = "simka -in simka_input.txt -out ./__results__/results_k21_t2 -out-tmp ./temp_output -simple-dist -complex-dist -kmer-size 21 -abundance-min 2 -verbose 0 -nb-cores 1"
print(command, flush=True)
os.system(command + suffix)
test_dists("results_k21_t2")
#test resources 1
clear()
print("TESTING parallelization", flush=True)
command = "simka -in simka_input.txt -out ./__results__/results_resources1 -out-tmp ./temp_output -simple-dist -complex-dist -kmer-size 21 -abundance-min 0 -nb-cores 20 -max-memory 4000 -verbose 0"
os.system(command + suffix)
command = "simka -in simka_input.txt -out ./__results__/results_resources2 -out-tmp ./temp_output -simple-dist -complex-dist -kmer-size 21 -abundance-min 0 -nb-cores 2 -max-memory 2000 -verbose 0"
os.system(command + suffix)
test_parallelization()
#----------------------------------------------------------------
#----------------------------------------------------------------
#----------------------------------------------------------------
clear()
|