File: simple_test.py

package info (click to toggle)
simka 1.5.3-10
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 2,200 kB
  • sloc: cpp: 5,321; python: 672; sh: 386; makefile: 26
file content (135 lines) | stat: -rwxr-xr-x 4,508 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135

import sys, os, shutil, glob, gzip
os.chdir(os.path.split(os.path.realpath(__file__))[0])

suffix = " > /dev/null 2>&1"
dir = "__results__"

def clear():
	if os.path.exists("temp_output"):
		shutil.rmtree("temp_output")
	if os.path.exists("__results__"):
		shutil.rmtree("__results__")
	os.mkdir(dir)


def decompress_simka_results(dir):
	result_filenames = glob.glob(os.path.join(dir, '*.csv.gz'))
	for filename_gz in result_filenames:
		#filename_gz = result_dir + "/" + filename
		with gzip.open(filename_gz, 'rb') as f:
			outFile = open(filename_gz[:-3], "wb")
			outFile.write(f.read())
			outFile.close()
			os.remove(filename_gz)

def __test_matrices(simka_vs_truth, result_dir, truth_dir):

	ok = True

	decompress_simka_results(result_dir)
	result_filenames = glob.glob(os.path.join(result_dir, '*.csv'))
	if len(result_filenames) == 0:
		print("Error: no results")
		exit(1)

	if simka_vs_truth:
		truth_filenames = glob.glob(os.path.join(truth_dir, '*.csv'))
	else: #simka vs simka
		#if result_dir+"/mat_abundance_jaccard.csv" in truth_filenames: #comparing simka results vs simka results
		#truth_filenames.remove(result_dir+"/mat_abundance_jaccard.csv") #This distance is computed from Bray Curtis distance
		decompress_simka_results(truth_dir)
		truth_filenames = glob.glob(os.path.join(truth_dir, '*.csv'))

	truth_filenames.sort()
	result_filenames.sort()

	for result_filename in result_filenames:
		distanceName = os.path.split(result_filename)[1]
		for truth_filename in truth_filenames:
			distanceName2 = os.path.split(truth_filename)[1]
			if distanceName != distanceName2: continue

			res_file = open(result_filename, "r")
			truth_file = open(truth_filename, "r")

			#print res_file, truth_file
			res_str = res_file.read()
			truth_str = truth_file.read()

			res_file.close()
			truth_file.close()

			if(res_str != truth_str):
				print("\t- TEST ERROR:    " + distanceName)
				ok = False

	return ok


def test_dists(dir):
	if(__test_matrices(True, "__results__/" + dir, "truth/" + dir)):
		print("\tOK")
	else:
		print("\tFAILED")
		sys.exit(1)


def test_parallelization():
	if(__test_matrices(False, "__results__/results_resources1", "__results__/results_resources2")):
		print("\tOK")
	else:
		print("\tFAILED")
		sys.exit(1)


#----------------------------------------------------------------
#----------------------------------------------------------------
#----------------------------------------------------------------


#test k=31 t=0
clear()
print("TESTING k=31 t=0", flush=True)
command = "simka -in simka_input.txt -out ./__results__/results_k31_t0 -out-tmp ./temp_output -simple-dist -complex-dist -kmer-size 31 -abundance-min 0 -verbose 0 -nb-cores 1"
print(command, flush=True)
os.system(command + suffix)
test_dists("results_k31_t0")

#test k=21 t=0
clear()
print("TESTING k=21 t=0", flush=True)
command = "simka -in simka_input.txt -out ./__results__/results_k21_t0 -out-tmp ./temp_output -simple-dist -complex-dist -kmer-size 21 -abundance-min 0 -verbose 0 -nb-cores 1"
print(command, flush=True)
os.system(command + suffix)
test_dists("results_k21_t0")

#test k=31 t=2
clear()
print("TESTING k=31 t=2", flush=True)
command = "simka -in simka_input.txt -out ./__results__/results_k31_t2 -out-tmp ./temp_output -simple-dist -complex-dist -kmer-size 31 -abundance-min 2 -verbose 0 -nb-cores 1"
print(command, flush=True)
os.system(command + suffix)
test_dists("results_k31_t2")

#test k=21 t=2
clear()
print("TESTING k=21 t=2", flush=True)
command = "simka -in simka_input.txt -out ./__results__/results_k21_t2 -out-tmp ./temp_output -simple-dist -complex-dist -kmer-size 21 -abundance-min 2 -verbose 0 -nb-cores 1"
print(command, flush=True)
os.system(command + suffix)
test_dists("results_k21_t2")

#test resources 1
clear()
print("TESTING parallelization", flush=True)
command = "simka -in simka_input.txt -out ./__results__/results_resources1 -out-tmp ./temp_output -simple-dist -complex-dist -kmer-size 21 -abundance-min 0 -nb-cores 20 -max-memory 4000  -verbose 0"
os.system(command + suffix)
command = "simka -in simka_input.txt -out ./__results__/results_resources2 -out-tmp ./temp_output -simple-dist -complex-dist -kmer-size 21 -abundance-min 0 -nb-cores 2 -max-memory 2000  -verbose 0"
os.system(command + suffix)
test_parallelization()

#----------------------------------------------------------------
#----------------------------------------------------------------
#----------------------------------------------------------------
clear()