File: benchmark_fasta_random_access.sh

package info (click to toggle)
pyfastx 2.2.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,336 kB
  • sloc: ansic: 4,820; python: 1,817; sh: 505; perl: 66; makefile: 31
file content (99 lines) | stat: -rw-r--r-- 2,116 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/bin/bash

#store benchmark time and memory
tempfile=time_mem.tmp

#record memory
memorys=()

#record elapsed time
times=()

#number of programs
num=-1

#number of repeat tests
repeats=$1

#input fasta files
gfiles=$@

measure_memory_time(){
	/usr/bin/time -f "%e %M" -o $tempfile $1 > /dev/null 2>&1

	let num++

	if [ ! ${memorys[$num]} > 0 ]; then
		memorys[$num]=0
		times[$num]=0
	fi

	arr=($(cat $tempfile))

	#clear temp file
	if [ -e "$tempfile" ]; then
		rm "$tempfile"
	fi

	times[$num]=$(echo "${arr[0]}+${times[$num]}" | bc)
	memorys[$num]=$(echo "${arr[1]}+${memorys[$num]}" | bc)
}

#print header
printf "genome\tsize\tcount\tbioperl\t\tbiopython\t\tpyfaidx\t\tpyfasta\t\tpysam\t\tsamtools\t\tseqkit\t\tpyfastx\t\tpyfastx_gzip\t\n"

for gfile in ${gfiles[@]:1}; do
	memorys=()
	times=()
	filename=$(basename $gfile)
	filename="${filename%.*}"

	#get genome information
	array=($(python3 get_fasta_info.py $gfile))
	
	#genome size
	gsize=${array[0]}

	#sequence counts in genome
	seqcounts=${array[1]}

	for i in $(seq 1 $repeats); do
		num=-1

		#bioperl
		measure_memory_time "perl bioperl_fasta_random_access.pl $gfile.list $gfile"

		#biopython
		measure_memory_time "python3 biopython_fasta_random_access.py $gfile.list $gfile"
		
		#pyfaidx
		measure_memory_time "python3 pyfaidx_fasta_random_access.py $gfile.list $gfile"

		#pyfasta
		measure_memory_time "python3 pyfasta_fasta_random_access.py $gfile.list $gfile"

		#pysam
		measure_memory_time "python3 pysam_fasta_random_access.py $gfile.list $gfile"

		#samtools
		measure_memory_time "samtools faidx -r $gfile.list $gfile"

		#seqkit
		measure_memory_time "seqkit faidx --infile-list $gfile.list $gfile"

		#pyfastx
		measure_memory_time "python3 pyfastx_fasta_random_access.py $gfile.list $gfile"

		#pyfastx gzip
		measure_memory_time "python3 pyfastx_fasta_random_access.py $gfile.list $gfile.gz"
	done

	#print result
	printf "%s\t%s\t%s" $filename $gsize $seqcounts
	for((i=0;i<=$num;i++)); do
		mm=$(echo "${memorys[$i]}/$repeats" | bc)
		et=$(echo "${times[$i]}/$repeats" | bc)
		printf "\t%d\t%d" $mm $et
	done
	printf "\n"
done