File: perf_tests2.sh

package info (click to toggle)
libedlib 1.2.7-7
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 14,532 kB
  • sloc: cpp: 2,002; sh: 304; python: 131; makefile: 89; ansic: 7
file content (142 lines) | stat: -rwxr-xr-x 4,610 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/env bash

# Runs performance tests.
# Here we run Edlib with different sizes of queries, with special focus on short queries,
# since that is where Edlib needs to improve.

EDLIB=~/git/edlib/build/bin/edlib-aligner

TEST_DATA=.

########################## TEST RUNNERS ########################

F_YELLOW='\e[93m'
F_NONE='\e[0m'
F_UNDERLINE='\e[4m'
F_NO_UNDERLINE='\e[24m'
F_BOLD='\e[1m'
F_NO_BOLD='\e[21m'

function echo_underlined {
    local input=$1
    echo -e "${F_UNDERLINE}${input}${F_NO_UNDERLINE}"
}

function echo_bolded {
    local input=$1
    echo -e "${F_BOLD}${input}${F_NO_BOLD}"
}

function edlib {
    mode=$1
    query=$2
    target=$3
    num_tests=$4
    k=$5
    r=${6-1}  # Number of repetitions, default is 1.

    time_sum=0
    for i in $(seq $num_tests); do
        sleep 0.25
        output=$($EDLIB -m $mode -k $k -r $r $query $target)
        time=$(echo "$output" | grep "Cpu time of searching" | cut -d " " -f5)
        score=$(echo "$output" | grep "#0:" | cut -d " " -f2)
        time_sum=$(python -c "print($time_sum + $time)")
        #echo ">" "#"$i $score $time
    done
    avg_time=$(python -c "print($time_sum / $num_tests)")
    echo -e "    => Edlib, $r repetition(s): time=${F_YELLOW}${avg_time}${F_NONE}s, score=$score"
}

function edlib_path {
    mode=$1
    query=$2
    target=$3
    num_tests=$4
    r=${5-1}  # Number of repetitions, default is 1.

    time_sum=0
    for i in $(seq $num_tests); do
        sleep 0.1
        output=$($EDLIB -m $mode -r $r -p -s $query $target)
        time=$(echo "$output" | grep "Cpu time of searching" | cut -d " " -f5)
        time_sum=$(python -c "print($time_sum + $time)")
        #echo ">" "#"$i $time
    done
    avg_time=$(python -c "print($time_sum / $num_tests)")
    echo -e "    => Edlib (path), $r repetition(s): time=${F_YELLOW}${avg_time}${F_NONE}s, score=$score"
}


############################ TESTS #############################



# ---------------------- Short reads ----------------------- #

# SHW (prefix).
echo_bolded "\nSHW, short query (<= 500bp) and long target."
target=$TEST_DATA/E_coli_DH1/e_coli_DH1.fasta
for query_length in 50 100 250 500; do
    echo_underlined "Query length: $query_length"
    for query in $(ls $TEST_DATA/E_coli_DH1/prefixes/${query_length}bp/mutated_*_perc.fasta); do
        echo "  Query: $query"
        edlib      SHW $query $target 3 -1 100  # Last number here is num repetitions, tweak it if times are too small.
        edlib_path SHW $query $target 3    100
    done
done

# HW (infix).
echo_bolded "\nHW, short query (<= 500bp) and long target."
target=$TEST_DATA/E_coli_DH1/e_coli_DH1.fasta
for query_length in 50 100 250 500; do
    echo_underlined "Query length: $query_length"
    for query in $(ls $TEST_DATA/E_coli_DH1/mason_illumina_reads/${query_length}bp/*.fasta); do
        echo "  Query: $query"
        edlib      HW $query $target 3 -1
        edlib_path HW $query $target 3
    done
done

# NW (global).
echo_bolded "\nNW, query and target of equal size, both short (<= 500bp)."
for seq_length in 50 100 250 500; do
    reads_dir=$TEST_DATA/E_coli_DH1/mason_illumina_reads/${seq_length}bp
    target=$reads_dir/e_coli_DH1_illumina_1x${seq_length}.fasta
    echo_underlined "Query and target length: $seq_length"
    echo_underlined "Target: $target"
    for query in $(ls $reads_dir/*.fasta); do
        echo "  Query: $query"
        edlib      HW $query $target 3 -1 100000  # Last number here is num repetitions, tweak it if times are too small.
        edlib_path HW $query $target 3    10000
    done
done

# ----------------------- Long reads ----------------------- #

# HW (infix).
echo_bolded -e "\nHW, long query and long target."
target=$TEST_DATA/E_coli_DH1/e_coli_DH1.fasta
for query in $(ls $TEST_DATA/E_coli_DH1/mason_illumina_reads/10kbp/*.fasta); do
    echo_underlined "Query: $query"
    edlib      HW $query $target 3 -1
    edlib_path HW $query $target 3
done

# SHW (prefix).
echo_bolded -e "\nSHW, long query and long target."
target=$TEST_DATA/E_coli_DH1/e_coli_DH1.fasta
for query in $(ls $TEST_DATA/E_coli_DH1/prefixes/10kbp/mutated_*_perc.fasta); do
    echo_underlined "Query: $query"
    edlib      SHW $query $target 3 -1
    edlib_path SHW $query $target 3
done

# NW (global).
echo_bolded "\nNW, big query and target."
target=$TEST_DATA/Chromosome_2890043_3890042_0/Chromosome_2890043_3890042_0.fasta
for query in $(ls $TEST_DATA/Chromosome_2890043_3890042_0/mutated_*_perc.fasta); do
    echo_underlined "Query: $query"
    edlib      NW $query $target 3 -1
    edlib_path NW $query $target 3
done