File: performance.py

package info (click to toggle)
libedlib 1.2.7-7
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 14,532 kB
  • sloc: cpp: 2,002; sh: 304; python: 131; makefile: 89; ansic: 7
file content (38 lines) | stat: -rwxr-xr-x 1,453 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/env python

import timeit

import edlib
import editdistance
import Levenshtein

with open('../../test_data/Enterobacteria_Phage_1/mutated_90_perc_oneline.fasta', 'r') as f:
    queryFull = f.readline()
print('Read query: ', len(queryFull) ,' characters.')

with open('../../test_data/Enterobacteria_Phage_1/Enterobacteria_phage_1_oneline.fa', 'r') as f:
    targetFull = f.readline()
print('Read target: ', len(targetFull) ,' characters.')

for seqLen in [30, 100, 1000, 10000, 50000]:
    query = queryFull[:seqLen]
    target = targetFull[:seqLen]
    numRuns = max(1000000000 // (seqLen**2), 1)

    print('Sequence length: ', seqLen)

    edlibTime = timeit.timeit(stmt="edlib.align(query, target)",
                              number=numRuns, globals=globals()) / numRuns
    print('Edlib: ', edlibTime)
    print(edlib.align(query, target))

    editdistanceTime = timeit.timeit(stmt="editdistance.eval(query, target)",
                                     number=numRuns, globals=globals()) / numRuns
    print('editdistance: ', editdistanceTime)

    levenshteinTime = timeit.timeit(stmt="Levenshtein.distance(query, target)",
                                     number=numRuns, globals=globals()) / numRuns
    print('levenshtein: ', levenshteinTime)

    print('edlib is %f times faster than editdistance.' % (editdistanceTime / edlibTime))
    print('edlib is %f times faster than Levenshtein.' % (levenshteinTime / edlibTime))