File: test_performance.py

package info (click to toggle)
kalign 1%3A3.5.1-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 4,060 kB
  • sloc: ansic: 16,129; python: 10,759; cpp: 636; sh: 65; makefile: 57
file content (129 lines) | stat: -rw-r--r-- 4,909 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
"""
Performance and benchmark tests for Kalign Python package.
"""

import pytest
import time
import kalign


class TestPerformance:
    """Test performance characteristics and benchmarks."""

    @pytest.mark.performance
    def test_basic_alignment_speed(self, dna_simple, benchmark):
        """Benchmark basic alignment speed."""
        result = benchmark(kalign.align, dna_simple, seq_type="dna")
        assert len(result) == len(dna_simple)

    @pytest.mark.performance
    def test_threading_speedup(self, dna_with_gaps):
        """Test that multiple threads provide speedup."""
        # Single thread
        start_time = time.time()
        aligned_single = kalign.align(dna_with_gaps, seq_type="dna", n_threads=1)
        single_time = time.time() - start_time

        # Multiple threads
        start_time = time.time()
        aligned_multi = kalign.align(dna_with_gaps, seq_type="dna", n_threads=2)
        multi_time = time.time() - start_time

        # Results should be identical
        assert aligned_single == aligned_multi

        # Just verify both completed successfully - timing comparisons are unreliable in CI
        print(
            f"Single-thread time: {single_time:.6f}s, Multi-thread time: {multi_time:.6f}s"
        )

    @pytest.mark.performance
    @pytest.mark.slow
    def test_large_sequence_performance(self):
        """Test performance with larger sequences."""
        large_seqs = ["ATCG" * 100] * 10  # 400bp sequences

        start_time = time.time()
        aligned = kalign.align(large_seqs, seq_type="dna")
        elapsed = time.time() - start_time

        assert len(aligned) == 10
        assert elapsed < 10.0  # Should complete in reasonable time

    @pytest.mark.performance
    def test_memory_usage_reasonable(self, dna_simple):
        """Test that memory usage is reasonable."""
        # This is a basic test - could be enhanced with memory profiling
        aligned = kalign.align(dna_simple * 10)  # 30 sequences
        assert len(aligned) == 30

    @pytest.mark.performance
    def test_dna_alignment_speed(self, dna_simple, benchmark):
        """Benchmark DNA alignment speed."""
        result = benchmark.pedantic(
            kalign.align, args=[dna_simple], kwargs={"seq_type": "dna"}, rounds=3
        )
        assert len(result) == len(dna_simple)

    @pytest.mark.performance
    def test_protein_alignment_speed(self, protein_simple, benchmark):
        """Benchmark protein alignment speed."""
        result = benchmark.pedantic(
            kalign.align,
            args=[protein_simple],
            kwargs={"seq_type": "protein"},
            rounds=3,
        )
        assert len(result) == len(protein_simple)

    @pytest.mark.performance
    @pytest.mark.parametrize("n_threads", [1, 2, 4, 8, 16])
    def test_dna_threading_performance(self, n_threads, benchmark):
        """Test DNA alignment performance with different thread counts."""
        # Generate a larger problem to really test threading
        sequences = kalign.generate_test_sequences(
            n_seq=200,  # 200 sequences - much more demanding
            n_obs=30,  # 30 observed sequences for HMM training
            dna=True,  # DNA sequences
            length=1000,  # 1000bp sequences - much longer
            seed=42,  # Reproducible results
        )

        result = benchmark.pedantic(
            kalign.align,
            args=[sequences],
            kwargs={"seq_type": "dna", "n_threads": n_threads},
            rounds=1,  # Reduced rounds due to longer runtime
        )
        assert len(result) == len(sequences)
        # Verify alignment length consistency
        if result:
            expected_length = len(result[0])
            for seq in result:
                assert len(seq) == expected_length

    @pytest.mark.performance
    @pytest.mark.parametrize("n_threads", [1, 2, 4, 8, 16])
    def test_protein_threading_performance(self, n_threads, benchmark):
        """Test protein alignment performance with different thread counts."""
        # Generate a larger problem to really test threading
        sequences = kalign.generate_test_sequences(
            n_seq=150,  # 150 sequences - more demanding
            n_obs=25,  # 25 observed sequences for HMM training
            dna=False,  # Protein sequences
            length=320,  # 320aa sequences - much longer
            seed=123,  # Different seed for variety
        )

        result = benchmark.pedantic(
            kalign.align,
            args=[sequences],
            kwargs={"seq_type": "protein", "n_threads": n_threads},
            rounds=1,  # Reduced rounds due to longer runtime
        )
        assert len(result) == len(sequences)
        # Verify alignment length consistency
        if result:
            expected_length = len(result[0])
            for seq in result:
                assert len(seq) == expected_length