File: test_alignment_tools.py

package info (click to toggle)
ont-fast5-api 4.1.3%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: sid, trixie
  • size: 3,556 kB
  • sloc: python: 3,799; makefile: 152; sh: 13
file content (138 lines) | stat: -rwxr-xr-x 7,050 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import os
import numpy as np
from ont_fast5_api.fast5_file import Fast5File
from ont_fast5_api.analysis_tools.alignment import AlignmentTools
from test.helpers import TestFast5ApiHelper


class TestAlignmentTools(TestFast5ApiHelper):

    def test_001_put_and_retrieve(self):
        fname = self.generate_temp_filename()
        summary_temp = {'genome': 'Lambda',
                        'genome_start': 100,
                        'genome_end': 200,
                        'strand_start': 1,
                        'strand_end': 101,
                        'num_events': 125,
                        'num_aligned': 92,
                        'num_correct': 87,
                        'num_insertions': 8,
                        'num_deletions': 8,
                        'identity': 0.9457,
                        'accuracy': 0.8056}
        summary_comp = {'genome': 'Lambda_rc',
                        'genome_start': 100,
                        'genome_end': 200,
                        'strand_start': 0,
                        'strand_end': 96,
                        'num_events': 120,
                        'num_aligned': 90,
                        'num_correct': 88,
                        'num_insertions': 6,
                        'num_deletions': 10,
                        'identity': 0.9778,
                        'accuracy': 0.8302}
        summary_2d = {'genome': 'Lambda',
                      'genome_start': 100,
                      'genome_end': 200,
                      'strand_start': 0,
                      'strand_end': 100,
                      'num_events': 125,
                      'num_aligned': 98,
                      'num_correct': 96,
                      'num_insertions': 4,
                      'num_deletions': 4,
                      'identity': 0.9796,
                      'accuracy': 0.9057}
        sam1 = 'Dummy string for template SAM.'
        sam2 = 'Dummy string for complement SAM.'
        sam3 = 'Dummy string for 2D SAM.'
        sequence1 = ''.join(np.random.choice(['A', 'C', 'G', 'T'], 100))
        bc = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
        sequence2 = ''.join([bc[letter] for letter in sequence1[::-1]])
        with Fast5File(fname, mode='w') as fh:
            fh.add_channel_info({'channel_number': 1,
                                 'sampling_rate': 4000,
                                 'digitisation': 8192,
                                 'range': 819.2,
                                 'offset': 0})
            fh.add_read(12, 'unique_snowflake', 12345, 4000, 0, 120.75)
            attrs = {'name': 'test', 'version': 0, 'time_stamp': 'just now', 'component': 'segmentation'}
            fh.add_analysis('segmentation', 'Segmentation_000', attrs)
            seg_data = {'has_template': 1,
                        'has_complement': 1,
                        'first_sample_template': 0,
                        'duration_template': 2000,
                        'first_sample_complement': 2000,
                        'duration_complement': 2000}
            fh.set_summary_data('Segmentation_000', 'segmentation', seg_data)
            attrs['component'] = 'alignment'
            attrs['segmentation'] = 'Analyses/Segmentation_000'
            fh.add_analysis('alignment', 'Alignment_000', attrs)
            fh.set_summary_data('Alignment_000', 'genome_mapping_template', summary_temp)
            fh.set_summary_data('Alignment_000', 'genome_mapping_complement', summary_comp)
            fh.set_summary_data('Alignment_000', 'genome_mapping_2d', summary_2d)
            with AlignmentTools(fh, group_name='Alignment_000') as align:
                align.add_alignment_data('template', sam1, sequence1)
                align.add_alignment_data('complement', sam2, sequence2)
                align.add_alignment_data('2d', sam3, sequence1)
        with Fast5File(fname, mode='r') as fh:
            with AlignmentTools(fh, group_name='Alignment_000') as align:
                sam, seq = align.get_alignment_data('template')
                self.assertEqual(sam1, sam)
                self.assertEqual(sequence1, seq)
                sam, seq = align.get_alignment_data('complement')
                self.assertEqual(sam2, sam)
                self.assertEqual(sequence2, seq)
                sam, seq = align.get_alignment_data('2d')
                self.assertEqual(sam3, sam)
                self.assertEqual(sequence1, seq)
                results = align.get_results()
                speed_temp = align.calculate_speed('template')
                speed_comp = align.calculate_speed('complement')
                # Make sure we can calculate speed using only what's in the
                # summary
                summary = fh.get_summary_data('Alignment_000')
                template_summary = summary['genome_mapping_template']
                summary_speed_temp = align.calculate_speed('template',
                                                           template_summary)
        self.assertEqual(250, speed_temp)
        self.assertEqual(250, speed_comp)
        self.assertEqual(speed_temp, summary_speed_temp)
        self.assertDictEqual({'status': 'match found',
                              'direction': 'forward',
                              'ref_name': 'Lambda',
                              'ref_span': (100, 200),
                              'seq_span': (1, 101),
                              'seq_len': 125,
                              'num_aligned': 92,
                              'num_correct': 87,
                              'num_insertions': 8,
                              'num_deletions': 8,
                              'identity': 0.9457,
                              'accuracy': 0.8056}, results['template'])
        self.assertDictEqual({'status': 'match found',
                              'direction': 'reverse',
                              'ref_name': 'Lambda',
                              'ref_span': (100, 200),
                              'seq_span': (0, 96),
                              'seq_len': 120,
                              'num_aligned': 90,
                              'num_correct': 88,
                              'num_insertions': 6,
                              'num_deletions': 10,
                              'identity': 0.9778,
                              'accuracy': 0.8302}, results['complement'])
        self.assertDictEqual({'status': 'match found',
                              'direction': 'forward',
                              'ref_name': 'Lambda',
                              'ref_span': (100, 200),
                              'seq_span': (0, 100),
                              'seq_len': 125,
                              'num_aligned': 98,
                              'num_correct': 96,
                              'num_insertions': 4,
                              'num_deletions': 4,
                              'identity': 0.9796,
                              'accuracy': 0.9057}, results['2d'])