File: basecall_1d.py

package info (click to toggle)
ont-fast5-api 4.1.1%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 3,548 kB
  • sloc: python: 3,799; makefile: 153; sh: 13
file content (68 lines) | stat: -rw-r--r-- 2,904 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
""" Helper class for working with 1D basecall type analyses.
"""
import numpy as np

from ont_fast5_api.analysis_tools.base_tool import BaseTool


class Basecall1DTools(BaseTool):
    """ Provides helper methods specific to 1D basecall analyses.
    """
    group_id = 'Basecall_1D'
    analysis_id = 'basecall_1d'


    def get_event_data(self, section):
        """ Return either the template or complement event data, if present.
        
        :param section: Either template or complement.
        :return: Event data table.
        """
        event_group = '{}/BaseCalled_{}'.format(self.group_name, section)
        data = self.handle.get_analysis_dataset(event_group, 'Events')
        return data

    def add_event_data(self, section, data):
        """ Add template or complement basecalled event data.
        
        :param section: Either template or complement.
        :param data: Event data table to be written.
        """
        event_group = 'BaseCalled_{}'.format(section)
        if not event_group in self.handle.handle['Analyses/{}'.format(self.group_name)]:
            self.handle.add_analysis_subgroup(self.group_name, event_group)
        self.handle.add_analysis_dataset('{}/{}'.format(self.group_name, event_group), 'Events', data)

    def get_called_sequence(self, section, fastq=False):
        """ Return either the called sequence data, if present.
        
        :param section: ['template', 'complement' or '2D']
        :param fastq: If True, return a single, multiline fastq string. If
            False, return a tuple of (name, sequence, qstring).
        :return: Either the fastq string or the (name, sequence, qstring) tuple.
        """

        event_group = '{}/BaseCalled_{}'.format(self.group_name, section)
        data = self.handle.get_analysis_dataset(event_group, 'Fastq')
        if data is None:
            raise KeyError("No fastq data in: {} {}".format(event_group, self.filename))
        if fastq:
            return data
        name, sequence, _, qstring = data.strip().split('\n')
        name = name[1:]
        return name, sequence, qstring

    def add_called_sequence(self, section, name, sequence, qstring):
        """ Add basecalled sequence data
        
        :param section: ['template', 'complement' or '2D']
        :param name: The record ID to use for the fastq.
        :param sequence: The called sequence.
        :param qstring: The quality string.
        """
        event_group = 'BaseCalled_{}'.format(section)
        if not event_group in self.handle.handle['Analyses/{}'.format(self.group_name)]:
            self.handle.add_analysis_subgroup(self.group_name, event_group)
        fastq_text = '@{}\n{}\n+\n{}\n'.format(name, sequence, qstring)
        fastq_arr = np.array(fastq_text, dtype=str)
        self.handle.add_analysis_dataset('{}/{}'.format(self.group_name, event_group), 'Fastq', fastq_arr)