File: MetricField.hpp

package info (click to toggle)
pbseqlib 5.3.5%2Bdfsg-4
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 7,020 kB
  • sloc: cpp: 77,250; python: 331; sh: 103; makefile: 41
file content (121 lines) | stat: -rw-r--r-- 5,369 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#ifndef _LOADPULSES_METRICFIELD_HPP_
#define _LOADPULSES_METRICFIELD_HPP_

#include <cstdlib>
#include <iostream>
#include <string>
#include <vector>

#include <pbdata/Types.h>

enum FieldType
{
    BasField,
    PlsField
};

class Field
{
public:
    std::string name;
    FieldType type;
    Field(std::string n, FieldType t);
    bool operator==(const Field &another) const;
};

class FieldsRequirement
{
public:
    std::string metric;
    std::vector<Field> fieldsUseBasFile;
    std::vector<Field> fieldsUsePlsFile;

    // Return fields that are required for computing this metric.
    // Eighteen metrics are supported in total.
    // [1/18] metric requires only an attribute (not a field):
    //     WhenStarted
    //
    // [9/18] metrics require exactly one BaseCall field
    //     QualityValue InsertionQV     MergeQV           DeletionQV
    //     DeletionTag  SubstitutionTag SubstitutionQV    PreBaseFrames
    //     PulseIndex
    //
    // [4/18] metrics require more than one field and can be computed using
    // only one method:
    //                         BaseCall         PulseCall
    //     ----------------------------------------------------
    //     ClassifierQV        PulseIndex       NumEvent
    //                                          ClassifierQV
    //     ----------------------------------------------------
    //     pkmid               PulseIndex       NumEvent
    //                                          MidSignal
    //     ----------------------------------------------------
    //     Light               PulseIndex       NumEvent
    //                                          WidthInFrames
    //                                          MeanSignal
    //     ----------------------------------------------------
    //     StartTimeOffset     PulseIndex       NumEvent
    //                                          StartFrame
    //     ----------------------------------------------------
    // [4/18] metrics can be computed from both BaseCalls and PulseCalls.
    // But sometimes the value computed from BaseCalls can be wrong,
    // because the value of BaseCalls/PreBaseFrames may exceed 2^16-1.
    //                Method   BaseCall         PulseCall
    //     ----------------------------------------------------
    //     PulseWidth  (1)     WidthInFrames
    //                  =======================================
    //                 (2)     PulseIndex       NumEvent
    //                                          WidthInFrames
    //     ----------------------------------------------------
    //     WidthInFrames  : The same as PulseWidth
    //     ----------------------------------------------------
    //     StartFrame  (1)     PreBaseFrames
    //                         WidthInFrames
    //                  =======================================
    //                 (2)     PulseIndex       NumEvent
    //                                          StartFrame
    //     ----------------------------------------------------
    //     IPD         (1)     PreBaseFrames
    //                  =======================================
    //                 (2)     PulseIndex       NumEvent
    //                                          StartFrame
    //                                          WidthInFrames
    //     ----------------------------------------------------
    // Note: PulseWidth and WidthInFrames have the same meaning and are
    // computed in the same way.
    //
    // Note: StartFrame can be loaded for both bas.h5 and pls.h5 files
    //       for bas.h5, StartFrame is computed from PreBaseFrames and WidthInFrames
    //           Let x = PreBaseFrames for bases 0 ... n-1, where x[0] is 0 and
    //                   x[i] is the inter-pulse distance between start of pulse
    //                   for base i and end of pulse for base i-1
    //           Let y = WidthInFrames for bases 0 ... n-1, where y[i] is the
    //                   number of pulses within base i
    //       Then,
    //           StartFrame[0] = x[0]
    //           StartFrame[i] = sum(x[0] ... x[i]) + sum(y[0] ... y[i-1])
    //                           for i in [1 ... n-1]
    //       for pls.h5, StartFrame can be directly read from dataset
    //       /PulseData/PulseCalls/StartFrame
    //
    // Note: StartTimeOffset is the StartFrame for the very first base of a read, it
    //       can only be computed from PulseCalls
    //
    // Note: IPD has the same meaning as PreBaseFrames:
    //           = the inter-pulse distance between this base and end of last base,
    //           = the number of Frames between the ending pulse of the last base and
    //           the starting pulse of this base.
    //       However, PreBaseFrames can only be read directly from BaseCalls, while
    //       IPD can also be computed from PulseCalls
    //           If use BaseCalls,
    //               IPD[i] = PreBaseFrames[i]        for i in [0 ... n-1]
    //           If use PulseCalls,
    //               IPD[0] = 0
    //               IPD[i] = StartFrame[i] - StartFrame[i-1] - WidthInFrames[i-1]
    //                                                for i in [1 ... n-1]
    //
    //void GetRequiredFieldsForMetric(const std::string & metric, FieldType & field){
    FieldsRequirement(const std::string &m);
};

#endif