File: st_quality.hxx

package info (click to toggle)
arb 6.0.2-1
  • links: PTS, VCS
  • area: non-free
  • in suites: jessie-kfreebsd
  • size: 65,852 kB
  • sloc: ansic: 394,903; cpp: 250,252; makefile: 19,635; sh: 15,878; perl: 10,461; fortran: 6,019; ruby: 683; xml: 503; python: 53; awk: 32
file content (121 lines) | stat: -rw-r--r-- 4,145 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
// =============================================================== //
//                                                                 //
//   File      : st_quality.hxx                                    //
//   Purpose   :                                                   //
//                                                                 //
//   Institute of Microbiology (Technical University Munich)       //
//   http://www.arb-home.de/                                       //
//                                                                 //
// =============================================================== //

#ifndef ST_QUALITY_HXX
#define ST_QUALITY_HXX

#ifndef ST_WINDOW_HXX
#include "st_window.hxx"
#endif

#define st_assert(cond) arb_assert(cond)

class Sampler {
    double sum;                                     //!< sum of added values
    size_t count;                                   //!< number of values added
public:
    Sampler() : sum(0.0) , count(0) {}

    void add(double val) {
        sum += val;
        count++;
    }
    void add(const Sampler& other) {
        sum   += other.sum;
        count += other.count;
    }

    size_t get_count() const { return count; }
    double get_sum() const { return sum; }
    double get_median() const { return sum / count; }
};

class VarianceSampler : public Sampler {
    /*! stores values such that variance can quickly be calculated
     * for algorithm see
     * http://de.wikipedia.org/wiki/Standardabweichung#Berechnung_f.C3.BCr_auflaufende_Messwerte or
     * http://en.wikipedia.org/wiki/Standard_deviation#Rapid_calculation_methods
     */

    double square_sum;                              //!< sum of squares of added values

public:
    VarianceSampler() : square_sum(0.0) {}

    void add(double val) {
        Sampler::add(val);
        square_sum += val*val;
    }

    void add(const VarianceSampler& other) {
        Sampler::add(other);
        square_sum += other.square_sum;
    }

    double get_variance(double median) const { return square_sum/get_count() - median*median; }
    double get_variance() const { return get_variance(get_median()); }
};

class LikelihoodRanges : virtual Noncopyable {
    /*! The alignment is split into multiple, similar-sized column ranges.
     * For each range the likelihoods get summarized
     */
    size_t           ranges;
    VarianceSampler *likelihood;

public:

    LikelihoodRanges(size_t ranges);
    ~LikelihoodRanges() { delete [] likelihood; }

    size_t get_range_count() const { return ranges; }

    void add(size_t range, double probability) {
        st_assert(range < ranges);
        likelihood[range].add(probability);
    }
    void add_relative(double seq_rel_pos, double probability) {
        st_assert(seq_rel_pos>=0.0 && seq_rel_pos<1.0);
        add(seq_rel_pos*ranges, probability);
    }

    VarianceSampler summarize_all_ranges();
    char *generate_string(Sampler& t_values);

    // int is_bad();                                   // 0 == ok, 1 strange, 2 bad, 3 very bad
};

struct ColumnQualityInfo {
    LikelihoodRanges stat_half;                    // two ranges (one for each half)
    LikelihoodRanges stat_five;                    // five ranges
    LikelihoodRanges stat_user;                    // user defined range size
    LikelihoodRanges stat_cons;                    // two ranges (conserved + variable positions) -- @@@ unused

    ColumnQualityInfo(int seq_len, int bucket_size);

    size_t overall_range_count() {
        return
            stat_half.get_range_count() +
            stat_five.get_range_count() +
            stat_user.get_range_count() +
            stat_cons.get_range_count();
    }
};

class WeightedFilter;
class ColumnStat;

GB_ERROR st_ml_check_sequence_quality(GBDATA *gb_main, const char *tree_name,
                                      const char *alignment_name, ColumnStat *colstat, const WeightedFilter *weighted_filter, int bucket_size,
                                      int marked_only, st_report_enum report, const char *dest_field); 

#else
#error st_quality.hxx included twice
#endif // ST_QUALITY_HXX