File: VariantAssist.h

package info (click to toggle)
tvc 5.0.3%2Bgit20151221.80e144e%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 3,548 kB
  • sloc: cpp: 24,088; ansic: 3,933; python: 260; makefile: 16
file content (131 lines) | stat: -rw-r--r-- 3,869 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/* Copyright (C) 2013 Ion Torrent Systems, Inc. All Rights Reserved */

//! @file     VariantAssist.h
//! @ingroup  VariantCaller
//! @brief    Utilities for output of variants


#ifndef VARIANTASSIST_H
#define VARIANTASSIST_H

#include <iostream>
#include <fstream>
#include <string>
#include <sstream>
#include <vector>
#include <math.h>
#include <ctype.h>
#include <algorithm>
#include "api/api_global.h"
#include "api/BamAux.h"
#include "api/BamConstants.h"
#include "api/BamReader.h"
#include "api/SamHeader.h"
#include "api/BamAlignment.h"
#include "api/SamReadGroup.h"
#include "api/SamReadGroupDictionary.h"
#include "api/SamSequence.h"
#include "api/SamSequenceDictionary.h"

#include "sys/types.h"
#include "sys/stat.h"
#include <map>
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>

#include <Variant.h>
#include "MiscUtil.h"
#include "RandSchrange.h"
// ugly, too many headers
#include "ExtendParameters.h"

using namespace std;
using namespace BamTools;

class MultiBook {
public:
  // strand = 0/1
  // alleles = 0 (ref), 1,2,... alts
  vector< vector<int> > my_book;
  int invalid_reads;

  MultiBook();
  void Allocate(int num_hyp);
  float GetFailedReadRatio();
  void SetCount(int i_strand, int i_hyp, int count);
  int GetDepth(int i_strand, int i_alt);
  float OldStrandBias(int i_alt, float tune_bias);
  float StrandBiasPval(int i_alt, float tune_bias);
  float GetXBias(int i_alt, float tune_bias);
  int GetAlleleCount(int strand_key, int i_hyp);
  int TotalCount(int strand_key);
  int NumAltAlleles();
  void ResetCounter();
  void AssignStrandToHardClassifiedReads(vector<bool> &strand_id, vector<int> &read_id);
  void AssignPositionFromEndToHardClassifiedReads(vector<int> &read_id, vector<int> &left, vector<int> &right);
  float PositionBias(int i_alt);
  float GetPositionBiasPval(int i_alt);
  float GetPositionBias(int i_alt);

 private:
  struct position_bias {
    float rho;
    float pval;
  };
  vector<position_bias> my_position_bias;

  // distance of allele from softclips in each read
  vector<int> allele_index; // alleles = 0 (ref), 1,2,... alts
  vector<int> to_left;
  vector<int> to_right;

  void ComputePositionBias(int i_allele);
};


class VariantOutputInfo {
  public:
    bool isFiltered;
    float variant_qual_score;
    float gt_quality_score;
    int genotype_call;
    vector<string>  filterReason; // make sure we can push back as many as we need
    string infoString;


    VariantOutputInfo() {
      isFiltered = false;
     variant_qual_score = 0.0;
      gt_quality_score = 0.0f;
      genotype_call = -1;
      //filterReason = "";
      //infoString = "";

    }
};


float ComputeXBias(long int plus_var, long int plus_depth, long int neg_var, long int neg_depth, float var_zero);
float ComputeTunedXBias(long int plus_var, long int plus_depth, long int neg_var, long int neg_depth, float proportion_zero);
float BootstrapStrandBias(long int plus_var, long int plus_depth, long int neg_var, long int neg_depth, float tune_fish);
float ComputeTransformStrandBias(long int plus_var, long int plus_depth, long int neg_var, long int neg_depth, float tune_fish);
inline float ComputeStrandBias(long int plus_var, long int plus_depth, long int neg_var, long int neg_depth);

namespace VariantAssist {
  // helper functions for ComputePositionBias
  // todo: move to Stats.h
  inline float median(std::vector<float>& values);
  inline void randperm(vector<unsigned int> &v, RandSchrange& rand_generator);
  inline double partial_sum(vector<double> &v, size_t n);
  void tiedrank(vector<double> &vals);
  double MannWhitneyURho(vector<float> &ref, vector<float> &var, bool debug);
  inline double MannWhitneyU(vector<float> &ref, vector<float> &var, bool debug);

  struct mycomparison
  {
    bool operator() (double* lhs, double* rhs) {return (*lhs) < (*rhs);}
  };
}

#endif //VARIANTASSIST_H