File: SequenceIndexDatabase.hpp

package info (click to toggle)
pbseqlib 5.3.4%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 7,020 kB
  • sloc: cpp: 77,246; python: 331; sh: 103; makefile: 42
file content (96 lines) | stat: -rw-r--r-- 2,175 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#ifndef _BLASR_SEQUENCE_INDEX_DATABASE_HPP_
#define _BLASR_SEQUENCE_INDEX_DATABASE_HPP_

#include <algorithm>
#include <cassert>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <iostream>
#include <sstream>
#include <vector>

#include <pbdata/Types.h>
#include <pbdata/DNASequence.hpp>
#include <pbdata/StringUtils.hpp>

#define SEQUENCE_INDEX_DATABASE_MAGIC 1233211233

template <typename TSeq>
class SequenceIndexDatabase
{
public:
    std::vector<DNALength> growableSeqStartPos;
    std::vector<std::string> growableName;

    DNALength *seqStartPos;
    bool deleteSeqStartPos;
    char **names;
    bool deleteNames;
    int *nameLengths;
    bool deleteNameLengths;
    int nSeqPos;
    bool deleteStructures;
    //
    // This is stored after reading in the sequence.
    //
    std::vector<std::string> md5;

    SequenceIndexDatabase(int final = 0);
    ~SequenceIndexDatabase();

    DNALength GetLengthOfSeq(int seqIndex);

    // Return index of a reference sequence with name "seqName".
    int GetIndexOfSeqName(std::string seqName);

    void GetName(int seqIndex, std::string &name);

    void MakeSAMSQString(std::string &sqString);

    DNALength ChromosomePositionToGenome(int chrom, DNALength chromPos);

    int SearchForIndex(DNALength pos);

    std::string GetSpaceDelimitedName(unsigned int index);

    int SearchForStartBoundary(DNALength pos);

    int SearchForEndBoundary(DNALength pos);

    DNALength SearchForStartAndEnd(DNALength pos, DNALength &start, DNALength &end);

    void WriteDatabase(std::ofstream &out);

    void ReadDatabase(std::ifstream &in);

    void SequenceTitleLinesToNames();

    VectorIndex AddSequence(TSeq &sequence);

    void Finalize();

    void FreeDatabase();
};

template <typename TSeq>
class SeqBoundaryFtr
{
public:
    SequenceIndexDatabase<TSeq> *seqDB;

    SeqBoundaryFtr(SequenceIndexDatabase<TSeq> *_seqDB);

    int GetIndex(DNALength pos);

    int GetStartPos(int index);

    DNALength operator()(DNALength pos);

    // This is misuse of a functor, but easier interface coding for now.
    DNALength Length(DNALength pos);
};

#include "SequenceIndexDatabaseImpl.hpp"

#endif