File: SequenceIndexDatabase.hpp

package info (click to toggle)
pbseqlib 0~20161219-1
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 5,924 kB
  • ctags: 5,123
  • sloc: cpp: 82,727; makefile: 305; python: 239; sh: 8
file content (96 lines) | stat: -rw-r--r-- 2,134 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#ifndef _BLASR_SEQUENCE_INDEX_DATABASE_HPP_
#define _BLASR_SEQUENCE_INDEX_DATABASE_HPP_

#include <fstream>
#include <iostream>
#include <vector>
#include <assert.h>
#include <stdlib.h>
#include <sstream>
#include <cstring>
#include <algorithm>
#include "../Types.h"
#include "../DNASequence.hpp"
#include "../StringUtils.hpp"


#define SEQUENCE_INDEX_DATABASE_MAGIC 1233211233

template<typename TSeq>
class SequenceIndexDatabase {
public:
    std::vector<DNALength> growableSeqStartPos;
    std::vector<std::string> growableName;

	DNALength *seqStartPos;
	bool deleteSeqStartPos;
	char **names;
	bool deleteNames;
	int  *nameLengths;
	bool deleteNameLengths;
	int  nSeqPos;
	bool deleteStructures;
    //
    // This is stored after reading in the sequence.
    //
    std::vector<std::string> md5;

    SequenceIndexDatabase(int final=0);
    ~SequenceIndexDatabase();

    DNALength GetLengthOfSeq(int seqIndex);

    // Return index of a reference sequence with name "seqName".
    int GetIndexOfSeqName(std::string seqName);

    void GetName(int seqIndex, std::string &name);

    void MakeSAMSQString(std::string &sqString);

    DNALength ChromosomePositionToGenome(int chrom, DNALength chromPos);

    int SearchForIndex(DNALength pos);

    std::string GetSpaceDelimitedName(unsigned int index);

    int SearchForStartBoundary(DNALength pos);

    int SearchForEndBoundary(DNALength pos);

    DNALength SearchForStartAndEnd(DNALength pos, DNALength &start,
        DNALength &end);

    void WriteDatabase(ofstream &out);

   	void ReadDatabase(ifstream &in);

	void SequenceTitleLinesToNames();

   	VectorIndex AddSequence(TSeq &sequence);

   	void Finalize();

	void FreeDatabase();
};


template< typename TSeq >
class SeqBoundaryFtr {
public:
    SequenceIndexDatabase<TSeq> *seqDB;

    SeqBoundaryFtr(SequenceIndexDatabase<TSeq> *_seqDB);

    int GetIndex(DNALength pos);

    int GetStartPos(int index);

    DNALength operator()(DNALength pos);

    // This is misuse of a functor, but easier interface coding for now.
    DNALength Length(DNALength pos);
};

#include "SequenceIndexDatabaseImpl.hpp"

#endif