File: SearchDirectory.h

package info (click to toggle)
ray 2.3.1-9
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 6,008 kB
  • sloc: cpp: 49,973; sh: 339; makefile: 281; python: 168
file content (124 lines) | stat: -rw-r--r-- 2,826 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
/*
 	Ray
    Copyright (C) 2012 Sébastien Boisvert

	http://DeNovoAssembler.SourceForge.Net/

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, version 3 of the License.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You have received a copy of the GNU General Public License
    along with this program (gpl-3.0.txt).  
	see <http://www.gnu.org/licenses/>

*/

#ifndef _SearchDirectory_h
#define _SearchDirectory_h

#include "VirtualKmerColor.h"

#include <code/KmerAcademyBuilder/Kmer.h>
#include <code/GeneOntology/KeyEncoder.h>

#include <set>
#include <string>
#include <fstream>
#include <vector>
using namespace std;

#define DUMMY_IDENTIFIER 999999999999ULL
#define CONFIG_COLORED_LINE_MAX_LENGTH 10000

/**
 * This class represents a directory
 * with fasta files
 * to be used for coloring
 */
class SearchDirectory{

	KeyEncoder m_encoder;

	bool m_hasBufferedLine;

/** this is to avoid using tellg() and seekg() **/
	char m_bufferedLine[CONFIG_COLORED_LINE_MAX_LENGTH];

	bool m_hasN;

	string m_path;

	vector<string> m_files;
	vector<int> m_counts;

	set<int> m_createdDirectories;

	/** sequence lazy loader */
	bool m_hasFile;
	int m_currentSequencePosition;
	char m_currentSequenceHeader[CONFIG_COLORED_LINE_MAX_LENGTH];
	char m_currentSequenceBuffer[CONFIG_COLORED_LINE_MAX_LENGTH];
	int m_currentSequenceNumberOfAvailableKmers;

	int m_currentFile;
	int m_currentSequence;

	bool m_noMoreSequence;
	FILE* m_currentFileStream;

	void readDirectory();

	/** lazy load some sequences */
	void loadSomeSequence();


	void readLineFromFile(char*line,int length);

	bool lineIsSequenceHeader(char*line);

	string filterName(string a);
public:
	void constructor(string path);

/** get the number of sequences in the file */
	int getCount(int i);

	string*getFileName(int i);
	string*getDirectoryName();
	void countEntriesInFile(int j);

/** get the number of files in the directory */
	int getSize();

	void setCount(int file,int count);

	// sequence reader
	void createSequenceReader(int file,int sequence,int kmerLength);

	int getCurrentSequenceLengthInKmers();
	bool hasNextKmer(int kmerLength);
	void iterateToNextKmer();
	void getNextKmer(int kmerLength,Kmer*kmer);

	bool kmerContainsN();

	string getCurrentSequenceName();

	bool hasCurrentSequenceIdentifier();
	LargeIndex getCurrentSequenceIdentifier();

	bool hasIdentifier_EMBL_CDS();
	PhysicalKmerColor getIdentifier_EMBL_CDS();

	bool hasDirectory(int file);
	void setCreatedDirectory(int file);
};

#endif