File: BlastdbFileIndexReader.cpp

package info (click to toggle)
plast 2.3.2%2Bdfsg-12
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 16,344 kB
  • sloc: cpp: 28,979; ansic: 2,075; sh: 478; makefile: 80
file content (143 lines) | stat: -rwxr-xr-x 5,452 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
/*****************************************************************************
 *                                                                           *
 *   PLAST : Parallel Local Alignment Search Tool                            *
 *   Version 2.3, released November 2015                                     *
 *   Copyright (c) 2009-2015 Inria-Cnrs-Ens                                  *
 *                                                                           *
 *   PLAST is free software; you can redistribute it and/or modify it under  *
 *   the Affero GPL ver 3 License, that is compatible with the GNU General   *
 *   Public License                                                          *
 *                                                                           *
 *   This program is distributed in the hope that it will be useful,         *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of          *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the            *
 *   Affero GPL ver 3 License for more details.                              *
 *****************************************************************************/

#include <database/impl/BlastdbFileIndexReader.hpp>
#include <database/impl/DatabaseUtility.hpp>
#include <misc/api/PlastStrings.hpp>
#include <os/impl/CommonOsImpl.hpp>
#include <os/impl/DefaultOsFactory.hpp>
#include <misc/api/macros.hpp>


#define DEBUG(a)  //printf a

using namespace std;
using namespace dp;
using namespace os::impl;
using namespace database::impl;

/********************************************************************************/
namespace database { namespace impl {
/********************************************************************************/

/*********************************************************************
** METHOD  :
** PURPOSE :
** INPUT   :
** OUTPUT  :
** RETURN  :
** REMARKS :
*********************************************************************/
BlastdbFileIndexReader::BlastdbFileIndexReader (const char* filename)
    :  _filename(filename),_version(0),
       _dbKind (IDatabaseQuickReader::ENUM_UNKNOWN),
       _title(""),_timestamp(""),
       _dataSize(0), _nbSequences(0), _maxSeqSize(0),_startIndexOffsetTable(0),
       _sequenceFilesize(0),_startSequenceIndex(0), _data(NULL)
{
	u_int32_t foundPoint;

	foundPoint = std::string(filename).find_last_of(".");
	_filenameWithoutExt = std::string(filename).substr(0,foundPoint);

	/** Open the File index depending of the extension nin = Nucleotides, pin = Protein*/
	DatabaseLookupType::QuickReaderType_e databaseType;
    databaseType = DatabaseLookupType::quickReaderType(filename);
	_fileindex = DefaultFactory::fileMem().newFile (filename);

	if ((databaseType == DatabaseLookupType::ENUM_BLAST_PIN)||(databaseType == DatabaseLookupType::ENUM_BLAST_PAL))
		_dbKind = IDatabaseQuickReader::ENUM_AMINO_ACID;
	else if ((databaseType == DatabaseLookupType::ENUM_BLAST_NIN)||(databaseType == DatabaseLookupType::ENUM_BLAST_NAL))
		_dbKind = IDatabaseQuickReader::ENUM_NUCLOTID;
	else
		_dbKind = IDatabaseQuickReader::ENUM_UNKNOWN;
}

/*********************************************************************
** METHOD  :
** PURPOSE :
** INPUT   :
** OUTPUT  :
** RETURN  :
** REMARKS :
*********************************************************************/
BlastdbFileIndexReader::~BlastdbFileIndexReader  ()
{
	delete (_fileindex);
}

/*********************************************************************
** METHOD  :
** PURPOSE :
** INPUT   :
** OUTPUT  :
** RETURN  :
** REMARKS :
*********************************************************************/
void BlastdbFileIndexReader::read ()
{
    u_int32_t templength=0;
    u_int32_t index=0;


	_data = _fileindex->getData();

	_dataSize     = 0;
    _nbSequences  = 0;

    /** We read the 3 first 32 bytes in order to get :
	 * 	Version 		: Int32
	 * 	Database Type 	: Int32
	 * 	Title Length  	: Int32. */
    index=3;
    _version = _data[index];
    index+=4;
	_dbKind = (_data[index]==0) ? IDatabaseQuickReader::ENUM_NUCLOTID : IDatabaseQuickReader::ENUM_AMINO_ACID;;
	index++;

	/** Read the title length */
	templength = CHAR_TO_INT32(_data[index],_data[index+1],_data[index+2],_data[index+3]);
	index+=4;

	/** Read the timestamp length */
	index+=templength;
	templength = CHAR_TO_INT32(_data[index],_data[index+1],_data[index+2],_data[index+3]);
	index+=4;

	/** We read the 4 next 32 bytes in order to get :
	 * 	Sequences number	: Int32
	 * 	Data size	 		: Int64
	 * 	Sequence max size	: Int32. */
 	index+=templength;
	_nbSequences = CHAR_TO_INT32(_data[index],_data[index+1],_data[index+2],_data[index+3]);
	index+=4;
	_dataSize = CHAR_TO_INT64(_data[index+7],_data[index+6],_data[index+5],_data[index+4],
			_data[index+3],_data[index+2],	_data[index+1],_data[index+0]);
	index+=8;
	_maxSeqSize = CHAR_TO_INT32(_data[index],_data[index+1],_data[index+2],_data[index+3]);
	index+=4;

	/** Save the index to start to read the offset table */
	_startIndexOffsetTable = index;

	/** Read the last offset of the offset sequence table to get the sequence file size */
	index+=2*((_nbSequences+1)*4)-4;
	_sequenceFilesize = CHAR_TO_INT32(_data[index],_data[index+1],_data[index+2],_data[index+3]);
}
/********************************************************************************/
} } /* end of namespaces. */
/********************************************************************************/