File: ParallelMemHash.cpp

package info (click to toggle)
libmems 1.6.0%2B4725-9
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, bullseye
  • size: 2,120 kB
  • sloc: cpp: 21,579; ansic: 4,312; xml: 115; makefile: 103; sh: 26
file content (133 lines) | stat: -rw-r--r-- 3,671 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/*******************************************************************************
 * $Id: ParallelMemHash.cpp,v 1.32 2004/03/01 02:40:08 darling Exp $
 * This file is copyright 2002-2007 Aaron Darling and authors listed in the AUTHORS file.
 * Please see the file called COPYING for licensing, copying, and modification
 * Please see the file called COPYING for licensing details.
 * **************
 ******************************************************************************/

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include "libMems/ParallelMemHash.h"
#include <vector>

#ifdef _OPENMP


using namespace std;
using namespace genome;
namespace mems {

	ParallelMemHash::ParallelMemHash() : MemHash()

{
}

ParallelMemHash::ParallelMemHash(const ParallelMemHash& mh) : MemHash(mh)
{
	*this = mh;
}

ParallelMemHash& ParallelMemHash::operator=( const ParallelMemHash& mh ){
	thread_mem_table = mh.thread_mem_table;
	return *this;
}

ParallelMemHash* ParallelMemHash::Clone() const{
	return new ParallelMemHash(*this);
}

void ParallelMemHash::FindMatches( MatchList& ml ) 
{
	for( uint32 seqI = 0; seqI < ml.seq_table.size(); ++seqI ){
                if( !AddSequence( ml.sml_table[ seqI ], ml.seq_table[ seqI ] ) ){
                        ErrorMsg( "Error adding " + ml.seq_filename[seqI] + "\n");
                        return;
                }
        }

	size_t CHUNK_SIZE = 200000;
	// break up the SMLs into nice small chunks
	vector< vector< gnSeqI > > chunk_starts;
	vector< gnSeqI > chunk_lengths;

	// set the progress counter data
	mers_processed = 0;
	total_mers = 0;
	m_progress = -1;
	for( size_t i = 0; i < ml.sml_table.size(); i++ )
		total_mers += ml.sml_table[i]->Length();

	// break up on the longest SML
	int max_length_sml = -1;
	size_t maxlen = 0;
	for( size_t i = 0; i < ml.sml_table.size(); i++ )
	{
		if( ml.sml_table[i]->Length() > maxlen )
		{
			maxlen = ml.sml_table[i]->Length();
			max_length_sml = i;
		}
	}

	chunk_starts.push_back( vector< gnSeqI >( seq_count, 0 ) );

	while( chunk_starts.back()[max_length_sml] + CHUNK_SIZE < ml.sml_table[max_length_sml]->Length() )
	{
		vector< gnSeqI > tmp( seq_count, 0 );
		GetBreakpoint(max_length_sml, chunk_starts.back()[max_length_sml] + CHUNK_SIZE, tmp);
		chunk_starts.push_back(tmp);
	}

	
	// now that it's all chunky, search in parallel
#pragma omp parallel for schedule(dynamic)
	for( int i = 0; i < chunk_starts.size(); i++ )
	{
		if(thread_mem_table.get().size() != mem_table.size())
			thread_mem_table.get().resize( mem_table.size() );

		vector< gnSeqI > chunk_lens(seq_count);
		if( i + 1 < chunk_starts.size() )
		{
			for( size_t j = 0; j < seq_count; j++ )
				chunk_lens[j] = chunk_starts[i+1][j] - chunk_starts[i][j];
		}else
			chunk_lens = vector< gnSeqI >( seq_count, GNSEQI_END );
		SearchRange( chunk_starts[i], chunk_lens );
		MergeTable();
	}
	GetMatchList( ml );	
}

void ParallelMemHash::MergeTable()
{
#pragma omp critical
	{
		size_t buckets = thread_mem_table.get().size();
		for( size_t bI = 0; bI < buckets; bI++ )
		{
			vector< MatchHashEntry* >& bucket = thread_mem_table.get()[bI];
			for( size_t mI = 0; mI < bucket.size(); mI++ )
			{
				MemHash::AddHashEntry((*(bucket[mI])), mem_table);
//				bucket[mI]->Free();
			}
		}
		thread_mem_table.get() = mem_table;
	}
}



MatchHashEntry* ParallelMemHash::AddHashEntry(MatchHashEntry& mhe){
	// do the normal procedure, but use the thread-local mem table.
	return MemHash::AddHashEntry(mhe, thread_mem_table.get());
}


} // namespace mems

#endif // _OPENMP