File: s_mapper.h

package info (click to toggle)
kmc 2.3%2Bdfsg-5
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 1,416 kB
  • ctags: 3,050
  • sloc: cpp: 17,316; perl: 178; makefile: 91; sh: 16
file content (160 lines) | stat: -rw-r--r-- 3,337 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
/*
  This file is a part of KMC software distributed under GNU GPL 3 licence.
  The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
  
  Authors: Sebastian Deorowicz, Agnieszka Debudaj-Grabysz, Marek Kokot
  
  Version: 2.3.0
  Date   : 2015-08-21
*/

#ifndef _S_MAPPER_H
#define _S_MAPPER_H
#include "defs.h"
#include "mmer.h"
#include "params.h"

#ifdef DEVELOP_MODE
#include "develop.h"
#endif


class CSignatureMapper
{
	uint32 map_size;
	int32* signature_map;
	uint32 signature_len;
	uint32 special_signature;
	CMemoryPool* pmm_stats;
	uint32 n_bins;

	class Comp
	{
		uint32* signature_occurrences;
	public:
		Comp(uint32* _signature_occurrences) : signature_occurrences(_signature_occurrences){}
		bool operator()(int i, int j)
		{
			return signature_occurrences[i] > signature_occurrences[j];
		}
	};
	
public:	
	void Init(uint32* stats)
	{
		uint32 *sorted;
		pmm_stats->reserve(sorted);
		for (uint32 i = 0; i < map_size ; ++i)
			sorted[i] = i;
		sort(sorted, sorted + map_size, Comp(stats));

		list<pair<uint32, uint64>> _stats;
		for (uint32 i = 0; i < map_size ; ++i)
		{
			if (CMmer::is_allowed(sorted[i], signature_len))
				_stats.push_back(make_pair(sorted[i], stats[sorted[i]]));
		}

		list<pair<uint32, uint64>> group;
		uint32 bin_no = 0;
		//counting sum
		double sum = 0.0;
		for (auto &i : _stats)
		{
			i.second += 1000;
			sum += i.second;
		}

		double mean = sum / n_bins;
		double max_bin_size = 1.1 * mean;
		uint32 n = n_bins - 1; //one is needed for disabled signatures
		uint32 max_bins = n_bins - 1;
		while (_stats.size() > n)
		{
			pair<uint32, uint64>& max = _stats.front();

			if (max.second > mean)
			{
				signature_map[max.first] = bin_no++;				
				sum -= max.second;
				mean = sum / (max_bins - bin_no);
				max_bin_size = 1.1 * mean;

				_stats.pop_front();
				--n;
			}
			else
			{
				//heuristic
				group.clear();
				double tmp_sum = 0.0;
				uint32 in_current = 0;
				for (auto it = _stats.begin(); it != _stats.end();)
				{
					if (tmp_sum + it->second < max_bin_size)
					{
						tmp_sum += it->second;
						group.push_back(*it);
						it = _stats.erase(it);
						++in_current;
					}
					else
						++it;
				}

				for (auto i = group.begin(); i != group.end(); ++i)
				{
					signature_map[i->first] = bin_no;
				}
				--n;
				++bin_no;

				sum -= tmp_sum;
				mean = sum / (max_bins - bin_no);
				max_bin_size = 1.1 * mean;
			}
		}
		if (_stats.size() > 0)
		{
			for (auto i = _stats.begin(); i != _stats.end(); ++i)
			{
				signature_map[i->first] = bin_no++;
				//cout << "rest bin: " << i->second << "\n";
			}
		}
		signature_map[special_signature] = bin_no;
		pmm_stats->free(sorted);

#ifdef DEVELOP_MODE
		map_log(signature_len, map_size, signature_map);
#endif

	}
	CSignatureMapper(CMemoryPool* _pmm_stats, uint32 _signature_len, uint32 _n_bins)
	{
		n_bins = _n_bins;
		pmm_stats = _pmm_stats;
		signature_len = _signature_len;
		special_signature = 1 << 2 * signature_len;
		map_size = (1 << 2 * signature_len) + 1;
		signature_map = new int32[map_size];		
		fill_n(signature_map, map_size, -1);
	}
	inline int32 get_bin_id(uint32 signature)
	{
		return signature_map[signature];
	}

	inline int32 get_max_bin_no()
	{
		return signature_map[special_signature];
	}

	~CSignatureMapper()
	{
		delete [] signature_map;
	}

};

#endif