File: develop.cpp

package info (click to toggle)
kmc 3.1.1%2Bdfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 2,376 kB
  • sloc: cpp: 33,006; python: 372; perl: 178; makefile: 135; sh: 34
file content (144 lines) | stat: -rw-r--r-- 4,146 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#include "stdafx.h"

#include "develop.h"
#include "params.h"
#include <iostream>
#include <functional>
using namespace std;

void map_log(uint32 signature_len, uint32 map_size, int32* signature_map)
{
	#ifdef MAP_LOG_SRC
		FILE* mapLogFile = fopen(MAP_LOG_SRC, "w");
		char ACGT[10];
		ACGT[signature_len] = '\0';
		char symbols[] = { 'A', 'C', 'G', 'T' };
		if (!mapLogFile)
		{
			cerr << "Error: cannot save map log to file";
			exit(1);
		}
		fprintf(mapLogFile, "SIGNMATURE | ACGT | BIN NO\n");
		for (uint32 i = 0; i < map_size; ++i)
		{
	
			for (int j = signature_len - 1; j >= 0; --j)
				ACGT[signature_len - j - 1] = symbols[(i >> 2 * j) & 3];
	
			if (signature_map[i] >= 0)
				fprintf(mapLogFile, "%i\t\t%s\t%i\n", i, ACGT, signature_map[i]);
			else
				fprintf(mapLogFile, "%i\t\t%s\tDISABLED_SIGNATURE\n", i, ACGT);
		}
	
		fclose(mapLogFile);
	#endif
}



void save_bins_stats(CKMCQueues& Queues, CKMCParams& Params, uint32 kmer_size, uint64 n_reads, uint32 /*signature_len*/, uint32 map_size, int32* signature_map)
{
#ifdef KMERS_PER_BIN_LOG_FILE
	int32 bin_id;
	CMemDiskFile *file;
	string name;
	uint64 n_rec;
	uint64 n_plus_x_recs;
	uint64 n_super_kmers;
	uint64 size;

	Queues.bd->reset_reading();
	FILE* stats_file = fopen(KMERS_PER_BIN_LOG_FILE, "w");
	uint64 sum_size, sum_n_rec, sum_n_plus_x_recs, sum_n_super_kmers;
	sum_size = sum_n_rec = sum_n_plus_x_recs = sum_n_super_kmers = 0;
	if (!stats_file)
	{
		cerr << "Error: cannot open file to store kmers per bin: " << KMERS_PER_BIN_LOG_FILE << "\n";
		exit(1);
	}
	fprintf(stats_file, "%s;%s;%s;%s;%s;%s\n", "bin_id", "n_rec", "n_super_kmers", "size", "2nd stage MEM", "n_singatures");
	while ((bin_id = Queues.bd->get_next_sort_bin()) >= 0)
	{
		Queues.bd->read(bin_id, file, name, size, n_rec, n_plus_x_recs, n_super_kmers);

		// Reserve memory necessary to process the current bin at all next stages
		uint64 input_kmer_size;
		int64 kxmer_counter_size;
		uint32 kxmer_symbols;
		if (Params.max_x)
		{
			input_kmer_size = n_plus_x_recs * kmer_size;
			kxmer_counter_size = n_plus_x_recs * sizeof(uint32);
			kxmer_symbols = Params.kmer_len + Params.max_x + 1;
		}
		else
		{
			input_kmer_size = n_rec * kmer_size;
			kxmer_counter_size = 0;
			kxmer_symbols = Params.kmer_len;
		}
		uint64 max_out_recs = (n_rec + 1) / max(Params.cutoff_min, 1);

		std::function<int64(int64)> round_up_to_alignment = [](int64 x){ return (x + ALIGNMENT - 1) / ALIGNMENT * ALIGNMENT; };


		uint64 counter_size = min(BYTE_LOG(Params.cutoff_max), BYTE_LOG(Params.counter_max));		

		uint32 kmer_symbols = Params.kmer_len - Params.lut_prefix_len;
		uint64 kmer_bytes = kmer_symbols / 4;
		uint64 out_buffer_size = max_out_recs * (kmer_bytes + counter_size);

		uint32 rec_len = (kxmer_symbols + 3) / 4;

		uint64 lut_recs = 1 << (2 * Params.lut_prefix_len);
		uint64 lut_size = lut_recs * sizeof(uint64);


		size = round_up_to_alignment(size);
		input_kmer_size = round_up_to_alignment(input_kmer_size);
		out_buffer_size = round_up_to_alignment(out_buffer_size);
		kxmer_counter_size = round_up_to_alignment(kxmer_counter_size);
		lut_size = round_up_to_alignment(lut_size);


		int64 part1_size;
		int64 part2_size;

		if (rec_len % 2 == 0)
		{
			part1_size = input_kmer_size + kxmer_counter_size;
			part2_size = max(max(size, input_kmer_size), out_buffer_size + lut_size);
		}
		else
		{
			part1_size = max(input_kmer_size + kxmer_counter_size, size);
			part2_size = max(input_kmer_size, out_buffer_size + lut_size);
		}
		int64 req_size = part1_size + part2_size;

		uint64 n_signatures = 0;
		for (uint32 i = 0; i < map_size; ++i)
		{
			if (signature_map[i] == bin_id)
				++n_signatures;
		}

		fprintf(stats_file, "%i;%llu;%llu;%llu;%llu;%llu\n", bin_id, n_rec, n_super_kmers, size, (uint64)req_size, n_signatures);
		sum_size += size;
		sum_n_rec += n_rec;
		sum_n_plus_x_recs += n_plus_x_recs;
		sum_n_super_kmers += n_super_kmers;
	}

	fprintf(stats_file, "%s;%llu;%llu;%llu\n", "SUMMARY", sum_n_rec, sum_n_super_kmers, sum_size);
	fprintf(stats_file, "n_reads: %llu\n", n_reads);

	fclose(stats_file);

	Queues.bd->reset_reading();
	exit(1);
#endif
}

// ***** EOF