File: readstats.hpp

package info (click to toggle)
sortmerna 4.3.7-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 134,048 kB
  • sloc: cpp: 24,424; ansic: 15,923; python: 1,453; sh: 224; makefile: 31
file content (96 lines) | stat: -rw-r--r-- 3,960 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
/*
 @copyright 2016-2021  Clarity Genomics BVBA
 @copyright 2012-2016  Bonsai Bioinformatics Research Group
 @copyright 2014-2016  Knight Lab, Department of Pediatrics, UCSD, La Jolla

 @parblock
 SortMeRNA - next-generation reads filter for metatranscriptomic or total RNA
 This is a free software: you can redistribute it and/or modify
 it under the terms of the GNU Lesser General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 SortMeRNA is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU Lesser General Public License for more details.

 You should have received a copy of the GNU Lesser General Public License
 along with SortMeRNA. If not, see <http://www.gnu.org/licenses/>.
 @endparblock

 @contributors Jenya Kopylova   jenya.kopylov@gmail.com
			   Laurent No      laurent.noe@lifl.fr
			   Pierre Pericard  pierre.pericard@lifl.fr
			   Daniel McDonald  wasade@gmail.com
			   Mikal Salson    mikael.salson@lifl.fr
			   Hlne Touzet    helene.touzet@lifl.fr
			   Rob Knight       robknight@ucsd.edu
*/

/*
 * file: readstats.hpp
 * created: Nov 06, 2017 Mon
 *
 * Collective Statistics for all Reads. Encapsulates old 'compute_read_stats' logic and results
 * Some statistics computed during Alignment, and some in Post-processing
 */

#pragma once

#include <cstdint>
#include <string>
#include <vector>
#include <map>
#include <mutex>
#include <atomic>

#include "common.hpp"
#include "options.hpp"

// forward
class KeyValueDatabase;

/*
 * 1. 'all_reads_count' - Should be known before processing and index loading. 
 * 2. 'total_mapped_sw_id_cov'
 *        Calculated during alignment and stored to KVDB
 *        Thread accessed - Synchronize
 * 3. 'reads_matched_per_db' - Synchronize.
 *			Calculated during alignment. Thread accessed.
 * 4. 'total_reads_denovo_clustering'
 *			TODO: currently accessed in single thread ('computeStats') but potentially could be multiple threads
 */
struct Readstats 
{
	std::string dbkey; // Hashed concatenation of underscore separated basenames of the read files. Used as the key into the Key-value DB. 
	std::string suffix; // 'fasta' | 'fastq' TODO: remove?

	uint64_t all_reads_count; // [1] total number of reads in file.
	uint64_t all_reads_len; // total number of nucleotides in all reads i.e. sum of length of All read sequences
	uint32_t min_read_len; // shortest Read length. (read only)
	uint32_t max_read_len; // longest Read length. (read only)
	uint64_t total_otu; // not to store in DB

	std::atomic<uint64_t> num_aligned; // reads passing E-value threshold.
	std::atomic<uint64_t> n_yid_ncov; // SW + ID - COV i.e. aligned passing ID, failing COV
	std::atomic<uint64_t> n_nid_ycov; // SW - ID + COV i.e. aligned failing ID, passing COV
	std::atomic<uint64_t> n_yid_ycov; // [2] SW + ID + COV i.e. aligned passing ID, passing COV
	std::atomic<uint64_t> num_denovo; // [4] SW - ID - COV i.e. 'de novo' reads, aligned failing ID, failing COV
	std::atomic<uint64_t> num_short; // count of reads shorter than a threshold of N nucleotides. Reset for each index.

	std::vector<uint64_t> reads_matched_per_db; // [3] reads matched per database.
    //              |_TODO: should be atomic std::atomic<uint64_t> 20201019

	bool is_stats_calc; // flags 'computeStats' was called.
	bool is_set_aligned_id_cov; // flag 'total_aligned_id_cov' was calculated (so no need to calculate no more)

	Readstats(uint64_t all_reads_count, uint64_t all_reads_len, uint32_t min_read_len, uint32_t max_read_len, KeyValueDatabase& kvdb, Runopts& opts);

	void calcSuffix(Runopts& opts);
	std::string toBstring();
	std::string toString();
	bool restoreFromDb(KeyValueDatabase & kvdb);
	void store_to_db(KeyValueDatabase & kvdb);
	void set_is_set_aligned_id_cov();
}; // ~struct Readstats