1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
|
//
// C++ Interface: StateSpace
//
// Description:
//
//
// Author: BUI Quang Minh (c) 2018
//
// Copyright: See COPYING file that comes with this distribution
//
//
#ifndef STATESPACE_H
#define STATESPACE_H
#include <iostream>
#include <string>
#include <vector>
#include <stdint.h>
#include "utils/tools.h"
#include "yaml-cpp/yaml.h"
namespace PML {
/**
StateType as 32-bit unsigned int
*/
typedef uint32_t StateType;
typedef vector<StateType> StateVector;
enum SeqType {
SEQ_DNA, SEQ_PROTEIN, SEQ_BINARY, SEQ_MORPH, SEQ_MULTISTATE, SEQ_CODON, SEQ_POMO, SEQ_UNKNOWN
};
// IMPORTANT: refactor STATE_UNKNOWN
//const char STATE_UNKNOWN = 126;
// TODO DS: This seems like a significant restriction.
/* PoMo: STATE_INVALID is not handled in PoMo. Set STATE_INVALID to
127 to remove warning about comparison to char in alignment.cpp.
This is important if the maximum N will be increased above 21
because then the state space is larger than 127 and we have to
think about something else. */
/* const unsigned char STATE_INVALID = 255; */
const unsigned char STATE_INVALID = 127;
#ifdef USE_HASH_MAP
typedef unordered_map<string, int> StringIntMap;
typedef unordered_map<string, StateType> StringStateMap;
typedef unordered_map<StateType, string> StateStringMap;
typedef unordered_map<string, double> StringDoubleHashMap;
typedef unordered_map<uint32_t, uint32_t> IntIntMap;
#else
typedef map<string, int> StringIntMap;
typedef map<string, StateType> StringStateMap;
typedef map<StateType, string> StateStringMap;
typedef map<string, double> StringDoubleHashMap;
typedef map<uint32_t, uint32_t> IntIntMap;
#endif
/**
general class defining state space
*/
class StateSpace {
public:
/** constructor */
StateSpace();
/** destructor */
~StateSpace();
/** convert a raw string to single state ID */
StateType toState(string str);
/**
convert the entire string into vector of states
@param[in] str input string
@param[out] str_states output vector of StateType
*/
void toState(string &str, StateVector &str_states);
/** convert a state back to raw string */
string toString(StateType state);
/**
check if a state is unknown (missing or gap)
*/
bool isUnknown(StateType state);
/** get number of states */
inline int getNStates() { return num_states; }
/** get all number of states incl. missing/gap/ambiguous states */
inline int getNAllStates() { return states.size(); }
/**
initialise from a state definition string
@param datatype a YAML::Node structure
*/
void parseStateSpace(YAML::Node datatype);
/**
initialise state space from a SeqType
@param seqtype sequence type
*/
void initStateSpace(SeqType seqtype);
/**
reset state space
*/
void resetStateSpace();
/** number of state */
int num_states;
protected:
/** state space name */
string space_name;
/** number of state */
int num_all_states;
/** map from raw state string to state ID */
StringStateMap states;
/** map from state ID to raw state string */
StateStringMap raw_states;
/** map from ambiguous states to vector of state ID */
unordered_map<StateType, StateVector>equate;
/** vector of the same size as states to translate to another state space */
StrVector translate;
private:
/** minimum length of state string */
int min_state_len;
/** maximum length of state string */
int max_state_len;
};
} // namespace PML
#endif
|