1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
|
/*
This file is a part of KMC software distributed under GNU GPL 3 licence.
The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
Authors: Marek Kokot
Version: 3.2.4
Date : 2024-02-09
*/
#ifndef _TOKENIZER_H
#define _TOKENIZER_H
#include "defs.h"
#include <vector>
#include <regex>
#include <list>
#include <set>
#include <iostream>
enum class TokenType{ VARIABLE, PLUS_OPER, STRICT_MINUS_OPER, COUNTER_MINUS_OPER, MUL_OPER, PARENTHESIS_OPEN, PARENTHESIS_CLOSE, TERMINATOR, DIFF_MODIFIER, SUM_MODIFIER, MIN_MODIFIER, MAX_MODIFIER, LEFT_MODIFIER, RIGHT_MODIFIER };
using Token = std::pair<std::string, TokenType>;
//************************************************************************************************************
// CTokenizer - Tokenizer for k-mers set operations
//************************************************************************************************************
class CTokenizer
{
public:
static const std::set<std::string>& GetKeywords();
CTokenizer();
void Tokenize(const std::string& _expression, std::list<Token>& tokens);
private:
std::vector<std::pair<std::regex, TokenType>> token_patterns;
void leftTrimString(std::string& str, int start_pos);
};
#endif
// ***** EOF
|