File: tokenizer.h

package info (click to toggle)
kmc 3.2.4%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,716 kB
  • sloc: cpp: 38,308; python: 664; makefile: 216; perl: 179; sh: 34
file content (41 lines) | stat: -rw-r--r-- 1,239 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
/*
  This file is a part of KMC software distributed under GNU GPL 3 licence.
  The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
  
  Authors: Marek Kokot
  
  Version: 3.2.4
  Date   : 2024-02-09
*/

#ifndef _TOKENIZER_H
#define _TOKENIZER_H

#include "defs.h"
#include <vector>
#include <regex>
#include <list>
#include <set>
#include <iostream>

enum class TokenType{ VARIABLE, PLUS_OPER, STRICT_MINUS_OPER, COUNTER_MINUS_OPER, MUL_OPER, PARENTHESIS_OPEN, PARENTHESIS_CLOSE, TERMINATOR, DIFF_MODIFIER, SUM_MODIFIER, MIN_MODIFIER, MAX_MODIFIER, LEFT_MODIFIER, RIGHT_MODIFIER };
using Token = std::pair<std::string, TokenType>;

//************************************************************************************************************
// CTokenizer - Tokenizer for k-mers set operations
//************************************************************************************************************
class CTokenizer
{
public:
	static const std::set<std::string>& GetKeywords();
	CTokenizer();
	void Tokenize(const std::string& _expression, std::list<Token>& tokens);

private:
	std::vector<std::pair<std::regex, TokenType>> token_patterns;
	void leftTrimString(std::string& str, int start_pos);	
};

#endif

// ***** EOF