File: tokenizer.cpp

package info (click to toggle)
kmc 2.3%2Bdfsg-7
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 1,412 kB
  • sloc: cpp: 17,316; perl: 178; makefile: 90; sh: 16
file content (77 lines) | stat: -rw-r--r-- 2,903 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
/*
  This file is a part of KMC software distributed under GNU GPL 3 licence.
  The homepage of the KMC project is http://sun.aei.polsl.pl/kmc
  
  Authors: Marek Kokot
  
  Version: 2.3.0
  Date   : 2015-08-21
*/

#include "stdafx.h"
#include "tokenizer.h"

/*****************************************************************************************************************************/
/******************************************************** CONSTRUCTOR ********************************************************/
/*****************************************************************************************************************************/

CTokenizer::CTokenizer()
{
	token_patterns.resize(7);
	token_patterns[0] = std::make_pair("^(\\()", TokenType::PARENTHESIS_OPEN);
	token_patterns[1] = std::make_pair("^(\\))", TokenType::PARENTHESIS_CLOSE);
	token_patterns[2] = std::make_pair("^(\\-)", TokenType::STRICT_MINUS_OPER);
	token_patterns[3] = std::make_pair("^(\\~)", TokenType::COUNTER_MINUS_OPER);
	token_patterns[4] = std::make_pair("^(\\+)", TokenType::PLUS_OPER);
	token_patterns[5] = std::make_pair("^(\\*)", TokenType::MUL_OPER);
	token_patterns[6] = std::make_pair("^(\\w*)", TokenType::VARIABLE);
}


/*****************************************************************************************************************************/
/********************************************************** PUBLIC ***********************************************************/
/*****************************************************************************************************************************/

void CTokenizer::Tokenize(const std::string& _expression, std::list<Token>& tokens)
{
	std::string expression = _expression;
	std::smatch match;
	leftTrimString(expression, 0);
	while (!expression.empty())
	{
		bool valid_token = false;
		for (const auto& pattern : token_patterns)
		{
			if (std::regex_search(expression, match, pattern.first))
			{
#ifdef ENABLE_DEBUG
				std::cout << match[1];
#endif
				tokens.push_back(std::make_pair(match[1], pattern.second));
				leftTrimString(expression, (int)match[1].length());
				valid_token = true;
				break;
			}
		}
		if (!valid_token)
		{
			std::cout << "Error: wrong output format near : " << expression << "\n";
			exit(1);
		}
	}
}

/*****************************************************************************************************************************/
/********************************************************** PRIVATE **********************************************************/
/*****************************************************************************************************************************/

void CTokenizer::leftTrimString(std::string& str, int start_pos)
{
	static const std::string whitespace = " \t\r\n\v\f";
	auto next_pos = str.find_first_not_of(whitespace, start_pos);
	str.erase(0, next_pos);
}



// ***** EOF