File: csvfiles.cpp

package info (click to toggle)
pgadmin3 1.20.0~beta2-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 73,704 kB
  • ctags: 18,591
  • sloc: cpp: 193,786; ansic: 18,736; sh: 5,154; pascal: 1,120; yacc: 927; makefile: 516; lex: 421; xml: 126; perl: 40
file content (142 lines) | stat: -rw-r--r-- 3,724 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
//////////////////////////////////////////////////////////////////////////
//
// pgAdmin III - PostgreSQL Tools
//
// Copyright (C) 2002 - 2014, The pgAdmin Development Team
// This software is released under the PostgreSQL Licence
//
// csvfiles.cpp - CSV file parsing
//
//////////////////////////////////////////////////////////////////////////

#include "pgAdmin3.h"
#include "utils/sysLogger.h"
#include "utils/csvfiles.h"

// PostgreSQL and GPDB now support CSV format logs.
// So, we need a way to parse the CSV files into lines, and lines into tokens (fields).

bool CSVTokenizer::HasMoreTokens() const
{
	if ( m_string.length() > 0)
	{
		if ( m_pos >= m_string.length())
			return false;

		if ( m_string.find_first_not_of(wxT(','), m_pos) != wxString::npos )
			// there are non delimiter characters left, so we do have more tokens
			return true;

		if (m_string[m_pos] == wxT('\n'))
			return false;
	}
	return m_pos == 0 && !m_string.empty();
}

wxString CSVTokenizer::GetNextToken()
{
	wxString token;

	if ( !HasMoreTokens() )
		return token;

	// skip leading blanks if not quoted.
	while (m_pos < m_string.length() && m_string[m_pos] == wxT(' '))
		m_pos ++;

	// Are we a quoted field?  Must handle this special.
	bool quoted_string = (m_string[m_pos] == wxT('\"'));
	bool inquote = false;

	size_t pos = m_pos;

	// find the end of this token.
	for (; pos < m_string.length(); pos++)
	{
		if (quoted_string && m_string[pos] == wxT('\"'))
			inquote = !inquote;

		if (!inquote)
		{
			// Check to see if we have found the end of this token.
			// Tokens normally end with a ',' delimiter.
			if (m_string[pos] == wxT(','))
				break;

			// Last token is delimited by '\n' or by end of string.
			if (m_string[pos] == wxT('\n') && pos == m_string.length() - 1)
				break;
		}
	}

	if (quoted_string && !inquote)
	{
		token.assign(m_string, m_pos + 1, pos - m_pos - 2);  // Remove leading and trailing quotes

		// Remove double doublequote chars, replace with single doublequote chars
		token.Replace(wxT("\"\""), wxT("\""), true);
	}
	else
		token.assign(m_string, m_pos, pos - m_pos);

	if (quoted_string && inquote)
	{
		wxLogNotice(wxT("unterminated double quoted string: %s\n"), token.c_str());
	}

	m_pos = pos + 1;    // Skip token and delimiter

	if (m_pos > m_string.length())  // Perhaps no delimiter if at end of string if orig string didn't have '\n'.
		m_pos = m_string.length();

	return token;
}

bool CSVLineTokenizer::HasMoreLines() const
{
	if ( m_string.find_first_not_of(wxT('\n'), m_pos) != wxString::npos )
		// there are non line-end characters left, so we do have more lines
		return true;
	return false;
}

wxString CSVLineTokenizer::GetNextLine(bool &partial)
{
	wxString token;
	partial = true;

	if ( !HasMoreLines() )
		return token;

	// find the end of this line.  CSV lines end in "\n", but
	// CSV lines may have "\n" chars inside double-quoted strings, so we need to find that out.

	bool inquote = false;
	for (size_t pos = m_pos; pos < m_string.length(); pos++)
	{
		if (m_string[pos] == wxT('\"'))
			inquote = !inquote;

		if (m_string[pos] == wxT('\n') && !inquote)
		{
			// Good, we found a complete log line terminated
			// by "\n", and the "\n" wasn't in a quoted string.

			size_t len = pos - m_pos + 1;   // return the line, including the trailing "\n"
			token.assign(m_string, m_pos, len);
			m_pos = pos + 1;                // point to next line.
			partial = false;
			return token;
		}
	}

	// no more delimiters, so the line is everything till the end of
	// string, but we don't have all of the CSV the line... Some must still be coming.

	token.assign(m_string, m_pos, wxString::npos);
	partial = true;

	m_pos = m_string.length();

	return token;
}