File: qgsdelimitedtextfile.h

package info (click to toggle)
qgis 2.4.0-1
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 374,696 kB
  • ctags: 66,263
  • sloc: cpp: 396,139; ansic: 241,070; python: 130,609; xml: 14,884; perl: 1,290; sh: 1,287; sql: 500; yacc: 268; lex: 242; makefile: 168
file content (388 lines) | stat: -rw-r--r-- 13,684 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
/***************************************************************************
      qgsdelimitedtextfile.h  -  File for delimited text file
                             -------------------
    begin                : 2004-02-27
    copyright            : (C) 2013 by Chris Crook
    email                : ccrook at linz.govt.nz
 ***************************************************************************/

/***************************************************************************
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU General Public License as published by  *
 *   the Free Software Foundation; either version 2 of the License, or     *
 *   (at your option) any later version.                                   *
 *                                                                         *
 ***************************************************************************/

#ifndef QGSDELIMITEDTEXTFILE_H
#define QGSDELIMITEDTEXTFILE_H

#include <QStringList>
#include <QRegExp>
#include <QUrl>

class QgsFeature;
class QgsField;
class QFile;
class QFileSystemWatcher;
class QTextStream;


/**
\class QgsDelimitedTextFile
\brief Delimited text file parser extracts records from a QTextStream as a QStringList.
*
*
* The delimited text parser is used by the QgsDelimitedTextProvider to parse
* a QTextStream into records of QStringList.  It provides a number of variants
* for parsing each record.  The following options are supported:
* - Basic whitespace parsing.  Each line in the file is treated as a record.
*   Extracts all contiguous sequences of non-whitespace
*   characters.  Leading and trailing whitespace are ignored.
* - Regular expression parsing.  Each line in the file is treated as a record.
*   The string is split into fields based on a regular expression.
* - Character delimited, based on a delimiter character set, a quote character, and
*   an escape character.  The escape treats the next character as a part of a field.
*   Fields may start and end with quote characters, in which case any non-escaped
*   character within the field is treated literally, including end of line characters.
*   The escape character within a string causes the next character to be read literally
*   (this includes new line characters).  If the escape and quote characters are the
*   same, then only quote characters will be escaped (ie to include a quote in a
*   quoted field it is entered as two quotes.  All other characters in quoted fields
*   are treated literally, including newlines.
* - CSV format files - these are a special case of character delimited, in which the
*   delimiter is a comma, and the quote and escape characters are double quotes (")
*
* The delimiters can be encode in and decoded from a QUrl as query items.  The
* items used are:
* - delimiterType, one of plain (delimiter is any of a set of characters),
*   regexp, csv, whitespace
* - delimiter, interpreted according to the type.  For plain characters this is
*   a sequence of characters.  The string \t in the sequence is replaced by a tab.
*   For regexp type delimiters this specifies the reqular expression.
*   The field is ignored for csv and whitespace
* - quoteChar, optional, a single character used for quoting plain fields
* - escapeChar, optional, a single characer used for escaping (may be the same as quoteChar)
*/

// Note: this has been implemented as a single class rather than a set of classes based
// on an abstract base class in order to facilitate changing the type of the parser easily
// eg in the provider dialog

class QgsDelimitedTextFile : public QObject
{

    Q_OBJECT

  public:

    enum Status
    {
      RecordOk,
      InvalidDefinition,
      RecordEmpty,
      RecordInvalid,
      RecordEOF
    };

    enum DelimiterType
    {
      DelimTypeWhitespace,
      DelimTypeCSV,
      DelimTypeRegexp
    };

    QgsDelimitedTextFile( QString url = QString() );

    virtual ~QgsDelimitedTextFile();

    /** Set the filename
     * @param filename  the name of the file
     */
    void setFileName( QString filename );
    /** Return the filename
     * @return filename  the name of the file
     */
    QString fileName()
    {
      return mFileName;
    }

    /** Set the file encoding (defuault is UTF-8)
     *  @param encoding the encoding to use for the fileName()
     */
    void setEncoding( QString encoding );
    /** Return the file encoding
     *  @return encoding The file encoding
     */
    QString encoding() { return mEncoding; }

    /** Decode the parser settings from a url as a string
     *  @param url  The url from which the delimiter and delimiterType items are read
     */
    bool setFromUrl( QString url );
    /** Decode the parser settings from a url
     *  @param url  The url from which the delimiter and delimiterType items are read
     */
    bool setFromUrl( const QUrl &url );

    /** Encode the parser settings into a QUrl
     *  @return url  The url into which the delimiter and delimiterType items are set
     */
    QUrl url();

    /** Set the parser for parsing CSV files
     */
    void setTypeWhitespace();

    /** Set the parser for parsing based on a reqular expression delimiter
        @param regexp A string defining the regular expression
    */
    void setTypeRegexp( QString regexp );
    /** Set the parser to use a character type delimiter.
     *  @param delim  The field delimiter character set
     *  @param quote  The quote character, used to define quoted fields
     *  @param escape The escape character used to escape quote or delim
     *                characters.
     */
    void setTypeCSV( QString delim = QString( "," ), QString quote = QString( "\"" ), QString escape = QString( "\"" ) );

    /** Set the number of header lines to skip
     * @param skiplines The maximum lines to skip
     */
    void setSkipLines( int skiplines );
    /** Return the number of header lines to skip
     * @return skiplines The maximum lines to skip
     */
    int skipLines()
    {
      return mSkipLines;
    }

    /** Set reading field names from the first record
     * @param useheaders Field names will be read if true
     */
    void setUseHeader( bool useheader = true );
    /** Return the option for reading field names from the first record
     * @return useheaders Field names will be read if true
     */
    bool useHeader()
    {
      return mUseHeader;
    }

    /** Set the option for dicarding empty fields
     * @param useheaders Empty fields will be discarded if true
     */
    void setDiscardEmptyFields( bool discardEmptyFields = true );
    /** Return the option for discarding empty fields
     * @return useheaders Empty fields will be discarded if true
     */
    bool discardEmptyFields()
    {
      return mDiscardEmptyFields;
    }

    /** Set the option for trimming whitespace from fields
     * @param trimFields Fields will be trimmed if true
     */
    void setTrimFields( bool trimFields = true );
    /** Return the option for trimming empty fields
     * @return useheaders Empty fields will be trimmed if true
     */
    bool trimFields()
    {
      return mTrimFields;
    }

    /** Set the maximum number of fields that will be read from a record
     *  By default the maximum number is unlimited (0)
     *  @param maxFields  The maximum number of fields that will be read
     */
    void setMaxFields( int maxFields );
    /** Return the maximum number of fields that will be read
     *  @return maxFields The maximum number of fields that will be read
     */
    int maxFields() { return mMaxFields; }

    /** Set the field names
     *  Field names are set from QStringList.  Names may be modified
     *  to ensure that they are unique, not empty, and do not conflict
     *  with default field name (field_##)
     *  @param names  A list of proposed field names
     */
    void setFieldNames( const QStringList &names );

    /** Return the field names read from the header, or default names
     *  field_## if none defined.  Will open and read the head of the file
     *  if required, then reset.  Note that if header record record has
     *  not been read then the field names are empty until records have
     *  been read.  The list may be expanded as the file is read and records
     *  with more fields are loaded.
     *  @return names  A list of field names in the file
     */
    QStringList &fieldNames();

    /** Return the index of a names field
     *  @param name    The name of the field to find.  This will also accept an
     *                 integer string ("1" = first field).
     *  @return index  The zero based index of the field name, or -1 if the field
     *                 name does not exist or cannot be inferred
     */
    int fieldIndex( QString name );

    /** Reads the next record from the stream splits into string fields.
     *  @param fields  The string list to populate with the fields
     *  @return status The result of trying to parse a record.  RecordOk
     *                 if read successfully, RecordEOF if reached the end of the
     *                 file, RecordEmpty if no data on the next line, and
     *                 RecordInvalid if the record is ill-formatted.
     */
    Status nextRecord( QStringList &fields );

    /** Return the line number of the start of the last record read
     *  @return linenumber  The line number of the start of the record
     */
    int recordId()
    {
      return mRecordLineNumber;
    }

    /** Set the index of the next record to return.
     *  @param  nextRecordId The id to set the next record to
     *  @return valid  True if the next record can be located
     */
    bool setNextRecordId( long nextRecordId );

    /** Number record number of records visited. After scanning the file
     *  serves as a record count.
     *  @return maxRecordNumber The maximum record number
     */
    long recordCount() { return mMaxRecordNumber; }
    /** Reset the file to reread from the beginning
     */
    Status reset();

    /** Return a string defining the type of the delimiter as a string
     *  @return type The delimiter type as a string
     */
    QString type();

    /** Check that provider is valid (filename and definition valid)
     *
     * @return valid True if the provider is valid
     */
    bool isValid();

    /** Encode characters - used to convert delimiter/quote/escape characters to
     *  encoded form (eg replace tab with \t)
     *  @param string  The unencoded string
     *  @return encstring  The encoded string
     */
    static QString encodeChars( QString string );

    /** Encode characters - used to encoded character strings to
     *  decoded form (eg replace \t with tab)
     *  @param string  The unencoded string
     *  @return decstring  The decoded string
     */
    static QString decodeChars( QString string );

    /** Set to use or not use a QFileWatcher to notify of changes to the file
     * @param useWatcher True to use a watcher, false otherwise
     */

    void setUseWatcher( bool useWatcher );

  signals:
    /** Signal sent when the file is updated by another process
     */
    void fileUpdated();

  public slots:
    /** Slot used by watcher to notify of file updates
     */
    void updateFile();

  private:

    /** Open the file
     *
     * @return valid  True if the file is successfully opened
     */
    bool open();

    /** Close the text file
     */
    void close();

    /** Reset the status if the definition is changing (eg clear
     *  existing field names, etc...
     */
    void resetDefinition();

    /** Parse reqular expression delimited fields */
    Status parseRegexp( QString &buffer, QStringList &fields );
    /** Parse quote delimited fields, where quote and escape are different */
    Status parseQuoted( QString &buffer, QStringList &fields );

    /** Return the next line from the data file.  If skipBlank is true then
     * blank lines will be skipped - this is for compatibility with previous
     * delimited text parser implementation.
     */
    Status nextLine( QString &buffer, bool skipBlank = false );

    /** Set the next line to read from the file.
     */
    bool setNextLineNumber( long nextLineNumber );

    /** Utility routine to add a field to a record, accounting for trimming
     *  and discarding, and maximum field count
     */
    void appendField( QStringList &record, QString field, bool quoted = false );

    // Pointer to the currently selected parser
    Status( QgsDelimitedTextFile::*mParser )( QString &buffer, QStringList &fields );

    QString mFileName;
    QString mEncoding;
    QFile *mFile;
    QTextStream *mStream;
    bool mUseWatcher;
    QFileSystemWatcher *mWatcher;

    // Parameters common to parsers
    bool mDefinitionValid;
    DelimiterType mType;
    bool mUseHeader;
    bool mDiscardEmptyFields;
    bool mTrimFields;
    int mSkipLines;
    int mMaxFields;
    int mMaxNameLength;

    // Parameters used by parsers
    QRegExp mDelimRegexp;
    bool mAnchoredRegexp;
    QString mDelimChars;
    QString mQuoteChar;
    QString mEscapeChar;

    // Information extracted from file
    QStringList mFieldNames;
    long mLineNumber;
    long mRecordLineNumber;
    long mRecordNumber;
    QStringList mCurrentRecord;
    bool mHoldCurrentRecord;
    // Maximum number of record (ie maximum record number visited)
    long mMaxRecordNumber;
    int mMaxFieldCount;

    QString mDefaultFieldName;
    QRegExp mInvalidFieldRegexp;
    QRegExp mDefaultFieldRegexp;
};

#endif