File: Data.cpp

package info (click to toggle)
sqlitebrowser 3.13.0-1.1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 23,080 kB
  • sloc: cpp: 117,608; ansic: 2,143; yacc: 798; lex: 262; sh: 147; xml: 63; python: 27; makefile: 5
file content (234 lines) | stat: -rw-r--r-- 8,159 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
#include "Data.h"

#include <QBuffer>
#include <QDateTime>
#include <QImageReader>
#include <QLocale>
#include <QRegExp>
#include <QTextCodec>
#include <QFile>

#include <algorithm>

// Note that these aren't all possible BOMs. But they are probably the most common ones.
// The size is needed at least for the ones with character zero in them.
static const QByteArray bom3("\xEF\xBB\xBF", 3);
static const QByteArray bom2a("\xFE\xFF", 2);
static const QByteArray bom2b("\xFF\xFE", 2);
static const QByteArray bom4a("\x00\x00\xFE\xFF", 4);
static const QByteArray bom4b("\xFF\xFE\x00\x00", 4);

bool isTextOnly(QByteArray data, const QString& encoding, bool quickTest)
{
    // If the data starts with a Unicode BOM, we can use detection provided by QTextCodec.
    if(startsWithBom(data)) {
        QTextCodec *codec = encoding.isEmpty()? QTextCodec::codecForName("UTF-8") : QTextCodec::codecForName(encoding.toUtf8());
        QTextCodec *detectedCodec = QTextCodec::codecForUtfText(data, nullptr);
        return detectedCodec == codec;
    }

    // Truncate to the first few bytes for quick testing
    int testSize = quickTest? std::min(512, data.size()) : data.size();

    // If the quick test has been requested and we have to truncate the string, we have to use
    // an approach where truncated multibyte characters are not interpreted as invalid characters.
    if(quickTest && data.size() > testSize) {

        // We can assume that the default encoding (UTF-8) and all the ISO-8859
        // cannot contain character zero.
        // This has to be checked explicitly because toUnicode() is using zero as
        // a terminator for these encodings.
        if((encoding.isEmpty() || encoding.startsWith("ISO-8859")) && data.contains('\0'))
            return false;

        QTextCodec::ConverterState state;
        QTextCodec *codec = encoding.isEmpty()? QTextCodec::codecForName("UTF-8") : QTextCodec::codecForName(encoding.toUtf8());
        codec->toUnicode(data.constData(), testSize, &state);
        return state.invalidChars == 0;
    } else {
        // Convert to Unicode if necessary
        data = decodeString(data, encoding);
        QString text (data);
        bool isPrintable = true;
        for(QChar character : text) {
            if(character.isNonCharacter() || (character.category() == QChar::Other_Control && !character.isSpace())) {
                isPrintable = false;
                break;
            }
        }
        // Perform check
        return isPrintable && text.toUtf8() == data;
    }
}

bool isTextOnlyFile(const QString& fileName)
{
    QFile file(fileName);
    if (file.open(QFile::ReadOnly)) {
        QByteArray data = file.read(512);
        return !data.isEmpty() && isTextOnly(data);
    } else {
        return false;
    }
}

bool containsRightToLeft(const QString& text) {

    for(QChar ch : text) {
        switch(ch.direction()) {
        case QChar::DirR:
        case QChar::DirAL:
        case QChar::DirRLE:
        case QChar::DirRLO:
        case QChar::DirRLI:
            return true;
        default:
            break;
        }
    }
    return false;
}

bool startsWithBom(const QByteArray& data)
{
    if(data.startsWith(bom3) ||
            data.startsWith(bom2a) || data.startsWith(bom2b) ||
            data.startsWith(bom4a) || data.startsWith(bom4b))
    return true;
    else
        return false;
}

QByteArray removeBom(QByteArray& data)
{
    if(data.startsWith(bom3))
    {
        QByteArray bom = data.left(3);
        data.remove(0, 3);
        return bom;
    } else if(data.startsWith(bom2a) || data.startsWith(bom2b)) {
        QByteArray bom = data.left(2);
        data.remove(0, 2);
        return bom;
    } else if(data.startsWith(bom4a) || data.startsWith(bom4b)) {
        QByteArray bom = data.left(4);
        data.remove(0, 4);
        return bom;
    } else {
        return QByteArray();
    }
}

QString isImageData(const QByteArray& data)
{
    // Check if it's an image. First do a quick test by calling canRead() which only checks the first couple of bytes or so. Only if
    // that returned true, do a more sophisticated test of the data. This way we get both, good performance and proper data checking.
    QBuffer imageBuffer(const_cast<QByteArray*>(&data));
    QImageReader readerBuffer(&imageBuffer);
    QString imageFormat = readerBuffer.format();
    if(readerBuffer.canRead() && !readerBuffer.read().isNull())
        return imageFormat;
    else
        return QString();
}

QStringList toStringList(const QList<QByteArray>& list) {
    QStringList strings;
    for (const QByteArray &item : list) {
        strings.append(QString::fromUtf8(item));
    }
    return strings;
}

QByteArray encodeString(const QByteArray& str, const QString& encoding)
{
    if(encoding.isEmpty())
        return str;
    else
        return QTextCodec::codecForName(encoding.toUtf8())->fromUnicode(str);
}

QByteArray decodeString(const QByteArray& str, const QString& encoding)
{
    if(encoding.isEmpty())
        return str;
    else
        return QTextCodec::codecForName(encoding.toUtf8())->toUnicode(str).toUtf8();
}

QString humanReadableSize(unsigned long byteCount)
{
    static const std::vector<QString> units = {"B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB"};

    double size = static_cast<double>(byteCount);
    for(const QString& unit : units)
    {
        if(size < 1024.0)
            return QString::number(size, 'f', 2) + " " + unit;

        size /= 1024.0;
    }

    return QString::number(size, 'f', 2) + " YiB";
}

QString isoDateTimeStringToLocalDateTimeString(const QString& date_string)
{
    return QLocale::system().toString(QDateTime::fromString(date_string, Qt::ISODate).toLocalTime(), QLocale::ShortFormat);
}

void removeCommentsFromQuery(QString& query)
{
    // Store the current size so we can easily check later if the string has been changed
    int oldSize = query.size();

    // This implements a simple state machine to strip the query from comments
    QChar quote;
    for(int i=0;i<query.size();i++)
    {
        // Are we in quote state?
        if(quote.isNull())
        {
            // We are currently not in quote state

            // So are we starting a quote?
            if((query.at(i) == '\'' || query.at(i) == '\"' || query.at(i) == '[') && (i == 0 || query.at(i-1) != '\\'))
            {
                // Quoted text is beginning. Switch to the quote state

                quote = query.at(i);
            } else if(query.at(i) == '-' && i+1 < query.size() && query.at(i+1) == '-') {
                // This is an end of line comment. Remove anything till the end of the line or the end of the string if this is the last line

                int pos_next_line_break = query.indexOf('\n', i);
                if(pos_next_line_break == -1)
                    query = query.left(i);
                else
                    query.remove(i, pos_next_line_break - i);       // The \n is left in intentionally
            } else if(query.at(i) == '/' && i+1 < query.size() && query.at(i+1) == '*') {
                // This is a block comment. Remove anything till the end of the block or the end of the string if the block is not closed
                int pos_end_comment = query.indexOf("*/", i);
                if(pos_end_comment == -1)
                    query = query.left(i);
                else
                    query.remove(i, pos_end_comment - i + 2);       // Add 2 to include the */
            }
        } else {
            // We are currently in quote state

            // If this is the closing quote character, switch back to normal state
            if((query.at(i) == quote) && (i == 0 || query.at(i-1) != '\\'))
                quote = 0;
        }
    }

    query = query.trimmed();

    if (oldSize != query.size()) {
        // Remove multiple line breaks that might have been created by deleting comments till the end of the line but not including the line break
        query.replace(QRegExp("\\n+"), "\n");

        // Also remove any remaining whitespace at the end of each line
        query.replace(QRegExp("[ \t]+\n"), "\n");
    }
}