1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
|
/**
* parsetrigrams.cpp
*
* Parse a set of trigram files into a QMap, and serialize to stdout.
* Note: we allow this data to be read into QHash. We use QMap here
* to get deterministic output from run to run.
*
* SPDX-FileCopyrightText: 2006 Jacob Rideout <kde@jacobrideout.net>
*
* SPDX-License-Identifier: LGPL-2.1-or-later
*/
#include <QDataStream>
#include <QDir>
#include <QFile>
#include <QMap>
#include <QRegularExpression>
#include <QString>
#include <QTextStream>
int main(int argc, char **argv)
{
if (argc < 2) {
return 1;
}
QFile sout;
sout.open(stdout, QIODevice::WriteOnly);
QDataStream out(&sout);
QString path = QLatin1String(argv[1]);
QDir td(path);
/*
* We use QMap (instead of QHash) here to get deterministic output
* from run to run.
*/
QMap<QString, QMap<QString, int>> models;
const QRegularExpression rx(QStringLiteral("(?:.{3})\\s+(.*)"));
const QStringList files = td.entryList(QDir::Files);
for (const QString &fname : files) {
QFile fin(td.filePath(fname));
fin.open(QFile::ReadOnly | QFile::Text);
QTextStream stream(&fin);
// Not needed with Qt6, UTF-8 is the default
#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
stream.setCodec("UTF-8");
#endif
while (!stream.atEnd()) {
QString line = stream.readLine();
const QRegularExpressionMatch match = rx.match(line);
if (match.hasMatch()) {
#if QT_VERSION >= QT_VERSION_CHECK(6, 0, 0)
models[fname][line.left(3)] = match.capturedView(1).toInt();
#else
models[fname][line.left(3)] = match.capturedRef(1).toInt();
#endif
}
}
}
out << models;
}
|