File: parsetrigrams.cpp

package info (click to toggle)
sonnet 5.116.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 22,976 kB
  • sloc: cpp: 7,535; sh: 15; makefile: 9
file content (65 lines) | stat: -rw-r--r-- 1,745 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
/**
 * parsetrigrams.cpp
 *
 * Parse a set of trigram files into a QMap, and serialize to stdout.
 * Note: we allow this data to be read into QHash. We use QMap here
 * to get deterministic output from run to run.
 *
 * SPDX-FileCopyrightText: 2006 Jacob Rideout <kde@jacobrideout.net>
 *
 * SPDX-License-Identifier: LGPL-2.1-or-later
 */

#include <QDataStream>
#include <QDir>
#include <QFile>
#include <QMap>
#include <QRegularExpression>
#include <QString>
#include <QTextStream>

int main(int argc, char **argv)
{
    if (argc < 2) {
        return 1;
    }

    QFile sout;
    sout.open(stdout, QIODevice::WriteOnly);
    QDataStream out(&sout);

    QString path = QLatin1String(argv[1]);
    QDir td(path);

    /*
     * We use QMap (instead of QHash) here to get deterministic output
     * from run to run.
     */
    QMap<QString, QMap<QString, int>> models;

    const QRegularExpression rx(QStringLiteral("(?:.{3})\\s+(.*)"));
    const QStringList files = td.entryList(QDir::Files);
    for (const QString &fname : files) {
        QFile fin(td.filePath(fname));
        fin.open(QFile::ReadOnly | QFile::Text);
        QTextStream stream(&fin);

        // Not needed with Qt6, UTF-8 is the default
#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
        stream.setCodec("UTF-8");
#endif
        while (!stream.atEnd()) {
            QString line = stream.readLine();
            const QRegularExpressionMatch match = rx.match(line);
            if (match.hasMatch()) {
#if QT_VERSION >= QT_VERSION_CHECK(6, 0, 0)
                models[fname][line.left(3)] = match.capturedView(1).toInt();
#else
                models[fname][line.left(3)] = match.capturedRef(1).toInt();
#endif
            }
        }
    }

    out << models;
}