File: parsetrigrams.cpp

package info (click to toggle)
kf6-sonnet 6.20.0-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 23,096 kB
  • sloc: cpp: 7,586; sh: 14; makefile: 7
file content (57 lines) | stat: -rw-r--r-- 1,475 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
/**
 * parsetrigrams.cpp
 *
 * Parse a set of trigram files into a QMap, and serialize to stdout.
 * Note: we allow this data to be read into QHash. We use QMap here
 * to get deterministic output from run to run.
 *
 * SPDX-FileCopyrightText: 2006 Jacob Rideout <kde@jacobrideout.net>
 *
 * SPDX-License-Identifier: LGPL-2.1-or-later
 */

#include <QDataStream>
#include <QDir>
#include <QFile>
#include <QMap>
#include <QRegularExpression>
#include <QString>
#include <QTextStream>

int main(int argc, char **argv)
{
    if (argc < 2) {
        return 1;
    }

    QFile sout;
    sout.open(stdout, QIODevice::WriteOnly);
    QDataStream out(&sout);

    QString path = QLatin1String(argv[1]);
    QDir td(path);

    /*
     * We use QMap (instead of QHash) here to get deterministic output
     * from run to run.
     */
    QMap<QString, QMap<QString, int>> models;

    const QRegularExpression rx(QStringLiteral("(?:.{3})\\s+(.*)"));
    const QStringList files = td.entryList(QDir::Files);
    for (const QString &fname : files) {
        QFile fin(td.filePath(fname));
        fin.open(QFile::ReadOnly | QFile::Text);
        QTextStream stream(&fin);

        while (!stream.atEnd()) {
            QString line = stream.readLine();
            const QRegularExpressionMatch match = rx.match(line);
            if (match.hasMatch()) {
                models[fname][line.left(3)] = match.capturedView(1).toInt();
            }
        }
    }

    out << models;
}