1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
|
/*
This file is part of Kiten, a KDE Japanese Reference Tool
SPDX-FileCopyrightText: 2001 Jason Katz-Brown <jason@katzbrown.com>
SPDX-FileCopyrightText: 2006 Joseph Kerian <jkerian@gmail.com>
SPDX-FileCopyrightText: 2006 Eric Kjeldergaard <kjelderg@gmail.com>
SPDX-FileCopyrightText: 2011 Daniel E. Moctezuma <democtezuma@gmail.com>
SPDX-License-Identifier: LGPL-2.0-or-later
*/
#include "dictfileedict.h"
#include <KConfigSkeleton>
#include <QFile>
#include <QList>
#include <QRegularExpression>
#include <QString>
#include <QStringDecoder>
#include <QTextStream>
#include "deinflection.h"
#include "dictfilefieldselector.h"
#include "dictquery.h"
#include "entryedict.h"
#include "entrylist.h"
#include "kitenmacros.h"
using namespace Qt::StringLiterals;
QString *DictFileEdict::deinflectionLabel = nullptr;
QStringList *DictFileEdict::displayFields = nullptr;
QString *DictFileEdict::wordType = nullptr;
/**
* Per instructions in the super-class, this constructor basically sets the
* dictionaryType member variable to identify this as an edict-type database handler.
*/
DictFileEdict::DictFileEdict()
: DictFile(EDICT)
, m_deinflection(nullptr)
, m_hasDeinflection(false)
{
m_dictionaryType = EDICT;
m_searchableAttributes.insert(QStringLiteral("common"), QStringLiteral("common"));
}
/**
* The destructor... ditch our memory maps and close our files here
* (if they were open).
*/
DictFileEdict::~DictFileEdict()
{
delete m_deinflection;
m_deinflection = nullptr;
}
QMap<QString, QString> DictFileEdict::displayOptions() const
{
QMap<QString, QString> list;
list[QStringLiteral("Part of speech(type)")] = QStringLiteral("type");
return list;
}
/**
* Do a search, respond with a list of entries.
* The general strategy will be to take the first word of the query, and do a
* binary search on the dictionary for that item. Take all results and filter
* them using the rest of the query with the validate method.
*/
EntryList *DictFileEdict::doSearch(const DictQuery &query)
{
if (query.isEmpty() || !m_edictFile.valid()) // No query or dict, no results.
{
return new EntryList();
}
qDebug() << "Search from : " << getName();
QString firstChoice = query.getWord();
if (firstChoice.length() == 0) {
firstChoice = query.getPronunciation();
if (firstChoice.length() == 0) {
firstChoice = query.getMeaning().split(' '_L1).first().toLower();
if (firstChoice.length() == 0) {
// The nastiest situation... we have to assemble a search string
// from the first property
QList<QString> keys = query.listPropertyKeys();
if (keys.empty()) // Shouldn't happen... but maybe in the future
{
return new EntryList();
}
firstChoice = keys[0];
firstChoice = firstChoice + query.getProperty(firstChoice);
// TODO: doSearch: some accommodation for searching for ranges and such of properties
}
}
} else {
// Only search for one kanji or the
// binary lookup mechanism breaks
firstChoice = firstChoice.at(0);
}
QList<QString> preliminaryResults = m_edictFile.findMatches(firstChoice);
if (preliminaryResults.empty()) // If there were no matches... return an empty list
{
return new EntryList();
}
auto results = new EntryList();
for (const QString &it : preliminaryResults) {
// qDebug() << "result: " << it << endl;
Entry *result = makeEntry(it);
auto resultEdict = static_cast<EntryEdict *>(result);
if (result->matchesQuery(query) && resultEdict->matchesWordType(query)) {
results->append(result);
} else {
delete result;
}
}
// At this point we should have some preliminary results
// and if there were no matches, it probably means the user
// input was a verb or adjective, so we have to deinflect it.
bool isAnyQuery = query.getMatchWordType() == DictQuery::Any;
bool isVerbQuery = query.getMatchWordType() == DictQuery::Verb;
bool isAdjectiveQuery = query.getMatchWordType() == DictQuery::Adjective;
if (results->count() == 0 && (isAnyQuery || isVerbQuery || isAdjectiveQuery)) {
delete results;
results = m_deinflection->search(query, preliminaryResults);
QString *label = m_deinflection->getDeinflectionLabel();
if (!label->isEmpty() && !m_hasDeinflection) {
deinflectionLabel = label;
m_hasDeinflection = true;
wordType = m_deinflection->getWordType();
}
} else {
deinflectionLabel = nullptr;
wordType = nullptr;
m_hasDeinflection = false;
}
if (results) {
auto common = new EntryList();
auto uncommon = new EntryList();
EntryList::EntryIterator i(*results);
while (i.hasNext()) {
auto entry = static_cast<EntryEdict *>(i.next());
if (entry->isCommon()) {
common->append(entry);
} else {
uncommon->append(entry);
}
}
delete results;
results = new EntryList();
results->appendList(common);
results->appendList(uncommon);
delete common;
delete uncommon;
auto exact = new EntryList();
auto beginning = new EntryList();
auto ending = new EntryList();
auto anywhere = new EntryList();
EntryList::EntryIterator it(*results);
while (it.hasNext()) {
Entry *entry = it.next();
if (entry->getWord() == query.getWord()) {
exact->append(entry);
} else if (entry->getWord().startsWith(query.getWord())) {
beginning->append(entry);
} else if (entry->getWord().endsWith(query.getWord())) {
ending->append(entry);
} else {
anywhere->append(entry);
}
}
delete results;
results = new EntryList();
results->appendList(exact);
results->appendList(beginning);
results->appendList(ending);
results->appendList(anywhere);
delete exact;
delete beginning;
delete ending;
delete anywhere;
}
return results;
}
/**
* Make a list of all the extra fields in our db.. Entry uses this to decide
* what goes in the interpretations it gives.
*/
QStringList DictFileEdict::listDictDisplayOptions(QStringList x) const
{
x += displayOptions().keys();
return x;
}
/**
* Load up the dictionary
*/
bool DictFileEdict::loadDictionary(const QString &fileName, const QString &dictName)
{
if (m_edictFile.valid()) {
return false; // Already loaded
}
if (m_edictFile.loadFile(fileName)) {
m_dictionaryName = dictName;
m_dictionaryFile = fileName;
m_deinflection = new Deinflection(m_dictionaryName);
m_deinflection->load();
return true;
}
return false;
}
QMap<QString, QString> DictFileEdict::loadDisplayOptions() const
{
QMap<QString, QString> list = displayOptions();
list[QStringLiteral("Word/Kanji")] = QStringLiteral("Word/Kanji");
list[QStringLiteral("Reading")] = QStringLiteral("Reading");
list[QStringLiteral("Meaning")] = QStringLiteral("Meaning");
list[QStringLiteral("--Newline--")] = QStringLiteral("--Newline--");
return list;
}
QStringList *DictFileEdict::loadListType(KConfigSkeletonItem *item, QStringList *list, const QMap<QString, QString> &long2short)
{
QStringList listFromItem;
if (item != nullptr) {
listFromItem = item->property().toStringList();
}
if (!listFromItem.isEmpty()) {
delete list;
list = new QStringList();
for (const QString &it : listFromItem) {
if (long2short.contains(it)) {
list->append(long2short[it]);
}
}
}
return list;
}
void DictFileEdict::loadSettings()
{
this->displayFields = new QStringList(loadDisplayOptions().values());
}
void DictFileEdict::loadSettings(KConfigSkeleton *config)
{
QMap<QString, QString> long2short = displayOptions();
long2short[QStringLiteral("Word/Kanji")] = QStringLiteral("Word/Kanji");
long2short[QStringLiteral("Reading")] = QStringLiteral("Reading");
long2short[QStringLiteral("Meaning")] = QStringLiteral("Meaning");
long2short[QStringLiteral("--Newline--")] = QStringLiteral("--Newline--");
KConfigSkeletonItem *item = config->findItem(getType() + "__displayFields"_L1);
this->displayFields = loadListType(item, this->displayFields, long2short);
}
inline Entry *DictFileEdict::makeEntry(const QString &entry)
{
return new EntryEdict(getName(), entry);
}
DictionaryPreferenceDialog *DictFileEdict::preferencesWidget(KConfigSkeleton *config, QWidget *parent)
{
auto dialog = new DictFileFieldSelector(config, getType(), parent);
dialog->addAvailable(listDictDisplayOptions(QStringList()));
return dialog;
}
/**
* Scan a potential file for the correct format, remembering to skip comment
* characters. This is not a foolproof scan, but it should be checked before adding
* a new dictionary.
* Valid EDICT format is considered:
* \<kanji or kana\>+ [\<kana\>] /latin characters & symbols/separated with slashes/
* Comment lines start with... something... not remembering now.
*/
bool DictFileEdict::validDictionaryFile(const QString &filename)
{
QFile file(filename);
bool returnFlag = true;
if (!file.exists() || !file.open(QIODevice::ReadOnly)) {
return false;
}
// Now we can actually check the file
QStringDecoder decoder("EUC-JP");
const QString decoded = decoder(file.readAll());
QTextStream fileStream(decoded.toUtf8());
QString commentMarker(QStringLiteral("????")); // Note: Don't touch this! vim seems to have
// An odd text codec error here too :(
QRegularExpression formattedLine(QStringLiteral("^\\S+\\s+(\\[\\S+\\]\\s+)?/.*/$"));
while (!fileStream.atEnd()) {
QString line = fileStream.readLine();
if (line.left(4) == commentMarker) {
continue;
}
if (line.contains(formattedLine)) // If it matches our regex
{
continue;
}
returnFlag = false;
break;
}
file.close();
return returnFlag;
}
/**
* Reject queries that specify anything we don't understand
*/
// TODO: Actually write this method (validQuery)
bool DictFileEdict::validQuery(const DictQuery &query)
{
Q_UNUSED(query)
return true;
}
|