File: searchdatatox.cpp

package info (click to toggle)
recoll 1.43.13-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 16,956 kB
  • sloc: cpp: 104,864; python: 9,923; xml: 7,324; ansic: 6,447; sh: 1,252; perl: 166; makefile: 73
file content (1187 lines) | stat: -rw-r--r-- 43,639 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
/* Copyright (C) 2006-2021 J.F.Dockes
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program; if not, write to the
 *   Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */

// Handle translation from rcl's SearchData structures to Xapian Queries

#include "autoconfig.h"

#include <stdio.h>

#include <string>
#include <vector>
#include <algorithm>
#include <sstream>
#include <iostream>

#include "xapian.h"

#include "cstr.h"
#include "rcldb.h"
#include "rcldb_p.h"
#include "searchdata.h"
#include "log.h"
#include "smallut.h"
#include "textsplit.h"
#include "unacpp.h"
#include "utf8iter.h"
#include "stoplist.h"
#include "rclconfig.h"
#include "termproc.h"
#include "synfamily.h"
#include "stemdb.h"
#include "expansiondbs.h"
#include "base64.h"
#include "daterange.h"
#include "rclvalues.h"
#include "pathut.h"

using namespace std;

namespace Rcl {

static const int original_term_wqf_booster = 10;

// Expand doc categories and mime type wild card expressions 
//
// Categories are expanded against the configuration, mimetypes
// against the index.
bool SearchData::expandFileTypes(Db &db, vector<string>& tps)
{
    const RclConfig *cfg = db.getConf();
    if (!cfg) {
        LOGFATAL("Db::expandFileTypes: null configuration!!\n");
        return false;
    }
    vector<string> exptps;
    for (const auto& mtype : tps) {
        if (cfg->isMimeCategory(mtype)) {
            vector<string> ctps;
            cfg->getMimeCatTypes(mtype, ctps);
            exptps.insert(exptps.end(), ctps.begin(), ctps.end());
        } else {
            TermMatchResult res;
            string mt = stringtolower(mtype);
            // Expand possible wildcard in mime type, e.g. text/*
            // We set casesens|diacsens to get an equivalent of ixTermMatch()
            db.termMatch(
                Db::ET_WILD|Db::ET_CASESENS|Db::ET_DIACSENS, string(), mt, res, -1, "mtype");
            if (res.entries.empty()) {
                exptps.push_back(mtype);
            } else {
                for (const auto& entry : res.entries) {
                    exptps.push_back(strip_prefix(entry.term));
                }
            }
        }
    }
    sort(exptps.begin(), exptps.end());
    exptps.erase(unique(exptps.begin(), exptps.end()), exptps.end());

    tps = exptps;
    return true;
}

static const char *maxXapClauseMsg = 
    "Maximum Xapian query size exceeded. Increase maxXapianClauses in the configuration. ";
static const char *maxXapClauseCaseDiacMsg = 
    "Or try to use case (C) or diacritics (D) sensitivity qualifiers, or less wildcards ?";


// Walk the clauses list, translate each and add to top Xapian Query
bool SearchData::clausesToQuery(
    Rcl::Db &db, SClType tp, vector<SearchDataClause*>& query, string& reason, void *d)
{
    Xapian::Query xq;
    for (auto& clausep : query) {
#if 0        
        string txt;
        auto clp = dynamic_cast<SearchDataClauseSimple*>(clausep);
        if (clp)
            txt = clp->gettext();
        LOGINF("Clause: tp: " << clausep->getTp() << " txt: [" << txt << "] mods: " <<
                std::hex << clausep->getModifiers() << std::dec << "\n");
#endif
        Xapian::Query nq;
        if (!clausep->toNativeQuery(db, &nq)) {
            LOGERR("SearchData::clausesToQuery: toNativeQuery failed: "
                   << clausep->getReason() << "\n");
            reason += clausep->getReason() + " ";
            return false;
        }       
        if (nq.empty()) {
            LOGDEB0("SearchData::clausesToQuery: skipping empty clause\n");
            continue;
        }
        // If this structure is an AND list, must use AND_NOT for excl clauses.
        // Else this is an OR list, and there can't be excl clauses (checked by
        // addClause())
        Xapian::Query::op op;
        if (tp == SCLT_AND) {
            if (clausep->getexclude()) {
                op =  Xapian::Query::OP_AND_NOT;
            } else {
                if (clausep->getModifiers() & SearchDataClause::SDCM_FILTER) {
                    op =  Xapian::Query::OP_FILTER;
                } else {
                    op =  Xapian::Query::OP_AND;
                }
            }
        } else {
            op = Xapian::Query::OP_OR;
        }
        if (xq.empty()) {
            if (op == Xapian::Query::OP_AND_NOT)
                xq = Xapian::Query(op, Xapian::Query::MatchAll, nq);
            else 
                xq = nq;
        } else {
            xq = Xapian::Query(op, xq, nq);
        }
        if (int(xq.get_length()) >= getMaxCl()) {
            LOGERR("" << maxXapClauseMsg << "\n");
            m_reason += maxXapClauseMsg;
            if (!o_index_stripchars)
                m_reason += maxXapClauseCaseDiacMsg;
            return false;
        }
    }

    LOGDEB0("SearchData::clausesToQuery: got " << xq.get_length()<<" clauses\n");

    if (xq.empty())
        xq = Xapian::Query::MatchAll;

    *((Xapian::Query *)d) = xq;
    return true;
}

static void processdaterange(Rcl::Db& db, Xapian::Query& xq, DateInterval& dates, bool isbr = false)
{
    // If one of the extremities is unset, compute db extremas
    if (dates.y1 == 0 || dates.y2 == 0) {
        int minyear = 1970, maxyear = 2100;
        if (!db.maxYearSpan(&minyear, &maxyear)) {
            LOGERR("Can't retrieve index min/max dates\n");
            //whatever, go on.
        }

        if (dates.y1 == 0) {
            dates.y1 = minyear;
            dates.m1 = 1;
            dates.d1 = 1;
        }
        if (dates.y2 == 0) {
            dates.y2 = maxyear;
            dates.m2 = 12;
            dates.d2 = 31;
        }
    }
    LOGDEB("Db::toNativeQuery: " << (isbr?"birtime":"date") << " interval: " << dates.y1 <<
           "-" << dates.m1 << "-" << dates.d1 << "/" <<
           dates.y2 << "-" << dates.m2 << "-" << dates.d2 << "\n");
    Xapian::Query dq;
#ifdef EXT4_BIRTH_TIME
    if (isbr) {
        dq = brdate_range_filter(dates.y1, dates.m1, dates.d1, dates.y2, dates.m2, dates.d2);
    } else
#endif
    {
        dq = date_range_filter(dates.y1, dates.m1, dates.d1, dates.y2, dates.m2, dates.d2);
    }
    if (dq.empty()) {
        LOGINFO("Db::toNativeQuery: date filter is empty\n");
    }
    // If no probabilistic query is provided then promote the daterange
    // filter to be THE query instead of filtering an empty query.
    if (xq.empty()) {
        LOGINFO("Db::toNativeQuery: proba query is empty\n");
        xq = dq;
    } else {
        xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, dq);
    }
}

bool SearchData::toNativeQuery(Rcl::Db &db, void *d)
{
    LOGDEB("SearchData::toNativeQuery: stemlang [" << m_stemlang << "]\n");
    m_reason.erase();

    db.getConf()->getConfParam("maxTermExpand", &m_maxexp);
    db.getConf()->getConfParam("maxXapianClauses", &m_maxcl);
    m_autocasesens = true;
    db.getConf()->getConfParam("autocasesens", &m_autocasesens);
    m_autodiacsens = false;
    db.getConf()->getConfParam("autodiacsens", &m_autodiacsens);

    simplify();
    // Walk the clause list translating each in turn and building the 
    // Xapian query tree
    Xapian::Query xq;
    if (!clausesToQuery(db, m_tp, m_query, m_reason, &xq)) {
        LOGERR("SearchData::toNativeQuery: clausesToQuery failed. reason: " << m_reason << "\n");
        return false;
    }

    if (m_haveDates) {
        processdaterange(db, xq, m_dates);
    }

#ifdef EXT4_BIRTH_TIME
    //handle birtime
    if (m_haveBrDates) {
        processdaterange(db, xq, m_brdates, true);
    }
#endif

    if (m_minSize != -1 || m_maxSize != -1) {
        Xapian::Query sq;
        string min = std::to_string(m_minSize);
        string max = std::to_string(m_maxSize);
        if (m_minSize == -1) {
            string value(max);
            leftzeropad(value, 12);
            sq = Xapian::Query(Xapian::Query::OP_VALUE_LE, VALUE_SIZE, value);
        } else if (m_maxSize == -1) {
            string value(min);
            leftzeropad(value, 12);
            sq = Xapian::Query(Xapian::Query::OP_VALUE_GE, VALUE_SIZE, value);
        } else {
            string minvalue(min);
            leftzeropad(minvalue, 12);
            string maxvalue(max);
            leftzeropad(maxvalue, 12);
            sq = Xapian::Query(Xapian::Query::OP_VALUE_RANGE, VALUE_SIZE, minvalue, maxvalue);
        }
        
        // If no probabilistic query is provided then promote the
        // filter to be THE query instead of filtering an empty query.
        if (xq.empty()) {
            LOGINFO("Db::toNativeQuery: proba query is empty\n");
            xq = sq;
        } else {
            xq = Xapian::Query(Xapian::Query::OP_FILTER, xq, sq);
        }
    }

    // Add the autophrase if any
    if (m_autophrase) {
        Xapian::Query apq;
        if (m_autophrase->toNativeQuery(db, &apq)) {
            xq = xq.empty() ? apq : Xapian::Query(Xapian::Query::OP_AND_MAYBE, xq, apq);
        }
    }

    // Add the file type filtering clause if any
    if (!m_filetypes.empty()) {
        expandFileTypes(db, m_filetypes);
        
        Xapian::Query tq;
        for (const auto& ft : m_filetypes) {
            string term = wrap_prefix(mimetype_prefix) + ft;
            LOGDEB0("Adding file type term: [" << term << "]\n");
            tq = tq.empty() ? Xapian::Query(term) :
                Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
        }
        xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_FILTER, xq, tq);
    }

    // Add the neg file type filtering clause if any
    if (!m_nfiletypes.empty()) {
        expandFileTypes(db, m_nfiletypes);
        
        Xapian::Query tq;
        for (const auto& ft : m_nfiletypes) {
            string term = wrap_prefix(mimetype_prefix) + ft;
            LOGDEB0("Adding negative file type term: [" << term << "]\n");
            tq = tq.empty() ? Xapian::Query(term) : 
                Xapian::Query(Xapian::Query::OP_OR, tq, Xapian::Query(term));
        }
        xq = xq.empty() ? tq : Xapian::Query(Xapian::Query::OP_AND_NOT, xq, tq);
    }

    *((Xapian::Query *)d) = xq;
    return true;
}

// Splitter for breaking a user string into simple terms and
// phrases. This is for parts of the user entry which would appear as
// a single word because there is no white space inside, but are
// actually multiple terms to rcldb (ie term1,term2). Still, most of
// the time, the result of our splitting will be a single term.
class TextSplitQ : public TextSplitP {
public:
    TextSplitQ(int flags, TermProc *prc)
        : TextSplitP(prc, flags), m_nostemexp(false) {
    }

    bool takeword(const std::string &term, size_t pos, size_t bs, size_t be) override {
        // Check if the first letter is a majuscule in which
        // case we do not want to do stem expansion. Need to do this
        // before unac of course...
        m_nostemexp = unaciscapital(term);

        return TextSplitP::takeword(term, pos, bs, be);
    }
    virtual bool discarded(const std::string &term, size_t, size_t, size_t, DiscardReason reason)
        override {
        m_problemterm = term;
        return true;
    }

    bool nostemexp() const {
        return m_nostemexp;
    }
    std::string getproblemterm() {
        return m_problemterm;
    }
private:
    bool m_nostemexp;
    std::string m_problemterm;
};

class TermProcQ : public TermProc {
public:
    TermProcQ() : TermProc(nullptr), m_alltermcount(0), m_lastpos(0), m_ts(nullptr) {}

    // We need a ref to the splitter (only it knows about orig term
    // capitalization for controlling stemming. The ref can't be set
    // in the constructor because the splitter is not built yet when
    // we are born (chicken and egg).
    void setTSQ(const TextSplitQ *ts) {
        m_ts = ts;
    }
    
    bool takeword(const std::string &term, size_t _pos, size_t, size_t be) override {
        m_alltermcount++;
        int ipos = static_cast<int>(_pos);
        if (m_lastpos < ipos)
            m_lastpos = ipos;
        bool noexpand = be ? m_ts->nostemexp() : true;
        LOGDEB1("TermProcQ::takeword: pushing [" << term << "] pos " <<
                ipos << " noexp " << noexpand << "\n");
        if (m_terms[ipos].size() < term.size()) {
            m_terms[ipos] = term;
            m_nste[ipos] = noexpand;
        }
        return true;
    }

    bool flush() override {
        for (const auto& entry : m_terms) {
            m_vterms.push_back(entry.second);
            m_vnostemexps.push_back(m_nste[entry.first]);
        }
        return true;
    }

    int alltermcount() const {
        return m_alltermcount;
    }
    int lastpos() const {
        return m_lastpos;
    }
    const vector<string>& terms() {
        return m_vterms;
    }
    const vector<bool>& nostemexps() {
        return m_vnostemexps;
    }
private:
    // Count of terms including stopwords: this is for adjusting
    // phrase/near slack
    int m_alltermcount; 
    int m_lastpos;
    const TextSplitQ *m_ts;
    vector<string> m_vterms;
    vector<bool>   m_vnostemexps;
    map<int, string> m_terms;
    map<int, bool> m_nste;
};

static const vector<CharFlags> expandModStrings{
    {SearchDataClause::SDCM_NOSTEMMING, "nostemming"},
    {SearchDataClause::SDCM_ANCHORSTART, "anchorstart"},
    {SearchDataClause::SDCM_ANCHOREND, "anchorend"},
    {SearchDataClause::SDCM_CASESENS, "casesens"},
    {SearchDataClause::SDCM_DIACSENS, "diacsens"},
    {SearchDataClause::SDCM_NOTERMS, "noterms"},
    {SearchDataClause::SDCM_NOSYNS, "nosyns"},
    {SearchDataClause::SDCM_PATHELT, "pathelt"},
    {SearchDataClause::SDCM_FILTER, "filter"},
    {SearchDataClause::SDCM_EXPANDPHRASE, "expandphrase"},
    {SearchDataClause::SDCM_NOWILDEXP, "nowildexp"},
};

/** Expand term into term list, using appropriate mode: stem, wildcards, 
 *  diacritics... 
 *
 * @param mods stem expansion, case and diacritics sensitivity control.
 * @param term input single word
 * @param oexp output expansion list
 * @param sterm output original input term if there were no wildcards
 * @param prefix field prefix in index. We could recompute it, but the caller
 *  has it already. Used in the simple case where there is nothing to expand, 
 *  and we just return the prefixed term (else Db::termMatch deals with it).
 * @param multiwords it may happen that synonym processing results in multi-word
 *   expansions which should be processed as phrases.
 */
bool SearchDataClauseSimple::expandTerm(Rcl::Db &db, 
                                        string& ermsg, int mods, 
                                        const string& term, 
                                        vector<string>& oexp, string &sterm,
                                        const string& prefix,
                                        vector<string>* multiwords
    )
{
    LOGDEB0("expandTerm: mods: [" << flagsToString(expandModStrings, mods) <<
            "] fld [" << m_field << "] trm [" << term << "] lang [" <<
            getStemLang() << "]\n");
    sterm.clear();
    oexp.clear();
    if (term.empty())
        return true;

    if (mods & SDCM_PATHELT) {
        // Path element are so special. Only wildcards, and they are
        // case-sensitive.
        mods |= SDCM_NOSTEMMING|SDCM_CASESENS|SDCM_DIACSENS|SDCM_NOSYNS;
    }

    bool maxexpissoft = false;
    int maxexpand = getSoftMaxExp();
    if (maxexpand != -1) {
        maxexpissoft = true;
    } else {
        maxexpand = getMaxExp();
    }

    bool haswild = term.find_first_of(cstr_minwilds) != string::npos;

    // If there are no wildcards, add term to the list of user-entered terms
    bool dowildexp = !getNoWildExp() && haswild;
    if (!dowildexp) {
        m_hldata.uterms.insert(term);
        sterm = term;
    }
    // No stem expansion if there are wildcards (even if nowildexp) or if prevented by caller
    bool nostemexp = (mods & SDCM_NOSTEMMING) != 0;
    if (haswild || getStemLang().empty()) {
        LOGDEB2("expandTerm: found wildcards or stemlang empty: no exp\n");
        nostemexp = true;
    }

    bool diac_sensitive = (mods & SDCM_DIACSENS) != 0;
    bool case_sensitive = (mods & SDCM_CASESENS) != 0;
    bool synonyms = (mods & SDCM_NOSYNS) == 0;
    bool pathelt = (mods & SDCM_PATHELT) != 0;
    
    // noexpansion can be modified further down by possible case/diac expansion
    bool noexpansion = nostemexp && !dowildexp && !synonyms; 

    if (o_index_stripchars) {
        diac_sensitive = case_sensitive = false;
    } else {
        // If we are working with a raw index, apply the rules for case and 
        // diacritics sensitivity.

        // If any character has a diacritic, we become
        // diacritic-sensitive. Note that the way that the test is
        // performed (conversion+comparison) will automatically ignore
        // accented characters which are actually a separate letter
        if (getAutoDiac() && unachasaccents(term)) {
            LOGDEB0("expandTerm: term has accents -> diac-sensitive\n");
            diac_sensitive = true;
        }

        // If any character apart the first is uppercase, we become
        // case-sensitive.  The first character is reserved for
        // turning off stemming. You need to use a query language
        // modifier to search for Floor in a case-sensitive way.
        Utf8Iter it(term);
        it++;
        if (getAutoCase() && unachasuppercase(term.substr(it.getBpos()))) {
            LOGDEB0("expandTerm: term has uppercase -> case-sensitive\n");
            case_sensitive = true;
        }

        // If we are sensitive to case or diacritics turn stemming off
        if (diac_sensitive || case_sensitive) {
            LOGDEB0("expandTerm: diac or case sens set -> stemexpand and synonyms off\n");
            nostemexp = true;
            synonyms = false;
        }

        if (!case_sensitive || !diac_sensitive)
            noexpansion = false;
    }


    if (!m_exclude && noexpansion) {
        oexp.push_back(prefix + term);
        m_hldata.terms[term] = term;
        LOGDEB("ExpandTerm: noexpansion: final: "<<stringsToString(oexp)<< "\n");
        return true;
    } 

    int termmatchsens = 0;
    if (case_sensitive)
        termmatchsens |= Db::ET_CASESENS;
    if (diac_sensitive)
        termmatchsens |= Db::ET_DIACSENS;
    if (synonyms)
        termmatchsens |= Db::ET_SYNEXP;
    if (pathelt) 
        termmatchsens |= Db::ET_PATHELT;
    Db::MatchType mtyp = dowildexp ? Db::ET_WILD : nostemexp ? Db::ET_NONE : Db::ET_STEM;
    TermMatchResult res;
    if (!db.termMatch(mtyp | termmatchsens, getStemLang(),
                      term, res, maxexpand, m_field, multiwords)) {
        // Let it go through
    }

    // Term match entries to vector of terms
    if (int(res.entries.size()) >= maxexpand && !maxexpissoft) {
        ermsg = "Maximum term expansion size exceeded."
            " Maybe use case/diacritics sensitivity or increase maxTermExpand.";
        return false;
    }
    for (const auto& entry : res.entries) {
        oexp.push_back(entry.term);
    }
    // If the term does not exist at all in the db, the return from
    // termMatch() is going to be empty, which is not what we want (we
    // would then compute an empty Xapian query)
    if (oexp.empty())
        oexp.push_back(prefix + term);

    // Remember the uterm-to-expansion links
    if (!m_exclude) {
        for (const auto& entry : oexp) {
            m_hldata.terms[strip_prefix(entry)] = term;
        }
    }
    // Remember the terms generated trough spelling approximation
    m_hldata.spellexpands.insert(m_hldata.spellexpands.end(),
                                 res.fromspelling.begin(), res.fromspelling.end());
    LOGDEB("ExpandTerm: final: " << stringsToString(oexp) << "\n");
    return true;
}

static void prefix_vector(vector<string>& v, const string& prefix)
{
    for (auto& elt : v) {
        elt = prefix + elt;
    }
}

void SearchDataClauseSimple::processSimpleSpan(
    Rcl::Db &db, string& ermsg, const string& span, int mods, void *pq)
{
    vector<Xapian::Query>& pqueries(*(vector<Xapian::Query>*)pq);
    LOGDEB0("StringToXapianQ::processSimpleSpan: [" << span << "] mods 0x"
           << (unsigned int)mods << "\n");

    string prefix;
    const FieldTraits *ftp;
    if (!m_field.empty() && db.fieldToTraits(m_field, &ftp, true)) {
        if (ftp->noterms)
            addModifier(SDCM_NOTERMS); // Don't add terms to highlight data
        prefix = wrap_prefix(ftp->pfx);
    }

    vector<string> exp;  
    string sterm; // dumb version of user term
    vector<string> multiwords;
    static string wildcardchars{"*?[]"};
    // Special case: in case nowildexp is set and we get a single char (because it's in
    // indexedpunctuation probably), check if it's a wildcard and don't do expandTerm at
    // all. Simpler than dealing with the case inside expandTerm.
    if (getNoWildExp() && span.size() == 1 && wildcardchars.find(span[0]) != string::npos) {
        exp.push_back(span);
        sterm = span;
    } else {
        if (!expandTerm(db, ermsg, mods, span, exp, sterm, prefix, &multiwords)) {
            LOGINF("processSimpleSpan: expandterm failed\n");
            return;
        }
    }
    
    // Set up the highlight data. No prefix should go in there
    if (!m_exclude) {
        for (const auto& term : exp) {
            HighlightData::TermGroup tg;
            tg.term = term.substr(prefix.size());
            tg.grpsugidx =  m_hldata.ugroups.size() - 1;
            m_hldata.index_term_groups.push_back(tg);
        }
    }
    
    // Push either term or OR of stem-expanded set
    Xapian::Query xq(Xapian::Query::OP_OR, exp.begin(), exp.end());
    m_curcl += exp.size();

    // If sterm (simplified original user term) is not null, give it a
    // relevance boost. We do this even if no expansion occurred (else
    // the non-expanded terms in a term list would end-up with even
    // less wqf). This does not happen if there are wildcards anywhere
    // in the search.
    // We normally boost the original term in the stem expansion list. Don't
    // do it if there are wildcards anywhere, this would skew the results. Also
    // no need to do it if there was no expansion.
    bool doBoostUserTerm = 
        (m_parentSearch && !m_parentSearch->haveWildCards()) || 
        (nullptr == m_parentSearch && !m_haveWildCards);
    if (exp.size() > 1 && doBoostUserTerm && !sterm.empty()) {
        xq = Xapian::Query(Xapian::Query::OP_OR, xq,
                           Xapian::Query(prefix+sterm, original_term_wqf_booster));
    }

    // Push phrases for the multi-word expansions
    for (const auto& mw : multiwords) {
        vector<string> phr;
        // We just do a basic split to keep things a bit simpler here
        // (no textsplit). This means though that no punctuation is
        // allowed in multi-word synonyms.
        stringToTokens(mw, phr);
        if (!prefix.empty())
            prefix_vector(phr, prefix);
        xq = Xapian::Query(Xapian::Query::OP_OR, xq, 
                           Xapian::Query(Xapian::Query::OP_PHRASE, 
                                         phr.begin(), phr.end()));
        m_curcl++;
    }

    pqueries.push_back(xq);
}

// User entry element had several terms: transform into a PHRASE or
// NEAR xapian query, the elements of which can themselves be OR
// queries if the terms get expanded by stemming or wildcards (we
// don't do stemming for PHRASE though)
void SearchDataClauseSimple::processPhraseOrNear(
    Rcl::Db &db, string& ermsg, TermProcQ *splitData, int mods, void *pq, bool useNear, int slack)
{
    vector<Xapian::Query> &pqueries(*(vector<Xapian::Query>*)pq);
    Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR : Xapian::Query::OP_PHRASE;
    vector<Xapian::Query> orqueries;
    vector<vector<string> >groups;

    bool useidxsynonyms = db.getSynGroups().getpath() == db.getConf()->getIdxSynGroupsFile();
    
    string prefix;
    const FieldTraits *ftp;
    if (!m_field.empty() && db.fieldToTraits(m_field, &ftp, true)) {
        prefix = wrap_prefix(ftp->pfx);
    }

    if (mods & Rcl::SearchDataClause::SDCM_ANCHORSTART) {
        orqueries.push_back(Xapian::Query(prefix + start_of_field_term));
    }

    // Max term count in a multiword. See below comment about slack adjustment
    int maxmwordlen = 0;
    // Go through the list and perform stem/wildcard expansion for each element
    auto nxit = splitData->nostemexps().begin();
    for (auto it = splitData->terms().begin(); it != splitData->terms().end(); it++, nxit++) {
        LOGDEB0("ProcessPhrase: processing [" << *it << "]\n");
        // Adjust when we do stem expansion. Not if disabled by caller, not inside phrases.
        bool nostemexp = *nxit ||
            (op == Xapian::Query::OP_PHRASE && !o_expand_phrases &&
             !(mods & Rcl::SearchDataClause::SDCM_EXPANDPHRASE));
        int lmods = mods;
        if (nostemexp)
            lmods |= SearchDataClause::SDCM_NOSTEMMING;
        string sterm;
        vector<string> exp;
        vector<string> multiwords;
        if (!expandTerm(db, ermsg, lmods, *it, exp, sterm, prefix, &multiwords))
            return;

        // Note: because of how expandTerm works, the multiwords can only come from the synonyms
        // expansion, which means that, if idxsynonyms is set, they have each been indexed as a
        // single term. So, if idxsynonyms is set, and is the current active synonyms file, we just
        // add them to the expansion. We have to increase the slack in this case, which will also
        // apply to single-word expansions and cause false matches, but this is better than missing
        // multiword matches (this is because, even if the multiword term was issued at the position
        // of its first word during indexing, the position of the first term after the multiword is
        // still increased by the number of words in the multiword).
        if (!multiwords.empty() && useidxsynonyms) {
            for (const auto& mword: multiwords) {
                int cnt = std::count(mword.begin(), mword.end(), ' ') + 1;
                if (cnt > maxmwordlen)
                    maxmwordlen = cnt;
            }
            exp.insert(exp.end(), multiwords.begin(), multiwords.end());
        }

        LOGDEB0("ProcessPhraseOrNear: exp size " << exp.size() << ", exp: " <<
                stringsToString(exp) << "\n");
        // groups is used for highlighting, we don't want prefixes in there.
        vector<string> noprefs;
        for (const auto& prefterm : exp) {
            noprefs.push_back(prefterm.substr(prefix.size()));
        }
        groups.push_back(noprefs);
        orqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, exp.begin(), exp.end()));
        m_curcl += exp.size();
        if (m_curcl >= getMaxCl())
            return;
    }

    if (mods & Rcl::SearchDataClause::SDCM_ANCHOREND) {
        orqueries.push_back(Xapian::Query(prefix + end_of_field_term));
    }

    // Generate an appropriate PHRASE/NEAR query with adjusted slack
    // For phrases, give a relevance boost like we do for original terms
    LOGDEB2("PHRASE/NEAR:  alltermcount " << splitData->alltermcount() <<
            " lastpos " << splitData->lastpos() << "\n");
    if (maxmwordlen > 1) {
        slack += maxmwordlen - 1;
    }
    Xapian::Query xq(op, orqueries.begin(), orqueries.end(),
                     static_cast<int>(orqueries.size()) + slack);
    if (op == Xapian::Query::OP_PHRASE)
        xq = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, xq, original_term_wqf_booster);
    pqueries.push_back(xq);

    // Insert the search groups and slacks in the highlight data, with
    // a reference to the user entry that generated them:
    if (!m_exclude) {
        HighlightData::TermGroup tg;
        tg.orgroups = groups;
        tg.slack = slack;
        tg.grpsugidx =  m_hldata.ugroups.size() - 1;
        tg.kind = (op == Xapian::Query::OP_PHRASE) ?
            HighlightData::TermGroup::TGK_PHRASE :
            HighlightData::TermGroup::TGK_NEAR;
        m_hldata.index_term_groups.push_back(tg);
    }
}

// Trim string beginning with ^ or ending with $ and convert to flags
static int stringToMods(string& s)
{
    int mods = 0;
    // Check for an anchored search
    trimstring(s);
    if (s.length() > 0 && s[0] == '^') {
        mods |= Rcl::SearchDataClause::SDCM_ANCHORSTART;
        s.erase(0, 1);
    }
    if (s.length() > 0 && s[s.length()-1] == '$') {
        mods |= Rcl::SearchDataClause::SDCM_ANCHOREND;
        s.erase(s.length()-1);
    }
    return mods;
}

/** 
 * Turn user entry string (NOT raw query language, but possibly the contents of a phrase/near
 * clause out of the parser) into a list of Xapian queries.
 * We just separate words and phrases, and do wildcard and stem expansion,
 *
 * This is used to process data entered into an OR/AND/NEAR/PHRASE field of
 * the GUI (in the case of NEAR/PHRASE, clausedist adds dquotes to the user
 * entry).
 *
 * This appears awful, and it would seem that the split into
 * terms/phrases should be performed in the upper layer so that we
 * only receive pure term or near/phrase pure elements here, but in
 * fact there are things that would appear like terms to naive code,
 * and which will actually may be turned into phrases (ie: tom-jerry),
 * in a manner which intimately depends on the index implementation,
 * so that it makes sense to process this here.
 *
 * The final list contains one query for each term or phrase
 *   - Elements corresponding to a stem-expanded part are an OP_OR
 *     composition of the stem-expanded terms (or a single term query).
 *   - Elements corresponding to phrase/near are an OP_PHRASE/NEAR
 *     composition of the phrase terms (no stem expansion in this case)
 * @return the subquery count (either or'd stem-expanded terms or phrase word
 *   count)
 */
bool SearchDataClauseSimple::processUserString(
    Rcl::Db &db, const string &iq, string &ermsg, string &pbterm, void *pq, int slack0, bool useNear)
{
    vector<Xapian::Query> &pqueries(*(vector<Xapian::Query>*)pq);
    int mods = m_modifiers;

    LOGDEB("StringToXapianQ:pUS:: qstr [" << iq << "] fld [" << m_field <<
           "] mods 0x"<<mods<< " slack " << slack0 << " near " << useNear <<"\n");
    ermsg.erase();
    m_curcl = 0;
    const StopList stops = db.getStopList();

    // Simple whitespace-split input into user-level words and double-quoted phrases: word1 word2
    // "this is a phrase".
    //
    // The text splitter may further still decide that the resulting "words" are really phrases,
    // this depends on separators: [paul@dom.net] would still be a word (span), but [about-me] will
    // probably be handled as a phrase.
    vector<string> phrases;
    TextSplit::stringToStrings(iq, phrases);

    // Process each element: textsplit into terms, handle stem/wildcard expansion and transform into
    // an appropriate Xapian::Query
    try {
        for (auto& wordorphrase : phrases) {
            LOGDEB0("strToXapianQ: phrase/word: [" << wordorphrase << "]\n");
            int slack = slack0;
            // Anchoring modifiers
            int amods = stringToMods(wordorphrase);
            int terminc = amods != 0 ? 1 : 0;
            mods |= amods;
            // If there are multiple spans in this element, including
            // at least one composite, we have to increase the slack
            // else a phrase query including a span would fail. 
            // Ex: "term0@term1 term2" is onlyspans-split as:
            //   0 term0@term1             0   12
            //   2 term2                  13   18
            // The position of term2 is 2, not 1, so a phrase search
            // would fail.
            // We used to do  word split, searching for 
            // "term0 term1 term2" instead, which may have worse 
            // performance, but will succeed.
            // We now adjust the phrase/near slack by comparing the term count
            // and the last position

            // The term processing pipeline:
            //   split -> [unac/case ->] stops -> store terms
            TermProcQ tpq;
            TermProc *nxt = &tpq;
            TermProcStop tpstop(nxt, stops); nxt = &tpstop;
            //TermProcCommongrams tpcommon(nxt, stops); nxt = &tpcommon;
            //tpcommon.onlygrams(true);
            TermProcPrep tpprep(nxt);
            if (o_index_stripchars)
                nxt = &tpprep;

            int txtsplitflags = TextSplit::TXTS_ONLYSPANS;
            if (!getNoWildExp()) {
                txtsplitflags |= TextSplit::TXTS_KEEPWILD;
            }
            TextSplitQ splitter(txtsplitflags, nxt);
            tpq.setTSQ(&splitter);
            splitter.text_to_words(wordorphrase);
            if (!splitter.getproblemterm().empty()) {
                pbterm = splitter.getproblemterm();
            }
            slack += tpq.lastpos() - int(tpq.terms().size()) + 1;

            LOGDEB0("strToXapianQ: termcount: " << tpq.terms().size() << "\n");
            switch (tpq.terms().size() + terminc) {
            case 0: 
                continue;// ??
            case 1: {
                int lmods = mods;
                if (tpq.nostemexps().front())
                    lmods |= SearchDataClause::SDCM_NOSTEMMING;
                if (!m_exclude) {
                    m_hldata.ugroups.push_back(tpq.terms());
                }
                processSimpleSpan(db, ermsg, tpq.terms().front(), lmods, &pqueries);
            }
                break;
            default:
                if (!m_exclude) {
                    m_hldata.ugroups.push_back(tpq.terms());
                }
                processPhraseOrNear(db, ermsg, &tpq, mods, &pqueries, useNear, slack);
            }
            if (m_curcl >= getMaxCl()) {
                ermsg = maxXapClauseMsg;
                if (!o_index_stripchars)
                    ermsg += maxXapClauseCaseDiacMsg;
                break;
            }
        }
    } catch (const Xapian::Error &e) {
        ermsg = e.get_msg();
    } catch (const string &s) {
        ermsg = s;
    } catch (const char *s) {
        ermsg = s;
    } catch (...) {
        ermsg = "Caught unknown exception";
    }
    if (!ermsg.empty()) {
        LOGERR("stringToXapianQueries: " << ermsg << "\n");
        return false;
    }
    return true;
}

// Translate a simple OR or AND search clause. 
bool SearchDataClauseSimple::toNativeQuery(Rcl::Db &db, void *p)
{
    LOGDEB("SearchDataClauseSimple::toNativeQuery: fld [" << m_field <<
           "] val [" << m_text << "] stemlang [" << getStemLang() << "]\n");

    m_hldata.clear();

    // Transform (in)equalities into a range query
    switch (getrel()) {
    case REL_EQUALS:
    {
        SearchDataClauseRange cl(*this, gettext(), gettext());
        bool ret = cl.toNativeQuery(db, p);
        m_reason = cl.getReason();
        return ret;
    }
    case REL_LT: case REL_LTE:
    {
        SearchDataClauseRange cl(*this, "", gettext());
        bool ret = cl.toNativeQuery(db, p);
        m_reason = cl.getReason();
        return ret;
    }
    case REL_GT: case REL_GTE:
    {
        SearchDataClauseRange cl(*this, gettext(), "");
        bool ret = cl.toNativeQuery(db, p);
        m_reason = cl.getReason();
        return ret;
    }
    default:
        break;
    }
        
    Xapian::Query *qp = (Xapian::Query *)p;
    *qp = Xapian::Query();

    Xapian::Query::op op;
    switch (m_tp) {
    case SCLT_AND: op = Xapian::Query::OP_AND; break;
    case SCLT_OR: op = Xapian::Query::OP_OR; break;
    default:
        LOGERR("SearchDataClauseSimple: bad m_tp " << m_tp << "\n");
        m_reason = "Internal error";
        return false;
    }

    vector<Xapian::Query> pqueries;
    std::string pbterm;
    if (m_text.empty()) {
        // Simpler to just special-case the empty case here.
        pqueries = std::vector<Xapian::Query>{Xapian::Query(std::string())};
    } else if (!processUserString(db, m_text, m_reason, pbterm, &pqueries)) {
        return false;
    }
    if (pqueries.empty()) {
        LOGDEB("SearchDataClauseSimple: " << m_text << " resolved to null query\n");
        if (!pbterm.empty()) 
            m_reason = string("Resolved to null query. Problem term : [" + pbterm + string("]"));
        else
            m_reason = string("Resolved to null query. Term too long ? : [" + m_text + string("]"));
        return false;
    }

    *qp = Xapian::Query(op, pqueries.begin(), pqueries.end());
    if  (m_weight != 1.0) {
        *qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
    }
    return true;
}

// Translate a range clause. This only works if a Xapian value slot
// was attributed to the field.
bool SearchDataClauseRange::toNativeQuery(Rcl::Db &db, void *p)
{
    LOGDEB("SearchDataClauseRange::toNativeQuery: " << m_field <<
           " :[" << m_text << ".." << m_t2 << "]\n");

    m_hldata.clear();

    Xapian::Query *qp = (Xapian::Query *)p;
    *qp = Xapian::Query();

    if (m_field.empty() || (m_text.empty() && m_t2.empty())) {
        m_reason = "Range clause needs a field and a value";
        return false;
    }

    // Get the value number for the field from the configuration
    const FieldTraits *ftp;
    if (!db.fieldToTraits(m_field, &ftp, true)) {
        m_reason = string("field ") + m_field + " not found in configuration";
        return false;
    }
    if (ftp->valueslot == 0) {
        m_reason = string("No value slot specified in configuration for field ") + m_field;
        return false;
    }
    LOGDEB("SearchDataClauseRange: value slot " << ftp->valueslot << endl);
    // Build Xapian VALUE query.
    string errstr;
    try {
        if (m_text.empty()) {
            *qp = Xapian::Query(Xapian::Query::OP_VALUE_LE, ftp->valueslot,
                                convert_field_value(*ftp, m_t2));
        } else if (m_t2.empty()) {
            *qp = Xapian::Query(Xapian::Query::OP_VALUE_GE, ftp->valueslot,
                                convert_field_value(*ftp, m_text));
        } else {
            *qp = Xapian::Query(Xapian::Query::OP_VALUE_RANGE, ftp->valueslot,
                                convert_field_value(*ftp, m_text),
                                convert_field_value(*ftp, m_t2));
        }
    }
    XCATCHERROR(errstr);
    if (!errstr.empty()) {
        LOGERR("SearchDataClauseRange: range query creation failed for slot "<<ftp->valueslot<<"\n");
        m_reason = "Range query creation failed\n";
        *qp = Xapian::Query();
        return false;
    }
    return true;
}

// Translate a FILENAME search clause. This always comes
// from a "filename" search from the gui or recollq. A query language
// "filename:"-prefixed field will not go through here, but through
// the generic field-processing code.
//
// We do not split the entry any more (used to do some crazy thing
// about expanding multiple fragments in the past). We just take the
// value blanks and all and expand this against the indexed unsplit
// file names
bool SearchDataClauseFilename::toNativeQuery(Rcl::Db &db, void *p)
{
    m_hldata.clear();

    Xapian::Query *qp = (Xapian::Query *)p;
    *qp = Xapian::Query();

    int maxexp = getSoftMaxExp();
    if (maxexp == -1)
        maxexp = getMaxExp();

    vector<string> names;
    db.filenameWildExp(m_text, names, maxexp);
    *qp = Xapian::Query(Xapian::Query::OP_OR, names.begin(), names.end());

    if (m_weight != 1.0) {
        *qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
    }
    return true;
}

// Translate a dir: path filtering clause. See comments in .h
bool SearchDataClausePath::toNativeQuery(Rcl::Db &db, void *p)
{
    LOGDEB("SearchDataClausePath::toNativeQuery: [" << m_text << "]\n");

    m_hldata.clear();
    
    Xapian::Query *qp = (Xapian::Query *)p;
    *qp = Xapian::Query();

    string ltext;
#ifdef _WIN32
    // Windows file names are case-insensitive, so we lowercase (same as when indexing)
    unacmaybefold(m_text, ltext, UNACOP_FOLD);
#else
    ltext = m_text;
#endif

    if (ltext.empty()) {
        LOGERR("SearchDataClausePath: empty path??\n");
        m_reason = "Empty path ?";
        return false;
    }

    vector<Xapian::Query> orqueries;

    if (path_isabsolute(ltext))
        orqueries.push_back(Xapian::Query(wrap_prefix(pathelt_prefix)));
    else
        ltext = path_tildexpand(ltext);

    vector<string> vpath;
    stringToTokens(ltext, vpath, "/");

    for (const auto& pathelt : vpath) {
        string sterm;
        vector<string> exp;
        if (!expandTerm(
                db, m_reason, SDCM_PATHELT, pathelt, exp, sterm, wrap_prefix(pathelt_prefix))) {
            return false;
        }
        LOGDEB0("SDataPath::toNative: exp size " << exp.size() << ". Exp: " <<
                stringsToString(exp) << "\n");
        if (exp.size() == 1)
            orqueries.push_back(Xapian::Query(exp[0]));
        else 
            orqueries.push_back(Xapian::Query(Xapian::Query::OP_OR, exp.begin(), exp.end()));
        m_curcl += exp.size();
        if (m_curcl >= getMaxCl())
            return false;
    }

    *qp = Xapian::Query(Xapian::Query::OP_PHRASE, orqueries.begin(), orqueries.end());

    if (m_weight != 1.0) {
        *qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
    }
    return true;
}

// Translate NEAR or PHRASE clause. 
bool SearchDataClauseDist::toNativeQuery(Rcl::Db &db, void *p)
{
    LOGDEB("SearchDataClauseDist::toNativeQuery\n");

    m_hldata.clear();

    Xapian::Query *qp = (Xapian::Query *)p;
    *qp = Xapian::Query();

    vector<Xapian::Query> pqueries;

    // We produce a single phrase out of the user entry then use processUserString() to lowercase
    // and simplify the phrase terms etc. This will result into a single (complex) Xapian::Query.
    if (m_text.find('\"') != string::npos) {
        m_text = neutchars(m_text, "\"");
    }
    string s = cstr_dquote + m_text + cstr_dquote;
    bool useNear = (m_tp == SCLT_NEAR);
    if (!useNear  && !o_expand_phrases && !(m_modifiers & SDCM_EXPANDPHRASE)) {
        // We are a phrase query. Make sure to disable stemming explicitly in case this is a single
        // quoted word because processUserString won't see it as a phrase by itself.
        m_modifiers |= SDCM_NOSTEMMING;
    }
    string pbterm;
    if (!processUserString(db, s, m_reason, pbterm, &pqueries, m_slack, useNear))
        return false;
    if (pqueries.empty()) {
        LOGDEB("SearchDataClauseDist: [" << s << "]resolved to null query\n");
        if (!pbterm.empty()) 
            m_reason = string("Resolved to null query. Problem term : [" + pbterm + string("]"));
        else
            m_reason = string("Resolved to null query. Term too long ? : [" + m_text + string("]"));
        return true;
    }

    *qp = *pqueries.begin();
    if (m_weight != 1.0) {
        *qp = Xapian::Query(Xapian::Query::OP_SCALE_WEIGHT, *qp, m_weight);
    }
    return true;
}

} // Namespace Rcl