File: filtseq.cpp

package info (click to toggle)
recoll 1.43.7-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 16,512 kB
  • sloc: cpp: 104,170; python: 9,500; xml: 7,248; ansic: 6,447; sh: 1,212; perl: 130; makefile: 72
file content (119 lines) | stat: -rw-r--r-- 4,147 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
/* Copyright (C) 2005-2021 J.F.Dockes
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program; if not, write to the
 *   Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 */
#include "autoconfig.h"

#include "log.h"
#include "filtseq.h"
#include "rclconfig.h"


static bool filter(const DocSeqFiltSpec& fs, const Rcl::Doc *x)
{
    LOGDEB2("  Filter: ncrits " << fs.crits.size() << "\n");
    // Compare using each criterion in term. We're doing an or: 1st ok ends 
    for (unsigned int i = 0; i < fs.crits.size(); i++) {
        switch (fs.crits[i]) {
        case DocSeqFiltSpec::DSFS_MIMETYPE:
            LOGDEB2(" filter: MIMETYPE: me [" << fs.values[i] << "] doc [" << x->mimetype << "]\n");
            if (x->mimetype == fs.values[i])
                return true;
            break;
        case DocSeqFiltSpec::DSFS_QLANG: {
            LOGDEB(" filter: QLANG [" << fs.values[i] << "]!!\n");
        }
            break;
        case DocSeqFiltSpec::DSFS_PASSALL:
            return true;
        }
    }
    // Did all comparisons
    return false;
} 

DocSeqFiltered::DocSeqFiltered(
    RclConfig *conf, std::shared_ptr<DocSequence> iseq, DocSeqFiltSpec &filtspec)
    : DocSeqModifier(iseq), m_config(conf)
{
    setFiltSpec(filtspec);
}

bool DocSeqFiltered::setFiltSpec(const DocSeqFiltSpec &filtspec)
{
    LOGDEB0("DocSeqFiltered::setFiltSpec\n");
    for (unsigned int i = 0; i < filtspec.crits.size(); i++) {
        switch (filtspec.crits[i]) {
        case DocSeqFiltSpec::DSFS_MIMETYPE:
            m_spec.orCrit(filtspec.crits[i], filtspec.values[i]);
            break;
        case DocSeqFiltSpec::DSFS_QLANG: {
            // There are very few lang constructs that we can interpret. The
            // default config uses rclcat:value only. That will be all for now...
            std::string val = filtspec.values[i];
            if (val.find("rclcat:") == 0) {
                std::string catg = val.substr(7);
                std::vector<std::string> tps;
                m_config->getMimeCatTypes(catg, tps);
                for (const auto& mime : tps) {
                    LOGDEB2("Adding mime: [" << mime << "]\n");
                    m_spec.orCrit(DocSeqFiltSpec::DSFS_MIMETYPE, mime);
                }
            }
        }
            break;
        default:
            break;
        }
    }
    // If m_spec ends up empty, pass everything, better than filtering all.
    if (m_spec.crits.empty()) {
        m_spec.orCrit(DocSeqFiltSpec::DSFS_PASSALL, "");
    }
    m_dbindices.clear();
    return true;
}

bool DocSeqFiltered::getDoc(int idx, Rcl::Doc &doc, std::string *)
{
    LOGDEB2("DocSeqFiltered::getDoc() fetching " << idx << "\n");

    if (idx >= (int)m_dbindices.size()) {
        // Have to fetch docs and filter until we get enough or
        // fail
        m_dbindices.reserve(idx+1);

        // First backend seq doc we fetch is the one after last stored 
        int backend_idx = m_dbindices.size() > 0 ? m_dbindices.back() + 1 : 0;

        // Loop until we get enough docs
        Rcl::Doc tdoc;
        while (idx >= (int)m_dbindices.size()) {
            if (!m_seq->getDoc(backend_idx, tdoc)) 
                return false;
            if (filter(m_spec, &tdoc)) {
                m_dbindices.push_back(backend_idx);
            }
            backend_idx++;
        }
        doc = tdoc;
    } else {
        // The corresponding backend indice is already known
        if (!m_seq->getDoc(m_dbindices[idx], doc)) 
            return false;
    }
    return true;
}