File: recollq.py

package info (click to toggle)
recoll 1.43.4-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 16,468 kB
  • sloc: cpp: 103,827; python: 9,498; xml: 7,218; ansic: 6,447; sh: 1,212; perl: 130; makefile: 72
file content (146 lines) | stat: -rwxr-xr-x 3,683 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/python3
# -*- coding: utf-8 -*-
"""A Python simplified equivalent of the command line query tool recollq
The input string is always interpreted as a query language string.
This could actually be useful for something after some customization
"""

import sys
import locale
from getopt import getopt

from recoll import recoll, rclextract

allmeta = (
    "title",
    "keywords",
    "abstract",
    "url",
    "mimetype",
    "mtime",
    "ipath",
    "fbytes",
    "dbytes",
    "relevancyrating",
)


def Usage():
    print("Usage: recollq.py [-c conf] [-i extra_index] <recoll query>")
    sys.exit(1)


class ptrmeths:
    def __init__(self, groups):
        self.groups = groups

    def startMatch(self, idx):
        ugroup = " ".join(self.groups[idx][1])
        return '<span class="pyrclstart" idx="%d" ugroup="%s">' % (idx, ugroup)

    def endMatch(self):
        return "</span>"


def extract(doc):
    extractor = rclextract.Extractor(doc)
    newdoc = extractor.textextract(doc.ipath)
    return newdoc


def extractofile(doc, outfilename=""):
    extractor = rclextract.Extractor(doc)
    outfilename = extractor.idoctofile(doc.ipath, doc.mimetype, ofilename=outfilename)
    return outfilename


def doquery(db, q):
    # Get query object
    query = db.query()
    # query.sortby("dmtime", ascending=True)

    # Parse/run input query string
    nres = query.execute(q, stemming=0, stemlang="english")
    qs = "Xapian query: [%s]" % query.getxquery()
    print(f"{qs}")
    groups = query.getgroups()
    m = ptrmeths(groups)

    # Print results:
    print("Result count: %d %d" % (nres, query.rowcount))
    if nres > 20:
        nres = 20
    # results = query.fetchmany(nres)
    # for doc in results:

    for i in range(nres):
        doc = query.fetchone()
        rownum = query.next if type(query.next) == int else query.rownumber
        print("%d:" % (rownum,))

        # for k,v in doc.items().items():
        #    print(f"KEY: {k} VALUE: {v}")
        # continue

        # outfile = extractofile(doc) ; print(f"outfile: {outfile} url: {doc.url}")

        for k in ("title", "mtime", "author"):
            value = getattr(doc, k)
            # value = doc.get(k)
            if value is None:
                print(f"{k}: (None)")
            else:
                print(f"{k} : {value}")
        # doc.setbinurl(bytearray("toto"))
        # burl = doc.getbinurl(); print("Bin URL : [%s]"%(doc.getbinurl(),))
        abs = query.makedocabstract(doc, methods=m)
        print(f"{abs}\n")


#        fulldoc = extract(doc)
#        print("FULLDOC MIMETYPE %s TEXT: %s" % (fulldoc.mimetype,fulldoc.text))


########################################### MAIN

if len(sys.argv) < 2:
    Usage()

language, localecharset = locale.getdefaultlocale()
confdir = ""
extra_dbs = []
# Snippet params
maxchars = 120
contextwords = 4
syngroupsfile = ""
# Process options: [-c confdir] [-i extra_db [-i extra_db] ...]
try:
    options, args = getopt(sys.argv[1:], "c:i:T:")
except Exception as ex:
    print(f"{ex}")
    sys.exit(1)
for opt, val in options:
    if opt == "-c":
        confdir = val
    elif opt == "-i":
        extra_dbs.append(val)
    elif opt == "-T":
        syngroupsfile = val
    else:
        print("Bad opt: %s" % (opt,))
        Usage()

# The query should be in the remaining arg(s)
if len(args) == 0:
    print("No query found in command line")
    Usage()
q = ""
for word in args:
    q += word + " "

print(f"QUERY: [{q}]")
db = recoll.connect(confdir=confdir, extra_dbs=extra_dbs)
db.setAbstractParams(maxchars=maxchars, contextwords=contextwords)
if syngroupsfile:
    db.setSynonymsFile(syngroupsfile)
doquery(db, q)