1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
|
# This program is free software. It comes without any warranty, to
# the extent permitted by applicable law. You can redistribute it
# and/or modify it under the terms of the Do What The Fuck You Want
# To Public License, Version 2, as published by Sam Hocevar. See
# http://sam.zoy.org/wtfpl/COPYING for more details.
import os, re
import xapian
import warnings
# Setup configuration
# This tells python-apt that we've seen the warning about the API not being
# stable yet, and we don't want to see every time we run the program
warnings.filterwarnings("ignore","apt API not stable yet")
import apt
warnings.resetwarnings()
# Setup configuration
XAPIANDBPATH = os.environ.get("AXI_DB_PATH", "/var/lib/apt-xapian-index")
XAPIANDB = XAPIANDBPATH + "/index"
XAPIANDBVALUES = XAPIANDBPATH + "/values"
# This is our little database of simple Debtags filters we provide: the name
# entered by the user in "--type" maps to a piece of Xapian query
filterdb = dict(
# We can do simple AND queries...
game = xapian.Query(xapian.Query.OP_AND, ('XTuse::gameplaying', 'XTrole::program')),
# Or we can do complicate binary expressions...
gui = xapian.Query(xapian.Query.OP_AND, xapian.Query('XTrole::program'),
xapian.Query(xapian.Query.OP_OR, 'XTinterface::x11', 'XTinterface::3d')),
cmdline = xapian.Query(xapian.Query.OP_AND, 'XTrole::program', 'XTinterface::commandline'),
editor = xapian.Query(xapian.Query.OP_AND, 'XTrole::program', 'XTuse::editing')
# Feel free to invent more
)
def termsForSimpleQuery(keywords):
"""
Given a list of user-supplied keywords, build the list of terms that will
go in a simple Xapian query.
If a term is lowercase and contains '::', then it's considered to be a
Debtags tag.
"""
stemmer = xapian.Stem("english")
terms = []
for word in keywords:
if word.islower() and word.find("::") != -1:
# FIXME: A better way could be to look up arguments in
# /var/lib/debtags/vocabulary
#
# According to /var/lib/apt-xapian-index/README, Debtags tags are
# indexed with the 'XT' prefix.
terms.append("XT"+word)
else:
# If it is not a Debtags tag, then we consider it a normal keyword.
word = word.lower()
terms.append(word)
# If the word has a stemmed version, add it to the query.
# /var/lib/apt-xapian-index/README tells us that stemmed terms have a
# 'Z' prefix.
stem = stemmer(word)
if stem != word:
terms.append("Z"+stem)
return terms
def addSimpleFilterToQuery(query, filtername):
"""
If filtername is not None, lookup the simple filter database for the name
and add its filter to the query. Returns the enhanced query.
"""
# See if the user wants to use one of the result filters
if filtername:
if filtername in filterdb:
# If a filter was requested, AND it with the query
return xapian.Query(xapian.Query.OP_AND, filterdb[filtername], query)
else:
raise RuntimeError("Invalid filter type. Try one of " + ", ".join(sorted(filterdb.keys())))
else:
return query
def show_mset(mset):
"""
Show a Xapian result mset as a list of packages and their short descriptions
"""
# Display the top 20 results, sorted by how well they match
cache = apt.Cache()
print "%i results found." % mset.get_matches_estimated()
print "Results 1-%i:" % mset.size()
for m in mset:
# /var/lib/apt-xapian-index/README tells us that the Xapian document data
# is the package name.
name = m.document.get_data()
# Get the package record out of the Apt cache, so we can retrieve the short
# description
pkg = cache[name]
# Print the match, together with the short description
if pkg.candidate:
print "%i%% %s - %s" % (m.percent, name, pkg.candidate.summary)
def readValueDB(pathname):
"""
Read the "/etc/services"-style database of value indices
"""
try:
rmcomments = re.compile("\s*(#.*)?$")
splitter = re.compile("\s+")
values = {}
for idx, line in enumerate(open(pathname)):
# Remove comments and trailing spaces
line = rmcomments.sub("", line)
# Skip empty lines
if len(line) == 0: continue
# Split the line
fields = splitter.split(line)
if len(fields) < 2:
print >>sys.stderr, "Ignoring line %s:%d: only 1 value found when I need at least the value name and number" % (pathname, idx+1)
continue
# Parse the number
try:
number = int(fields[1])
except ValueError:
print >>sys.stderr, "Ignoring line %s:%d: the second column (\"%s\") must be a number" % (pathname, idx+1, fields[1])
continue
values[fields[0]] = number
for alias in fields[2:]:
values[alias] = number
except OSError, e:
# If we can't read the database, fallback to defaults
print >>sys.stderr, "Cannot read %s: %s. Using a minimal default configuration" % (pathname, e)
values = dict(
installedsize = 1,
packagesize = 2
)
return values
|