1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301
|
#!/usr/bin/python3
import errno
import json
import logging
import os
import sqlite3
import subprocess
import sys
import time
import apt_pkg
apt_pkg.init()
# TODO:
# - add apt.conf.d snippet for download handling
# - add apt::update::post-invoke-success handler
component_priorities = {
'main': 120,
'universe': 100,
'contrib': 80,
'restricted': 60,
'non-free-firmware': 50,
'non-free': 40,
'multiverse': 20,
}
# pkgnames in here are blacklisted
create_db_sql="""
CREATE TABLE IF NOT EXISTS "commands"
(
[cmdID] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
[pkgID] INTEGER NOT NULL,
[command] TEXT,
FOREIGN KEY ([pkgID]) REFERENCES "pkgs" ([pkgID])
);
CREATE TABLE IF NOT EXISTS "packages"
(
[pkgID] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
[name] TEXT,
[version] TEXT,
[component] TEXT,
[priority] INTEGER
);
CREATE INDEX IF NOT EXISTS idx_commands_command ON commands (command);
CREATE INDEX IF NOT EXISTS idx_packages_name ON packages (name);
"""
# FIXME:
# - add support for foreign arch in DB (add foreign commands if there
# is not native command)
# - addd support for -backports: pkgname must be appended with /backports
# and only be shown if there is no other command available
# - do not re-create the DB everytime, only if sources.list changed
# - add "last-mtime" into the DB, then we can skip all packages files
# where the mtime is older and we only need to update the DB
class measure:
def __init__(self, what, stats):
self.what = what
self.stats = stats
def __enter__(self):
self.now = time.time()
def __exit__(self, *args):
if not self.what in self.stats:
self.stats[self.what] = 0
self.stats[self.what] += time.time() - self.now
def rm_f(path):
try:
os.remove(path)
except OSError as e:
if e.errno != errno.ENOENT:
raise
class DbCreator:
def __init__(self, files):
self.files = files
self.primary_arch = apt_pkg.get_architectures()[0]
self.stats = {"total": 0,"total_time": time.time()}
def create(self, dbname):
metadata_file = dbname+".metadata"
if not self._db_update_needed(metadata_file):
logging.info(
"%s does not require an update (inputs unchanged)", dbname)
return
tmpdb = dbname+".tmp"
with sqlite3.connect(tmpdb) as con:
try:
con.executescript(create_db_sql)
self._fill_commands(con)
except sqlite3.OperationalError as e:
# There might be a parallel cnf-update-db process, updating the
# tmpdb. Just stop execution in this case, as the other process
# should produce the correct result.
if str(e) == "database is locked":
logging.warning(
"%s is locked by another process. Ignoring.", tmpdb)
sys.exit(0)
else:
raise e
# remove now stale metadata
rm_f(metadata_file)
# put database in place
os.rename(tmpdb, dbname)
# add new metadata
with open(metadata_file, "w") as fp:
json.dump(self._calc_input_metadata(), fp)
def _db_update_needed(self, metadata_file):
if not os.path.exists(metadata_file):
return True
try:
with open(metadata_file) as fp:
meta = json.load(fp)
return meta != self._calc_input_metadata()
except Exception as e:
logging.warning("cannot read %s: %s", metadata_file, e)
return True
def _calc_input_metadata(self):
meta = {}
for p in self.files:
st = os.stat(p)
meta[p] = {
'st_ino': st.st_ino,
'st_dev': st.st_dev,
'st_uid': st.st_uid,
'st_gid': st.st_gid,
'st_size': st.st_size,
'st_mtime': st.st_mtime,
}
return meta
def _fill_commands(self, con):
for f in self.files:
proc = None
if "Contents" in f:
proc = subprocess.Popen(["/usr/lib/apt/apt-helper", "cat-file", f], stdout=subprocess.PIPE)
with proc or open(f) as fp:
if "Contents" in f:
self._parse_single_contents_file(con, f, fp.stdout)
fp.stdout.close()
else:
self._parse_single_commands_file(con, fp)
self.stats["total_time"] = time.time() - self.stats["total_time"]
logging.info("processed %i packages in %.2fs" % (
self.stats["total"], self.stats["total_time"]))
def _in_db(self, con, command, pkgname):
already_in_db = con.execute(
"""
SELECT packages.pkgID, name, version
FROM commands
INNER JOIN packages on packages.pkgID = commands.pkgID
WHERE commands.command=? AND packages.name=?;
""", (command, pkgname)).fetchone()
return already_in_db
def _delete_pkgid(self, con, pkgid):
con.execute("DELETE FROM packages WHERE pkgID=?", (pkgid,) )
con.execute("DELETE FROM commands WHERE pkgID=?", (pkgid,) )
def _get_pkgid(self, con, pkgname):
have_pkg = con.execute(
"SELECT pkgID from packages WHERE name=?", (pkgname,)).fetchone()
if have_pkg:
return have_pkg[0]
return None
def _insert_package(self, con, pkgname, version, component, priority):
cur=con.execute("""
INSERT INTO packages (name, version, component, priority)
VALUES (?, ?, ?, ?);
""", (pkgname, version, component, priority))
return cur.lastrowid
def _insert_command(self, con, command, pkg_id):
con.execute("""
INSERT INTO commands (command, pkgID) VALUES (?, ?);
""", (command, pkg_id))
def _parse_single_commands_file(self, con, fp):
tagf = apt_pkg.TagFile(fp)
# file empty
if not tagf.step():
return
# read header
suite=tagf.section["suite"]
# FIXME: support backports
if suite.endswith("-backports"):
return
component=tagf.section["component"]
arch=tagf.section["arch"]
# FIXME: add code for secondary arch handling!
if arch != "all" and arch != self.primary_arch:
return
# step over the pkgs
while tagf.step():
self.stats["total"] += 1
pkgname=tagf.section["name"]
# allow to override the viisble pkgname to accomodate for
# cases like "python2.7" which is part of python2.7-minimal
# but users should just install python2.7
if tagf.section.get("visible-pkgname"):
pkgname = tagf.section["visible-pkgname"]
version=tagf.section.get("version", "")
ignore_commands=set()
if tagf.section.get("ignore-commands", ""):
ignore_commands=set(tagf.section.get("ignore-commands", "").split(","))
for command in tagf.section["commands"].split(","):
if command in ignore_commands:
continue
# see if we have the command already
with measure("sql_already_db", self.stats):
already_in_db=self._in_db(con, command, pkgname)
if already_in_db:
# we found a version that is higher what we have
# in the DB -> remove current, insert higher
if apt_pkg.version_compare(version, already_in_db[2]) > 0:
logging.debug("replacing exiting %s in DB (higher version)" % command)
with measure("sql_delete_already_in_db", self.stats):
self._delete_pkgid(con, already_in_db[0])
else:
logging.debug("skipping %s from %s (lower/same version)" % (command, suite))
continue
logging.debug("adding %s from %s/%s (%s)" % (
command, pkgname, version, suite))
# insert new data
with measure("sql_have_pkg", self.stats):
pkg_id = self._get_pkgid(con, pkgname)
if not pkg_id:
priority = component_priorities[component]
priority += int(tagf.section.get("priority-bonus", "0"))
with measure("sql_insert_pkg", self.stats):
pkg_id = self._insert_package(con, pkgname, version, component, priority)
with measure("sql_insert_cmd", self.stats):
self._insert_command(con, command, pkg_id)
def _parse_single_contents_file(self, con, f, fp):
# read header
suite=None # FIXME
for l in fp:
l = l.decode("utf-8")
if not (l.startswith('usr/sbin') or l.startswith('usr/bin') or
l.startswith('bin') or l.startswith('sbin')):
continue
try:
command, pkgnames = l.split(None, 1)
except ValueError:
continue
command = os.path.basename(command)
for pkgname in pkgnames.split(','):
try:
section, pkgname = pkgname.strip().rsplit('/', 1)
except ValueError:
pkgname = pkgname.strip()
section = "unknown"
if len(section.split('/')) == 2:
component, section = section.split('/')
else:
component = 'main'
# FIXME - Don't really know how.
version = None
# see if we have the command already
with measure("sql_already_db", self.stats):
already_in_db=self._in_db(con, command, pkgname)
if already_in_db:
# we found a version that is higher what we have
# in the DB -> remove current, insert higher
if False and apt_pkg.version_compare(version, already_in_db[2]) > 0:
logging.debug("replacing exiting %s in DB (higher version)" % command)
with measure("sql_delete_already_in_db", self.stats):
self._delete_pkgid(con, already_in_db[0])
else:
logging.debug("skipping %s from %s (lower/same version)" % (command, suite))
continue
logging.debug("adding %s from %s/%s (%s)" % (
command, pkgname, version, suite))
# insert new data
with measure("sql_have_pkg", self.stats):
pkg_id = self._get_pkgid(con, pkgname)
if not pkg_id:
priority = component_priorities[component]
with measure("sql_insert_pkg", self.stats):
pkg_id = self._insert_package(con, pkgname, version, component, priority)
with measure("sql_insert_cmd", self.stats):
self._insert_command(con, command, pkg_id)
if __name__ == "__main__":
logging.basicConfig(level=logging.DEBUG)
if len(sys.argv) < 3:
print("usage: %s <output-db-path> <files...>" % sys.argv[0])
print(" e.g.: %s commands.db ./dists/*/*/*/Commands-*" % sys.argv[0])
print(" e.g.: %s /var/lib/command-not-found/commands.db /var/lib/apt/lists/*Commands-*", sys.argv[0])
sys.exit(1)
col = DbCreator(sys.argv[2:])
col.create(sys.argv[1])
for stat, amount in col.stats.items():
logging.debug("%s: %s" % (stat, amount))
|