1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
|
#!/usr/bin/python3
# SPDX-License-Identifier: MPL-2.0
# SPDX-FileCopyrightText: 2020-2025 Collabora Ltd.
# SPDX-FileCopyrightText: 2020-2025 Walter Lozano <walter.lozano@collabora.com>
# SPDX-FileCopyrightText: 2021 Emanuele Aina <emanuele.aina@collabora.com>
# SPDX-FileCopyrightText: 2022 Vignesh Raman <vignesh.raman@collabora.com>
# SPDX-FileCopyrightText: 2022 Andre Moreira Magalhaes <andre.magalhaes@collabora.com>
# SPDX-FileCopyrightText: 2022-2024 Ryan Gonzalez <ryan.gonzalez@collabora.com>
# SPDX-FileCopyrightText: 2022-2025 Dylan Aïssi <dylan.aissi@collabora.com>
# SPDX-FileCopyrightText: 2022 Detlev Casanova <detlev.casanova@collabora.com>
# SPDX-FileCopyrightText: 2024 Andrej Shadura <andrew.shadura@collabora.co.uk>
import argparse
import gzip
import json
import os
import sys
from os.path import isdir, isfile, join
DEFAULT_METADATA_DIR = "/usr/share/doc"
DEFAULT_DPKG_STATUS = "/var/lib/dpkg/status"
VERBOSE_IMAGE = 0
VERBOSE_PACKAGE = 1
VERBOSE_INSTALLED_FILE = 2
VERBOSE_SOURCE = 3
COPYRIGHT_LENGTH = 500
COPYRIGHT_CONTAINS_NONASCII_CHARS = "CopyrightContainsNonAsciiCharacters"
NO_INFO_FOUND = "NoInfoFound"
def open_potentially_gzipped(path, *args, **kw):
if str(path).endswith(".gz"):
return gzip.open(path, *args, **kw)
else:
return open(path, *args, **kw)
class BomGenerator:
def __init__(self, metadata_dir, dpkg_status, verbose):
self.metadata_dir = metadata_dir
self.dpkg_status = dpkg_status
self.verbose = verbose
def get_installed_packages(self):
installed_packages = set()
with open(self.dpkg_status) as dpkg_status:
for line in dpkg_status.readlines():
if line.startswith("Package:"):
installed_packages.add(line.split(" ")[1].strip())
return installed_packages
def scan_metadata(self):
image_licenses = set()
image_copyright = set()
packages = []
processed_packages = set()
installed_packages = self.get_installed_packages()
for d in os.listdir(self.metadata_dir):
dirpath = join(self.metadata_dir, d)
if not isdir(dirpath):
continue
filenames = os.listdir(dirpath)
if len(filenames) == 0:
continue
for f in filenames:
if not isfile(join(dirpath, f)) or f.find("_metadata_") == -1:
continue
fparts = f.split("_")
package_name = fparts[0]
f = join(dirpath, f)
metadata = {}
with open_potentially_gzipped(f) as fm:
metadata = json.load(fm)
package = {}
package["name"] = package_name
package["license"] = metadata.get("license", [NO_INFO_FOUND])
package["copyright"] = metadata.get("copyright", [NO_INFO_FOUND])
processed_packages.add(package_name)
if self.verbose > VERBOSE_IMAGE:
packages.append(package)
image_licenses.update(package["license"])
image_copyright.update(package["copyright"])
missing_packages = installed_packages.difference(processed_packages)
missing_packages = list(missing_packages)
missing_packages.sort()
if len(missing_packages):
print(
"WARNING: there are packages without license information",
file=sys.stderr,
)
if self.verbose > VERBOSE_IMAGE:
for p in missing_packages:
package = {}
package["name"] = p
package["license"] = [NO_INFO_FOUND]
package["copyright"] = [NO_INFO_FOUND]
image_licenses.update([NO_INFO_FOUND])
image_copyright.update([NO_INFO_FOUND])
bom = {"license": list(image_licenses), "copyright": list(image_copyright)}
if self.verbose > VERBOSE_IMAGE:
bom["packages"] = packages
return bom
def main(argv):
parser = argparse.ArgumentParser()
parser.add_argument(
"-d",
"--dir",
default=DEFAULT_METADATA_DIR,
help="directory to search for information",
)
parser.add_argument("-o", "--out", help="output file")
parser.add_argument(
"-s", "--dpkg-status", default=DEFAULT_DPKG_STATUS, help="dpkg status file"
)
parser.add_argument(
"-v",
"--verbose",
type=int,
default=VERBOSE_IMAGE,
help="verbose use in output 0: image, 1: package, 2: binary, 3: source",
)
args = parser.parse_args()
bom_generator = BomGenerator(args.dir, args.dpkg_status, args.verbose)
bom = bom_generator.scan_metadata()
if args.out:
with open(args.out, "w+") as output:
json.dump(bom, output)
else:
print(json.dumps(bom))
if __name__ == "__main__":
main(sys.argv[1:])
|