File: generate_licensing_bom

package info (click to toggle)
sbom-toolkit 0.0.20260112
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 128 kB
  • sloc: perl: 599; python: 451; makefile: 13
file content (152 lines) | stat: -rwxr-xr-x 5,024 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/usr/bin/python3

# SPDX-License-Identifier: MPL-2.0
# SPDX-FileCopyrightText: 2020-2025 Collabora Ltd.
# SPDX-FileCopyrightText: 2020-2025 Walter Lozano <walter.lozano@collabora.com>
# SPDX-FileCopyrightText: 2021 Emanuele Aina <emanuele.aina@collabora.com>
# SPDX-FileCopyrightText: 2022 Vignesh Raman <vignesh.raman@collabora.com>
# SPDX-FileCopyrightText: 2022 Andre Moreira Magalhaes <andre.magalhaes@collabora.com>
# SPDX-FileCopyrightText: 2022-2024 Ryan Gonzalez <ryan.gonzalez@collabora.com>
# SPDX-FileCopyrightText: 2022-2025 Dylan Aïssi <dylan.aissi@collabora.com>
# SPDX-FileCopyrightText: 2022 Detlev Casanova <detlev.casanova@collabora.com>
# SPDX-FileCopyrightText: 2024 Andrej Shadura <andrew.shadura@collabora.co.uk>

import argparse
import gzip
import json
import os
import sys
from os.path import isdir, isfile, join

DEFAULT_METADATA_DIR = "/usr/share/doc"
DEFAULT_DPKG_STATUS = "/var/lib/dpkg/status"

VERBOSE_IMAGE = 0
VERBOSE_PACKAGE = 1
VERBOSE_INSTALLED_FILE = 2
VERBOSE_SOURCE = 3

COPYRIGHT_LENGTH = 500
COPYRIGHT_CONTAINS_NONASCII_CHARS = "CopyrightContainsNonAsciiCharacters"

NO_INFO_FOUND = "NoInfoFound"


def open_potentially_gzipped(path, *args, **kw):
    if str(path).endswith(".gz"):
        return gzip.open(path, *args, **kw)
    else:
        return open(path, *args, **kw)


class BomGenerator:
    def __init__(self, metadata_dir, dpkg_status, verbose):
        self.metadata_dir = metadata_dir
        self.dpkg_status = dpkg_status
        self.verbose = verbose

    def get_installed_packages(self):
        installed_packages = set()
        with open(self.dpkg_status) as dpkg_status:
            for line in dpkg_status.readlines():
                if line.startswith("Package:"):
                    installed_packages.add(line.split(" ")[1].strip())

        return installed_packages

    def scan_metadata(self):
        image_licenses = set()
        image_copyright = set()
        packages = []
        processed_packages = set()
        installed_packages = self.get_installed_packages()
        for d in os.listdir(self.metadata_dir):
            dirpath = join(self.metadata_dir, d)
            if not isdir(dirpath):
                continue
            filenames = os.listdir(dirpath)
            if len(filenames) == 0:
                continue
            for f in filenames:
                if not isfile(join(dirpath, f)) or f.find("_metadata_") == -1:
                    continue

                fparts = f.split("_")
                package_name = fparts[0]

                f = join(dirpath, f)
                metadata = {}
                with open_potentially_gzipped(f) as fm:
                    metadata = json.load(fm)
                package = {}
                package["name"] = package_name
                package["license"] = metadata.get("license", [NO_INFO_FOUND])
                package["copyright"] = metadata.get("copyright", [NO_INFO_FOUND])

                processed_packages.add(package_name)
                if self.verbose > VERBOSE_IMAGE:
                    packages.append(package)

                image_licenses.update(package["license"])
                image_copyright.update(package["copyright"])

        missing_packages = installed_packages.difference(processed_packages)
        missing_packages = list(missing_packages)
        missing_packages.sort()
        if len(missing_packages):
            print(
                "WARNING: there are packages without license information",
                file=sys.stderr,
            )
            if self.verbose > VERBOSE_IMAGE:
                for p in missing_packages:
                    package = {}
                    package["name"] = p
                    package["license"] = [NO_INFO_FOUND]
                    package["copyright"] = [NO_INFO_FOUND]

            image_licenses.update([NO_INFO_FOUND])
            image_copyright.update([NO_INFO_FOUND])

        bom = {"license": list(image_licenses), "copyright": list(image_copyright)}
        if self.verbose > VERBOSE_IMAGE:
            bom["packages"] = packages

        return bom


def main(argv):
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-d",
        "--dir",
        default=DEFAULT_METADATA_DIR,
        help="directory to search for information",
    )
    parser.add_argument("-o", "--out", help="output file")
    parser.add_argument(
        "-s", "--dpkg-status", default=DEFAULT_DPKG_STATUS, help="dpkg status file"
    )
    parser.add_argument(
        "-v",
        "--verbose",
        type=int,
        default=VERBOSE_IMAGE,
        help="verbose use in output 0: image, 1: package, 2: binary, 3: source",
    )

    args = parser.parse_args()

    bom_generator = BomGenerator(args.dir, args.dpkg_status, args.verbose)

    bom = bom_generator.scan_metadata()

    if args.out:
        with open(args.out, "w+") as output:
            json.dump(bom, output)
    else:
        print(json.dumps(bom))


if __name__ == "__main__":
    main(sys.argv[1:])