1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
|
"""Script for copyright/license reports
Dependencies:
- python3
- ripgrep
Assumptions:
- copyright.txt file is in CWD
- copyright.txt is in https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ format
- all relevant files are within CWD and not ignored by git (ripgrep default behaviour)
For xournalpp execute using:
python3 scripts/compare_license.py
Workflow:
1. Run the script
2. In case script exits with status 1 adapt copyright.txt or this script (see comments I, II, III in below code)
3. Rerun script now it should exit with status 0
Note: This script cannot automatically detect whether you added a file that should be licensed differently
but does not indicate this in any way. Please refer to comment II in the code below and add it to the whitelist.
"""
from typing import Set
import re
import os
import subprocess
def get_files_from_copyright_format(file: str) -> Set[str]:
"""Get all Files listed in a copyright file
Args:
- file: file formatted according to https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
"""
with open('copyright.txt', 'r') as f:
lines = f.readlines()
files = set()
for l in lines:
if re.match("^Files: |^ {7}[a-zA-Z0-9/_\-.*]* *$", l):
files.add(l[7:].strip())
return files
def get_all_files():
stdout = os.popen('rg --files').readlines()
files = [f.strip() for f in stdout]
return set(files)
def get_files_containing_copyright_or_license():
"""Find all files containing either
- copyright
- license
(case insensitive)
Exluding .po files as they create only false positives
"""
stdout = os.popen('rg -i -e "copyright" -e "license" -l | rg -v "\.po"').readlines()
files = [f.strip() for f in stdout]
lc_files = set(files)
stdout = os.popen('rg --files-without-match "@author Xournal\+\+"').readlines()
files = [f.strip() for f in stdout]
xpp_files = set(files)
return (lc_files & xpp_files)
def get_changed_files_since(git_hash:str):
stdout = os.popen(f'git diff {git_hash} HEAD --name-only').readlines()
files = [f.strip() for f in stdout]
return set(files)
def get_source_files_missing_license_of_header(scanned_files:Set[str], all_files:Set[str]) -> Set[str]:
"""Return all `.cpp` files which do not have a license but their corrsponding `.h` file has.
Args:
scanned_files (Set[str]): Files which have a license header
all_files (Set[str]): all Files in the project (used for existence check)
"""
scanned_header_files = set()
scanned_source_files = set()
for f in scanned_files:
if f.endswith(".h"):
scanned_header_files.add(f.strip('.h'))
elif f.endswith('.cpp'):
scanned_source_files.add(f.strip('.cpp'))
missing_source_files = scanned_header_files - scanned_source_files
source_file_exists = lambda x: (x+'.cpp') in all_files
return set(filter(source_file_exists, missing_source_files))
# I: Add an entry if a file is detected automatically as a file with special
# license/copyright, but which is actually licensed/copyrighted under the same
# license/copyright as xournalpp.
# Please add a short comment explaining why it's whitelisted
def get_whitelist_not_listed():
"""Whitelist for files containing the searched for substrings but
are not necessary for the copyright.txt"""
white_list = set()
white_list.add("ABOUT-NLS") # false positive
white_list.add("copyright.txt") # copyright/license summary file
white_list.add("scripts/compare_license.py") # this very script
white_list.add("CMakeLists.txt") # false positive
white_list.add("LICENSE") # main license file
white_list.add("rpm/fedora/xournalpp.spec") # false positive
white_list.add("windows-setup/xournalpp.nsi") # false positive
white_list.add("ui/about.glade") # false positive
white_list.add("src/exe/win32/xpp.rc.in") # false positive
white_list.add("mac-setup/Info.plist") # false positive
white_list.add("src/core/gui/dialog/AboutDialog.cpp") # false positive
return white_list
# II: Add an entry to the whitelist if you added a file which has special
# licensing/copyright but does not contain any of the substrings used to
# automatically identify such files
# The rational should be explained in the copyright.txt file itself.
# Do not use comments in this file to explain the rational.
def get_whitelist_not_found():
"""Whitelist for files listed in copyright.txt but do not include
the searched for substrings"""
white_list = set()
white_list.add("*")
white_list.add("debian/changelog")
white_list.add("debian/compat")
white_list.add("debian/control")
white_list.add("debian/docs")
white_list.add("debian/package_description")
white_list.add("debian/rules")
white_list.add("debian/source/format")
white_list.add("ui/pixmaps/application-x-xojpp.svg")
white_list.add("ui/pixmaps/application-x-xopp.svg")
white_list.add("ui/pixmaps/application-x-xopt.svg")
white_list.add("ui/pixmaps/com.github.xournalpp.xournalpp.png")
white_list.add("ui/pixmaps/com.github.xournalpp.xournalpp.svg")
white_list.add("ui/pixmaps/gnome-mime-application-x-xopp.svg")
white_list.add("ui/pixmaps/gnome-mime-application-x-xopt.svg")
white_list.add("ui/pixmaps/xopt.svg")
white_list.add("ui/iconsColor-dark/*")
white_list.add("ui/iconsColor-light/*")
white_list.add("ui/iconsLucide-dark/*")
white_list.add("ui/iconsLucide-light/*")
white_list.add("ui/iconsColor-dark/hicolor/scalable/actions/xopp-compass.svg")
white_list.add("ui/iconsColor-dark/hicolor/scalable/actions/xopp-setsquare.svg")
white_list.add("ui/iconsColor-light/hicolor/scalable/actions/xopp-Tselect-pdf-text-area.svg")
white_list.add("ui/iconsColor-light/hicolor/scalable/actions/xopp-Tselect-pdf-text-hd.svg")
white_list.add("ui/iconsLucide-dark/hicolor/scalable/actions/xopp-compass.svg")
white_list.add("ui/iconsLucide-dark/hicolor/scalable/actions/xopp-draw-spline.svg")
white_list.add("ui/iconsLucide-dark/hicolor/scalable/actions/xopp-floating-toolbox.svg")
white_list.add("ui/iconsLucide-dark/hicolor/scalable/actions/xopp-setsquare.svg")
white_list.add("ui/iconsLucide-light/hicolor/scalable/actions/xopp-compass.svg")
white_list.add("ui/iconsLucide-light/hicolor/scalable/actions/xopp-draw-spline.svg")
white_list.add("ui/iconsLucide-light/hicolor/scalable/actions/xopp-floating-toolbox.svg")
white_list.add("ui/iconsLucide-light/hicolor/scalable/actions/xopp-setsquare.svg")
return white_list
# III: Update git commit hash to current commit once you checked
# that the changes do not affect the licensing information in copyright.txt
last_checked_git_commit_hash = "c00f7b74009716c488bd666fa8ba7587ea0fed2f"
changed_files = get_changed_files_since(last_checked_git_commit_hash)
summary_files = get_files_from_copyright_format("copyright.txt")
scanned_files = get_files_containing_copyright_or_license()
found = summary_files & scanned_files
not_found = summary_files - scanned_files - get_whitelist_not_found()
not_listed = scanned_files - summary_files - get_whitelist_not_listed()
# Copyright could change with the same commit. Hence, it needs to be exluded.
all_whitelisted = (get_whitelist_not_found() | get_whitelist_not_listed()) - set(["copyright.txt"])
# Files inside copyright.txt or mentioned in whitelist should be checked for
# diffs affecting the license/copyright
out_of_date = (all_whitelisted | summary_files) & changed_files
missing_source_license = get_source_files_missing_license_of_header(scanned_files, get_all_files())
print("Found License/Copyright both in copyright.txt and repo: ",len(found))
if not_listed:
print()
print("No License/Copyright listed in copyright.txt (but found in repo):")
for f in sorted(not_listed):
print(" ", f)
else:
print("- All automatically detected files listed or whitelisted")
if not_found:
print()
print("No License/Copyright found in repo (but listed in copyright.txt):")
for f in sorted(not_found):
print(" ", f)
else:
print("- All listed files automatically detected or whitelisted")
if out_of_date:
print()
print("Following items are whitelisted or listed in copyright.txt but changed since last check:")
for f in sorted(out_of_date):
print(" ", f)
else:
print("- No listed file got changed since the last check.")
if missing_source_license:
print()
print("Following `.cpp` files do NOT contain a license even though their accompanying `.h` file does.")
for f in sorted(missing_source_license):
print(" ", f)
if not_found or not_listed:
print("⚠️ Update required")
exit(1)
if out_of_date:
"⚠️ Recheck required"
exit(1)
if missing_source_license:
"⚠️ Adding license header required"
exit(1)
print("🎉 Success")
exit(0)
|