File: copyright-check

package info (click to toggle)
janus 1.1.2-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 15,804 kB
  • sloc: ansic: 87,789; javascript: 16,059; makefile: 696; sh: 282; python: 257; lisp: 9
file content (127 lines) | stat: -rwxr-xr-x 6,129 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
#!/bin/sh
# Copyright 2020-2023  Jonas Smedegaard <dr@jones.dk>
# Copyright 2020-2021  Purism, SPC
# Description: helper script to update copyright_hints
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

# Depends:
#  licensecheck,
#  libimage-exiftool-perl,
#  libipc-system-simple-perl,
#  libpath-tiny-perl,
#  libregexp-assemble-perl,
#  perl,

set -eu

# resolve file regex from contained license or shebang, or path regex
_file_regex() {
	perl -MGetopt::Long=:config,gnu_getopt -MPath::Tiny -MRegexp::Assemble -MList::Util=any \
		-e 'GetOptions ( \%opt, "shortname=s@", "grantglob=s@", "regex=s@", "shebang=s@", "nonverb=s@" );'\
		-e '@section = split /\n\n+/, path("debian/copyright")->slurp_utf8;'\
		-e 'for $name ( @{ $opt{shortname} } ) {'\
			-e 'push @license, map { /^License:\h*\Q$name\E\n\h+(\S[^\n]*(?:\n\h+\S[^\n]*)*)/ } @section };'\
		-e 'for $glob ( @{ $opt{grantglob} } ) {'\
			-e 'push @files, grep { /^Files:\n?\h(?:\S[^\n]*\n?\h)*\Q$glob\E\s/ } @section };'\
		-e '@grant = map { /^License(?:-Grant:|:\h*\S[^\n]*)\h*\n\h+(\S[^\n]*(?:\n\h+\S[^\n]*)*)/mg } @files;'\
		-e '@firstline_re = map { qr/^\Q$_\E/ } @{ $opt{shebang} };'\
		-e '$nonverb_re = Regexp::Assemble->new->add( "\\W+", map { "\\W+(?:$_)\\W+" } @{ $opt{nonverb} } )->as_string;'\
		-e '@content_re = map { s/\W+/[**]/g; s/\Q[**]\E\d\Q[**]\E/[**]\\S{0,2}[**]/g; s/\Q[**]\E/$nonverb_re/g; qr/$_/ } @license, @grant;'\
		-e '$inspect = sub {'\
			-e 'return if $_[0]->is_dir;'\
			-e '$file = $_[0];'\
			-e 'if (@firstline_re) {'\
				-e '($head) = $file->lines( { count => 1 } );'\
				-e 'push @match, quotemeta and return '\
					-e 'if any { $head =~ $_ } @firstline_re };'\
			-e 'push @match, quotemeta'\
				-e 'if any { $file->slurp_raw =~ $_ } @content_re };'\
		-e 'for (@ARGV) {'\
			-e 'path($_)->is_dir'\
			-e '? path($_)->visit( \&$inspect, { recurse => 1 } )'\
			-e ': &$inspect( path($_) ) };'\
		-e '$files_re = Regexp::Assemble->new->add( @match, @{ $opt{regex} } );'\
		-e 'print $files_re->as_string =~ s/\(\?:/\(/gr;'\
		-- "$@"
}

SKIPFILES='skip|meta'

# cleanup stray hint files from a previous run
find ./* -type f -regextype posix-egrep -regex "^\./.*:($SKIPFILES)$" -delete

1>&2 echo 'skip binary files without parsable metadata ...'
RE_skip='.*\.(alaw|mulaw|mjr)|fuzzers/corpora/.*'
find ./* -type f -regextype posix-egrep -regex "^\./($RE_skip)$" -exec sh -c 'echo "License: UNKNOWN" > "$1:skip"' shell {} ';'

1>&2 echo 'extract metadata from binary files ...'
EXT_meta='png mp4 webp'
RE_meta=$(echo "$EXT_meta" | perl -aE '$"="|";say ".*\\.(@F)"') #"
exiftool '-textOut!' %d%f.%e:meta -short -short -recurse -extractEmbedded $(echo "$EXT_meta" | perl -aE 'say map {" -ext $_"} @F') -- ./*

RE_SKIP="$RE_skip|$RE_meta"

# licensing patterns misdetected by licensecheck
RE_janus=$(_file_regex --grantglob '*' -- *)

# TODO: automate more of this manual cleanup:
#  * strip garbage copyright holders
#  * optionally merge equally licensed Files sections
#  * do "sort -k2 -k1,1 -u" on copyright holders
#  * merge copyright years for each copyright holder
# TODO: strip files matching glob in current (only, no later) section
_licensecheck() {
	SKIPFILES=$SKIPFILES perl -MGetopt::Long=:config,gnu_getopt -MIPC::System::Simple=capture -MList::Util=uniq \
		-e 'GetOptions ( \%opt, "check=s", "ignore=s", "shortname=s", "subset=s", "merge-licenses" );'\
		-e '@subset = split( " ", $opt{subset} );' \
		-e '$subset_globs = join "\n ", @subset;' \
		-E 'if ( $subset_globs =~ /^[*]$/ ) { say STDERR "check default section(s) ..." }' \
		-E 'elsif ( @subset and $opt{shortname} ) { say STDERR "check $opt{shortname} section(s) @subset ..." }' \
		-E 'elsif (@subset) { say STDERR "check section(s) @subset ..." }' \
		-E 'elsif ($opt{shortname} ) { say STDERR "check $opt{shortname} section(s) @subset ..." }' \
		-E 'else { say STDERR "check remaining upstream section(s) ..." };' \
		-e '@cmd = ( qw(licensecheck --copyright --deb-machine --recursive --lines 0), "--check", $opt{check}, "--ignore", $opt{ignore}, $opt{"merge-licenses"} ? "--merge-licenses" : (), "--" );'\
		-E 'say STDERR "@cmd *" if $ENV{DEBUG};'\
		-e '$_ = $dump = capture( @cmd, glob "*" );'\
		-e 'if ( !$ENV{NOGLOBMERGE} and grep /[*]/, @subset ) { s/^.*?\n\nFiles: \K.*?(?=\n\w)/$subset_globs/s }' \
		-e 'elsif (@subset) { s/^.*?\n\nFiles: \K/$subset_globs\n /s };' \
		-e 'if ( $subset[0] ne "*" ) { s/^.*?\n\n//s };' \
		-e 's/^Files:\K /\n /mg;' \
		-e 's/^Copyright:\K /\n  /mg;' \
		-e 's/(?:(?<=^  )|(?<=\d{4})),\K (?=\d{4})//mg;' \
		-e 's/:(?:$ENV{SKIPFILES})$//mg;' \
		-e 'if ($opt{shortname}) { s/^License: \K(.*)/ join " and\/or ", uniq sort grep( !m{\AUNKNOWN\Z}, split(" and\/or ",$1), $opt{shortname} ) /mge };' \
		-e 'print $_;'\
		-- "$@" \
		>> debian/copyright_hints
}

rm -f debian/copyright_hints

# initially, check all to know roughly what to group and in which order
#rm -f debian/copyright_hints
#_licensecheck '' --check '.*' --ignore "^($RE_SKIP|debian/.*)$"
#exit 0

# check default licensed files first
_licensecheck --shortname 'GPL-3 with OpenSSL exception' --subset '*' --check "^($RE_janus)$" --ignore "^($RE_SKIP|debian/.*)$"

# check generally
#  * omit non-copyright-protected Debian files
_licensecheck --subset '' --check '.*' --ignore "^($RE_janus|$RE_SKIP|debian/.*)$"
_licensecheck --subset 'debian/*' --check '^debian/' --ignore "^($RE_janus|$RE_SKIP|debian/(changelog|copyright(_hints)?|source/lintian-overrides))$"

# cleanup hint files
find ./* -type f -regextype posix-egrep -regex "^\./.*:($SKIPFILES)$" -delete