File: copyright-check

package info (click to toggle)
rust-swc-core 35.0.0~ds-6
  • links: PTS, VCS
  • area: main
  • in suites: forky
  • size: 62,972 kB
  • sloc: javascript: 873; xml: 538; sh: 358; makefile: 35; python: 5
file content (114 lines) | stat: -rwxr-xr-x 5,720 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/bin/sh
# Copyright 2020-2023  Jonas Smedegaard <dr@jones.dk>
# Copyright 2020-2021  Purism, SPC
# Description: helper script to update copyright_hints
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

# Depends:
#  licensecheck,
#  libipc-system-simple-perl,
#  libpath-tiny-perl,
#  libregexp-assemble-perl,
#  perl,

set -eu

# resolve file regex from contained license or shebang, or path regex
_file_regex() {
	perl -MGetopt::Long=:config,gnu_getopt -MPath::Tiny -MRegexp::Assemble -MList::Util=any \
		-e 'GetOptions ( \%opt, "shortname=s@", "grantglob=s@", "regex=s@", "shebang=s@", "nonverb=s@" );'\
		-e '@section = split /\n\n+/, path("debian/copyright")->slurp_utf8;'\
		-e 'for $name ( @{ $opt{shortname} } ) {'\
			-e 'push @license, map { /^License:\h*\Q$name\E\n\h+(\S[^\n]*(?:\n\h+\S[^\n]*)*)/ } @section };'\
		-e 'for $glob ( @{ $opt{grantglob} } ) {'\
			-e 'push @files, grep { /^Files:\n?\h(?:\S[^\n]*\n?\h)*\Q$glob\E\s/ } @section };'\
		-e '@grant = map { /^License(?:-Grant:|:\h*\S[^\n]*)\h*\n\h+(\S[^\n]*(?:\n\h+\S[^\n]*)*)/mg } @files;'\
		-e '@firstline_re = map { qr/^\Q$_\E/ } @{ $opt{shebang} };'\
		-e '$nonverb_re = Regexp::Assemble->new->add( "\\W+", map { "\\W+(?:$_)\\W+" } @{ $opt{nonverb} } )->as_string;'\
		-e '@content_re = map { s/\W+/[**]/g; s/\Q[**]\E\d\Q[**]\E/[**]\\S{0,2}[**]/g; s/\Q[**]\E/$nonverb_re/g; qr/$_/ } @license, @grant;'\
		-e '$inspect = sub {'\
			-e 'return if $_[0]->is_dir;'\
			-e '$file = $_[0];'\
			-e 'if (@firstline_re) {'\
				-e '($head) = $file->lines( { count => 1 } );'\
				-e 'push @match, quotemeta and return '\
					-e 'if any { $head =~ $_ } @firstline_re };'\
			-e 'push @match, quotemeta'\
				-e 'if any { $file->slurp_raw =~ $_ } @content_re };'\
		-e 'for (@ARGV) {'\
			-e 'path($_)->is_dir'\
			-e '? path($_)->visit( \&$inspect, { recurse => 1 } )'\
			-e ': &$inspect( path($_) ) };'\
		-e '$files_re = Regexp::Assemble->new->add( @match, @{ $opt{regex} } );'\
		-e 'print $files_re->as_string =~ s/\(\?:/\(/gr;'\
		-- "$@"
}

# omit files not copyright protected nor stating copyright or licensing
RE_omit='.*\.(json|debug|rust-debug|stderr|stdout|swc-stderr)'

RE_SKIP="$RE_omit"

# common licensing patterns
RE_main=$(_file_regex --grantglob '*' -- *)
RE_deno=$(_file_regex --grantglob 'crates/swc/tests/deno-unit/*' -- *)

# TODO: automate more of this manual cleanup:
#  * strip garbage copyright holders
#  * optionally merge equally licensed Files sections
#  * do "sort -k2 -k1,1 -u" on copyright holders
#  * merge copyright years for each copyright holder
# TODO: strip files matching glob in current (only, no later) section
_licensecheck() {
	perl -MGetopt::Long=:config,gnu_getopt -MIPC::System::Simple=capture -MList::Util=uniq \
		-e 'GetOptions ( \%opt, "check=s", "ignore=s", "shortname=s", "subset=s", "merge-licenses" );'\
		-e '@subset = split( " ", $opt{subset} );' \
		-e '$subset_globs = join "\n ", @subset;' \
		-E 'if ( $subset_globs =~ /^[*]$/ ) { say STDERR "check default section(s) ..." }' \
		-E 'elsif ( @subset and $opt{shortname} ) { say STDERR "check $opt{shortname} section(s) @subset ..." }' \
		-E 'elsif (@subset) { say STDERR "check section(s) @subset ..." }' \
		-E 'elsif ($opt{shortname} ) { say STDERR "check $opt{shortname} section(s) @subset ..." }' \
		-E 'else { say STDERR "check remaining upstream section(s) ..." };' \
		-e '@cmd = ( qw(licensecheck --copyright --deb-machine --recursive --lines 0), "--check", $opt{check}, "--ignore", $opt{ignore}, $opt{"merge-licenses"} ? "--merge-licenses" : (), "--" );'\
		-E 'say STDERR "@cmd *" if $ENV{DEBUG};'\
		-e '$_ = $dump = capture( @cmd, glob "*" );'\
		-e 'if ( !$ENV{NOSTRIPLARGESTUNKNOWN} ) { s/^.*?\n\nFiles: \K.*?(?=\n\w)\nCopyright[:] NONE\nLicense[:] UNKNOWN\n FIXME\n\nFiles: //s };' \
		-e 'if ( !$ENV{NOGLOBMERGE} and grep /[*]/, @subset ) { s/^.*?\n\nFiles: \K.*?(?=\n\w)/$subset_globs/s }' \
		-e 'elsif (@subset) { s/^.*?\n\nFiles: \K/$subset_globs\n /s };' \
		-e 'if ( $subset[0] ne "*" ) { s/^.*?\n\n//s };' \
		-e 's/^Files:\K /\n /mg;' \
		-e 's/^Copyright:\K /\n  /mg;' \
		-e 's/(?:(?<=^  )|(?<=\d{4})),\K (?=\d{4})//mg;' \
		-e 'if ($opt{shortname}) { s/^License: \K(.*)/ join " and\/or ", uniq sort grep( !m{\AUNKNOWN\Z}, split(" and\/or ",$1), $opt{shortname} ) /mge };' \
		-e 'print $_;'\
		-- "$@" \
		>> debian/copyright_hints
}

rm -f debian/copyright_hints

# initially, check all to know roughly what to group and in which order
#rm -f debian/copyright_hints
#_licensecheck --check '.*' --ignore "^($RE_SKIP|debian/.*)$"
#exit 0

# check common licensing patterns
_licensecheck --subset '*' --shortname 'Apache-2.0 or Expat' --check "^($RE_main)$" --ignore "^($RE_SKIP|debian/.*)$"
_licensecheck --subset 'crates/swc/tests/deno-unit/*' --shortname 'Expat' --check "^($RE_deno)$" --ignore "^($RE_main|$RE_SKIP|debian/.*)$"

# check generally
#  * omit non-copyright-protected Debian files
_licensecheck --check '.*' --ignore "^($RE_main|$RE_deno|$RE_SKIP|debian/.*)$"
_licensecheck --subset 'debian/*' --check '^debian/' --ignore "^($RE_main|$RE_deno|$RE_SKIP|debian/(changelog|copyright(_hints)?|source/lintian-overrides))$"