File: copyright-check

package info (click to toggle)
ghostscript 10.06.0~dfsg-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 96,296 kB
  • sloc: ansic: 944,438; python: 7,917; cpp: 6,534; cs: 6,457; sh: 6,168; java: 4,028; perl: 2,373; tcl: 1,639; makefile: 538; awk: 66; yacc: 18
file content (149 lines) | stat: -rwxr-xr-x 7,587 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/bin/sh
# Copyright 2020-2023  Jonas Smedegaard <dr@jones.dk>
# Copyright 2020-2021  Purism, SPC
# Description: helper script to update copyright_hints
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

# Depends:
#  licensecheck,
#  libimage-exiftool-perl,
#  libipc-system-simple-perl,
#  libpath-tiny-perl,
#  libregexp-assemble-perl,
#  perl,

set -eu

# resolve file regex from contained license or shebang, or path regex
_file_regex() {
	perl -MGetopt::Long=:config,gnu_getopt -MPath::Tiny -MRegexp::Assemble -MList::Util=any \
		-e 'GetOptions ( \%opt, "shortname=s@", "grantglob=s@", "regex=s@", "shebang=s@", "nonverb=s@" );'\
		-e '@section = split /\n\n+/, path("debian/copyright")->slurp_utf8;'\
		-e 'for $name ( @{ $opt{shortname} } ) {'\
			-e 'push @license, map { /^License:\h*\Q$name\E\n\h+(\S[^\n]*(?:\n\h+\S[^\n]*)*)/ } @section };'\
		-e 'for $glob ( @{ $opt{grantglob} } ) {'\
			-e 'push @files, grep { /^Files:\n?\h(?:\S[^\n]*\n?\h)*\Q$glob\E\s/ } @section };'\
		-e '@grant = map { /^License(?:-Grant:|:\h*\S[^\n]*)\h*\n\h+(\S[^\n]*(?:\n\h+\S[^\n]*)*)/mg } @files;'\
		-e '@firstline_re = map { qr/^\Q$_\E/ } @{ $opt{shebang} };'\
		-e '$nonverb_re = Regexp::Assemble->new->add( "\\W+", map { "\\W+(?:$_)\\W+" } @{ $opt{nonverb} } )->as_string;'\
		-e '@content_re = map { s/\W+/[**]/g; s/\Q[**]\E\d\Q[**]\E/[**]\\S{0,2}[**]/g; s/\Q[**]\E/$nonverb_re/g; qr/$_/ } @license, @grant;'\
		-e '$inspect = sub {'\
			-e 'return if $_[0]->is_dir;'\
			-e '$file = $_[0];'\
			-e 'if (@firstline_re) {'\
				-e '($head) = $file->lines( { count => 1 } );'\
				-e 'push @match, quotemeta and return '\
					-e 'if any { $head =~ $_ } @firstline_re };'\
			-e 'push @match, quotemeta'\
				-e 'if any { $file->slurp_raw =~ $_ } @content_re };'\
		-e 'for (@ARGV) {'\
			-e 'path($_)->is_dir'\
			-e '? path($_)->visit( \&$inspect, { recurse => 1 } )'\
			-e ': &$inspect( path($_) ) };'\
		-e '$files_re = Regexp::Assemble->new->add( @match, @{ $opt{regex} } );'\
		-e 'print $files_re->as_string =~ s/\(\?:/\(/gr;'\
		-- "$@"
}

SKIPFILES='skip|meta|comment'

# cleanup stray hint files from a previous run
find ./* -type f -regextype posix-egrep -regex "^\./.*:($SKIPFILES)$" -delete

# omit files not copyright protected nor stating copyright or licensing
#  * lib/ghostpdf.cat is a digital signature for lib/ghostpdf.cat
#    (see upstream commit be72694)
RE_omit='.*\.(ico)|lib/ghostpdf\.cat|doc/.*\.htm'

1>&2 echo 'skip binary files without parsable metadata ...'
RE_skip='.*\.(xls|pcl|xps)'
find ./* -type f -regextype posix-egrep -regex "^\./($RE_skip)$" -exec sh -c 'echo "License: UNKNOWN" > "$1:skip"' shell {} ';'

1>&2 echo 'extract metadata from binary files ...'
EXT_meta='icc pdf png ttf'
RE_meta=$(echo "$EXT_meta" | perl -aE '$"="|";say ".*\\.(@F)|Resource/Font/.*"') #"
exiftool '-textOut!' %d%f.%e:meta -short -short -recurse -extractEmbedded $(echo "$EXT_meta" | perl -aE 'say map {" -ext $_"} @F') -- ./*
exiftool '-textOut!' %d%f.%e:meta -short -short -recurse -extractEmbedded -ext '*' -- Resource/Font

1>&2 echo 'extract comments and metadata from Postscript files ...'
RE_comment_ps=$(_file_regex --shebang '%!PS-Adobe-' --regex '.*\.eps' --regex '.*\.ps' -- Resource)
find ./* -type f -regextype posix-egrep -regex "^\./($RE_comment_ps)$" \
 -exec sh -c 'perl -nE "$1" "$2" > "$2:comment"' shell '/^\s*(|%+\s*\K.*|dup\s+\/\S+\s+\(.*\)\s+put)\s*$/ && print $& =~ s/^dup \/(\S+)\s+\(\s*(.*)\s*\)\s*put/\1: \2/r' {} ';'

RE_SKIP="$RE_omit|$RE_skip|$RE_meta|$RE_comment_ps"

# directories more closely aligned
RE_cmap='Resource/CMap/.*'

# licensing patterns misdetected by licensecheck
RE_ghostscript=$(_file_regex --grantglob 'arch/*' --nonverb 'dnl' -- *)

RE_artifex=$(_file_regex --grantglob 'base/bobbin.c' -- *)

# TODO: automate more of this manual cleanup:
#  * strip garbage copyright holders
#  * optionally merge equally licensed Files sections
#  * do "sort -k2 -k1,1 -u" on copyright holders
#  * merge copyright years for each copyright holder
# TODO: strip files matching glob in current (only, no later) section
_licensecheck() {
	SKIPFILES=$SKIPFILES perl -MGetopt::Long=:config,gnu_getopt -MIPC::System::Simple=capture -MList::Util=uniq \
		-e 'GetOptions ( \%opt, "check=s", "ignore=s", "shortname=s", "subset=s", "merge-licenses" );'\
		-e '@subset = split( " ", $opt{subset} );' \
		-e '$subset_globs = join "\n ", @subset;' \
		-E 'if ( $subset_globs =~ /^[*]$/ ) { say STDERR "check default section(s) ..." }' \
		-E 'elsif ( @subset and $opt{shortname} ) { say STDERR "check $opt{shortname} section(s) @subset ..." }' \
		-E 'elsif (@subset) { say STDERR "check section(s) @subset ..." }' \
		-E 'elsif ($opt{shortname} ) { say STDERR "check $opt{shortname} section(s) @subset ..." }' \
		-E 'else { say STDERR "check remaining upstream section(s) ..." };' \
		-e '@cmd = ( qw(licensecheck --copyright --deb-machine --recursive --lines 0), "--check", $opt{check}, "--ignore", $opt{ignore}, $opt{"merge-licenses"} ? "--merge-licenses" : (), "--" );'\
		-E 'say STDERR "@cmd *" if $ENV{DEBUG};'\
		-e '$_ = $dump = capture( @cmd, glob "*" );'\
		-e 'if ( !$ENV{NOGLOBMERGE} and grep /[*]/, @subset ) { s/^.*?\n\nFiles: \K.*?(?=\n\w)/$subset_globs/s }' \
		-e 'elsif (@subset) { s/^.*?\n\nFiles: \K/$subset_globs\n /s };' \
		-e 'if ( $subset[0] ne "*" ) { s/^.*?\n\n//s };' \
		-e 's/^Files:\K /\n /mg;' \
		-e 's/^Copyright:\K /\n  /mg;' \
		-e 's/(?:(?<=^  )|(?<=\d{4})),\K (?=\d{4})//mg;' \
		-e 's/:(?:$ENV{SKIPFILES})$//mg;' \
		-e 'if ($opt{shortname}) { s/^License: \K(.*)/ join " and\/or ", uniq sort grep( !m{\AUNKNOWN\Z}, split(" and\/or ",$1), $opt{shortname} ) /mge };' \
		-e 'print $_;'\
		-- "$@" \
		>> debian/copyright_hints
}

rm -f debian/copyright_hints

# initially, check all to know roughly what to group and in which order
#rm -f debian/copyright_hints
#_licensecheck --check '.*' --ignore "^($RE_SKIP|debian/.*)$"
#exit 0

# check default licensed files first, with merged licenses where reliable
_licensecheck --shortname 'AGPL-3+' --subset 'arch/* base/* devices/* doc/* examples/* gpdl/* iccprofiles/* ios/* lib/* man/* pcl/* psi/* Resource/* toolbin/* xps/*' --check "^($RE_ghostscript)$" --ignore "^($RE_SKIP|debian/.*)$" --merge-licenses

# check files with similar boilerplate as default but different license grant
_licensecheck --shortname 'AGPL-3+' --subset '?artifex' --check "^($RE_artifex)$" --ignore "^($RE_ghostscript|$RE_SKIP|debian/.*)$" --merge-licenses

# check known clusters
_licensecheck --subset 'Resource/CMap/*' --check "^($RE_cmap)$" --ignore "^($RE_ghostscript|$RE_artifex|$RE_SKIP|debian/.*)$"

# check generally
#  * omit non-copyright-protected Debian files
_licensecheck --check '.*' --ignore "^($RE_ghostscript|$RE_artifex|$RE_cmap|$RE_SKIP|debian/.*)$"
_licensecheck --subset 'debian/*' --check '^debian/' --ignore "^($RE_SKIP|debian/(changelog|copyright(_hints)?|source/lintian-overrides))$"

# cleanup hint files
find ./* -type f -regextype posix-egrep -regex "^\./.*:($SKIPFILES)$" -delete