File: Copyright.pm

package info (click to toggle)
libconfig-model-dpkg-perl 3.017
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 4,048 kB
  • sloc: perl: 8,530; python: 242; makefile: 77; javascript: 16; sh: 1
file content (430 lines) | stat: -rw-r--r-- 13,284 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
package Config::Model::Dpkg::Copyright ;

use strict;
use warnings;

use 5.020;
use IO::Pipe;

use feature qw/postderef signatures/;
no warnings qw/experimental::postderef experimental::signatures/;

use base qw/Config::Model::Node/;
use Path::Tiny;
use Data::Dumper;

use Config::Model::DumpAsData;
use Dpkg::Copyright::Scanner qw/scan_files/;
use Dpkg::Copyright::Grant::ByDir;
use Dpkg::Copyright::Grant::Plain;

use Scalar::Util qw/weaken/;
use Storable qw/dclone/;

my $join_path = "\n "; # used to group Files

sub get_joined_path ($self, $paths) {
    return join ($join_path, sort @$paths);
}

sub split_path ($self,$path) {
    return ( sort ( ref $path ? @$path : split ( /[\s\n]+/ , $path ) ) );
}

sub _say ($self,@msg) {
    say @msg unless $self->{quiet};
    return;
}

sub _split_previous_data ($self) {
    my $files_obj = $self->grab("Files");

    my $old_split_files = Dpkg::Copyright::Grant::ByDir->new(current_dir =>  path('.'));

    my $debian_paths = {};
    foreach my $paths_str ($files_obj->fetch_all_indexes) {
        my $node = $files_obj->fetch_with_id($paths_str) ;
        my $dumper = Config::Model::DumpAsData->new;
        my $data = $dumper->dump_as_data( node => $node );

        my $grant = Dpkg::Copyright::Grant::Plain->new(
            copyright => $data->{Copyright},
            license => $data->{License}{short_name},
            license_text => $data->{License}{full_license} // '', # may be empty
            comment => $data->{Comment} // '', # may be empty
        );

        foreach my $path ($self->split_path($paths_str)) {
            if ($path =~ /\*$/) {
                # a bit of a hack since main_grant should be derived
                # from the content of the directory
                $old_split_files->get_dir($path)->set_main_grant($grant);
            }
            else {
                $old_split_files->add_grant($path, $grant);
            }
        }
    }

    return $old_split_files;
}

sub _set_license_data_for_a_path ($self, $new_data, $paths, $lic_usage_count ) {
    my $l = $new_data->{License}{short_name};

    # if full_license is not provided in datum, check global license(s)
    my $ok = 0;
    my @sub_licenses = split m![,\s]+ (?:and/or|or|and) [,\s]+!x,$l;
    my $lic_count = 0;
    my @empty_licenses = grep {
        my $text = $self->grab_value(steps => qq!License:"$_" text!, check =>'no') ;
        $ok++ if $text;
        $lic_count += $lic_usage_count->{$_} // 0 ;
        not $text; # to get list of empty licenses
    } @sub_licenses;

    my $full_license = $new_data->{License}{full_license};
    if ($ok eq scalar @sub_licenses) {
        $self->_say("Removing dummy license text from license $l for path ",$paths);
        delete $new_data->{License}{full_license};
    }
    elsif (not $full_license or $full_license =~ /Please fill license/) {
        my $filler = "Please fill license $l from header of $paths";
        if ($lic_count > 1 ) {
            for my $lic (@empty_licenses) {
                $self->_say("Adding dummy global license text for license $lic for path ",$paths);
                $self->load(qq!License:"$lic" text="$filler"!)
            };
        }
        else {
            $self->_say("Adding dummy license text for license $l for path ",$paths);
            $new_data->{License}{full_license} = $filler;
        }
    }
    return;
}

sub load_new_data_in_config_tree ($self, $data) {
    # count license usage to decide whether to add a global license
    # or a single entry. Skip unknown or public-domain licenses
    my %lic_usage_count;
    map { $lic_usage_count{$_}++ if $_ and not /unknown|public/i}
        map {split /\s+or\s+/, $_->{License}{short_name} // ''; }
        $data->@* ;

    # load new data in config tree
    foreach my $datum ($data->@*) {
        my $paths = delete $datum->{Files};
        $self->_set_license_data_for_a_path ( $datum, $paths, \%lic_usage_count );

        eval {
            $self->grab("Files")
                ->fetch_with_id($paths)
                ->load_data( data => $datum, check =>'yes' );
            1;
        } or do {
            die "Error: Data extracted from source file is corrupted:\n$@"
                ."This usually mean that cme or licensecheck (or both) "
                ."have a bug. You may work-around this issue by adding an override entry in "
                ."fill.copyright.blanks file. See "
                ."https://github.com/dod38fr/config-model/wiki/Updating-debian-copyright-file-with-cme "
                ."for instructions. Last but not least, please file a bug against libconfig-model-dpkg-perl.\n";
        };
    }
    return;
}

sub prune_empty_licenses ($self) {
    my $lic_obj = $self->fetch_element('License');
    foreach my $l ($lic_obj->fetch_all_indexes) {
        $lic_obj->delete($l)
            unless $lic_obj->fetch_with_id($l)->fetch_element_value('text');
    }
    return;
}

# $args{in} can contains the output of licensecheck (for tests)
sub update ($self, %args) {

    $self->{quiet} = $args{quiet} // 0;

    my $old_grants_by_dir = $self->_split_previous_data;

    my $new_grants_by_dir = scan_files( %args, get_grants => 1 );

    $new_grants_by_dir->merge_old_dir($old_grants_by_dir);

    my $current_dir = $args{from_dir} || path('.');

    # delete existing data in config tree. A more subtle solution to track which entry is
    # deleted or altered (when individual files are removed, renamed) is too complex.
    $self->grab("Files")->clear;

    my @data = $new_grants_by_dir->debian_full_data;
    $self->load_new_data_in_config_tree(\@data);

    # delete license without text
    $self->prune_empty_licenses;

    $self->_apply_fix_scan_copyright_file($current_dir) ;

    # normalize again after all the modifications
    $self->load("Files:.sort");

    $self->fetch_element("License")-> prune_unused_licenses;

    $self->instance->clear_changes; # too many changes to show users
    $self->notify_change(note => "updated copyright from source file"); # force a save

    my @msgs = (
        "Please follow the instructions given in ".__PACKAGE__." man page,",
        "section \"Tweak results\" if some license and copyright entries are wrong.",
        "Other information, like license text, can be added directly in debian/copyright file ",
        "and will be merged correctly next time this command is run.",
        "See also https://github.com/dod38fr/config-model/wiki/Updating-debian-copyright-file-with-cme"
    );

    return @msgs;
}

sub _apply_fix_scan_copyright_file ($self, $current_dir) {
    # read a debian/fix.scanned.copyright file to patch scanned data
    my $debian = $current_dir->child('debian'); # may be missing in test environment
    if ($debian->is_dir) {
        my @fixes = $current_dir->child('debian')->children(qr/fix\.scanned\.copyright$/x);
        $self->_say( "Note: loading @fixes fixes from copyright fix files") if @fixes;
        foreach my $fix ( @fixes) {
            my @l = grep { /[^\s]/ } grep { ! m!^(?:#|//)!  } $fix->lines_utf8;
            eval {
                $self->load( steps => join(' ',@l) , caller_is_root => 1 );
                1;
            } or do {
                my $e = $@;
                my $msg = $e->full_message;
                Config::Model::Exception::User->throw(
                    object => $self,
                    message => "Error while applying fix.scanned.copyright file:\n\t".$msg
                );
            }
        }
    }
    return;
}

1;

__END__

=encoding utf8

=head1 NAME

Config::Model::Dpkg::Copyright - Fill the File sections of debian/copyright file

=head1 SYNOPSIS

 # this modules is used by cme when invoked with this command
 $ cme update dpkg-copyright

=head1 DESCRIPTION

This commands helps with the tedious task of maintening
C<debian/copyright> file. When you package a new release of a
software, you can run C<cme update dpkg-copyright> to update the
content of the copyright file.

This command scans current package directory to extract copyright and
license information and store them in the Files sections of
debian/copyright file.

In debian package directory:

* run 'cme update dpkg-copyright' or 'cme update dpkg'
* check the result with your favorite VCS diff tool. (you do use
  a VCS for your package files, do you ?)

Note: this command is experimental.

=head1 Debian copyright data

The C<Files: debian/*> section from C<debian/copyright> is often the
only place containing copyright information for the files created by
Debian maintainer. So all C<Files> entries beginning with C<debian/>
are preserved during update. However, entries not matching an existing
file or directory are removed.


=head1 Tweak results

Results can be tweaked either by:

=over

=item *

Changing the list of files to scan or ignore. (By default, licensecheck will decide
which file to scan or not.)

=item *

Specifying information for individual files

=item *

Tweaking the copyright entries created by grouping and coaslescing
information.

=back

The first 2 ways are described in
L<Dpkg::Copyright::Scanner/"Selecting or ignoring files to scan">
and L<Dpkg::Copyright::Scanner/"Filling the blanks">.

The last way is described below:

=head2 Tweak copyright entries

Since the extraction of copyright information from source file is
based on comments, the result is sometimes lackluster. Your may
specify instruction to alter or set specific copyright entries in
C<debian/fix.scanned.copyright> file
(or C<< debian/<source-package>.fix.scanned.copyright >>).

L<cme> stores the copyright information in a tree. Entries in
C<fix.scanned.copyright> provide instructions for traversing the cme tree
and modifying entries. You can have a view of C<debian/copyright> file
translated in this syntax by running C<cme dump --format cml
dpkg-copyright>.  Each line of this file will be handled by
L<Config::Model::Loader> to modify copyright information; the full
syntax is documented in L<Config::Model::Loader/"load string syntax"> section.

=head2 Example

If the extracted copyright contains:

 Files: *
 Copyright: 2014-2015, Adam Kennedy <adamk@cpan.org> "foobar
 License: Artistic or GPL-1+

You may add this line in C<debian/fix.scanned.copyright> file:

 ! Files:"*" Copyright=~s/\s*".*//

This way, the copyright information will be updated from the file
content but the extra C<"foobar> will always be removed during
updates.

Comments are accepted in Perl and C++ style from the beginning of the line.
Lines breaks are ignored.

Here's another more complex example:

 // added a global license, MIT license text is filled by Config::Model
 ! copyright License:MIT

 # don't forget '!' to go back to tree root
 ! copyright Files:"pan/general/map-vector.h" Copyright="2001,Andrei Alexandrescu"
   License short_name=MIT
 # delete license text since short_name points to global  MIT license
   full_license~

 # use a loop there vvvvvv to clean up that vvvvvvvvvvvvvvvvvvvvvvv in all copyrights
 ! copyright   Files:~/.*/     Copyright=~s/all\s*rights\s*reserved//i

 # defeat spammer by replacing all '@' in emails of 3rdparty files
 # the operation :~/^3party/ loops over all Files entries that match ^3rdparty
 # and modify the copyright entry with a Perl substitution
 ! Files:~/^3rdparty/ Copyright=~s/@/(at)/

Sometimes, you might want to find an entry that spans multiple lines.
You can do this by double quoting the whole value:

 ! Files:"uulib/crc32.h
 uulib/uustring.h" Copyright="2019 John Doe"

=head1 Under the hood

This section explains how cme merges the information from the existing
C<debian/copyright> file (the "old" information) with the information
extracted by I<licensecheck> (the "new" information):

=over

=item *

The old and new information are compared in the form of file lists:

=over

=item *

New file entries are kept as is in the new list.

=item *

When a file entry is found in both old and new lists, the new © and
license short names are checked. If they are unknown, the information
from the old list is copied in the new list.

=item *

Old files entries not found in the new list are deleted.

=back

=item *

File entries are coalesced in the new list to reduce redundancies (this mechanism is explained in this L<blog|https://ddumont.wordpress.com/2015/04/05/improving-creation-of-debian-copyright-file>)

=item *

License entries are created, either attached to Files specification or as global licenses. License text is added for known license (actually known by L<Software::License>)

=item *

Directories (path ending with C</*>) from old list then checked:

=over

=item *

Directory is found in the new list: the old information is clobbered by new information.

=item *

Directory not found in new list but exists: the old information is copied in the new list.

=item *

Directory is not found: the old information is discarded

=back

=item *

Files entries are sorted and the new C<debian/copyright> is generated.

=back

=head1 update

Updates data using the output
L<Dpkg::Copyright::Scanner/scan_files">.

Parameters in C<%args>:

=over

=item quiet

set to 1 to suppress progress messages. Should be used only in tests.

=back

Otherwise, C<%args> is passed to C<scan_files>

=head1 AUTHOR

Dominique Dumont <dod@debian.org>

=cut