File: get_authors

package info (click to toggle)
octave2.1-forge 2006.03.17%2Bdfsg1-7
  • links: PTS, VCS
  • area: main
  • in suites: lenny
  • size: 16,672 kB
  • ctags: 6,047
  • sloc: cpp: 49,610; ansic: 14,035; perl: 2,789; sh: 2,087; makefile: 1,560; lex: 1,219; tcl: 799; fortran: 422; objc: 202
file content (241 lines) | stat: -rwxr-xr-x 9,575 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
#!/usr/bin/env perl
#
# Traverse the directory tree and extract author information from .m
# files.  Put author information in the file 'AUTHORS'.
# Understands variations of these entries:
#
#   ## Copyright (C)  year(s)  name1   <email>
#   ## Copyright (C)  year(s)  name2   <email>
#   ## Author:  name3  <email>
#   ## Author:  name4  <email>
#
# Albert Danial Dec 15 2001
#               Nov 30 2002 - Attribute to 'Anonymous' any file which has 
#                             licensing terms (ie, granted to public domain)
#                             but no author.  Show code and comment line
#                             counts.
#               Apr 25 2005 - Added -s option, better logic to identify
#                             public domain files.
# Dirk Eddelbuettel 07 Jul 2004  also look at .cc files and // comments
#                             
# Usage:  admin/get_authors  [-s <# lines>]                             
#            Scans .m and .cc files and extracts author and copyright
#            information.  Creates the AUTHORS file.
#                       Option:  -s  <n>    Set the size of a "short" program
#                                           to <n> non-commented lines.  
#                                           Copyrights aren't needed for short 
#                                           programs.  Default <n> is 5.
#                             
use warnings;
use strict;
use vars qw($opt_s );
use Cwd;
use Getopt::Std;
use File::Find;

getopts('s:');
$opt_s = 5 unless $opt_s;  # set the default value
my $location = getcwd;
my $PATH     = "";
if ($location =~ m{^(.*)/admin$}) {
    chdir "..";
    $PATH = "$1/";
}

my @files = ();       # to contain relative path names of each .m file
find(\&wanted, ".");  # start here & descend recursively; populate @files

my %file_data = ();
classify_files(\@files,       # in
               \%file_data);  # out file_data{file}{'name'} = [ names ]
                              #                    {'year'} = [ years ]
                              #                    {'mail'} = [ email addrs ]
                              #                    {'cpyr'} = [ 'C' or ' '  ]
                              #                    {'n_code'}    = # lines code
                              #                    {'n_comment'} = # lines of
                              #                                    comments

# traverse file_data and extract
#   - files without copyrights
#   - files without authors
#   - files grouped by author
#   - author to email map
#   - number of lines of code, number of lines of comments
my @unattributed_files  = ();
my @uncopyrighted_files = ();
my @short_files         = ();
my @public_domain       = ();
my %email               = ();   # email{ author name } = email address
my %files               = ();   # files{ author name } = [ list of files ]
foreach my $f (sort @files) {
    # each file can have multiple authors, loop over each author
    if (!defined @{$file_data{$f}{name}}) {
	if ($file_data{$f}{n_code} > $opt_s) { # not a short program
           push @unattributed_files, $f;
        } else {
           push @short_files       , $f;
        }
        next;
    }
    my $copyrighted = 0;
    for (my $i = 0; $i < scalar @{$file_data{$f}{name}}; $i++) {
        if (defined $file_data{$f}{mail}[$i]) {
            $email{ $file_data{$f}{name}[$i] } = $file_data{$f}{mail}[$i];
        }
        if (defined $file_data{$f}{cpyr}[$i]) {
            $copyrighted = 1;
            push @public_domain, $f if $file_data{$f}{cpyr}[$i] eq "P";
        }
        $files{ $file_data{$f}{name}[$i] }{$f} = 1;
    }
    next if $copyrighted;
    if ($file_data{$f}{n_code} > $opt_s) { # then it is not a short program
        push @uncopyrighted_files, $f;
    } else {
        push @short_files        , $f;
    }
}

printf "%3d uncopyrighted files:%s lines of code/lines of comments\n", 
       scalar @uncopyrighted_files, ' ' x 22;
foreach my $f (sort @uncopyrighted_files) {
    printf "      %-50s %3d/%3d\n", 
           $f, $file_data{$f}{n_code}, $file_data{$f}{n_comment};
}

printf "\n%3d unattributed files:%s lines of code/lines of comments\n", 
       scalar @unattributed_files, ' ' x 23;
foreach my $f (sort @unattributed_files) {
    printf "      %-50s %3d/%3d\n",
           $f, $file_data{$f}{n_code}, $file_data{$f}{n_comment};
}

printf "\n%3d public domain files:%s lines of code/lines of comments\n", 
       scalar @public_domain, ' ' x 22;
foreach my $f (sort @public_domain) {
    printf "      %-50s %3d/%3d\n", 
           $f, $file_data{$f}{n_code}, $file_data{$f}{n_comment};
}

printf "\n%3d uncopyrighted short (<= %2d lines) files:%s lines of code/lines of comments\n", 
       scalar @short_files, $opt_s, ' ' x 2;
foreach my $f (sort @short_files) {
    printf "      %-50s %3d/%3d\n", 
           $f, $file_data{$f}{n_code}, $file_data{$f}{n_comment};
}

my $Auth_file = "AUTHORS";
open(OUT, ">$Auth_file") or die "Cannot write $Auth_file:  $!\n";
printf "%3d authors:\n", scalar keys %files;
foreach my $n (sort keys %files) {
    printf     "%-28s", $n;
    printf OUT "%-28s", $n;
    print     $email{$n} if defined $email{$n};
    print OUT $email{$n} if defined $email{$n};
    print     "\n";
    print OUT "\n";
    my $i = 0;
    foreach my $f (sort keys %{$files{$n}}) {
        printf "%3d. %-50s  %3d/%3d\n",
               ++$i, $f, $file_data{$f}{n_code}, $file_data{$f}{n_comment};
    }
}
close OUT;
warn "Wrote ${PATH}${Auth_file}\n";

# # # # # # # 

#sub by_last_name { # {{{1 for sorting on names
#    (my $A = $a) =~ s/.*?(\w+)$/$1/;
#    (my $B = $b) =~ s/.*?(\w+)$/$1/;
#    return lc($A) cmp lc($B);
#} # 1}}}

# # # # # # # 

sub classify_files { # {{{1
    my ($ra_files,  # in, list of files
        $rhha_data, # out,  data{file}{ name | year | cpyr } = [entries]
       ) = @_;
    warn "Found ", scalar grep (/\.m$/,  @{$ra_files}), " .m files; ",
                   scalar grep (/\.cc$/, @{$ra_files}), " .cc files",
                   "\n";
    foreach my $f (@{$ra_files}) {
        open(IN, $f) or die "Cannot read $f: $!\n";
        my $found_copyright = 0;
        my $found_author    = 0;
        $rhha_data->{$f}{n_code}    = 0;
        $rhha_data->{$f}{n_comment} = 0;
        while (<IN>) {
            # find the copyright line and extract author info from it
            if (/^[#%\/]+/) {           # a comment
                ++$rhha_data->{$f}{n_comment};
            } elsif (!/^\s*$/) {        # not a blank line
                ++$rhha_data->{$f}{n_code};
            }
            s/all\s+rights\s+reserved\.?//i;
            s/\bby\s+//i;

            if (/^\s*[#%\/\*]*          #  one or more leading comment markers
                  \s*copyright          #  Copyright
                  \s*(\(c\))?           #  (c)    - optional           $1
                  \s*(\d[,\- 0-9]+)     #  Year (or years)             $2
                  \s+(\w.*?)            #  name                        $3
                  \s*(<.*>)?            #  email  - optional           $4
                  \s*$/ix) {
                $found_copyright = 1;
                $found_author    = 1;
                my $year  = $2;
                my $name  = $3;
                   $name  = "John W. Eaton" if $name eq "jwe";
                   $name  =~ s/\.\s*$//; # strip trailing period
                my $email = "" || $4;
                $name =~ s/^\s+//;  # strip leading  whitespace
                $name =~ s/\s+$//;  # strip trailing whitespace
                push @{$rhha_data->{$f}{name}}, $name;
                push @{$rhha_data->{$f}{year}}, $year;
                push @{$rhha_data->{$f}{mail}}, $email;
                push @{$rhha_data->{$f}{cpyr}}, 'C';
                # don't exit w/last because there could be multiple copyrights
            } elsif (
                /^\s*[#%\/\*]*            # one or more leading comment markers
                    \s*author\s*:?        #  Author    
                    \s+(\w.*?)            #  name                        $1
                    \s*(<.*>)?            #  email  - optional           $2
                    \s*$/ix) {
                my $name  = $1;
                   $name  =~ s/\.\s*$//; # strip trailing period
                   $name  = "John W. Eaton" if $name eq "jwe";
                my $email = "" || $2;
                push @{$rhha_data->{$f}{name}}, $name;
                push @{$rhha_data->{$f}{year}}, "";
                push @{$rhha_data->{$f}{mail}}, $email;
                $found_author = 1;
            } elsif (
                /^\s*[#%\/\*]*            # one or more leading comment markers
                  .*?                     #  some leading text
                  \b(grant(ed)?|place(d)?|give(n)?|is)(\s+this)?
                  (\s+(file|program|script|code|algorithm))?(\s+(in|to))?
                  (\s+the)?
                  \s+public\s+domain
                /ix) {
                push @{$rhha_data->{$f}{cpyr}}, 'P';
                $found_copyright = 1;
            }
        }
        if ($found_copyright and !$found_author) {
            push @{$rhha_data->{$f}{name}}, "Anonymous";
            push @{$rhha_data->{$f}{year}}, "";
            push @{$rhha_data->{$f}{mail}}, "";
            push @{$rhha_data->{$f}{cpyr}}, 'C';
        }
        close IN;
    }
} # 1}}}

# # # # # # # 

sub wanted { # {{{1 populates global array @files
    return unless -f and /\.(m|cc)$/;  # only want .m files (for now)
    push @files, "$File::Find::name";
} # 1}}}