1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
|
#!/usr/bin/env perl
#
# Traverse the directory tree and extract author information from .m
# files. Put author information in the file 'AUTHORS'.
# Understands variations of these entries:
#
# ## Copyright (C) year(s) name1 <email>
# ## Copyright (C) year(s) name2 <email>
# ## Author: name3 <email>
# ## Author: name4 <email>
#
# Albert Danial Dec 15 2001
# Nov 30 2002 - Attribute to 'Anonymous' any file which has
# licensing terms (ie, granted to public domain)
# but no author. Show code and comment line
# counts.
# Apr 25 2005 - Added -s option, better logic to identify
# public domain files.
# Dirk Eddelbuettel 07 Jul 2004 also look at .cc files and // comments
#
# Usage: admin/get_authors [-s <# lines>]
# Scans .m and .cc files and extracts author and copyright
# information. Creates the AUTHORS file.
# Option: -s <n> Set the size of a "short" program
# to <n> non-commented lines.
# Copyrights aren't needed for short
# programs. Default <n> is 5.
#
use warnings;
use strict;
use vars qw($opt_s );
use Cwd;
use Getopt::Std;
use File::Find;
getopts('s:');
$opt_s = 5 unless $opt_s; # set the default value
my $location = getcwd;
my $PATH = "";
if ($location =~ m{^(.*)/admin$}) {
chdir "..";
$PATH = "$1/";
}
my @files = (); # to contain relative path names of each .m file
find(\&wanted, "."); # start here & descend recursively; populate @files
my %file_data = ();
classify_files(\@files, # in
\%file_data); # out file_data{file}{'name'} = [ names ]
# {'year'} = [ years ]
# {'mail'} = [ email addrs ]
# {'cpyr'} = [ 'C' or ' ' ]
# {'n_code'} = # lines code
# {'n_comment'} = # lines of
# comments
# traverse file_data and extract
# - files without copyrights
# - files without authors
# - files grouped by author
# - author to email map
# - number of lines of code, number of lines of comments
my @unattributed_files = ();
my @uncopyrighted_files = ();
my @short_files = ();
my @public_domain = ();
my %email = (); # email{ author name } = email address
my %files = (); # files{ author name } = [ list of files ]
foreach my $f (sort @files) {
# each file can have multiple authors, loop over each author
if (!defined @{$file_data{$f}{name}}) {
if ($file_data{$f}{n_code} > $opt_s) { # not a short program
push @unattributed_files, $f;
} else {
push @short_files , $f;
}
next;
}
my $copyrighted = 0;
for (my $i = 0; $i < scalar @{$file_data{$f}{name}}; $i++) {
if (defined $file_data{$f}{mail}[$i]) {
$email{ $file_data{$f}{name}[$i] } = $file_data{$f}{mail}[$i];
}
if (defined $file_data{$f}{cpyr}[$i]) {
$copyrighted = 1;
push @public_domain, $f if $file_data{$f}{cpyr}[$i] eq "P";
}
$files{ $file_data{$f}{name}[$i] }{$f} = 1;
}
next if $copyrighted;
if ($file_data{$f}{n_code} > $opt_s) { # then it is not a short program
push @uncopyrighted_files, $f;
} else {
push @short_files , $f;
}
}
printf "%3d uncopyrighted files:%s lines of code/lines of comments\n",
scalar @uncopyrighted_files, ' ' x 22;
foreach my $f (sort @uncopyrighted_files) {
printf " %-50s %3d/%3d\n",
$f, $file_data{$f}{n_code}, $file_data{$f}{n_comment};
}
printf "\n%3d unattributed files:%s lines of code/lines of comments\n",
scalar @unattributed_files, ' ' x 23;
foreach my $f (sort @unattributed_files) {
printf " %-50s %3d/%3d\n",
$f, $file_data{$f}{n_code}, $file_data{$f}{n_comment};
}
printf "\n%3d public domain files:%s lines of code/lines of comments\n",
scalar @public_domain, ' ' x 22;
foreach my $f (sort @public_domain) {
printf " %-50s %3d/%3d\n",
$f, $file_data{$f}{n_code}, $file_data{$f}{n_comment};
}
printf "\n%3d uncopyrighted short (<= %2d lines) files:%s lines of code/lines of comments\n",
scalar @short_files, $opt_s, ' ' x 2;
foreach my $f (sort @short_files) {
printf " %-50s %3d/%3d\n",
$f, $file_data{$f}{n_code}, $file_data{$f}{n_comment};
}
my $Auth_file = "AUTHORS";
open(OUT, ">$Auth_file") or die "Cannot write $Auth_file: $!\n";
printf "%3d authors:\n", scalar keys %files;
foreach my $n (sort keys %files) {
printf "%-28s", $n;
printf OUT "%-28s", $n;
print $email{$n} if defined $email{$n};
print OUT $email{$n} if defined $email{$n};
print "\n";
print OUT "\n";
my $i = 0;
foreach my $f (sort keys %{$files{$n}}) {
printf "%3d. %-50s %3d/%3d\n",
++$i, $f, $file_data{$f}{n_code}, $file_data{$f}{n_comment};
}
}
close OUT;
warn "Wrote ${PATH}${Auth_file}\n";
# # # # # # #
#sub by_last_name { # {{{1 for sorting on names
# (my $A = $a) =~ s/.*?(\w+)$/$1/;
# (my $B = $b) =~ s/.*?(\w+)$/$1/;
# return lc($A) cmp lc($B);
#} # 1}}}
# # # # # # #
sub classify_files { # {{{1
my ($ra_files, # in, list of files
$rhha_data, # out, data{file}{ name | year | cpyr } = [entries]
) = @_;
warn "Found ", scalar grep (/\.m$/, @{$ra_files}), " .m files; ",
scalar grep (/\.cc$/, @{$ra_files}), " .cc files",
"\n";
foreach my $f (@{$ra_files}) {
open(IN, $f) or die "Cannot read $f: $!\n";
my $found_copyright = 0;
my $found_author = 0;
$rhha_data->{$f}{n_code} = 0;
$rhha_data->{$f}{n_comment} = 0;
while (<IN>) {
# find the copyright line and extract author info from it
if (/^[#%\/]+/) { # a comment
++$rhha_data->{$f}{n_comment};
} elsif (!/^\s*$/) { # not a blank line
++$rhha_data->{$f}{n_code};
}
s/all\s+rights\s+reserved\.?//i;
s/\bby\s+//i;
if (/^\s*[#%\/\*]* # one or more leading comment markers
\s*copyright # Copyright
\s*(\(c\))? # (c) - optional $1
\s*(\d[,\- 0-9]+) # Year (or years) $2
\s+(\w.*?) # name $3
\s*(<.*>)? # email - optional $4
\s*$/ix) {
$found_copyright = 1;
$found_author = 1;
my $year = $2;
my $name = $3;
$name = "John W. Eaton" if $name eq "jwe";
$name =~ s/\.\s*$//; # strip trailing period
my $email = "" || $4;
$name =~ s/^\s+//; # strip leading whitespace
$name =~ s/\s+$//; # strip trailing whitespace
push @{$rhha_data->{$f}{name}}, $name;
push @{$rhha_data->{$f}{year}}, $year;
push @{$rhha_data->{$f}{mail}}, $email;
push @{$rhha_data->{$f}{cpyr}}, 'C';
# don't exit w/last because there could be multiple copyrights
} elsif (
/^\s*[#%\/\*]* # one or more leading comment markers
\s*author\s*:? # Author
\s+(\w.*?) # name $1
\s*(<.*>)? # email - optional $2
\s*$/ix) {
my $name = $1;
$name =~ s/\.\s*$//; # strip trailing period
$name = "John W. Eaton" if $name eq "jwe";
my $email = "" || $2;
push @{$rhha_data->{$f}{name}}, $name;
push @{$rhha_data->{$f}{year}}, "";
push @{$rhha_data->{$f}{mail}}, $email;
$found_author = 1;
} elsif (
/^\s*[#%\/\*]* # one or more leading comment markers
.*? # some leading text
\b(grant(ed)?|place(d)?|give(n)?|is)(\s+this)?
(\s+(file|program|script|code|algorithm))?(\s+(in|to))?
(\s+the)?
\s+public\s+domain
/ix) {
push @{$rhha_data->{$f}{cpyr}}, 'P';
$found_copyright = 1;
}
}
if ($found_copyright and !$found_author) {
push @{$rhha_data->{$f}{name}}, "Anonymous";
push @{$rhha_data->{$f}{year}}, "";
push @{$rhha_data->{$f}{mail}}, "";
push @{$rhha_data->{$f}{cpyr}}, 'C';
}
close IN;
}
} # 1}}}
# # # # # # #
sub wanted { # {{{1 populates global array @files
return unless -f and /\.(m|cc)$/; # only want .m files (for now)
push @files, "$File::Find::name";
} # 1}}}
|