#! /usr/bin/perl -w

# Copyright Bill Allombert <ballombe@debian.org> 2001.
# Modifications copyright 2002, Julian Gilbey <jdg@debian.org>

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

use strict;
use 5.6.0;  # our() commands
use Cwd;
use Getopt::Long;

use lib '/usr/share/devscripts';
use Devscripts::Set;
use Devscripts::Packages;
use Devscripts::Symlinks;
use Devscripts::PackageDeps;

# Function prototypes
sub process_features ($$);
sub getusedfiles (@);
sub filterfiles (@);

# Global options
our %opts;

# libvfork is taken from dpkg-genbuilddeps by Ben Collins <bcollins@debian.org>
our $vforklib = "/usr/lib/devscripts/libvfork.so.0";

# A list of files that do not belong to a Debian package but are known
# to never create a dependency
our @known_files = ($vforklib, "/etc/ld.so.cache", "/etc/dpkg/shlibs.default");

# This will be given information about features later on
our %feature;

(my $progname=$0) =~ s,.*/,,;

sub usage ()
{
    my @ed=("disabled","enabled");
    print <<"EOF";
Usage:
  $progname [options] <command>
Run <command> and then output packages used to do this.
Options:
  Which packages to report:
    -a, --all              Report all packages used to run <command>
    -b, --build-depends    Do not report build-essential or essential packages
                           used or any of their (direct or indirect)
                           dependencies
    -d, --ignore-dev-deps  Do not show packages used which are direct
                           dependencies of -dev packages used
    -m, --min-deps         Output a minimal set of packages needed, taking
                           into account direct dependencies
    -m implies -d and both imply -b; -a cancels any earlier -b, -d, -m options

  -C, --C-locale           Run command with C locale
  --no-C-locale            Don\'t change locale
  -l, --list-files         Report list of files used in each package
  --no-list-files          Do not report list of files used in each package
  -o, --output=FILE        Output diagnostic to FILE instead of stdout
  -O, --strace-output=FILE Write strace output to FILE when tracing <command>
                           instead of a temporary file
  -I, --strace-input=FILE  Get strace output from FILE instead of tracing
                           <command>; strace must be run with -f -q for this
                           to work
  -f, --features=LIST      Enable or disabled features given in
                           comma-separated LIST as follows:
    +feature or feature      enable feature
    -feature                 disable feature
    Known features:          (default setting)
      warn-local             ($ed[$feature{'warn-local'}]) warn if files in /usr/local are used
      discard-check-version  ($ed[$feature{'discard-check-version'}]) discard execve with only
                             --version argument; this works around some
                             configure scripts that check for binaries they
                             don\'t use
      trace-local            ($ed[$feature{'trace-local'}]) also try to identify file
                             accesses in /usr/local
      catch-alternatives     ($ed[$feature{'discard-alternatives'}]) catch access to alternatives

  -h, --help                 Display this help and exit
  -v, --version              Output version information and exit
EOF
}


sub version ()
{
    print <<'EOF';
This is $progname, from the Debian devscripts package, version ###VERSION###
Copyright Bill Allombert <ballombe@debian.org> 2001.
Modifications copyright 2002, Julian Gilbey <jdg@debian.org>
This program comes with ABSOLUTELY NO WARRANTY.
You are free to redistribute this code under the terms of the
GNU General Public License, version 2 or later.
EOF
}


# Main program

# Features:
# This are heuristics used to speed up the process.
# Since thay may be considered as "kludges" or worse "bugs"
# by some, they can be deactivated
# 0 disabled by default, 1 enabled by default.
%feature=(
	  "warn-local"=>1, "discard-check-version"=>1,
	  "trace-local"=>0, "catch-alternatives"=>1
	  );

# First process configuration file options, then check for command-line
# options.  This is pretty much boilerplate.

my @config_files = ('/etc/devscripts.conf', '~/.devscripts');
my %config_vars = (
		   'DPKG_DEPCHECK_OPTIONS' => '',
		   );

my $shell_cmd;
# Set defaults
foreach my $var (keys %config_vars) {
    $shell_cmd .= qq[$var="$config_vars{$var}";\n];
}
$shell_cmd .= 'for file in ' . join(" ",@config_files) . "; do\n";
$shell_cmd .= '[ -f $file ] && . $file; done;' . "\n";
# Read back values
foreach my $var (keys %config_vars) { $shell_cmd .= "echo \$$var;\n" }
my $shell_out = `/bin/sh -c '$shell_cmd'`;
@config_vars{keys %config_vars} = split /\n/, $shell_out, -1;

if ($config_vars{'DPKG_DEPCHECK_OPTIONS'} ne '') {
    unshift @ARGV, split(' ', $config_vars{'DPKG_DEPCHECK_OPTIONS'});
}

# Default option:
$opts{"pkgs"} = 'all';

Getopt::Long::Configure('bundling','require_order');
my $opts_ret =
    GetOptions("h|help" => sub { usage(); exit; },
	       "v|version" => sub { version(); exit; },
	       "a|all" => sub { $opts{"pkgs"}='all'; },
	       "b|build-depends" => sub { $opts{"pkgs"}='build'; },
	       "d|ignore-dev-deps" => sub { $opts{"pkgs"}='dev'; },
	       "m|min-deps" => sub { $opts{"pkgs"}='min'; },
	       "C|C-locale" => \$opts{"C"},
	       "no-C-locale|noC-locale" => sub { $opts{"C"}=0; },
	       "l|list-files" => \$opts{"l"},
	       "no-list-files|nolist-files" => sub { $opts{"l"}=0; },
	       "o|output=s" => \$opts{"o"},
	       "O|strace-output=s" => \$opts{"strace-output"},
	       "I|strace-input=s" => \$opts{"strace-input"},
	       "f|features=s" => \&process_features,
	       );

if (! $opts_ret) {
    die "$progname: I didn't recognise some command-line option there;\nplease fix and try again.  (Use --help for more info.)\n";
}

if ($opts{"pkgs"} ne 'all')
{
    # We don't initialise the packages database before doing this check,
    # as that takes quite some time
    unless (system('dpkg -L build-essential >/dev/null 2>&1') >> 8 == 0) {
	die "You must have the build-essential package installed or use the --all option\n";
    }
}


@ARGV > 0 or $opts{"strace-input"} or
    die "You need to specify a command!  Run $progname --help for more info\n";

# Run the command and trace it to see what's going on
my @usedfiles = getusedfiles(@ARGV);

if ($opts{"o"}) {
    $opts{"o"} =~ s%^(\s)%./$1%;
    open STDOUT,"> $opts{'o'}" or
	warn "Cannot open $opts{'o'} for writing: $!\nTrying to use stdout instead\n";
} else {
    # Visual space
    print "\n\n";
    print '-' x 70, "\n";
}

# Get each file once only, and drop any we are not interested in.
# Also, expand all symlinks so we get the real file accessed.
@usedfiles = filterfiles(@usedfiles);

# Forget about the few files we are expecting to see but can ignore
@usedfiles = SetMinus(\@usedfiles, \@known_files);

# For a message at the end
my $number_files_used = scalar @usedfiles;

# Initialise the packages database unless --all is given
my $packagedeps;

# Exclude essential and build-essential packages?
if ($opts{"pkgs"} ne 'all')
{
    $packagedeps = new Devscripts::PackageDeps ('/var/lib/dpkg/status');
    my @essential = PackagesMatch('^Essential: yes$');
    my @essential_packages =
	$packagedeps->full_dependencies('build-essential', @essential);
    my @essential_files = PackagesToFiles(@essential_packages);
    @usedfiles = SetMinus(\@usedfiles,\@essential_files);
}

# Now let's find out which packages are used...
my @packages = FilesToPackages(@usedfiles);

# ... and remove their files from the filelist
if ($opts{"l"}) {
    # Have to do it slowly :-(
    print "Files used in each of the needed packages:\n";
    foreach my $pkg (sort @packages) {
	my @pkgfiles = PackagesToFiles($pkg);
	print "Files used in package $pkg:\n  ",
	    join("\n  ", SetInter(\@usedfiles, \@pkgfiles)), "\n";
	@usedfiles = SetMinus(\@usedfiles, \@pkgfiles);
    }
    print "\n";
} else {
    my @pkgfiles = PackagesToFiles(@packages);
    @usedfiles = SetMinus(\@usedfiles, \@pkgfiles);
}

if ($opts{"pkgs"} eq 'dev') {
    # We also remove any direct dependencies of '-dev' packages
    my %pkgs;
    @pkgs{@packages} = (1) x @packages;

    foreach my $pkg (@packages) {
	next unless $pkg =~ /-dev$/;
	my @deps = $packagedeps->dependencies($pkg);
	foreach my $dep (@deps) {
	    $dep = $$dep[0] if ref $dep;
	    delete $pkgs{$dep} if exists $pkgs{$dep};
	}
    }

    @packages = keys %pkgs;
}
elsif ($opts{"pkgs"} eq 'min') {
    # Do a mindep job on the package list
    @packages = $packagedeps->min_dependencies(@packages);
}

print "Summary: $number_files_used files considered.\n" if $opts{"l"};
if (@usedfiles) {
    warn "The following files did not appear to belong to any package:\n";
    warn join("\n", @usedfiles) . "\n";
}

print "Packages ", ($opts{"pkgs"} eq 'all') ? "used" : "needed", ":\n";
print join("\n", @packages), "\n";

exit 0;


### Subroutines

# This sub is handed two arguments: f or feature, and the setting

sub process_features ($$)
{
    foreach (split(',', $_[1])) {
	my $state=1;
	m/^-/ and $state=0;
	s/^[-+]//;
	if (exists $feature{$_}) {
	    $feature{$_}=$state;
	} else {
	    die("Unknown feature $_\n");
	}
    }
}


# Get used files.  This runs the requested command (given as parameters
# to this sub) under strace and then parses the output, returning a list
# of all absolute filenames successfully opened or execve'd.

sub getusedfiles (@)
{
    my $file;
    if ($opts{"strace-input"}) {
	$file=$opts{"strace-input"};
    }
    else {
	my $old_preload = $ENV{'LD_PRELOAD'} || undef;
	my $old_locale = $ENV{'LC_ALL'} || undef;
	my $trace_preload = defined $old_preload ?
	    "$old_preload $vforklib" : $vforklib;
	$file = $opts{"strace-output"} || `tempfile -p depcheck`;
	chomp $file;
	$file =~ s%^(\s)%./$1%;
	my @strace_cmd=('strace', '-e', 'trace=open,execve',  '-f', '-F',
			'-q', '-o', $file, @_);
	$ENV{'LD_PRELOAD'} = $trace_preload;
	$ENV{'LC_ALL'}="C" if $opts{"C"};
	system(@strace_cmd);
	$? >> 8 == 0 or
	    die "Running strace failed (command line:\n@strace_cmd\n";
	if (defined $old_preload) { $ENV{'LD_PRELOAD'} = $old_preload; }
	else { delete $ENV{'LD_PRELOAD'}; }
	if (defined $old_locale) { $ENV{'LC_ALL'} = $old_locale; }
	else { delete $ENV{'LC_ALL'}; }
    }
    
    my %openfiles=();
    open FILE, $file or die "Cannot open $file for reading: $!\n";
    while (<FILE>) {
	# We only consider absolute filenames
	m/^\d+\s+(\w+)\(\"(\/.*?)\",.*\) = (-?\d+)/ or next;
	my ($syscall, $filename, $status) = ($1, $2, $3);
	if ($syscall eq 'open') { next unless $status >= 0; }
	elsif ($syscall eq 'execve') { next unless $status == 0; }
	else { next; }  # unrecognised syscall
	next if $feature{"discard-check-version"} and
	    m/execve\(\"$filename\", \[\"[^\"]+\", "--version"\], /;
	# So it's a real file
	$openfiles{$filename}=1;
    }

    unlink $file unless $opts{"strace-input"} or $opts{"strace-output"};

    return keys %openfiles;
}


# Select those files which we are interested in, as determined by the
# user-specified options

sub filterfiles (@)
{
    my %files=();
    my %local_files=();
    my %alternatives=();
    my $pwd=cwd();

    foreach my $file (@_) {
	next unless -f $file;

	my @links=();
	my $prevlink='';
	foreach (ListSymlinks($file, $pwd)) {
	    if (m%^/(usr|var)/local(/|\z)%) {
		$feature{"warn-local"} and $local_files{$_} = 1;
		unless ($feature{"trace-local"}) {
		    $prevlink = $_;
		    next;
		}
	    }
	    elsif ($feature{"catch-alternatives"} and m%^/etc/alternatives/%) {
		$alternatives{"$prevlink --> " . readlink($_)} = 1
		    if $prevlink;
	    }
	    $prevlink=$_;
	    # If it's not in one of these dirs, we skip it
	    next unless m%^/(bin|etc|lib|sbin|usr|var)%;
	    push @links, $_;
	}

	@files{@links} = (1) x @links;
    }

    if (keys %local_files) {
	print "warning: files in /usr/local or /var/local used:\n",
	    join("\n", sort keys %local_files), "\n";
    }
    if (keys %alternatives) {
	print "warning: alternatives used:\n",
	    join("\n", sort keys %alternatives), "\n";
    }

    return keys %files;
}
