#! /usr/bin/perl -w

# uscan: This program looks for watchfiles and checks upstream ftp sites
# for later versions of the software.
#
# Originally written by Christoph Lameter <clameter@debian.org> (I believe)
# Modified by Julian Gilbey <jdg@debian.org>
# HTTP support added by Piotr Roszatycki <dexter@debian.org>
# Copyright 1999, Julian Gilbey
# Rewritten in Perl, Copyright 2002, Julian Gilbey
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

use 5.6.0;  # uses 'our' variables
use strict;
use Cwd;
use File::Basename;
use lib '/usr/share/devscripts';
use Devscripts::Versort;
BEGIN {
    eval { require LWP::UserAgent; };
    if ($@ =~ /^Can\'t locate LWP\/UserAgent\.pm/) {
	die "You must have the libwww-perl package installed to use this script\n";
    }
}

sub process_watchline ($$$$$);
sub process_watchfile ($$$);

(my $progname = $0) =~ s|.*/||;

my $usage = <<"EOF";
Usage: $progname [options] dir ...
  Process watchfiles in all .../debian/ subdirs of those listed
  to check for upstream releases.
Options:
    --report, --no-download
                   Only report on newer or absent versions, do not download
    --download     Report on newer and absent versions, and download (default)
    --pasv         Use PASV mode for FTP connections
    --no-pasv      Do not use PASV mode for FTP connections (default)
    --symlink      Make an orig.tar.gz symlink to downloaded file (default)
    --no-symlink   Don\'t make this symlink
    --verbose      Give verbose output
    --no-verbose   Don\'t give verbose output (default)
    --help         Show this message
    --version      Show version information
EOF

my $version = <<"EOF";
This is $progname, from the Debian devscripts package, version ###VERSION###
This code is copyright 1999 by Julian Gilbey, all rights reserved.
Original code by Christoph Lameter.
This program comes with ABSOLUTELY NO WARRANTY.
You are free to redistribute this code under the terms of the
GNU General Public License, version 2 or later.
EOF

# What is the default setting of $ENV{'FTP_PASSIVE'}?
our $passive;

# Now start by reading configuration files and then command line
# The next stuff is boilerplate

my @config_files = ('/etc/devscripts.conf', '~/.devscripts');
my %config_vars = (
		   'USCAN_DOWNLOAD' => 'yes',
		   'USCAN_PASV' => 'default',
		   'USCAN_SYMLINK' => 'yes',
		   'USCAN_VERBOSE' => 'no',
		   );

my $shell_cmd;
# Set defaults
foreach my $var (keys %config_vars) {
    $shell_cmd .= "$var='$config_vars{$var}';\n";
}
$shell_cmd .= 'for file in ' . join(" ",@config_files) . "; do\n";
$shell_cmd .= '[ -f $file ] && . $file; done;' . "\n";
# Read back values
foreach my $var (keys %config_vars) { $shell_cmd .= "echo \$$var;\n" }
my $shell_out = `/bin/sh -c '$shell_cmd'`;
@config_vars{keys %config_vars} = split /\n/, $shell_out, -1;

my $download = $config_vars{'USCAN_DOWNLOAD'} eq 'no' ? 0 : 1;
$passive = $config_vars{'USCAN_PASV'} eq 'yes' ? 1 :
    $config_vars{'USCAN_PASV'} eq 'no' ? 0 : 'default';
my $symlink = $config_vars{'USCAN_SYMLINK'} eq 'no' ? 0 : 1;
my $verbose = $config_vars{'USCAN_VERBOSE'} eq 'yes' ? 1 : 0;
my $debug = 0;

# Now read the command line arguments
while (@ARGV) {
    my $ARG = shift @ARGV;
    if ($ARG eq '--help')          { print $usage; exit 0; }
    if ($ARG eq '--version')       { print $version; exit 0; }
    if ($ARG eq '--download')      { $download=1; next; }
    if ($ARG =~ /^--no-?download$/) { $download=0; next; }
    if ($ARG eq '--report')        { $download=0; next; }
    if ($ARG eq '--pasv')          { $passive=1; next; }
    if ($ARG =~ /^--no-?pasv$/)    { $passive=0; next; }
    # Be liberal in what you accept
    if ($ARG eq '--passive')       { $passive=1; next; }
    if ($ARG =~ /^--no-?passive$/) { $passive=0; next; }
    if ($ARG eq '--symlink')       { $symlink=1; next; }
    if ($ARG =~ /^--no-?symlink$/) { $symlink=0; next; }
    if ($ARG eq '--verbose')       { $verbose=1; next; }
    if ($ARG =~ /^--no-?verbose$/) { $verbose=1; next; }
    if ($ARG eq '--debug')         { $debug=1; next; }
    # Not an option, so
    unshift @ARGV, $ARG;
    last;
}

# We'd better be verbose if we're debugging
$verbose |= $debug;

# Net::FTP understands this
if ($passive ne 'default') {
    $ENV{'FTP_PASSIVE'} = $passive;
}
elsif (exists $ENV{'FTP_PASSIVE'}) {
    $passive = $ENV{'FTP_PASSIVE'};
}
else { $passive = undef; }
# Now we can say
#   if (defined $passive) { $ENV{'FTP_PASSIVE'}=$passive; }
#   else { delete $ENV{'FTP_PASSIVE'}; }
# to restore $ENV{'FTP_PASSIVE'} to what it was at this point

my $user_agent = LWP::UserAgent->new(env_proxy => 1);

push @ARGV, '.' if ! @ARGV;

print "-- Scanning for watchfiles in @ARGV\n" if $verbose;

# Run find to find the directories.  We will handle filenames with spaces
# correctly, which makes this code a little messier than it would be
# otherwise.
my @dirs;
my $pid = open FIND, '-|';
if (! defined $pid) {
    die "Couldn't fork: $!\n";
}
if ($pid) {
    while (<FIND>) {
	chomp;
	push @dirs, $_;
    }
    close FIND;
} else {
    exec 'find', @ARGV, qw(-type d -name debian -print);
    die "Couldn't exec find: $!\n";
}

my $origdir = cwd;
for my $dir (@dirs) {
    unless (chdir $origdir) {
	warn "Warning: Couldn't chdir back to $origdir, skipping: $!\n";
	next;
    }
    $dir =~ s%/debian$%%;
    unless (chdir $dir) {
	warn "Warning: Couldn't chdir $dir, skipping: $!\n";
	next;
    }

    # Check for debian/watch file
    if (-r 'debian/watch' and -r 'debian/changelog') {
	print "-- Found watchfile in $dir/debian\n" if $verbose;
	# Figure out package info we need
	my $changelog = `dpkg-parsechangelog`;
	unless ($? == 0) {
	    warn "Warning: Problems running dpkg-parsechangelog in $dir, skipping\n";
	    next;
	}

	my ($package, $version);
	$changelog =~ /^Source: (.*?)$/m and $package=$1;
	$changelog =~ /^Version: (.*?)$/m and $version=$1;
	# Get upstream version number
	if ($version) {
	    $version =~ s/-[^-]+$//;
	    $version =~ s/^\d+://;
	}

	if ($package and $version) {
	    process_watchfile($dir, $package, $version);
	} else {
	    warn "Warning: Problems determining package name and version from\n  $dir/debian/changelog, skipping\n";
	    next;
	}
    }
    elsif (-r 'debian/watch') {
	warn "Warning: Found watchfile in $dir,\n  but couldn't find/read changelog; skipping\n";
	next;
    }
    elsif (-f 'debian/watch') {
	warn "Warning: Found watchfile in $dir,\n  but it is not readable; skipping\n";
	next;
    }
}

print "-- Scan finished\n" if $verbose;

exit 0;


# This is the heart of the code: Process a single watch item
# 
# watch_version=1: Lines have up to 5 parameters which are:
# 
# $1 = Remote site
# $2 = Directory on site
# $3 = Pattern to match, with (...) around version number part
# $4 = Last version we have (or 'debian' for the current Debian version)
# $5 = Actions to take on successful retrieval
# 
# watch_version=2: 
# 
# For ftp sites:
#   ftp://site.name/dir/path/pattern-(.*)\.tar\.gz [version [action]]
# 
# For http sites:
#   http://site.name/dir/path/pattern-(.*)\.tar\.gz [version [action]]
# or
#   http://site.name/dir/path/base pattern-(.*)\.tar\.gz [version [action]]
# 
# Lines can be prefixed with opts=<opts>.
# 
# Then the patterns matched will be checked to find the one with the
# greatest version number (as determined by the (...) group), using the
# Debian version number comparison algorithm described below.

sub process_watchline ($$$$$)
{
    my ($line, $watch_version, $pkg_dir, $pkg, $pkg_version) = @_;

    my ($base, $site, $dir, $pattern, $lastversion, $action);
    my %options = ();

    my ($request, $response);
    my ($newfile, $newversion);
    my $style='new';
    my $urlbase;

    if ($watch_version == 1) {
	($site, $dir, $pattern, $lastversion, $action) = split ' ', $line;

	if (! defined $lastversion or $site =~ /\(.*\)/ or $dir =~ /\(.*\)/) {
	    warn "Warning: there appears to be a version 2 format line in\n  the version 1 watchfile $dir/debian/watch;\n  Have you forgotten a 'version=2' line at the start, perhaps?\n  Skipping the line: $line\n";
	    return;
	}
	if ($site !~ m%\w+://%) {
	    $site = "ftp://$site";
	    if ($pattern !~ /\(.*\)/) {
		# watch_version=1 and old style watchfile;
		# pattern uses ? and * shell wildcards; everything from the first
		# to last of these metachars is the pattern to match on.
		$pattern =~ s/(\?|\*)/($1/;
		$pattern =~ s/(\?|\*)([^\?\*]*)$/$1)$2/;
		$pattern =~ s/\./\\./g;
		$pattern =~ s/\?/./g;
		$pattern =~ s/\*/.*/g;
		$style='old';
		warn "Warning: Using very old style of filename pattern in $pkg_dir/debian/watch\n  (this might lead to incorrect results): $3\n";
	    }
	}

	# Merge site and dir
	$base = "$site/$dir/";
	$base =~ s%(?<!:)//%/%g;
	$base =~ m%^(\w+://[^/]+)%;
	$site = $1;
    } else {
	if ($line =~ s/^opt(?:ion)?s=(\S+)\s+//) {
	    my $opts=$1;
	    my @opts = split /,/, $opts;
	    foreach my $opt (@opts) {
		if ($opt eq 'pasv' or $opt eq 'passive') {
		    $options{'pasv'}=1;
		}
		elsif ($opt eq 'active' or $opt eq 'nopasv'
		       or $opt eq 'nopassive') {
		    $options{'pasv'}=0;
		}
		else {
		    warn "Warning: unrecognised option $opt\n";
		}
	    }
	}
	($base, $pattern, $lastversion, $action) = split ' ', $line;
	if ($base =~ /\(.*\)/) {
	    # only three fields
	    $action = $lastversion;
	    $lastversion = $pattern;
	    # We're going to make the pattern
	    # (?:(?:http://site.name)?/dir/path/)?base_pattern
	    # It's fine even for ftp sites
	    $pattern = $base;
	    $pattern =~ s%^(\w+://[^/]+)%(?:$1)?%;
	    $pattern =~ s%^(.*/)%(?:$1)?%;
	    $base =~ s%/[^/]+$%/%;
	}

	if ($base =~ m%^(\w+://[^/]+)%) {
	    $site = $1;
	} else {
	    warn "Warning: Can't determine protocol and site in\n  $pkg_dir/debian/watch, skipping:\n  $line\n";
	    return;
	}
    }

    # Check all's OK
    if ($pattern !~ /\(.*\)/) {
	warn "Warning: Filename pattern missing version delimeters ()\n  in $pkg_dir/debian/watch, skipping:\n  $line\n";
	return;
    }

    # What is the most recent file, based on the filenames?
    # We first have to find the candidates, then we sort them using
    # Devscripts::Versort::versort
    if ($site =~ m%^http://%) {
	print STDERR "Debug: requesting URL $base\n" if $debug;
	$request = HTTP::Request->new('GET', $base);
	$response = $user_agent->request($request);
	if (! $response->is_success) {
	    warn "Warning: In watchfile $pkg_dir/debian/watch, reading webpage\n  $base failed: " . $response->status_line . "\n";
	    return;
	}

	my $content = $response->content;
	print STDERR "Debug: received content:\n$content\[End of received content]\n"
	    if $debug;
	# We need this horrid stuff to handle href=foo type
	# links.  OK, bad HTML, but we have to handle it nonetheless.
	# It's bug #89749.
	$content =~ s/href\s*=\s*(?=[^\"\'])([^\s>]+)/href="$1"/ig;
	# Strip comments
	$content =~ s/<!-- .*?-->//sg;
	# Is there a base URL given?
	if ($content =~ /<\s*base\s+[^>]*href\s*=\s*([\"\'])(.*?)\1/) {
	    # Ensure it ends with /
	    $urlbase = "$2/";
	    $urlbase =~ s%//$%/%;
	} else {
	    # May have to strip a base filename
	    ($urlbase = $base) =~ s%/[^/]*$%/%;
	}

	print STDERR "Debug: matching pattern $pattern\n" if $debug;
	my @hrefs;
	while ($content =~ m/<\s*a\s+[^>]*href\s*=\s*([\"\'])(.*?)\1/gi) {
	    my $href = $2;
	    if ($href =~ m/^$pattern$/) {
		push @hrefs, [$1, $href];  # [ version, href ]
	    }
	}
	if (@hrefs) {
	    if ($verbose) {
		print "-- Found the following matching hrefs:\n";
		foreach my $href (@hrefs) { print "     $$href[1]\n"; }
	    }
	    @hrefs = Devscripts::Versort::versort(@hrefs);
	    ($newversion, $newfile) = @{$hrefs[0]};
	} else {
	    warn "Warning: In $pkg_dir/debian/watch,\n  no matching hrefs for watch line\n  $line\n";
	    return;
	}
    }
    else {
	# Better be an FTP site
	if ($site !~ m%^ftp://%) {
	    warn "Warning: Unknown protocol in $pkg_dir/debian/watch, skipping:\n  $site\n";
	    return;
	}

	if (exists $options{'pasv'}) {
	    $ENV{'FTP_PASSIVE'}=$options{'pasv'};
	}
	print STDERR "Debug: requesting URL $base\n" if $debug;
	$request = HTTP::Request->new('GET', $base);
	$response = $user_agent->request($request);
	if (exists $options{'pasv'}) {
	    if (defined $passive) { $ENV{'FTP_PASSIVE'}=$passive; }
	    else { delete $ENV{'FTP_PASSIVE'}; }
	}
	if (! $response->is_success) {
	    warn "Warning: In watchfile $pkg_dir/debian/watch, reading FTP directory\n  $base failed: " . $response->status_line . "\n";
	    return;
	}

	my $content = $response->content;
	print STDERR "Debug: received content:\n$content\[End of received content]\n"
	    if $debug;

	# FTP directory listings either look like:
	# info info ... info filename [ -> linkname]
	# or they're HTMLised (if they've been through an HTTP proxy)
	# so we may have to look for <a href="filename"> type patterns
	print STDERR "Debug: matching pattern $pattern\n" if $debug;
	my (@files);
	$content =~ s/\n/ \n/g; # make every filename have an extra
	                        # space after it in a normal FTP listing
	while ($content =~
	           m/(?:<\s*a\s+[^>]*href\s*=\s*\"| )($pattern)(\"| )/gi) {
	    push @files, [$2, $1];  # [ version, file ]
	}
	if (@files) {
	    if ($verbose) {
		print "-- Found the following matching files:\n";
		foreach my $file (@files) { print "     $$file[1]\n"; }
	    }
	    @files = Devscripts::Versort::versort(@files);
	    ($newversion, $newfile) = @{$files[0]};
	} else {
	    warn "Warning: In $pkg_dir/debian/watch no matching files for watch line\n  $line\n";
	    return;
	}
    }

    # The original version of the code didn't use (...) in the watch
    # file to delimit the version number; thus if there is no (...)
    # in the pattern, we will use the old heuristics, otherwise we
    # use the new.

    if ($style eq 'old') {
        # Old-style heuristics
	if ($newversion =~ /^\D*(\d+\.(?:\d+\.)*\d+)\D*$/) {
	    $newversion = $1;
	} else {
	    warn <<"EOF";
Warning: In $pkg_dir/debian/watch, couldn\'t determine a
  pure numeric version number from the file name for watch line
  $line
  and file name $newfile
  Please use a new style watchfile instead!
EOF
	    return;
	}
    }
			
    my $newfile_base=basename($newfile);
    if (! $lastversion or $lastversion eq 'debian') {
	$lastversion=$pkg_version;
    }

    print "Newest version on remote site is $newversion, local version is $lastversion\n"
	if $verbose;
    if ($newversion eq $lastversion) {
	print " => Package is up to date\n" if $verbose;
	return;
    }

    # We use dpkg's rules to determine whether our current version
    # is newer or older than the remote version.
    if (system("dpkg --compare-versions '$lastversion' gt '$newversion'") == 0) {
        if ($verbose) {
	    print " => remote site does not even have current version\n";
	} else {
	    print "$pkg: remote site does not even have current version\n";
	}
        return;
    }

    if (-f "../$newfile_base") {
        print " => $newfile_base already in package directory\n"
	    if $verbose;
        return;
    }
    if (-f "../${pkg}_${newversion}.orig.tar.gz") {
        warn "Warning: In directory $pkg_dir, found file\n  ${pkg}_${newversion}.orig.tar.gz but not $newfile_base,\n  which is the newest file available on remote site.  Skipping.\n";
        return;
    }

    if ($verbose) {
	print " => Newer version available\n";
    } else {
	print "$pkg: Newer version ($newversion) available on remote site\n  (local version is $lastversion)\n";
    }

    return unless $download;

    print "-- Downloading updated package $newfile_base\n" if $verbose;
    # Download newer package
    if ($site =~ m%^http://%) {
	# absolute URL?
	if ($newfile =~ m%^\w+://%) {
	    print STDERR "Debug: requesting URL $newfile\n" if $debug;
	    $request = HTTP::Request->new('GET', $newfile);
	    $response = $user_agent->request($request, "../$newfile_base");
	    if (! $response->is_success) {
		warn "Warning: In directory $pkg_dir, downloading\n  $newfile failed: " . $response->status_line . "\n";
		return;
	    }
	}
	# absolute filename?
	elsif ($newfile =~ m%^/%) {
	    print STDERR "Debug: requesting URL $site$newfile\n" if $debug;
	    $request = HTTP::Request->new('GET', "$site$newfile");
	    $response = $user_agent->request($request, "../$newfile_base");
	    if (! $response->is_success) {
		warn "Warning: In directory $pkg_dir, downloading\n  $site$newfile failed: " . $response->status_line . "\n";
		return;
	    }
	}
	# relative filename, we hope
	else {
	    print STDERR "Debug: requesting URL $urlbase$newfile\n" if $debug;
	    $request = HTTP::Request->new('GET', "$urlbase$newfile");
	    $response = $user_agent->request($request, "../$newfile_base");
	    if (! $response->is_success) {
		warn "Warning: In directory $pkg_dir, downloading\n  $urlbase$newfile failed: " . $response->status_line . "\n";
		return;
	    }
	}
    }
    else {
	# FTP site
	if (exists $options{'pasv'}) {
	    $ENV{'FTP_PASSIVE'}=$options{'pasv'};
	}
	print STDERR "Debug: requesting URL $base$newfile\n" if $debug;
	$request = HTTP::Request->new('GET', "$base$newfile");
	$response = $user_agent->request($request, "../$newfile_base");
	if (exists $options{'pasv'}) {
	    if (defined $passive) { $ENV{'FTP_PASSIVE'}=$passive; }
	    else { delete $ENV{'FTP_PASSIVE'}; }
	}
	if (! $response->is_success) {
	    warn "Warning: In directory $pkg_dir, downloading\n  $base$newfile failed: " . $response->status_line . "\n";
	    return;
	}
    }

    if ($symlink and $newfile_base =~ /\.(tar\.gz|tgz)$/) {
	symlink $newfile_base, "../${pkg}_${newversion}.orig.tar.gz";
    }

    if ($verbose) {
	print "-- Successfully downloaded updated package $newfile_base\n";
	if ($symlink and $newfile_base =~ /\.(tar\.gz|tgz)$/) {
	    print "    and symlinked ${pkg}_${newversion}.orig.tar.gz to it\n";
	}
    } else {
	print "$pkg: Successfully downloaded updated package $newfile_base\n";
	if ($symlink and $newfile_base =~ /\.(tar\.gz|tgz)$/) {
	    print "    and symlinked ${pkg}_${newversion}.orig.tar.gz to it\n";
	}
    }

    # Do whatever the user wishes to do
    if ($action) {
	my $usefile = ($symlink and $newfile_base =~ /\.(tar\.gz|tgz)$/) ?
	    "../${pkg}_${newversion}.orig.tar.gz" : "../$newfile_base";

	if ($watch_version > 1) {
	    print "-- Executing user specified script\n     $action --upstream-version $newversion $newfile_base" if $verbose;
	    system("$action --upstream-version $newversion $usefile");
	} else {
	    print "-- Executing user specified script $action $newfile_base $newversion" if $verbose;
	    system("$action $usefile $newversion");
	}
    }
}

# parameters are dir, package, upstream version
sub process_watchfile ($$$)
{
    my ($dir, $package, $version) = @_;
    my $watch_version=0;

    unless (open WATCH, 'debian/watch') {
	warn "Warning: could not open $dir/debian/watch: $!\n";
	return;
    }
    while (<WATCH>) {
	next if /^\s*\#/;
	next if /^\s*$/;
	s/^\s*//;

    CHOMP:
	chomp;
	if (s/(?<!\\)\\$//) {
	    if (eof(WATCH)) {
		warn "Warning: $dir/debian/watch ended with \\; skipping last line\n";
		last;
	    }
	    $_ .= <WATCH>;
	    goto CHOMP;
	}

	if (! $watch_version) {
	    if (/^version\s*=\s*(\d+)(\s|$)/) {
		$watch_version=$1;
		next;
	    } else {
		print "Note: $dir/debian/watch is a version 1 watchfile;\n  consider upgrading to version 2 (see uscan(1) for details).\n" if $verbose;
		$watch_version=1;
	    }
	}

	# Handle shell \\ -> \
	s/\\\\/\\/g if $watch_version==1;
	print "-- In $dir/debian/watch, processing watchfile line:\n   $_\n" if $verbose;
	process_watchline($_, $watch_version, $dir, $package, $version);
    }

    close WATCH or warn "Warning: problems reading $dir/debian/watch\n";
}
