# This module is part of debbugs, and is released
# under the terms of the GPL version 2, or any later
# version at your option.
# See the file README and COPYING for more information.
#
# [Other people have contributed to this file; their copyrights should
# go here too.]

package Debbugs::Versions;

use warnings;

use strict;

=head1 NAME

Debbugs::Versions - debbugs version information processing

=head1 DESCRIPTION

The Debbugs::Versions module provides generic support functions for the
implementation of version tracking in debbugs.

Complex organizations, such as Debian, require the tracking of bugs in
multiple versions of packages. The versioning scheme is frequently branched:
for example, a security update announced by an upstream developer will be
packaged as-is for the unstable distribution while a minimal backport is
made to the stable distribution. In order to report properly on the bugs
open in each distribution, debbugs must be aware of the structure of the
version tree for each package.

Gathering the version data is beyond the scope of this module: in the case
of Debian it is carried out by mechanical analysis of package changelogs.
Debbugs::Versions takes version data for a package generated by this or any
other means, merges it into a tree structure, and allows the user to perform
queries based on supplied data about the versions in which bugs have been
found and the versions in which they have been fixed.

=head1 DATA FORMAT

The data format looks like this (backslashes are not actually there, and
indicate continuation lines):

  1.5.4 1.5.0 1.5-iwj.0.4 1.5-iwj.0.3 1.5-iwj.0.2 1.5-iwj.0.1 1.4.0 1.3.14 \
        1.3.13 1.3.12 1.3.11 1.3.10 ...
  1.4.1.6 1.4.1.5 1.4.1.4 1.4.1.3 1.4.1.2 1.4.1.1 1.4.1 1.4.0.31 1.4.0.30 \
        1.4.0.29 1.4.0.28 1.4.0.27 1.4.0.26.0.1 1.4.0.26 1.4.0.25 1.4.0.24 \
        1.4.0.23.2 1.4.0.23.1 1.4.0.23 1.4.0.22 1.4.0.21 1.4.0.20 1.4.0.19 \
        1.4.0.18 1.4.0.17 1.4.0.16 1.4.0.15 1.4.0.14 1.4.0.13 1.4.0.12 \
        1.4.0.11 1.4.0.10 1.4.0.9 1.4.0.8 1.4.0.7 1.4.0.6 1.4.0.5 1.4.0.4 \
        1.4.0.3 1.4.0.2 1.4.0.1 1.4.0 \
  1.4.0.35 1.4.0.34 1.4.0.33 1.4.0.32 1.4.0.31

=head1 METHODS

=over 8

=item new

Constructs a Debbugs::Versions object. The argument is a reference to a
version comparison function, which must be usable by Perl's built-in C<sort>
function.

=cut

sub new
{
    my $this = shift;
    my $class = ref($this) || $this;
    my $vercmp = shift;
    my $self = { parent => {}, vercmp => $vercmp };
    return bless $self, $class;
}

=item isancestor

Takes two arguments, C<ancestor> and C<descendant>. Returns true if and only
if C<ancestor> is a version on which C<descendant> is based according to the
version data supplied to this object. (As a degenerate case, this relation
is reflexive: a version is considered to be an ancestor of itself.)

This method is expected mainly to be used internally by the C<merge> method.

=cut

sub isancestor
{
    my $self = shift;
    my $ancestor = shift;
    my $descendant = shift;

    my $parent = $self->{parent};
    for (my $node = $descendant; defined $node; $node = $parent->{$node}) {
	return 1 if $node eq $ancestor;
    }

    return 0;
}

=item leaves

Find the leaves of the version tree, i.e. those versions with no
descendants.

This method is mainly for internal use.

=cut

sub leaves
{
    my $self = shift;

    my $parent = $self->{parent};
    my @vers = keys %$parent;
    my %leaf;
    @leaf{@vers} = (1) x @vers;
    for my $v (@vers) {
	delete $leaf{$parent->{$v}} if defined $parent->{$v};
    }
    return keys %leaf;
}

=item merge

Merges one branch of version data into this object. This branch takes the
form of a list of versions, each of which is to be considered as based on
the next in the list.

=cut

sub merge
{
    my $self = shift;
    return unless @_;
    my $last = $_[0];
    for my $i (1 .. $#_) {
	# Detect loops.
	next if $self->isancestor($last, $_[$i]);

	# If it's already an ancestor version, don't add it again. This
	# keeps the tree correct when we get several partial branches, such
	# as '1.4.0 1.3.14 1.3.13 1.3.12' followed by '1.4.0 1.3.12 1.3.10'.
	unless ($self->isancestor($_[$i], $last)) {
	    $self->{parent}{$last} = $_[$i];
	}

	$last = $_[$i];
    }
    # Insert undef for the last version so that we can tell a known version
    # by seeing if it exists in $self->{parent}.
    $self->{parent}{$_[$#_]} = undef unless exists $self->{parent}{$_[$#_]};
}

=item load

Loads version data from the filehandle passed as the argument. Each line of
input is expected to represent one branch, with versions separated by
whitespace.

=cut

sub load
{
    my $self = shift;
    my $fh = shift;
    local $_;
    while (<$fh>) {
	$self->merge(split);
    }
}

=item save

Outputs the version tree represented by this object to the filehandle passed
as the argument. The format is the same as that expected by the C<load>
method.

=cut

sub save
{
    my $self = shift;
    my $fh = shift;
    local $_;
    my $parent = $self->{parent};

    # TODO: breaks with tcp-wrappers/1.0-1 tcpd/2.0-1 case
    my @leaves = reverse sort {
	my ($x, $y) = ($a, $b);
	$x =~ s{.*/}{};
	$y =~ s{.*/}{};
	$self->{vercmp}->($x, $y);
    } $self->leaves();

    my %seen;
    for my $lf (@leaves) {
	print $fh $lf;
	$seen{$lf} = 1;
	for (my $node = $parent->{$lf}; defined $node;
	     $node = $parent->{$node}) {
	    print $fh " $node";
	    last if exists $seen{$node};
	    $seen{$node} = 1;
	}
	print $fh "\n";
    }
}

=item buggy

Takes three arguments, C<version>, C<found>, and C<fixed>. Returns true if
and only if C<version> is based on or equal to a version in the list
referenced by C<found>, and not based on or equal to one referenced by
C<fixed>.

C<buggy> attempts to cope with found and fixed versions not in the version
tree by simply checking whether any fixed versions are recorded in the event
that nothing is known about any of the found versions.

=cut

sub buggy
{
    my $self = shift;
    my $version = shift;
    my $found = shift;
    my $fixed = shift;

    my %found = map { $_ => 1 } @$found;
    my %fixed = map { $_ => 1 } @$fixed;
    my $parent = $self->{parent};
    for (my $node = $version; defined $node; $node = $parent->{$node}) {
	# The found and fixed tests are this way round because the most
	# likely scenario is that somebody thought they'd fixed a bug and
	# then it was reopened because it turned out not to have been fixed
	# after all. However, tools that build found and fixed lists should
	# generally know the order of events and make sure that the two
	# lists have no common entries.
	return 'found' if $found{$node};
	return 'fixed' if $fixed{$node};
    }

    unless (@$found) {
	# We don't know when it was found. Was it fixed in a descendant of
	# this version? If so, this one should be considered buggy.
	for my $f (@$fixed) {
	    for (my $node = $f; defined $node; $node = $parent->{$node}) {
		return 'found' if $node eq $version;
	    }
	}
    }

    # Nothing in the requested version's ancestor chain can be confirmed as
    # a version in which the bug was found or fixed. If it was only found or
    # fixed on some other branch, then this one isn't buggy.
    for my $f (@$found, @$fixed) {
	return 'absent' if exists $parent->{$f};
    }

    # Otherwise, we degenerate to checking whether any fixed versions at all
    # are recorded.
    return 'fixed' if @$fixed;
    return 'found';
}

=item allstates

Takes two arguments, C<found> and C<fixed>, which are interpreted as in
L</buggy>. Efficiently returns the state of the bug at every known version,
in the form of a hash from versions to states (as returned by L</buggy>). If
you pass a third argument, C<interested>, this method will stop after
determining the state of the bug at all the versions listed therein.

Whether this is faster than calling L</buggy> for each version you're
interested in is not altogether clear, and depends rather strongly on the
number of known and interested versions.

=cut

sub allstates
{
    my $self = shift;
    my $found = shift;
    my $fixed = shift;
    my $interested = shift;

    my %found = map { $_ => 1 } @$found;
    my %fixed = map { $_ => 1 } @$fixed;
    my %interested;
    if (defined $interested) {
	%interested = map { $_ => 1 } @$interested;
    }
    my $parent = $self->{parent};
    my @leaves = $self->leaves();

    # Are any of the found or fixed versions known? We'll need this later.
    my $known = 0;
    for my $f (@$found, @$fixed) {
	if (exists $parent->{$f}) {
	    $known = 1;
	    last;
	}
    }

    # Start at each leaf in turn, working our way up and remembering the
    # list of versions in the branch.
    my %state;
    LEAF: for my $lf (@leaves) {
	my @branch;
	my $fixeddesc = 0;

	for (my $node = $lf; defined $node; $node = $parent->{$node}) {
	    # If we're about to start a new branch, check whether we know
	    # the state of every version in which we're interested. If so,
	    # we can stop now.
	    if (defined $interested and not @branch) {
		my @remove;
		for my $interest (keys %interested) {
		    if (exists $state{$interest}) {
			push @remove, $interest;
		    }
		}
		delete @interested{@remove};
		last LEAF unless keys %interested;
	    }

	    # We encounter a version whose state we already know. Record the
	    # branch with the same state as that version, and go on to the
	    # next leaf.
	    if (exists $state{$node}) {
		$state{$_} = $state{$node} foreach @branch;
		last;
	    }

	    push @branch, $node;

	    # We encounter a version in the found list. Record the branch as
	    # 'found', and start a new branch.
	    if ($found{$node}) {
		$state{$_} = 'found' foreach @branch;
		@branch = ();
	    }

	    # We encounter a version in the fixed list. Record the branch as
	    # 'fixed', and start a new branch, remembering that we have a
	    # fixed descendant.
	    elsif ($fixed{$node}) {
		$state{$_} = 'fixed' foreach @branch;
		@branch = ();
		$fixeddesc = 1;
	    }

	    # We encounter a root.
	    elsif (not defined $parent->{$node}) {
		# If the found list is empty and we have a fixed descendant,
		# record the branch as 'found' (since they probably just
		# forgot to report a version when opening the bug).
		if (not @$found and $fixeddesc) {
		    $state{$_} = 'found' foreach @branch;
		}

		# If any of the found or fixed versions are known, record
		# the branch as 'absent' (since all the activity must have
		# happened on some other branch).
		elsif ($known) {
		    $state{$_} = 'absent' foreach @branch;
		}

		# If there are any fixed versions at all (but they're
		# unknown), then who knows, but we guess at recording the
		# branch as 'fixed'.
		elsif (@$fixed) {
		    $state{$_} = 'fixed' foreach @branch;
		}

		# Otherwise, fall back to recording the branch as 'found'.
		else {
		    $state{$_} = 'found' foreach @branch;
		}

		# In any case, we're done.
		last;
	    }
	}
    }

    return %state;
}

=back

=cut

1;
