#! /usr/bin/perl

# Copyright (C) 1998 Julian Gilbey <jdg@debian.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# For a copy of the GNU General Public License write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

# The input to this script comes from uscan, and consists of a series
# of lines of the form "$version_number\t$filename".  The output should
# be the same list sorted into version number order, using the same
# ordering as the Debian packaging system.  This is described in the
# Debian Packaging manual (v. 2.4.1.0), chapter 5:
#
#   The strings are compared from left to right.
#
#   First the initial part of each string consisting entirely of non-digit
#   characters is determined. These two parts (one of which may be empty)
#   are compared lexically. If a difference is found it is returned. The
#   lexical comparison is a comparison of ASCII values modified so that
#   all the letters sort earlier than all the non-letters.
#
#   Then the initial part of the remainder of each string which consists
#   entirely of digit characters is determined. The numerical values of
#   these two parts are compared, and any difference found is returned as
#   the result of the comparison. For these purposes an empty string
#   (which can only occur at the end of one or both version strings being
#   compared) counts as zero.
#
#   These two steps are repeated (chopping initial non-digit strings and
#   initial digit strings off from the start) until a difference is found
#   or both strings are exhausted.
#
# We note that we can simply compare the whole filenames, as we
# presume (reasonably) that the leading and trailing parts will be
# identical for each file.
#
# The program works like this.  In order that letters (defined by the
# isalpha() function) sort before non-letters, we initially modify all
# the strings as follows: every letter x is replaced by ax, every
# non-alphanumeric x is replaced by bx, and digits are left
# untouched.  In this way, the letters will all sort before
# non-letters in alphabetical sorting.  At the end, we simply undo the
# changes.  We use "no locale" since the dpkg program clears the locale
# before comparing things (or at least it should do), so we should do
# the same in order to match it.  We must also use [A-Za-z] rather than
# \w to distinguish between letters and digits.
#
# We read all of the strings into an array.  We then split each string
# by blocks of digits, and sort this array of arrays.  In this way, we
# need only perform the splitting etc. once.  We also note that some
# of our strings might end in a digit, in which case the split array
# would not have the null non-digit string at the end.  We make our
# lives easier by insisting that they do.  Thus we will always have an
# array of the form ("\D*","\d+","\D+","\d+",...,"\d+","\D*"), that
# is, it has odd length, always ending with a non-digit string.

no locale;

@namever_pairs=();

while (<>) {
	chomp;
	($ver,$name) = split /\t/;
	$ver =~ s/([A-Za-z])/a$1/g;
	$ver =~ s/([^A-Za-z0-9])/b$1/g;

	@split_ver = split /(\d+)/, $ver;
	if ( @split_ver % 2 == 0 ) { push @split_ver, ''; }
	push @namever_pairs, [ [ @split_ver ], $name ];
}

@sorted_namever_pairs = sort vercmp @namever_pairs;

foreach $namever_pairref (@sorted_namever_pairs) {
	$ver = join '', @{$$namever_pairref[0]};
	$ver =~ s/[ab](.)/$1/g;   # Undo former mangling
	print $ver, "\t", $$namever_pairref[1], "\n";
}

# The following subroutine compares two split strings, passed within
# references to anonymous arrays, $a and $b.  We remember that we must
# not alter the things $a and $b refer to.  We also remember that the
# arrays @{$$a[0]} and @{$$b[0]} will always have an odd length as
# explained above.

sub vercmp {
	$i=0;
	$vera=$$a[0];
	$verb=$$b[0];
	$lengtha = @$vera;
	$lengthb = @$verb;

	for (;;) {
		$nondiga = $vera->[$i];
		$nondigb = $verb->[$i];

		if ($nondiga lt $nondigb) { return -1; }
		if ($nondiga gt $nondigb) { return +1; }

		$i++;

		if ($lengtha == $i) {   # Nothing left in array @$vera
			if ($lengthb == $i) { return 0; }  # @$vera = @$verb
			else { return -1; }          # @$vera is an initial part of @$verb
		}
		elsif ($lengthb == $i) { return +1; }  # vice versa

		# Now for the next term, which is a numeric part

		if ( $vera->[$i] < $verb->[$i] ) { return -1; }
		if ( $vera->[$i] > $verb->[$i] ) { return +1; }

		$i++;
	}
}