#!/usr/bin/env perl
# Time-stamp: <2002-04-03 18:52:59 barre>
#
# Clean dashboard submission sites.
#
# barre : Sebastien Barre <sebastien.barre@kitware.com>
#
# 0.3 (barre) :
#   - add --html option so that HTML files will be cleaned too (not only XML)
#
# 0.2 (barre) :
#   - get rid of partition parameter
#
# 0.1 (barre) :
#   - first release

use Getopt::Long;
use Fcntl;
use File::Basename;
use File::Find;
use strict;
use FileHandle;
use Time::Local;

my ($VERSION, $PROGNAME, $AUTHOR) = (0.3, $0, "Sebastien Barre");
$PROGNAME =~ s/^.*[\\\/]//;

# -------------------------------------------------------------------------
# Defaults

my %default = 
  (
   "verbose" => 0,
   "html" => 0
  );

# -------------------------------------------------------------------------
# Parse options

my %args;
Getopt::Long::Configure("bundling");
GetOptions (\%args, "help", "verbose|v", "archive|a=s", "size|s=i", "nodelete|n", "html");

print "$PROGNAME $VERSION, by $AUTHOR\n";

foreach my $option (
                    "verbose",
                    "archive",
                    "size",
                    "html"
                   ) {
    $args{$option} = $default{$option} 
      if ! exists $args{$option} && exists $default{$option};
}

if (exists $args{'help'} || 
    !exists $args{'size'} ||
    !@ARGV) {
    print <<"EOT";
Usage : $PROGNAME [--help] [--verbose|-v] [--archive|-a] [--size|-s] [--nodelete|-n] [--html] build_directory1 build_directory2... 
  --help            : this message
  --verbose|-v      : verbose (display filenames while processing)
  --nodelete|-n     : do not delete files (just pretend)
  --html            : by default only XML files are cleaned. Clean HTML files also
  --size|-s         : size that has to be made available on the partition (Kb)
  --archive|-a      : basename of the archive to store the deleted files in
                      (*use a different partition!)

Example:
  $PROGNAME -p /dev/hda13 -s 1000000 -a ~www/dashboard/archive ~/build/VTK ~/build/ARL
EOT
    exit;
}

my $os_is_win = ($^O =~ m/(MSWin32|Cygwin)/i);
my $open_file_as_text = $os_is_win ? O_TEXT : 0;

STDOUT->autoflush;
STDERR->autoflush;

my ($sec, $min, $hours, $day, $month, $year) = (localtime)[0..5];
my $date = sprintf("%04d%02d%02d", 
                   $year + 1900, $month + 1, $day);
my $datetime = sprintf("%04d-%02d-%02d-%02d-%02d-%02d", 
                       $year + 1900, $month + 1, $day, $hours, $min, $sec);

# -------------------------------------------------------------------------
# Useful funcs

#
# Give a file, get its partition or undef if not found.
#
# Exec `df -k file` and parse its output provided that it looks like:
# Filesystem           1k-blocks      Used Available Use% Mounted on
# /dev/sda7             11535344   1279644   9669732  12% /
# [...]
#
sub get_partition_for_file {
    my $file = shift;
    my $output = `df -k $file`;
    if ($output =~ m/^([\w\/\\:]+?)\s+\d+\s+\d+\s+\d+\s/gms) {
        return $1;
    }
    return undef;
}

#
# Give a file, get its partition available size or undef if not found.
#
# Exec `df -k file` and parse its output provided that it looks like:
# Filesystem           1k-blocks      Used Available Use% Mounted on
# /dev/sda7             11535344   1279644   9669732  12% /
# [...]
#
sub get_partition_available_size_for_file {
    my $file = shift;
    my $output = `df -k $file`;
    if ($output =~ m/^[\w\/\\:]+?\s+\d+\s+\d+\s+(\d+)\s/gms) {
        return $1;
    }
    return undef;
}

#
# Given a full submission dir, return a shorter name (for display use)
#
# Example:
# VolView/Testing/HTML/TestingResults/Sites/Foo/WinNT-cl/20020402-0700-Nightly
# => [VolView]       Foo : WinNT-cl : 20020402-0700-Nightly
#
sub get_short_submission_name {
    my @dirs = split('/', shift);
    return sprintf("%-10s", "[$dirs[$#dirs - 7]]") . " $dirs[$#dirs - 2] : $dirs[$#dirs - 1] : " . unpack("A15", $dirs[$#dirs]);
}

# -------------------------------------------------------------------------
# Check if all build dirs belong to the same partition

my %build_dirs;
my %build_dirs_partitions;

foreach my $build_dir (@ARGV) {
    my $partition = get_partition_for_file($build_dir);
    if (defined($partition)) {
        $build_dirs{$build_dir} = 1;
        $build_dirs_partitions{$partition} = 1 
    } else {
        print STDERR "Error: unable to find partition for $build_dir!\n";
    }
}

my @build_dirs_partitions_found = keys %build_dirs_partitions;
my @build_dirs_found = keys %build_dirs;

if (! @build_dirs_partitions_found) {
    print STDERR
      "Error: the given directories do not belong to any partitions!\n";
    exit(1);
}

if (scalar @build_dirs_partitions_found > 1) {
    print STDERR
      "Error: the given directories belong to different partitions (",
      join(", ", @build_dirs_partitions_found), ") !\n";
    exit(1);
}

my $main_partition = $build_dirs_partitions_found[0];

if (exists $args{'archive'}) {
    my $archive_partition = get_partition_for_file(dirname($args{'archive'}));
    if (! defined($archive_partition)) {
        print STDERR
          "Error: unable to find partition for archive $args{'archive'}!\n";
        exit(1);
    }
    if ($archive_partition eq $main_partition) {
        print STDERR
          "Error: the archive can not be stored on partition $archive_partition!\n";
        exit(1);
    }
}   

# -------------------------------------------------------------------------
# Check available size

my $available_size = 
  get_partition_available_size_for_file($build_dirs_found[0]);

if (!defined($available_size)) {
    print STDERR
      "Error: unable to get available size of partition $main_partition!\n";
    exit(1);
}

print scalar localtime(), "\n";
print "Available size in $main_partition is $available_size Kb";
if ($available_size >= $args{'size'}) {
    print ", which is >= $args{'size'} Kb\n";
    print "=> Nothing to do.\n";
    exit(0);
}
print "\n";

# -------------------------------------------------------------------------
# For each build dir, collect submissions (except for today)

print "Collecting submissions...\n";

my %submissions;

my $start_time = time();
    
foreach my $build_dir (@build_dirs_found) {

    my $sites_dir = "$build_dir/Testing/HTML/TestingResults/Sites";
    next if ! -e $sites_dir;

    print " - $build_dir:\n   in $sites_dir\n";

    find sub { 
        if (-d $_ &&
            $_ =~ m/^20\d\d[01]\d[0-3]\d-[0-2]\d[0-6]\d-\w+$/ &&
            unpack("A8", $_) ne $date) {
            print "   ", get_short_submission_name($File::Find::name), "\n"
              if $args{'verbose'};
            $submissions{$File::Find::name} = 1;
        }
    }, $sites_dir;
}

my @sorted_submissions = 
  sort {basename($a) cmp basename($b)} keys %submissions;

print "=> ", scalar @sorted_submissions, 
  " submissions (excluding today $date) collected in ", time() - $start_time, " s.\n";

# -------------------------------------------------------------------------
# Clean submissions

print "Cleaning submissions...\n";

my $start_time = time();
my $nb_of_files_deleted = 0;
my $round = 0;
my $remove_html = exists $args{'html'};

while (@sorted_submissions) {

    my $submission = shift @sorted_submissions;

    my @files_to_delete;
    find sub {
        push @files_to_delete, $File::Find::name 
	  if (-f $_ && 
	      ($_ =~ m/\.xml$/ || ($remove_html && $_ =~ m/\.html$/)));
    }, $submission;

    next if ! @files_to_delete;

    print "   ", get_short_submission_name($submission), "\n"
      if $args{'verbose'};

    my $nb_of_files_deleted_in_this_round = 0;
    foreach my $file_to_delete (@files_to_delete) {
        next if exists $args{'archive'} && 
          system("tar -rf $args{'archive'}-$datetime.tar $file_to_delete");
        if (exists $args{'nodelete'} || unlink($file_to_delete)) {
            $nb_of_files_deleted_in_this_round++;
        } else {
            print STDERR "Unable to delete $file_to_delete!\n";
        }
    }

    next if ! $nb_of_files_deleted_in_this_round;
    $nb_of_files_deleted += $nb_of_files_deleted_in_this_round;

    my $available_size =
      get_partition_available_size_for_file($submission);

    print " - Available size in $main_partition is $available_size Kb ($nb_of_files_deleted files deleted)\n"
      if ++$round % 10 == 0;

    last if $available_size >= $args{'size'};
}

my $available_size = 
  get_partition_available_size_for_file($build_dirs_found[0]);
print " - Available size in $main_partition is $available_size Kb\n";

print "=> $nb_of_files_deleted files deleted in ", 
  time() - $start_time, " s.\n";

if (exists $args{'archive'}) {
    my $arcname = "$args{'archive'}-$datetime.tar";
    if (-e $arcname) {
        print "=> Archive is $arcname\n"; 
        system("gzip $arcname");
    }
}
    
