#!/usr/bin/perl -w
#
# rename_file.pl
# A simple script for renaming sets of files
# Usage :
#
# rename_file.pl <old_pattern> <new_pattern>
#
#$* = 1;
#$/ = "";
#### last modified by Ralf Schmid 11/10/2005
#### -end added, glob instead of readdir to fix memory issues 


use Getopt::Long;
use strict;

my ($file, $dir, $format, $add, $end, $txt, $help, $sub, $new_file);
my ($col, $row, $platecoords, $char, $plate, $tracenum);

GetOptions( "add=s"  => \$add,     # txt to add to beginning
            "end=s"  => \$end,  
            "txt=s"  => \$txt,     # txt to remove
	    "format" => \$format,  # to format the trace filenames
	    "dir=s" => \$dir,      # directory where traces are stored    
            "sub=s" => \$sub,      # txt to substitute for above
            "help" => \$help, );   # to get help

if(!($help)&&!($add)&&!($end)&&!($format)&&!($txt))
 {
 print "Usage : rename_files.pl <list of arguments>\n";
 print " -dir <txt> - set directory of traces <dir> to <txt>\n";
 print " -add <txt> - <txt> gets added to the beginning of each\n";
 print "                    tracefile in directory <dir>\n";
 print " -end <txt> - <txt> gets added to the end of each tracefile\n";
 print " -txt <txt1> - <txt> gets removed from each file\n";
 print " -sub <txt2> - (only with -txt set) txt1 is replaced by txt2\n";
 print " -format         - Traces are reformatted to correct 96 well\n";
 print "                   nomenclature. Single digits are replaced by double\n";
 print "                   digits and row ID set to uppercase\n";
 print "                   In addition, if your files do not contain\n";
 print "                   plate coordinates, but are numbered sequentially\n";
 print "                   e.g. trace1, trace2, trace3 etc. this option will\n";
 print "                   convert the numbers into 96 well format\n";
 print "                   (it assumes 1-12 refer to row A columns 1-12 etc.)\n";
 print " -help           - Get more detailed help\n";
 exit();
 }
 
if($help)
 {
print "To help with renaming the files into EGTDC style format the script\n"; 
print "rename_files.pl is provided.\n\n";
print "The preferred format for trace names is :\n\n";
print "\\w\\w_\\w{2,5}_\\d\\d\\w\\d\\d\n\n";
print "where      \\w represents any letter or number\n";
print "           \\d represents any digit\n";
print "           {2,5} = minimum/maximum number of characters\n";
	
print "The first two letters represent the species from which the sequences were\n";
print "derived, the middle letters represent the cDNA library used to derive the\n";
print "sequences and the last digits \ letter represent the plate number and well\n";
print "coordinates (usually in 96 well format).\n\n";
print "E.g. for sequences derived from a cDNA library made from material\n";
print "derived from the adult nervous system of the earthworm Lumbricus rubellus\n";
print "library you may use :\n\nLr_adN_01A01, Lr_adN_01A02 etc.\n\n";

print "---------------------------------------------------------------------\n";
print "rename_files.pl <list of arguments>\n";
print " -dir <txt> - set directory of traces <dir> to <txt>\n";
print " -add <txt> - <txt> gets added to the beginning of each\n";
print "                    tracefile in directory <dir>\n";
print " -end <txt> - <txt> gets added to the end of each tracefile\n";
print " -txt <txt1> - <txt> gets removed from each file\n";
print " -sub <txt2> - (only with -txt set) txt1 is replaced by txt2\n";
print " -format         - Traces are reformatted to correct 96 well\n";
print "                   nomenclature. Single digits are replaced by double\n";
print "                   digits and row ID set to uppercase\n";
print "                   In addition, if your files do not contain\n";
print "                   plate coordinates, but are numbered sequentially\n";
print "                   e.g. trace1, trace2, trace3 etc. this option will\n";
print "                   convert the numbers into 96 well format\n";
print "                   (it assumes 1-12 refer to row A columns 1-12 etc.)\n";
 print " -help           - Get this help information\n\n";
print "e.g. rename_files.pl -dir traces -add Lr_adE -txt .scf\n";
print "will take each file in the directory <traces>, add 'Lr_adE' to the front\n";
print "of the file and remove '.scf' from the filename\n";
exit();
 }


my @dir;
my $workdir = `pwd`;

if(!$dir) { $dir="./"; }
if(!$sub) { $sub=""; }

chdir ("$dir");

if($add) {
  @dir = glob ("*"); 
  foreach my $file (@dir) {  
    
    print "$file\n";
    
    if($file !~/^\./)   {
      $new_file=$add;
      $new_file.=$file;
      system("mv $file $new_file");
    }
  }
}

if($end) {
  @dir = glob ("*"); 
  foreach my $file (@dir) {  
    if($file !~/^\./)   {
      $new_file=$file;
      $new_file.=$end;
      system("mv $file $new_file");
    }
  }
}


if($txt) {
  @dir = glob ("*"); 
  foreach my $file (@dir) {  
    if($file !~/^\./)   {
      $new_file= $file;
      $new_file=~s/$txt/$sub/g;
      system ("mv $file $new_file");
    }
  }
}


if($format) {
  @dir = glob ("*"); 
  foreach my $file (@dir) {  
    if($file !~/^\./)   {
      $new_file= $file;   
      if($new_file=~/_(\d+)$/) {  # Not proper coordinates - assume 1-96 = 01A01-01H12 etc    
        my $tracenum=$1;
        $tracenum--;
        $plate=1+int($tracenum/96);
        $col=int((($tracenum%96)/12))+65;
        $row=($tracenum%12)+1;
        $char = chr($col);
        $char = uc($char);
        $platecoords="$plate"."$char"."$row"; 
        print "$file $platecoords\n"; 
        $new_file=~s/_\d+$/_$platecoords/;
      }

      if($new_file=~/_\d[A-Za-z]\d+$/)  # Plate ID is only in single figures
        { $new_file=~s/(\d)([A-Za-z])(\d+)$/;$1$2$3/; }
 
      if($new_file=~/([a-z])(\d+)$/)  # Column ID is lowercase
        { $new_file=~s/([a-z])(\d+)$/\u$1$2/; }

      if($new_file=~/([A-Z])([0-9])$/) # Row number is only in single figures
        { $new_file=~s/([A-Z])([0-9])$/$1;$2/; }
      $new_file=~s/;/0/g;
      system ("mv $file $new_file");
    }
  }
}

chdir ("$workdir");
