File: trace2dbest.pl

package info (click to toggle)
trace2dbest 3.0.1-2
links: PTS, VCS
area: main
in suites: bookworm, bullseye, sid, trixie
size: 11,848 kB
sloc: perl: 2,777; makefile: 4
file content (3266 lines) | stat: -rwxr-xr-x 118,563 bytes
parent folder | download | duplicates (2)
#!/usr/bin/perl 
# 
# Script to convert raw trace files into dbEST submission
# files, using phred and cross_match
# See trace2dbest documentation for more details

# Version 3 mainly by rearrangements by Ralf Schmid 
# last update  10/11/05 Ralf Schmid
# modified trimming

# Author - Alasdair Anthony, University of Edinburgh

# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#

use warnings;	
use strict;
use Term::ANSIColor;
use Term::ReadLine;	#makes user input a bit more friendly
use File::Path;	#for rmtree
use File::Copy;
use Cwd; 	#see sub storependingEST
use Bio::SearchIO;
use Bio::SeqIO;

#### from config file
my $vector_file;
my $ecoli_file;
my $Lib_file;
my $Pub_file;
my $Cont_file;
my $EST_file;
my $phredoptions;


#### general global variables
my @PATH=split(":","$ENV{'PATH'}");  ### Get users Path
my $homedir = $ENV{HOME};
my $version = "3.0.1";
my $phredenv = `printenv | grep PHRED_PARAMETER_FILE`;
chomp $phredenv;
if ($phredenv) {$phredenv =~ s/PHRED_PARAMETER_FILE=//;} 

my $ESTfileflag=0;
my $Pubfileflag=0;
my $Libfileflag=0;
my $Contfileflag = 0;
my $dirflag=0;
my $pblastall =1; my $pblastcl3=1;	#allows bits of trace2dbest to be missed out if certain prog not in PATH
my $no_vect =1;	#flag to indicate if user can see their vector in vector.seq
my $trace_dir_flag =1;	#flag to allow returns to start of trace directory section
my $blast = 0;
my $blast_save =0;
my $flag =1;	#general use flag, generally used for while loops enclosing print questions
my @dirlist=("fasta","phd_dir","qual","scf","subfiles","fastafiles", "blast_reports", "partigene", "process", "process/traceinfo", "raw_traces");
my @progs = ("phred", "cross_match");
my @optprogs =  ("blastcl3", "blastall");

#declare @
my (@stats, @ESTfile_data, @Libfile_data, @Pubfile_data, @Contfile_data, @blastdb);
#declare $
my ($answer,$less_flag, $dir, $file_type, $type_id, $input, $seq_primer, $pcrf_primer, $pcrr_primer, $p_end, $comment, $public_date,
 $adapter, $scheme, $tracedir, $library, $high_qual_cutoff, $vminmatch, $vminscore,$eminmatch, $eminscore, $poly_num,
 $sl_flag, $sl_seq, $blastdata, $blast_database, $ecoli_cross, $cont_name, $lib_name, $pub_cit, $lib_descr, $final_file_num,
 $dbESTsubmission, $species, $save_location, $no_lib, $no_cont, $no_pub, $chim_flag, $chim_flag_e, $bits_cutoff, $read_gnu);
#declare %



for(my $i=0;$i<6;$i++) {	#set stats to 0
    $stats[$i]=0;
}


#### less/more for display
if (-f "/usr/bin/less"){
	$less_flag = 1;
} else {
	$less_flag = 0;
}






#### readline stuff
my $term = new Term::ReadLine 'sample';
$term ->ornaments(0);	#stops prompt getting underlined
my $attribs = $term->Attribs;	
$attribs->{completion_entry_function} =
 $attribs->{filename_completion_function};

if ($term ->ReadLine() =~ /Gnu$/) {	#returns the actual package that executes the commands - we need gnu
	$read_gnu = 1;
}
else {
	print "The readline package Term::ReadLine:Gnu was not found.\nInstalling this package will make interaction with trace2dbest more friendly.\nContinuing...\n\n";
	$read_gnu = 0;
}


my $log = "logfile";
if (-e "logfile") {
	unlink "logfile";
}
open(LOG, ">$log");



#############################################################################
############################ start main menu ################################
#############################################################################
&print_title();
&options();

#########################################################################################################################
sub options () {   #### options menu
   my $answer=0;
   while($answer!=7) {
      $answer=&title_page();
      if($answer==1)  { &check_config();} 
      if($answer==2)  { &tests(); } # run tests
      if($answer==3)  { &processing_options(); } #processing
      if($answer==4)  { &blast(); } #preliminary annotation
      if($answer==5)  { &paperwork_options(); } #edit files
      if($answer==6)  { &submission(); } #as it says
   }
   system("clear");
   exit();   #exit program
}
############################################################################################################################

   
###################################################################################
sub title_page() {  #### title comments and option selection
    print "\n\t  Enter the number corresponding to the part of the trace2dbEST\n";
    print "\t  process you want to perform:\n\n";
    print "\t\t1. Setup configuration.\n";
    print "\t\t2. Checks and tests.\n";
    print "\t\t3. Process traces.\n";
    print "\t\t4. Blasts for preliminary annotation.\n";
    print "\t\t5. Create or view submission information.\n";
    print "\t\t6. Prepare dbEST submission files.\n";
    print "\t\t7. Exit.\n\t";

    my $flag=0; 
    while($flag==0) {
	$answer=<>;
	if($answer=~/^[1|2|3|4|5|6|7]$/) { $flag=1; next; }
        else {print " You have entered $answer This is not an option. Please try again\n";}
    }
    return $answer;
}
####################################################################################

#########################################################################################################################
sub processing_options () {   #### options menu
   my $answer=0;
   while($answer!=3) {
      $answer=&processing_page();
      if($answer==1)  { &process("1"); } # quick&dirty
      if($answer==2)  { &process("2"); } # full monty
      if($answer==3)  { &options(); } #back
   }
   system("clear");
   exit();   #exit program
}
############################################################################################################################



###################################################################################
sub processing_page() {  #### title comments and option selection
    print "\n\t  Enter the number corresponding to the processing option you want\n";
    print "\t  to perform:\n\n";
    print "\t\t1. Standard.\n";
    print "\t\t2. Advanced.\n";
    print "\t\t3. Back to main menu.\n\t";

    my $flag=0; 
    while($flag==0) {
	$answer=<>;
	if($answer=~/^[1|2|3]$/) { $flag=1; next; }
        else {print " You have entered $answer This is not an option. Please try again\n";}
    }
    return $answer;
}
####################################################################################

#########################################################################################################################
sub paperwork_options () {   #### options menu
   my $answer=0;
   print colored("\n\t##### SUBMISSION INFORMATION #####\n","white bold", "on_black");
   while($answer!=5) {
      $answer=&paperwork_page();
      if($answer==1)  { &paperwork("Library"); } 
      if($answer==2)  { &paperwork("Contact"); }
      if($answer==3)  { &paperwork("Publication"); } 
      if($answer==4)  { &paperwork("EST"); } 
      if($answer==5)  { &options(); } #back
   }
   system("clear");
   exit();   
}
############################################################################################################################

###################################################################################
sub paperwork_page() {  #### title comments and option selection
    print "\n\t  This menu allows you to view and add information";
    print "\n\t  required for submitting sequences to dbEST.";
    print "\n\t  Enter the number corresponding to the file you want\n";
    print "\t  to check:\n\n";
    print "\t\t1. Library files.\n";
    print "\t\t2. Contact files.\n";
    print "\t\t3. Publication files.\n";
    print "\t\t4. EST files.\n";
    print "\t\t5. Back to main menu.\n\t";

    my $flag=0; 
    while($flag==0) {
	$answer=<>;
	if($answer=~/^[1|2|3|4|5]$/) { $flag=1; next; }
        else {print " You have entered $answer This is not an option. Please try again\n";}
    }
    return $answer;
}
####################################################################################

############################################################################################################################
sub print_title() {    ##### title scheme
#displays title
    print "\n"; 
    print colored("\t################################################################\n","white bold", "on_black");
    print colored("\t###                                                          ###\n","white bold", "on_black");
    print colored("\t###                  trace2dbest -  Vs $version                 ###\n","white bold", "on_black");
    print colored("\t###          a trace file processing and dbEST               ###\n","white bold", "on_black");
    print colored("\t###               sequence submission tool                   ###\n","white bold", "on_black");
    print colored("\t###                                                          ###\n","white bold", "on_black");
    print colored("\t###   A. Anthony, R. Schmid and colleagues for BANG 2005     ###\n","white bold", "on_black");
    print colored("\t###                                                          ###\n","white bold", "on_black");
    print colored("\t###   News, upgrades and help:   nematode.bioinf\@ed.ac.uk    ###\n","white bold", "on_black");
    print colored("\t###                                                          ###\n","white bold", "on_black");
    print colored("\t################################################################\n","white bold", "on_black");
}
###########################################################################################################################

############################
##### SECTION 1 CONFIG #####
############################

sub check_config() {  
  print colored("\n\t##### CONFIGURATION FILE #####\n","white bold", "on_black");
  print "\nThis option will create or update the trace2dbest configuration file\n";
  print "'.trace2dbest.conf' in your home directory. Please note, you can also\n";
  print "edit your configuration file later on using a text editor.\n"; 

  my $filename = "~/.trace2dbest.conf";
   
  $filename =~ s{ ^ ~ ( [^/]* ) }
              { $1
                    ? (getpwnam($1))[7]
                    : ( $ENV{HOME} || $ENV{LOGDIR}
                         || (getpwuid($>))[7]
                       )
  }ex;
  
  my $config_flag;
  if (-e "$filename") {
    &get_config();
    print "\n\nA configuration file '.trace2dbest.conf' does already exist in your home \n";
    print "directory. If you want to recreate the configuration file from scratch please \n";
    print "type 'y' now. Typing 'n' will keep it as it is.";  
    $config_flag=&yes_no; 
  }
  else {
    $config_flag=1;
    $vector_file="/usr/software/trace2dbest/trace2dbest/vector.seq"; 
    $ecoli_file="/usr/software/trace2dbest/trace2dbest/ecoli.seq";  
    $Lib_file="/usr/software/trace2dbest/trace2dbest/Libfile.db"; 
    $Pub_file="/usr/software/trace2dbest/trace2dbest/Pubfile.db"; 
    $Cont_file="/usr/software/trace2dbest/trace2dbest/Contfile.db";  
    $EST_file="/usr/software/trace2dbest/trace2dbest/ESTfile.db"; 
    $phredoptions = "-cd scf -pd phd_dir -qd qual -sd fasta -exit_nomatch -trim_alt \\\"\\\" 2>>phredlogfile 1> /dev/null";

  }

#### now doing the real work - if wanted
  if ($config_flag==1) {
    print "\ntrace2dbest can screen for vector contamination using a FASTA file\n";
    print "containing the relevant vector sequences. The location of this file\n";
    print "'vector.seq' is set to \'$vector_file\'.\nDo you want to change this entry?";
    my $answer =  &yes_no;
    if ($answer == 1) {
      my $input = &get_input("\nPlease enter new location for 'vector.seq' file (full path)\n");
      chomp $input;  
      if (-e "$input") {$vector_file=$input}
      else {print "$input does not exist, keeping $vector_file\n"}
    }

    print "\ntrace2dbest can screen for E. coli contamination using a FASTA file\n";
    print "containing the relevant sequences. The location of this file\n";
    print "'ecoli.seq' is set to \'$ecoli_file\'.\nDo you want to change this entry?";
    $answer =  &yes_no;
    if ($answer == 1) {
      my $input= &get_input ("\nPlease enter new location for 'ecoli.seq' file (full path)\n");
      chomp $input;  
      if (-e "$input") {$ecoli_file=$input}
      else {print "$input does not exist, keeping $ecoli_file\n"}
    }

    print "\ntrace2dbest store entries for library files in the file 'Libfile.db'\n";
    print "The location of this file is set to $Lib_file . \nDo you want to change this?\n";
    $answer =  &yes_no;
    if ($answer == 1) {
      my $input= &get_input ("\nPlease enter a new location (full path).\n");
      chomp $input;  
      $Lib_file = $input;
    }

    print "\ntrace2dbest store entries for publication files in the file 'Pubfile.db'\n";
    print "The location of this file is set to $Pub_file . \nDo you want to change this?\n";
    $answer =  &yes_no;
    if ($answer == 1) {
      my $input= &get_input ("\nPlease enter a new location (full path).\n");
      chomp $input;  
      $Pub_file = $input;
    }

    print "\ntrace2dbest store entries for contact files in the file 'Contfile.db'\n";
    print "The location of this file is set to $Cont_file . \nDo you want to change this?\n";
    $answer =  &yes_no;
    if ($answer == 1) {
      my $input= &get_input ("\nPlease enter a new location (full path).\n");
      chomp $input;  
      $Cont_file = $input;
    }

    print "\ntrace2dbest store entries for EST files in the file 'ESTfile.DB'\n";
    print "The location of this file is set to $EST_file . \nDo you want to change this?\n";
    $answer =  &yes_no;
    if ($answer == 1) {
      my $input= &get_input ("\nPlease enter a new location (full path).\n");
      chomp $input;  
      $EST_file = $input;
    }


    &create_conf($vector_file,$ecoli_file,$Lib_file,$Pub_file,$Cont_file,$EST_file,$phredoptions);
  }
  print "\n\nBack to main menu\n";
  &options;
}    
###################################################################################################################



############################
#### SECTION 2 - tests #####
############################
sub tests () {
  print colored("\n\t##### TESTS & CHECKS #####\n","white bold", "on_black");
  print "\nThis option will test for the presence of the required executables,\n";
  print "and also check for the presence of some environmental variables and datafiles.\n";
  print "We recommend to sort out any issues highlighted in this option before \n";
  print "processing real data.\n\n"; 
  sleep(1);

  &get_config;
#### first executables
  print "The following programs are essential for trace2dbest:\n";
  foreach my $prog (@progs) {
    if (my $tmp=&find_program("$prog","1"))	    {print "searching for:      $prog\t................OK\n"; sleep(1);}
  }
  print "\n\nThe following programs are optional for trace2dbest:\n";
  foreach my $prog (@optprogs) {
    if (my $tmp=&find_program("$prog","1"))	    {print "searching for:      $prog\t................OK\n"; sleep(1);}
  }

#### environmental variables 
  print "\n\nThe phred parameter file is essential for trace2dbest:\n";
  if ($phredenv) 			    { 
    print "checking ENV:    PHRED_PARAMETER_FILE\t........OK\n"; sleep(1);
    if (-e "$phredenv") {
      print "checking file:   PHRED_PARAMETER_FILE\t........OK\n"; sleep(1);
    }
    else {    
      print colored("ERROR: PHRED_PARAMETER_FILE does not exist.\n","red bold");
    }
  }  
  else {    
     print colored("ERROR: PHRED_PARAMETER_FILE environment variable is not set.\n","red bold");
  }
  
####  files
  print "\n\nThe following files need to be writeable for trace2dbest\n";
  if (-e "$Lib_file" && -w "$Lib_file")  	     {
    print "checking:          Libfile.db\t................OK\n";sleep (1);
  }
  elsif (-e "$Lib_file") {
    print colored("ERROR: $Lib_file not writeable - trying to fix it\n","red bold");
    my $lucky = system ("chmod $Lib_file 755");
    unless ($lucky) {
      print "checking:          Libfile.db\t................OK\n";sleep (1);
    }  
    else{print colored("ERROR: fixing failed\n","red bold");}
  }
  else { 
    print colored("ERROR: $Lib_file does not exist - trying to create it\n","red bold");
    my $lucky = system ("touch $Lib_file");
    unless ($lucky) {
      print "checking:          Libfile.db\t................OK\n";sleep (1);
    }
    else{print colored("ERROR: creating $Lib_file failed\n","red bold");} 
  }

  if (-e "$Pub_file" && -w "$Pub_file")  	     {
    print "checking:          Pubfile.db\t................OK\n";sleep (1);
  }
  elsif (-e "$Pub_file") {
    print colored("ERROR: $Pub_file not writeable - trying to fix it\n","red bold");
    my $lucky = system ("chmod $Pub_file 755");
    unless ($lucky) {
      print "checking:          Pubfile.db\t................OK\n";sleep (1);
    }  
    else{print colored("ERROR: fixing failed\n","red bold");}
  }
  else { 
    print colored("ERROR: $Pub_file does not exist - trying to create it\n","red bold");
    my $lucky = system ("touch $Pub_file");
    unless ($lucky) {
      print "checking:          Pubfile.db\t................OK\n";sleep (1);
    }
    else{print colored("ERROR: creating $Pub_file failed\n","red bold");} 
  }

  if (-e "$Cont_file" && -w "$Cont_file")  	     {
    print "checking:          Contfile.db\t................OK\n";sleep (1);
  }
  elsif (-e "$Cont_file") {
    print colored("ERROR: $Cont_file not writeable - trying to fix it\n","red bold");
    my $lucky = system ("chmod $Cont_file 755"); 
    unless ($lucky) {
      print "checking:          Contfile.db\t................OK\n";sleep (1);
    }  
    else{print colored("ERROR: fixing failed\n","red bold");}
  }
  else { 
    print colored("ERROR: $Cont_file does not exist - trying to create it\n","red bold");
    my $lucky = system ("touch $Cont_file");
    unless ($lucky) {
      print "checking:          Contfile.db\t................OK\n";sleep (1);
    }
    else{print colored("ERROR: creating $Cont_file failed\n","red bold");} 
  }

  if (-e "$EST_file" && -w "$EST_file")  	     {
    print "checking:          EST_file.db\t................OK\n";sleep (1);
  }
  elsif (-e "$EST_file") {
    print colored("ERROR: $EST_file not writeable - trying to fix it\n","red bold");
    my $lucky = system ("chmod $EST_file 755");
    unless ($lucky) {
      print "checking:          EST_file.db\t................OK\n";sleep (1);
    }  
    else{print colored("ERROR: fixing failed\n","red bold");}
  }
  else { 
    print colored("ERROR: $EST_file does not exist - trying to create it\n","red bold");
    my $lucky = system ("touch $EST_file");
    unless ($lucky) {
      print "checking:          EST_file.db\t................OK\n";sleep (1);
    }
    else{print colored("ERROR: creating $EST_file failed\n","red bold");} 
  }


####  more files
  print "\n\nThe following files are optional for trace2dbest\n";
  if (-e "$vector_file")  	     {print "checking:          vector.seq\t................OK\n";sleep (1);}
  else {     print colored("Couldn't find $vector_file , we recommend to install them now.\n","red bold");
  }
  sleep(1);

  if (-e "$ecoli_file")  	     {print "checking:          ecoli.seq\t................OK\n";sleep (1);}
  else {     print colored("Couldn't find $ecoli_file , we recommend to install them now.\n","red bold");
  }
  sleep(1);
  print "\n\n\nTests and checks completed - Back to main menu.\n\n";
  
}





##############################
#### SECTION 3 processing ####
##############################
sub process () {
  print colored("\n\t##### PROCESSING TRACES #####\n","white bold", "on_black");
  print "\nUsing this option will process your trace files. First you will be asked to\n";
  print "enter some information required for successfull processing.\n";
  sleep(1);
  &get_config;
  my $process=$_[0];
  if ($process == 1) {
    print "You have opted for the 'standard' option - This option uses standard locations\n";
    print "for files and standard parameters for processing. It is used best for quick tests.\n";
    print "To make most of your analysis we recommend to use the 'advanced' option\n";
  }
  else {
    print "You have opted for the 'advanced' option - This option allows you to set several\n";
    print "processing parameters to make most of your analysis. \n";
  
  }
  sleep(1);


#### naming scheme
  if ($process == 1) {$scheme = 1;}
  else {
    print colored ("\nTrace file naming scheme\n","bold");
    print "In order for your traces to be processed, the file names must follow one of\nthese schemes:\n";
    print "\n1 - NERC Environmental Genomics scheme\n2 - STRESSGENES scheme\n";
    $input = &get_input("\nPlease enter the appropriate number: ");
    chomp $input;
    $flag =1;
    while ($flag) {
	if ($input !~ /^(1|2)$/) {
		$input = &get_input("Please enter 1 or 2: \n"); 
		chomp $input;
	} else { 
		$flag =0;
	}
    }
    $scheme = $input;
  }

#### trace directory
  print colored ("\n\nTrace file directory","bold");
  print "\n";	#needed to stop the following question coming up in bold
  while ($trace_dir_flag) {
	$input = &get_input("Please enter the full path of the directory containing the trace files\nto be processed\n");
	while ($input =~ /\./) {	#check for . usage (invalid)
		print "The current directory is not a valid option. See trace2dbest documentation\n";
		print "for more details.\n";
		$input = &get_input("Please enter a different directory or q to quit trace2dbest.\n");
		chomp $input;
	}

	while (!-R $input) {
		$input = &get_input("ERROR: $input is not readable. Please re-enter the trace file\ndirectory path or hit 'q' to quit\n"); 
		chomp $input;
		if ($input =~ /^q{1}$/i) {
			print "Ok, trace2dbest will now exit.\n";
			exit();
		}
		while ($input =~ /\./) {	#check for . usage (invalid)
			print "The current directory is not a valid option. See trace2dbest documentation\n";
			print "for more details.\n";
			$input = &get_input("Please enter a different directory or q to quit trace2dbest.\n");
			chomp $input;
		}

	}
	$tracedir = $input;


#get total files in dir and total with library identifer
	opendir(DIR, "$tracedir") or die "$tracedir could not be opened!\n";
	my @file_num = grep  !/^\.\.?\z/ ,   readdir DIR;	#don't get . and ..
	my $file_nums=@file_num;
	my @trace_num;	#declare trace_num
	close DIR;	#don't know why DIR has to be closed and then re=opened
	opendir(DIR, "$tracedir")or die "$tracedir could not be opened!\n";
	if ($scheme == 1) {	#EG format 
		@trace_num = grep  { m/^[A-Za-z]{2}_\w{2,5}_\d{2,3}[A-Za-z]\d{2}$/ }  readdir DIR;
	} elsif ($scheme ==2) {	#SG format
		@trace_num = grep { m/^[A-Z][a-z][A-Z]{2}[0-9]{2}[a-z][0-9]{2}[a-z][0-9]{2}[a-z][0-9]_[A-Z][a-z]{2}[A-Z][a-z]$/ } readdir DIR;
	} else {
		die "Naming scheme outside allowable range!\n";
	}
	my $trace_nums=@trace_num;
	if ($trace_nums ==0) {	#special case for when NO trace files are found
		$input = &get_input("\nNo files with the specified naming scheme were found in $tracedir.\nHit return to re-enter the trace file directory details, or q to quit\ntrace2dbest (so you can edit your trace directory): "); 
		chomp $input;
		if ($input =~  /^q{1}$/i) {
			print "Ok, trace2dbest will now exit...\n";
			exit();
		} else {
			next;
		}
	}
	my @nolib;
	if ($file_nums > $trace_nums) {
		print "\nWarning: There are $file_nums files in $tracedir, however, only $trace_nums files match\nthe specified naming scheme.\n";
		print "The following files in do not match the specified naming scheme: \n\n";
#for the grep operator (cf glob) we can use a regex to match the specified naming scheme.
		if ($scheme == 1) {	#EGformat
			@nolib = grep { !m/^([A-Za-z]{2}_\w{2,5}_\d{2,3}[A-Za-z]\d{2})$/} @file_num;	#find files that don't match (!m) EG format regex (could have used two foreach loops)
		} elsif ($scheme ==2) {	#SG format
			@nolib = grep { !m/^([A-Z][a-z][A-Z]{2}[0-9]{2}[a-z][0-9]{2}[a-z][0-9]{2}[a-z][0-9]_[A-Z][a-z]{2}[A-Z][a-z])$/} @file_num;
		} else {
			die "Naming scheme outside allowable range!\n";
		}
		my $rejects = join "\n", @nolib;
		print $rejects;
		print "\n\nThe above files will be DELETED and not be further processed. ";
	} elsif ($file_nums == $trace_nums) {
		print "\nThere are $trace_nums files that match this naming scheme in $tracedir\n"; 
	} else {
		die "There are $trace_nums trace files but only $file_nums files!\n";
	}
	$input = "y";
	unless ($process == 1) {$input = &get_input("Is this correct? (y/n): "); chomp $input;}; 	
	if ($input =~ /n/i) {
		$input = &get_input("Hit return to re-enter the trace file directory details, or q to quit\ntrace2dbest (so you can edit your trace directory): "); 
		chomp $input;
		if ($input =~ /^q{1}$/i) {
			print "Ok, trace2dbest will now exit...\n";
			exit();
		} 
	} else {
		foreach my $file (@nolib) {
			my $rm_file = $tracedir . "/" . $file;
			print "removing $rm_file\n";
			unlink ($rm_file) or die "could not unlink $rm_file! Please manually edit your trace directory\n";	#remove any files in trace that don't match naming scheme -brutal.
			
		}	
		$trace_dir_flag =0;
	}
	$final_file_num = $trace_nums;	#save trace nums as global variable for use later. ok to use trace nums because file_num and trace_num should be the same now.
  }

#### get rid of old directories
  foreach $dir (@dirlist) {
    if(stat("$dir")) {
	$dirflag=1;
	last;
    }
  }

  if($dirflag==1) {
    print colored ("\nCheck working directory\n","bold");
    print colored ("\n#### WARNING ! ####","green bold");
    print "\nSome directories/files from previous runs exist. These\n"; $|=1;
    print "directories must be removed before trace2dbest can continue.\n";
    print "Remove now [y/n]? : ";
    &query_for_exit();

    foreach $dir (@dirlist) {

		if(stat("$dir")) {
			print "Remove directory \'$dir\' and its contents ? [y/n] : ";
	    	&query_for_exit();
	    	&rmtree($dir);
		}
    }
  }


  if (-e "phredlogfile") {
	print "Removing old phred logfile\n";
	unlink "phredlogfile";
  }
  if (-e "cmlogfile") {
	print "Removing old cross_match logfile\n";
	unlink "cmlogfile";
  }

  ##### set up dirs
  mkdir "subfiles"; 
  mkdir "fasta"; 
  mkdir "qual"; 
  mkdir "phd_dir";
  mkdir "scf";
  mkdir "fastafiles"; 
  mkdir "partigene"; 
  mkdir "blast_reports";
  mkdir "process";
  mkdir "process/traceinfo";
  mkdir "raw_traces"; 



	my @traces = glob ("$tracedir/*");
	foreach my $file (@traces) {
		copy ($file, "raw_traces")or die "Can't copy $tracedir to raw_traces\n", print LOG " $tracedir/* to raw_traces:$!\n";
	} 





##### get info for specialists
  if ($process == 2) {
    print "\nThe following information is required to allow trace2dbest to process your\ntraces efficiently.\n";
    print colored ("\nAdapter","bold");
    print "\n"; 	#on separate line so next question doesn't get bolded
    $flag =1;
    while ($flag) {
	#print "If you would like trace2dbest to trim off an adapter sequence (and everything\nupstream of it), please enter the adapter sequence here or hit\nreturn to continue: ";
	$adapter = &get_input("If you would like trace2dbest to trim off an adapter sequence (and everything\nupstream of it), please enter the adapter sequence here or hit\nreturn to continue: "); 
	chomp $adapter;
	#this check has been hashed out so that a regex can be used in the adapter sequence
	#if ($adapter =~ /[^ACGT]/i) {
	#	print "Please enter a valid DNA sequence.\n";
	#} else {
		$flag = 0;
	#}
    }
    
    print colored ("\nVector file\n","bold");
    unless (-s $vector_file) {
	print "ERROR: $vector_file does not appear to exist or is empty.\n";
        print "Please check your setup - Back to main menu.\n";
	&options;
    
    }
    $no_vect=0;	#assume vector is in file (but see below)
    $input= &get_input("Would you like to see a list of all the vector sequences in\n $vector_file? (y/n): "); 
    chomp $input;
    if ($input =~ /y/i) {
      $no_vect = &Get_vectors($vector_file);	#will return 1 if vector not in file
      unless ($no_vect=1) {
        print "Please update $vector_file . - Exiting now.\n";
        sleep(1);
        exit; 	  
      }
    }

#extra bit to check that ecoli.seq is in place
    print colored ("\nE.coli sequence information","bold");
    print "\n";	#so that next bit doesn't get bolded
    $input = &get_input("Do you want to screen for E.coli sequence in your ESTs? (y/n): "); 
    chomp $input;
    if ($input =~ /y/i) {
	print "Ok, checking for ecoli.seq file...";
	if (-s "$ecoli_file") {	#file exists with non-zero size
		print "ecoli.seq file found at $ecoli_file.\n";
		
	} else {
		print "ERROR: The file ecoli.seq was not found in $ecoli_file. Please check your configuration file.\n";
		print "trace2dbest will NOT screen for E.coli sequence.\n";
		
	}
	$ecoli_cross = 0;
    } 
    else {
	print "Ok, trace2dbest will NOT screen for E.coli sequence.\n";
	$ecoli_cross = 0;
    }

    print "\nFor each of the parameters, enter the value you wish to use or hit return to\nuse the default shown in brackets().\n";
    print colored ("\nphred length cut-off setting","bold");
    print "\n";
    $flag = "1";
    while ($flag) {
	    my $input = &get_input("Number of high quality bases required in sequence (default setting is 150 bases)"); 
	    chomp $input;
	    if ($input) {
		    if ($input =~ /^\d*$/ ) {
			    $high_qual_cutoff = $input;
			    print "High quality cut-off set to $high_qual_cutoff\n";
			    $flag = 0;
		    } else {
			    print "You must enter a number!\n";
			    next;
		    }
	    }else {
		    $high_qual_cutoff = 150;
		    print "High quality cut-off of $high_qual_cutoff selected\n";
		    $flag =0;
	    }
    }

    print colored ("\ncross_match parameter setting\n","bold");
    print "cross_match for vector sequence - \n";
    $flag = "1";    #flag reused
    while ($flag) {
	    my $input = &get_input("minmatch (10): "); 
	    chomp $input;
	    if ($input) {
		    if ($input =~ /^\d*$/ ) {
			    $vminmatch = $input;
			    print "minmatch set to $vminmatch\n";
			    $flag = 0;
		    } else {
			    print "You must enter a number!\n";
			    next;
		    }
	    }else {
		    $vminmatch = 10;
		    print "minmatch value of $vminmatch selected\n";
		    $flag =0;
	    }
    }
    $flag = 1;
    while ($flag) {
	    my $input = &get_input("minscore (20): "); 
	    chomp $input;
	    if ($input) {
		    if ($input =~ /^\d*$/ ) {
			    $vminscore = $input;
			    print "minscore set to $vminscore\n";
			    $flag = 0;
		    } else {
			    print "You must enter a number!\n";
			    next;
		    }
	    }else {
		    $vminscore = 20;
		    print "minscore value of $vminscore selected\n";
		    $flag =0;
	    }
    }
    if ($ecoli_cross) {
	    print "cross_match for E.coli sequence - \n";
	    $flag = "1";
	    while ($flag) {
		    my $input = &get_input("minmatch (20): "); 
		    chomp $input;
		    if ($input) {
			    if ($input =~ /^\d*$/ ) {
				    $eminmatch = $input;
				    print "minmatch set to $eminmatch\n";
				    $flag = 0;
			    } else {
				    print "You must enter a number!\n";
				    next;
			    }
		    }else {
			    $eminmatch = 20;
			    print "minmatch value of $eminmatch selected\n";
			    $flag =0;
		    }
	    }
	    $flag = 1;
	    while ($flag) {
		    my $input = &get_input("minscore (30): "); 
		    chomp $input;
		    if ($input) {
			    if ($input =~ /^\d*$/ ) {
				    $eminscore = $input;
				    print "minscore set to $eminscore\n";
				    $flag = 0;
			    } else {
				    print "You must enter a number!\n";
				    next;
			    }
		    }else {
			    $eminscore = 30;
			    print "minscore value of $eminscore selected\n";
			    $flag =0;
		    }
	    }
    }
    print colored ("\nPoly(A) tail length cut-off setting","bold");
    print "\n";
    $flag = 1;
    while ($flag) {
	    my $input = &get_input("Enter number bases in poly(A) tail (default setting is 12 consecutive A bases): "); 
	    chomp $input;
	    if ($input) {   #if $input eq "0" then this will not pass and poly a will get set to 12. setting poly(A) to zero is a bad idea
		    if ($input =~ /^\d{1,2}$/ ) {
			    $poly_num = $input;
			    print "poly(A) tail set to $poly_num\n";
			    $flag=0;
		    } else {
			    print "You must enter a whole number between 1 and 99!\n";
			    next;
		    }
	    } else {
		    $poly_num = 12;
		    print "poly(A) tail set to $poly_num\n";
		    $flag=0;
	    }
    }
    print colored ("\nSpliced leader 1 identification\n","bold");
    print "If you wish to trim the nematode spliced leader sequence, type 'yes', or\n";
    print "enter an alternative nucleotide sequence.";
    $input = &get_input("If you do not wish to trim any spliced leader sequence just hit return: "); 
    chomp $input;
    if ($input) {
	    if ($input =~ /yes/i) { 
		    $sl_flag = 1;
		    $sl_seq = "ggtttaattacccaagtttgag";
		    print "\nThe nematode SL sequence will be trimmed\n";
	    } elsif ($input =~ /[acgt]*/) {
		    $sl_flag =1;
		    $sl_seq = $input;
		    print "The sequence $sl_seq will be trimmed\n";
	    }
    } else { 
	    print "\nOk, no SL will be trimmed\n";
	    $sl_flag = 0;
    }
  }


#### using all standard
  else {
    unless (-e "$vector_file")  	   {
      print "WARNING: couldn't find vector.seq\n";  
    }
    $ecoli_cross =1;
    unless (-e "$vector_file") {
      print "Ok, trace2dbest will NOT screen for E.coli sequence.\n";
      $ecoli_cross = 0;
    }
    $vminmatch = 10; $vminscore =20; $eminmatch=20; $eminscore =30; $high_qual_cutoff =150; $poly_num = 12;
  }


##### phred it
  print "trace2dbest has gathered all necessary information for trace processing\n";
  print "Starting phred (basecalling from raw traces) ...\n";
  sleep(2); 
  print "Running phred  - please wait...";
#see below (sequence processing) for explaination of phred arguements
  system("phred -id $tracedir $phredoptions ");	#exit_nomatch means phred will exit if the trace chemistry is not found in phredpar.dat

  if (my @phred_out = glob ("fasta/*.seq")) {
	my $phred_out_size = @phred_out;
	if ($phred_out_size == $final_file_num) {
		print LOG "PHRED: .seq files found in fasta - phred ok.\n";
		print "Done\n";
	} else {
		print LOG "\nPHRED WARNING: $final_file_num files with a valid name where passed to phred, but only $phred_out_size files were processed!\n";
		my $unphreded = $final_file_num - $phred_out_size;
		print "\n\t$unphreded of $final_file_num files were not processed by phred. See phredlogfile and logfile for details.\n";
		my $input = &get_input("\tEnter 'q' to quit or hit return to continue: "); 
		chomp $input;
		if ($input =~ /^q{1}$/i) {
			print "Ok, trace2dbest will now exit...\n";
			exit();
		} 
	}
	$stats[1] = $phred_out_size;	
  } else {
	print LOG "PHRED: No .seq files found in fasta directory\n";
	print colored ("FAILED!\n", "red bold");
	print "Check logfile and phredlogfile. trace2dbest will now exit...\n";
	exit();
  }

#cross_match for vector
#concatenate the .seq files created by phred into all.seq - makes cross_match quicker
  system("cat ./fasta/*.seq > all.seq" || die "cat fasta/*.seq failed!\n");
  print "Running cross_match (screening for vector sequence) - please wait...";
  system("cross_match all.seq $vector_file -minmatch $vminmatch -minscore $vminscore -screen 2>>cmlogfile 1> /dev/null"); 
  if (-s "all.seq.screen") {
	print LOG "Vector crossmatch: all.seq.screen - vector crossmatch ok.\n";
	print "Done\n";
  } else { 
	print LOG "Vector crossmatch: No all.seq.screen\n";
	print colored ("FAILED!\n", "red bold");
	print "Check logfile and cmlogfile. trace2dbest will now exit...\n";
	exit();
  }
#change the cross_match Xs to Zs so that they can be differentiated from the cross_match Xs from ecoli screen.
  open (SCREENED, "all.seq.screen")or die "Could not open all.seq.screen|\n";
  my @screened = <SCREENED>;
  close SCREENED;

  foreach my $line (@screened) {
        unless ($line =~ /^>/) {  
	  $line =~ s/X/Z/g;
	}  
  }

  open (SCREEN_F, ">all.seq.screen");	#overwrite all.seq.screen
  print SCREEN_F @screened;
  close SCREEN_F;

#cross_match for ecoli

  if ($ecoli_cross) {
	rename "all.seq.screen", "all.n";	#vector masked sequence produced by cross_match  to all.n
	print "Running cross_match (screening for E. coli sequence) - please wait...";
	system("cross_match all.n $ecoli_file -minmatch $eminmatch -minscore $eminscore -screen 2>>cmlogfile 1> /dev/null");	
	if (-s "all.n.screen") {
		print LOG "Ecoli crossmatch: all.n.screen - ecoli crossmatch ok.\n";
	} else { 
		print LOG "Ecoli crossmatch: No all.n.screen\n";
		print colored ("FAILED!\n", "red bold");
		print "Check logfile and cmlogfile. trace2dbest will now exit...\n";
		exit();
	}
	split_screened("all.n.screen","fasta"); #split output (all.n.screen) into individual files
	if (glob ("fasta/*.seqsc")) {
		print LOG "E.coli crossmatch: .seqsc files found in fasta - ecoli cm  ok.\n";
		print "Done\n";
	} else {
		print LOG "E.coli crossmatch: No .seqsc files found in fasta\n";
		print colored ("FAILED!\n", "red bold");
		print "Check logfile and cmlogfile. trace2dbest will now exit...\n";
		exit();
	}
	unlink glob "all.n*";
	unlink glob "all.seq*";
  } else {
	split_screened ("all.seq.screen", "fasta");
	unlink ("all.seq","all.seq.log", "all.seq.screen");
  }

  print colored("\nSequence processing\n", "bold");
#after basecalling by phred, the sequence is trimmed using the lowest(upstream) and highest(downstream) trim values from the phd
#file TRIM: field and the trim values suggested by the -trim arguement (shown in header of fasta dir files). Generally the lowest
#and highest values come from the phd file but sometimes -trim gives the lowest trim position. The remaining trim values are used
#to define the "high quality" section of the sequence. These values are changed accordingly following the subsequent sequnce
#processing to remove vector, ecoli, NNN+ etc.

  opendir (PHD_DIR, "./phd_dir");	#open the directory to allow looping thru the list of files
  while (my $file= readdir PHD_DIR) {
	my ($sequence, $t_hq_start, $t_hq_total, $t_hq_end, $p_hq_start, $p_hq_end,
	 $trim_start, $trim_end, $hi_qual_start, $hi_qual_end, $seq_length, $trace_name, $svector_seq);#clear varibables
	 my $polyA_des = "poly(A) not found";	
	next if $file =~ /^\./;	#avoid . and ..
	$file=~ s/\.phd\.1/\.seqsc/;	#convert file name 
	open (FASTA_FILE, "<./fasta/$file") or die "The file $file in the ./fasta directory could not be opened!\n";
	while (my $line = <FASTA_FILE> || !eof(FASTA_FILE)) {
		if ($line =~ />.+\s+\d+\s+(\d+)\s+(\d+)/) {	#get the trim info from the fasta header (number of bases to be trimmed at begining and length of seq after trimming
			$t_hq_start = $1;
			$t_hq_total =$2;
			if ($t_hq_total ==0) {
		    	$stats[2]++;  #add to 'number of low quality traces' stats
			} 
		} else {
			chomp $line;
			$sequence .= $line;	#add the sequence to $sequence
		}
	}
	close (FASTA_FILE);
	$file=~ s/\.seqsc/\.phd\.1/;	#and back again  
	open (PHD_FILE, "<./phd_dir/$file") or die "The file $file in the ./phd_dir directory could not be opened!\n";	#open the equivalent file in the phd_dir 
	while (my $line = <PHD_FILE>){
	    if($line=~/TRIM:\s(-?\d+)\s(-?\d+)/){ #get 1st and last bases of high quality read segment use -? to allow for presence of -1 
			$p_hq_start=$1; $p_hq_end=$2; last;       #from trim line of phd file
	    }
	}
	close(PHD_FILE);
	
#convert trim positions to same format
	$t_hq_end = $t_hq_start + $t_hq_total;
	$t_hq_start++;	#in fasta file this value refers to number of bases to be trimmed at start, we want position of high quality, so add 1
#get lowest and highest for trimming, use remaining two values for defining high qual section
	if ($p_hq_start < 0) {
		$p_hq_start = 0;
	}
	if ($t_hq_start >= $p_hq_start) {
		$trim_start = $p_hq_start-1;	#minus 1 to give position of last base to be trimmed at start of raw sequence (= number of bases trimmed at start)
		$hi_qual_start = $t_hq_start;
	} else {
		$trim_start = $t_hq_start -1;	#minus 1 to give position of last base to be trimmed at start of raw sequence
		$hi_qual_start = $p_hq_start;
	}
	if ($t_hq_end <= $p_hq_end) {
		$trim_end = $p_hq_end +1;	#add one to give postion of first base to be trimmed at end of raw sequence
		$hi_qual_end = $t_hq_end;
	} else {
		$trim_end = $t_hq_end +1;	#add one to give postion of first base to be trimmed at end of raw sequence
		$hi_qual_end = $p_hq_end;
	}

#trim sequence
	$seq_length = ($trim_end -$trim_start) -1;	#minus 1 to give length of trimmed seq
	#trim_start shouldn't be negative because that messes up the substr. so if <0 add 1. make new variable to keep trim_start consistent with rest of script
	my $trim_start_new;
	if ($trim_start < 0) {
		$trim_start_new = 0;
	} else {
		$trim_start_new = $trim_start;
	}
	$sequence = substr ($sequence, $trim_start_new, $seq_length);
#convert hi qual regions to positions in trimmed sequence (instead of raw sequence)
	$hi_qual_start = $hi_qual_start -$trim_start;
	$hi_qual_end = $hi_qual_end - $trim_start;	#adjust high qual region positions

#print sequence details to process dir
	($trace_name = $file) =~ s/\.phd.1//;
	open (TRACEINFO, ">./process/traceinfo/$trace_name") or die "The file $trace_name in the ./process/traceinfo directory could not be created/opened!\n";
	print TRACEINFO "$trace_name Sequence processing information\n";
	print TRACEINFO "Quality trimming information\n";
	print TRACEINFO "Position of last base trimmed at start of raw sequence: $trim_start\n";
	print TRACEINFO "Position of first base trimmed at end of raw sequence: $trim_end\n";
	print TRACEINFO "Length of sequence after trimming: $seq_length\n";
	print TRACEINFO "Start position of high quality region in trimmed sequence: $hi_qual_start\n";
	print TRACEINFO "End position of high quality region in trimmed sequence: $hi_qual_end\n";
	
#chuck out sequences that have high qual section < $high_qual_cutoff
 	if (($hi_qual_end - $hi_qual_start) <= $high_qual_cutoff) {
		my $high_qual_length = $hi_qual_end - $hi_qual_start;
		if ($high_qual_length < 0) { $high_qual_length =0 }
		print TRACEINFO "Sequence rejected - high quality length () is less than $high_qual_cutoff\n";
		print "$trace_name has beeen rejected, high quality sequence length = $high_qual_length\n";
		print LOG "$trace_name has beeen rejected, high quality sequence length = $high_qual_length\n";
		next;
	}

	
#trim adapter (if one has been entered) 	
#do this before vector trimming, otherwise vector trimming may take off a little bit of adapter meaning that this section wouldn't recognise it.
	if($adapter){
      	if($sequence =~ s/^(.{0,200}$adapter)//i) {  # remove adapter if it appears in first 200 bases
		
			my $adapter_trim = $1;
			my @adapter_trim = split ('', $adapter_trim);
			my $at_length= @adapter_trim;
			open (ADAPTERTRIM, ">>./process/adapter_trim") or die "Could not open process/adapter_trim!\n";
			print ADAPTERTRIM "$trace_name: $at_length bases were trimmed\nTrimmed sequence: $adapter_trim\n\n";
			close ADAPTERTRIM;
			print TRACEINFO "Adapter trimming\nAdapter sequence: $adapter\nLength of sequence trimmed (from start):$at_length\nSequence trimmed= $adapter_trim\n";
			$hi_qual_start = $hi_qual_start - $at_length;	#may make hi qual start -ve but that shouldn't matter
			$hi_qual_end = $hi_qual_end -$at_length;

	   	}
	}


#trimming for vector - Xs now converted to Zs
	if($sequence =~ s/^(.{0,150}Z+)//i) {	#substitue sequence beginning with any number of chars between
											#0 and 150, followed by any number of X's, with nothing.
	    my $svector_seq = $1;
	    my @svector = split('',$svector_seq);
	    my $sv_length = @svector;
		open (VECTORTRIM, ">>./process/vector_trim") or die "Could not open process/vector_trim!\n";
		print VECTORTRIM "$trace_name: $sv_length bases were trimmed at the start of the sequence.\nTrimmed sequence: $svector_seq\n\n";
		close VECTORTRIM;
	    print TRACEINFO "Leading vector trimming\nLength trimmed at start= $sv_length\nSequence trimmed= $svector_seq\n";
	    $hi_qual_start = $hi_qual_start - $sv_length;	#may make hi qual start -ve but that shouldn't matter
		$hi_qual_end = $hi_qual_end -$sv_length;
	}


	if($sequence =~ s/(Z+.{0,150})$//i){   # remove trailing vector seq
   
	   	my $evector_seq = $1;
	   	my @evector = split('',$evector_seq);
	   	my $ev_length = @evector;	#sv_length reused
		open (VECTORTRIM, ">>./process/vector_trim") or die "Could not open process/vector_trim!\n";
		print VECTORTRIM "$trace_name: $ev_length bases were trimmed at the end of the sequence.\nTrimmed sequence: $evector_seq\n\n";
		close VECTORTRIM;
	   	print TRACEINFO "Trailing vector trimming\nLength trimmed at end= $ev_length\nSequence trimmed= $evector_seq\n";
	   	#high qual start shouldn't be affected but high qual end might be. hi qual end can be made = end of seq later.
	}

#trimming for ecoli - Xs still Xs
	if($sequence =~ s/^(.{0,150}X+)//i) {	#substitue sequence beginning with any number of chars between
											#0 and 150, followed by any number of X's, with nothing.
	    my $secoli_seq = $1;
	    my @secoli = split('',$secoli_seq);
	    my $sv_length = @secoli;
		open (ECOLITRIM, ">>./process/ecoli_trim") or die "Could not open process/ecoli_trim!\n";
		print ECOLITRIM "$trace_name: $sv_length bases were trimmed at the start of the sequence.\nTrimmed sequence: $secoli_seq\n\n";
		close ECOLITRIM;
	    print TRACEINFO "Leading E.coli sequence trimming\nLength trimmed at start= $sv_length\nSequence trimmed= $secoli_seq\n";
	    $hi_qual_start = $hi_qual_start - $sv_length;	#may make hi qual start -ve but that shouldn't matter
		$hi_qual_end = $hi_qual_end -$sv_length;
	}


	if($sequence =~ s/(X+.{0,150})$//i){   # remove trailing ecoli seq
   
	   	my $eecoli_seq = $1;
	   	my @eecoli = split('',$eecoli_seq);
	   	my $ev_length = @eecoli;	#sv_length reused
		open (ECOLITRIM, ">>./process/ecoli_trim") or die "Could not open process/ecoli_trim!\n";
		print ECOLITRIM "$trace_name: $ev_length bases were trimmed at the end of the sequence.\nTrimmed sequence: $eecoli_seq\n\n";
		close ECOLITRIM;
	   	print TRACEINFO "Trailing E.coli sequence trimming\nLength trimmed at end= $ev_length\nSequence trimmed= $eecoli_seq\n";
	   	#high qual start shouldn't be affected but high qual end might be. hi qual end can be made = end of seq later.
	}


#trimming for N's
	if($sequence =~ s/^(.{0,150}NNN+)//i) {  # remove low quality bases if at least 3 in a row

	   	my $ntrim_seq = $1;
	   	my @ntrim = split('',$ntrim_seq);
	   	my $nt_length = @ntrim;
		open (QUALTRIM, ">>./process/quality_trim") or die "Could not open process/quality_trim!\n";
		print QUALTRIM "$trace_name: $nt_length bases trimmed from start of sequence\nTrimmed sequence:$ntrim_seq\n\n";
		close QUALTRIM;
	   	print TRACEINFO "Start N trimming\nLength of sequence trimmed (from start): $nt_length\nSequence trimmed= $ntrim_seq\n";
	   	$hi_qual_start = $hi_qual_start - $nt_length;	#may make hi qual start -ve but that shouldn't matter
		$hi_qual_end = $hi_qual_end -$nt_length;
	}

	
#trim SL-1 or equivalent
#Most C. elegans genes are trans-spliced at their 5' end to a small leader exon,
#called SL-1. Estimates of the actual proportion range from 80% to 60%. The SL-1 sequence can be used as a 
#5' end marker - any sequence upstream of it can be removed.
#this bit of code finds the sl and any sequence before it.
#complement sl sequence not trimmed because this is likley to be in the low quality region of the sequence read.
#added 6/5/03
#this an optional step and the user can enter their own sequence for removal
		
	if ($sl_flag) {	#sl sequence to be found and all upstream sequence removed
		if ($sequence =~ s/^(.*$sl_seq)//i) {	
			
			my $sl_trim = $1;
			my @sl_trim = split ('',$sl_trim);
			my $st_length = @sl_trim;
			open (ADAPTERTRIM, ">>./process/adapter_trim")  or die "Could not open process/adapter_trim!\n";
			print ADAPTERTRIM "$trace_name: $sl_seq found, $st_length bases were trimmed\nTrimmed sequence: $sl_trim\n\n";
			close ADAPTERTRIM;
			print TRACEINFO "Trimming for $sl_seq\nLength of sequence trimmed (from start): $st_length\nSequence trimmed= $sl_trim\n";
			$hi_qual_start = $hi_qual_start - $st_length;	#may make hi qual start -ve but that shouldn't matter
			$hi_qual_end = $hi_qual_end -$st_length;

		}
	}

#poly A trimming there is still the potential that a poly A/T will not be properly trimmed if it overlaps the 150 bp boundary, but this shouldn't cause too much of a problem.
        my $polya_cut = $high_qual_cutoff - $poly_num;
	if ($sequence =~ /(.{$polya_cut,}?)(a{$poly_num}.*)/i) {	
#last occurence of polya after acceptable length 
#poly_num is the user specified length of poly a tail (default is 12)
		my $pa_trim = $2;
		$sequence = $1;	#not using substition of $2 so that we get the right string in the right position extracted.
		my @pa_trim = split ('',$pa_trim);
		my $pat_length = @pa_trim;
		open (POLYATRIM, ">>./process/polya_trim")  or die "Could not open process/polya_trim!\n";
		print POLYATRIM "$trace_name: $pat_length bases were trimmed for poly(A)\nTrimmed sequence: $pa_trim\n\n";
		close POLYATRIM;
		print TRACEINFO "Poly(A) trimming\nLength of sequence trimmed (from end): $pat_length\nSequence trimmed= $pa_trim\n";
		$polyA_des = "poly(A) trimmed";
        }
	
#polyT trimming - revised algorithm. 
	my $rev_sequence = reverse $sequence;	#reverse sequence so we can look for polyT in same way as polyA
	if ($rev_sequence =~ /(.{$polya_cut,}?)(t{$poly_num}.*)/i ) {  
#poly_num is the user specified length of poly a tail (default is 12)
	   
	   my $pt_trim = $2;
	   $rev_sequence = $1;
	   my @pt_trim = split ('',$pt_trim);
	   my $ptt_length = @pt_trim;
	   $pt_trim = reverse $pt_trim;	#make pt_trim represent trimmed sequence as it would be in the actual (non reversed) sequence
	   open (POLYATRIM, ">>./process/polya_trim")  or die "Could not open process/polya_trim!\n";
	   print POLYATRIM "$trace_name: $ptt_length bases were trimmed for poly(t)\nTrimmed sequence: $pt_trim\n\n";
	   close POLYATRIM;
	   print TRACEINFO "Poly(t) trimming\nLength of sequence trimmed (from start): $ptt_length\nSequence trimmed= $pt_trim\n";
	   $hi_qual_start = $hi_qual_start - $ptt_length;  #may make hi qual start -ve but that shouldn't matter
	   $hi_qual_end = $hi_qual_end -$ptt_length;
	   $polyA_des = "poly(T) trimmed";
	}
	$sequence = reverse $rev_sequence;	#return $sequence to original orientation

#check for Zs (vector)  in middle of sequence (leading and trailing Zs already removed). Zs in middle may have occured because the trace represents a chimeric EST or cross_match has mistakenly identified ecoli seq in middle of sequence. they may also be other reasons.
	$chim_flag = 0;
	my $mid_z_num = 0;
	while ($sequence =~ s/[z]/n/i) {
		$mid_z_num++;
		$chim_flag = 1;
	}
	if ($chim_flag) {
		open (VECTORTRIM, ">>./process/vector_trim") or die "Could not open process/vector_trim!\n";
		print VECTORTRIM "$trace_name is a potential chimera - cross_match has masked $mid_z_num vector bases\n";
		print VECTORTRIM "in the middle of this sequence. The cross_match X's have been replaced with n's.\n";
		close VECTORTRIM;
		print TRACEINFO "**potential chimera - cross_match has masked $mid_z_num vector bases in the middle of the sequence!\n";
		print "$trace_name is a potential chimera - cross_match has masked $mid_z_num vector bases\n";
		print "in the middle of this sequence. The cross_match X's have been replaced with n's.\n";
		print LOG "$trace_name is a potential chimera - cross_match has masked $mid_z_num vector bases in the\nmiddle of this sequence. The cross_match X's have been replaced with n's.\n";
	}

#repeat above for Xs (ecoli)
#---check for Xs in middle of sequence (leading and trailing Xs already removed). Xs in middle may have occured because the trace represents a chimeric EST or cross_match has mistakenly identified ecoli seq in middle of sequence. they may also be other reasons.
	$chim_flag_e = 0;
	my $mid_x_num = 0;
	while ($sequence =~ s/[x]/n/i) {
		$mid_x_num++;
		$chim_flag_e = 1;
	}
	if ($chim_flag_e) {
		open (VECTORTRIM, ">>./process/vector_trim") or die "Could not open process/vector_trim!\n";
		print VECTORTRIM "$trace_name is a potential chimera - cross_match has masked $mid_x_num E.coli bases\n";
		print VECTORTRIM "in the middle of this sequence. The cross_match X's have been replaced with n's.\n";
		close VECTORTRIM;
		print TRACEINFO "**potential chimera - cross_match has masked $mid_x_num E.coli bases in the middle of the sequence!\n";
		print "$trace_name is a potential chimera - cross_match has masked $mid_x_num E.coli bases\n";
		print "in the middle of this sequence. The cross_match X's have been replaced with n's.\n";
		print LOG "$trace_name is a potential chimera - cross_match has masked $mid_x_num E.coli bases in the\nmiddle of this sequence. The cross_match X's have been replaced with n's.\n";
	}
		
#finalise and check high qual lengths	
	my @sequence = split('',$sequence);
	my $sequence_length = @sequence;
	if ($hi_qual_end > $sequence_length) {	#adjust high qual end in case it is longer than the sequence length (e.g. if a big chunk of trailing vector was removed)
		$hi_qual_end = $sequence_length;
	}
	if ($hi_qual_start < 1) {	#adjust high qual start in case it is less than 1 (e.g. if a big chunk of leading vector was removed)
		$hi_qual_start = 1;
	}
#chuck out sequences that have high qual section < $high_qual_cutoff
 	if (($hi_qual_end - $hi_qual_start) <= $high_qual_cutoff) {
		my $high_qual_length = $hi_qual_end - $hi_qual_start;
		if ($high_qual_length < 0) { $high_qual_length =0 }	#make high qual a sensible number if less than zero
		print TRACEINFO "Sequence rejected - high quality length ($high_qual_length bp) is less than $high_qual_cutoff\n";
		print "$trace_name has beeen rejected, high quality sequence length after trimming = $high_qual_length (cut off $high_qual_cutoff)\n";
		print LOG "$trace_name has beeen rejected, high quality sequence length after trimming = $high_qual_length (cut off $high_qual_cutoff)\n";
		next;
	}
	open(OUTFILE,">>subfiles/$trace_name");
	print OUTFILE ">$trace_name hq start $hi_qual_start hq end $hi_qual_end $polyA_des";
	if ($chim_flag) {
		print OUTFILE " chimeric(vector)?";
	}
	if ($chim_flag_e) {
		print OUTFILE " chimeric(E.coli)?";
	}
	print OUTFILE "\n";
	
    my $n=0;
  	while ($sequence =~ /(.)/g) {
		print OUTFILE "$1";
		$n++;
		if(($n % 60) == 0) { 
    		print OUTFILE "\n"; 
		}
   	}
   	if(($n % 60) != 0) {
		print OUTFILE "\n";
   	}
   	close OUTFILE;
	close TRACEINFO; close VECTORTRIM; close QUALTRIM; close ADAPTERTRIM; close POLYATRIM;	#if not already closed
  } #while from PHD_DIR
   
  
  
  # statistics
  print colored("\nStatistics\n", "bold");

  opendir (DIR, "./subfiles");
  while (defined (my $file = readdir(DIR))) {
	if (($scheme ==1 && $file =~ /^[A-Za-z]{2}_\w{2,5}_(\d{2,3})([A-Za-z])(\d{2})$/) ||
	 ($scheme ==2 && $file =~ /^[A-Z][a-z][A-Z]{2}[0-9]{2}[a-z]([0-9]{2})([a-z])([0-9]{2})[a-z][0-9]_[A-Z][a-z]{2}[A-Z][a-z]$/)) {
	 	my ($hi_qual_s, $hi_qual_end, $seq);
		open(INFILE,"<subfiles/$file");	#open this file to read in seq info
		while (my $line = <INFILE>) {
	    	  if ($line=~ /^>.+\shq\sstart\s(\d+)\shq\send\s(\d+)(.*)/) {	#regex matching fasta type header of the file
				$hi_qual_s=$1; $hi_qual_end=$2; 
			}
	          else{   # Read in the sequence
				chop $line;
				$seq .= $line;
	          }
		}
	        $stats[3]++;	#add to "number of submission files"
	        $stats[4] += length($seq);	#gives length of sequence
	        $stats[5]+= ($hi_qual_end - $hi_qual_s); #gives length of high qual part of seq

       }
   }

  
  print ("\n\t$stats[1]\t\tTraces processed\n");
  if ($stats[1] == 0) {
	print "No traces processed. Check logfile, cmlogfile and phred logfile for errors\n";
	print "trace2dbest will now exit\n";
	exit();
  }
  my $good = $stats[1] - $stats[2];
  my $percent = int ($good * 100 / $stats[1]);
  print ("\t$good ($percent%)\t'Good quality' traces\n");
  my $percent2 = int ($stats[3] * 100/$stats[1]);
  print ("\t$stats[3] ($percent2%)\tSubmissable sequences after trimming\n");
  if ($stats[3] == 0) {
	print "No submissable traces. Check logfile, cmlogfile and phred logfile for errors\n";
	print "trace2dbest will now exit\n";
	exit();
  }
  my $av_l = int($stats[4] / $stats[3]);
  print ("\t$av_l\t\tAverage length of submissable sequences\n");
  my $av_h = int($stats[5] / $stats[3]);
  print ("\t$av_h\t\tAverage number of high quality bases for submissable sequences\n");
  print ("\n\n");
#write these stats to a file for inclusion in output dir
  open (STATS, ">>./process/statistics");
  print STATS "\n\t### Trace process statistics  ###\n";
  print STATS "$stats[1]\t\tTraces processed\n";
  print STATS "$good ($percent%)\t\t'Good quality' traces\n";
  print STATS "$stats[3] ($percent2%)\t\tSubmissable sequences after trimming\n";
  print STATS "$av_l\t\tAverage length of submissable sequences\n";
  print STATS "$av_h\t\tAverage number of high quality bases for submissable sequences\n";

  
  sleep(1);
  print "\n\n\nDone - Back to processing.\n\n";

}




##############################
#### SECTION 4 annotation ####
##############################
sub blast () {
  print colored("\n\t##### ANNOTATION OF SEQUENCES #####\n","white bold", "on_black");
  print "\ntrace2dbest gives the option to preliminary annotate your ESTs based\n";
  print "on BLAST. This annotation will be added to the header each sequence before\n";
  print "submission to dbEST\n";
  sleep(1);
  &get_config;
  if (!$pblastall && !$pblastcl3) {
	print "trace2dbest facilitates preliminary annotation in the form of a BLAST hit,\n
	however neither remote nor local BLAST utilities were found in your PATH.\n";
  } else {
	print "Would you like to add BLAST-based preliminary annotation to the sequences?\n";
	my $input = &get_input(" (y/n): "); 
	chomp $input;
	if ($input !~ /y/i) {
		print "Ok, no BLAST annotation will be added.\n"; &options;
	} else {
		if ($pblastall && $pblastcl3) {
			print "You have two options:\n\n";
			print "\t1 - Remote BLAST using NCBI (max ~400 sequences/day)\n";
			print "\t2 - Local BLAST\n\n";
			while () {
				my $input = &get_input("Enter number: "); 
				chomp $input;
				if ($input =~ /^1$/) {
					$blast = 1;
					print "Ok, remote BLAST will be carried out on the processed sequences.\n";
					last;
				} elsif ($input =~ /^2$/) {
					$blast = 2;
					last;
				} else {
					print "Please enter 1 or 2\n";
					next;
				}
			}
		}elsif ($pblastall==1) {
			$blast =2
		} elsif ($pblastcl3==1) {
			$blast = 1;
			print "Ok, remote BLAST will be carried out on the processed sequences.\n";
		} else {
			die "blastall = $pblastall, blastcl3 = $pblastcl3";
		}
#selection of local blast databse
		if ($blast ==2) {
			my $flag =1;
			while ($flag) {
				my $input = &get_input("Please enter the name of the directory that holds your BLAST databases\n(e.g. /home/db/blastdb): "); 
				chomp $input;
				if ($input =~ /^q{1}$/i) {
					print "Ok, trace2dbest will now exit...\n";
					exit ();
				}
				if (-d $input && -r $input) {
					$blastdata =$input;
					@blastdb = glob ("$blastdata/*.pin");
					if  (!@blastdb) {	
						print "$blastdata does not appear to have any protein BLAST databases, please try again.\n";
						next;	#start while loop again now  so that flag doesn't get set to 0
					}
					$flag = 0;
				} else {
					print "$input is not a directory or could not be read. Please try\nagain or type q to exit.\n";
				}
			}
			if (@blastdb) {		
				print "The following protein BLAST databases were found-\n";
				my $number =1;
				foreach my $db (@blastdb) {
					$db =~ s/$blastdata\///;
					$db =~ s/\.pin//;
					print "$number - $db\n";
					$number++;
				}
				$number = $number -1;	#get real number of databases
				my $flag =1;
				while ($flag) {
					my $input = &get_input("Select a database by entering its number: "); 
					chomp $input;
					if ($input !~ /^\s*[0-9]+\s*$/i) {	#only numbers and spaces
						print "Please enter a number between 1 and $number.\n";
					} elsif ($input > $number && $input >= 1) {
						print "Please enter a number between 1 and $number.\n";
					} else {
						$input--;
						$blast_database = $blastdb[$input];
						$blast_database = $blastdata."/".$blast_database;	#add path to blast database
						print "Ok, the database $blast_database will be used\n";
						$flag =0;
					}
				}
			}
		}
		$flag = 1;
		while ($flag) {
			$input = &get_input("What BLAST bit score cut-off value would you like to use (default 55)? "); 
			chomp $input;
			if ($input) {
				if ($input !~ /^\s*[0-9]+\s*$/) {
					print "please enter a number.\n";
				} else {
					$bits_cutoff = $input;
					$flag = 0;
				}
			} else {
				$bits_cutoff = 55;
				$flag = 0;
			}
		}
	        $blast_save =1;
		
	}
	
  }
#### run blasts,
  my $seq;
  unless (-e "subfiles") {
    print "\nCouldn't find subfiles directory. Please change your working\n";
    print "directory back to the directory where you have processed your\n";
    print "traces. - Exiting now.\n\n";
    sleep(1);
    exit;
  }   
  my @list = glob ("subfiles/*");
  unless (@list) {print "subfiles directory appears to be empty - Please check.";exit;}
  if (-e "blast_reports/blasts_full") {system ("mv blast_reports/blasts_full blast_reports/blasts_full.old")}
  if (-e "blast_reports/blasts_tophit") {system ("mv blast_reports/blasts_tophit blast_reports/blasts_tophit.old")}
  foreach my $list (@list) {
    if ($list =~ /\.sub/) { print "subfiles directory has been used already - Please check."; exit}
    my $inseq  = Bio::SeqIO->new('-file' => "$list",
                         '-format' => 'Fasta');
    while (my $handle = $inseq->next_seq)  {
      $seq=$handle->seq();
    }
    $list =~ s/subfiles\///; 
    put_id($blast,$blast_save,$seq,$list,$blast_database, $bits_cutoff);
  }
  sleep(1);
  print "\n\n\nBLAST completed - Back to main menu.\n\n";
}

###########################################
#### SECTION 5  SUBMISSION INFORMATION ####
###########################################
sub paperwork () {
  &get_config;
  sleep(1);
  my $file_type=$_[0];
  my $tryagain = 0;
  if ($file_type eq "Library")  {
    &file_update($file_type,$Lib_file,$tryagain)
  }
  elsif ($file_type eq "Contact")  {
    &file_update($file_type,$Cont_file,$tryagain)
  }
  elsif ($file_type eq "Publication")  {
    &file_update($file_type,$Pub_file,$tryagain)
  }
  elsif ($file_type eq "EST")  {
    &file_update($file_type,$EST_file,$tryagain)
  }
  else {print "Bugs have eaten some bytes - Exiting now\n"; exit;}
  sleep(1);
  print "\n\n\nSubmission information update done - Back to main menu.\n\n";


}


###########################################
#### SECTION 6  SUBMISSION  ###############
###########################################

sub submission () {
  print colored("\n\t##### SEQUENCE SUBMISSION #####\n","white bold", "on_black");
  print "\nThis option gets the files ready for dbEST submission and, if you want to,\n";
  print "submits them to dbEST immediately\n";
  sleep(1);
  &get_config;
# Ask for info to complete Lib, Cont and Pub files - then EST file can be made (using some info from the other files) 

  print colored("\nLib, Cont, Pub and EST file information\n", "bold");
  my $tryagain = 0;	#flag so that if users need to 'try again' to get the file then they don't get the header info which they've seen already
  while($Libfileflag==0) {	#get Library file info
    $file_type = "Library";	
    $type_id = "TYPE: Lib";
    my $answer=&file_choices_page($file_type, $Lib_file, $tryagain);
    $tryagain =1;
    if($answer=~ /h/i)  { &print_help(); next; } #use less to show trace2dbest.doc
    if($answer=~ /q/i)  { print "Ok, trace2dbest exiting...\nBye \n"; exit();  } #exit program
    if($answer == 1)  { @Libfile_data= &dummyLib_file_data(); $no_lib = 1;}		#no Lib file needed so make dummy file with lib name for library field of est file. set flag so we know what to add to sub file
    else { @Libfile_data=&get_file_data($answer, $Lib_file, $type_id); }	#record from Libfile.db to be used
    if (@Libfile_data) {
		$Libfileflag =1;
		foreach my $line (@Libfile_data) {	#extract various bit of info for other bits of the process
			if ($line =~ /^NAME:\s(.*)/) {
				$lib_name = $1;
			}
			if ($line =~ /ORGANISM:\s(.*)/) {
				$species = $1;
			}
		}
    }
  }

  $tryagain = 0;
  while($Contfileflag==0) {	#get Contact file info
    $file_type = "Contact";
    $type_id = "TYPE: Cont";
    my $answer=&file_choices_page($file_type, $Cont_file, $tryagain);
    $tryagain =1;
    if($answer=~ /h/i)  { &print_help(); next; } #use less to show trace2dbest.doc
    if($answer=~ /q/i)  { print "Ok, trace2dbest exiting...\nBye \n"; exit();  } #exit program
    if($answer == 1)  { @Contfile_data= &dummyCont_file_data(); $no_cont = 1;}		#no Cont file needed so make dummy file with name for contact field of est file.
    else { @Contfile_data=&get_file_data($answer, $Cont_file, $type_id); }	#record from Contfile.db to be used
    if (@Contfile_data) {
		$Contfileflag =1;
		foreach my $line (@Contfile_data) {
			if ($line =~ /^NAME:\s(.*)/) {
				$cont_name = $1;
			}
		}
	}
  }

  $tryagain = 0;
  while($Pubfileflag==0) {	#get Publication file info

	$file_type = "Publication";
	$type_id = "TYPE: Pub";
    my $answer=&file_choices_page($file_type, $Pub_file, $tryagain);
	$tryagain =1;
    if($answer=~ /h/i)  { &print_help(); next; } #use less to show trace2dbest.doc
    if($answer=~ /q/i)  { print "Ok, trace2dbest exiting...\nBye \n"; exit();  } #exit program	
	if($answer == 1)  { @Pubfile_data= &dummyPub_file_data(); $no_pub = 1;}	#if pub file is not to be created. make dummy file with just the title to put in EST file
    else { #record from Pubfile.db to be used 
		@Pubfile_data=&get_file_data($answer, $Pub_file, $type_id);
		#get pub_cit for est file. needs to be done separately from cases where pub file was made
		if (@Pubfile_data) {	#...but we only want to extract this info if a pub file was selected
			my $flag =0;
			foreach my $line (@Pubfile_data) {
				if ($line =~ /AUTHORS:/ || $line =~ /\|\|/) {	#need || in case dummpy pub file being used
					$flag = 0;	#the multi lined title has come to an end so stop reading lines to pub-cit
				}
				if ($flag == 1) {
					$pub_cit = $line;
					chomp $pub_cit;
				}
				if ($line =~ /^TITLE:/ ) {	
					$flag = 1;
				}	
			}
		}
	}
	if (@Pubfile_data) {
		$Pubfileflag =1;
	}
	
  }
  chomp $pub_cit;

#use species name from lib file to make directory structure do now in case it fails
  $species =~ s/\s/_/g;	#swap spaces for underscores
  if (-w "/home/db/est_solutions") {
	unless  (-w "/home/db/est_solutions/$species") {
		mkdir "/home/db/est_solutions/$species", 0777 or die "Can't make directory /home/db/est_solutions/$species\n $!\n";
	}
	unless (-w "/home/db/est_solutions/$species/trace2dbest") {
		mkdir "/home/db/est_solutions/$species/trace2dbest", 0777 or die "Can't make directory /home/db/est_solutions/$species/trace2dbest\n$!\n";
	}
	$save_location = 1;	#so we know where to save the files at the end
	
  } else { 	#set up directory structure in user's home directory
	unless (-w "$homedir/est_solutions") {
		mkdir "$homedir/est_solutions", 0755 or die "Can't make directory : $!\n";
	}
	unless (-w "$homedir/est_solutions/$species") {
		mkdir "$homedir/est_solutions/$species" , 0755 or die "Can't make directory : $!\n";
	}
	unless (-w "$homedir/est_solutions/$species/trace2dbest") {
	        mkdir "$homedir/est_solutions/$species/trace2dbest" , 0755 or die "Can't make directory : $!\n";
	}
	$save_location = 0;
  } 	

  my @EST_file = &EST_file_data();
#set variables for final EST submission files
  foreach my $field (@EST_file) {
	if ($field =~ /SEQ_PRIMER:\s(.*)/ ) {
		$seq_primer = $1; #primer name extracted later
	} elsif ($field =~ /PCR_F:\s(.*)/ ) {
		$pcrf_primer = $1;
	}elsif ($field =~ /PCR_B:\s(.*)/ ) {
		$pcrr_primer = $1;
	}elsif ($field =~ /P_END:\s(.*)/ ) {
		$p_end = $1;
	}elsif ($field =~ /PUBLIC:\s(.*)/ ) {
		$public_date = $1;
	}elsif ($field =~ /COMMENT:\s(.*)/ ) {
		$comment = $1;
	}
  }



  my $blastexist;
  if (-e "blast_reports/blasts_tophit") {$blastexist=1;}
  else {
    print "No BLAST results found - blast_reports/blasts_tophit\n";
    print "doesn't seem to exist\n";
    my $carryon = &get_input("Would you like to carry on without using annotation? [y/n]"); 
    chomp $carryon;
    unless ($carryon =~ /y/i) {
      print "Exiting now.\n";
      exit;
    }
  }

# creation of submission files
  print colored("\nCreating submission files...\n", "bold");

#
  my %blast;
  if ($blastexist) {open(BLASTFILE,"<blast_reports/blasts_tophit"); 
    while (my $line = <BLASTFILE>) {  
      chomp $line; 
      if ($line=~ /^([A-Za-z]{2}_\w{2,5}_\d{2,3}[A-Za-z]\d{2})\s(.*)/) {
        $blast{$1} = $2;
      }
    }  
  }
  
  opendir (DIR, "./subfiles");
  unless ($scheme) {  
    print colored ("\nTrace file naming scheme\n","bold");
    print "In order to extract some necessary information, the file names must follow one of\nthese schemes:\n";
    print "\n1 - NERC Environmental Genomics scheme\n2 - STRESSGENES scheme\n";
    $input = &get_input("\nPlease enter the appropriate number: ");
    chomp $input;
    $flag =1;
    while ($flag) {
	if ($input !~ /^(1|2)$/) {
		$input = &get_input("Please enter 1 or 2: \n"); 
		chomp $input;
	} else { 
		$flag =0;
	}
    }
    $scheme = $input;
  
  
  
  }
  while (defined (my $file = readdir(DIR))) {
	if (($scheme ==1 && $file =~ /^[A-Za-z]{2}_\w{2,5}_(\d{2,3})([A-Za-z])(\d{2})$/) ||
	 ($scheme ==2 && $file =~ /^[A-Z][a-z][A-Z]{2}[0-9]{2}[a-z]([0-9]{2})([a-z])([0-9]{2})[a-z][0-9]_[A-Z][a-z]{2}[A-Z][a-z]$/)) {
	 	my ($hi_qual_s, $hi_qual_end, $seq, $put_id, $plate, $row, $column, $poly_A, $chimeric, $chimeric_e);
		$plate = $1; $row = $2; $column = $3;	#get plate coordinates from name to use in submission file
		open(INFILE,"<subfiles/$file");	#open this file to read in seq info
		open(SUBFILE,">subfiles/$file.sub");	#create this file for EST sub file
		open(FASTAFILE,">fastafiles/$file.fsa");	#create this file to hold the processed seq.
		while (my $line = <INFILE>) {
	    	if ($line=~ /^>.+\shq\sstart\s(\d+)\shq\send\s(\d+)(.*)/) {	#regex matching fasta type header of the file
				$hi_qual_s=$1; $hi_qual_end=$2; my $dollar3 = $3;
				if ($dollar3) { 
					if ($dollar3 =~ /trimmed/){
						$poly_A=1; 
					}
					if ($dollar3 =~ /chimeric\(vector\)/) {
						$chimeric =1;
					}
					if ($dollar3 =~ /chimeric\(E.coli\)/) {
						$chimeric_e = 1;
					}
				}
			}
	    	else{   # Read in the sequence
				chop $line;
				$seq .= $line;
	    	}
		}

	    if ($blastexist) {$put_id = $blast{$file}}


#extract from $primer the primer NAME. $primer should be in format (e.g.) "T7 (aggagagagcgtg)" or "T7"
	    my $primer_name = $seq_primer;
	    if ($primer_name =~ s/\(.*\)//) {}	#get name part
	    if ($primer_name =~ s/\s//g) {}	#remove whitespace
	    
	    print SUBFILE "TYPE: EST\nSTATUS: New\n";
	    print SUBFILE "CONT_NAME:  $cont_name\nCITATION:\n";
	    print SUBFILE "$pub_cit\n";
	    print SUBFILE "LIBRARY: $lib_name\n";
	    print SUBFILE "EST#: $file\_$primer_name\n";
	    print SUBFILE "CLONE: $file\n";
	    print SUBFILE "SOURCE:\n";
	    print SUBFILE "PCR_F: $pcrf_primer\n";
	    print SUBFILE "PCR_B: $pcrr_primer\n";
	    print SUBFILE "PLATE: $plate\n";
	    print SUBFILE "ROW: $row\n";
	    print SUBFILE "COLUMN: $column\n";
	    print SUBFILE "SEQ_PRIMER: $seq_primer\n";
		if ($p_end) {
		    print SUBFILE "P_END: $p_end\n";
		}
	    print SUBFILE "HIQUAL_START: $hi_qual_s\n";
	    print SUBFILE "HIQUAL_STOP: $hi_qual_end\n";
	    print SUBFILE "DNA_TYPE: cDNA\n";
	    print SUBFILE "PUBLIC: $public_date\n";
	    if($put_id && $put_id !~ /No\shits\sfound/) { 
			print SUBFILE "PUT_ID: Similar to $put_id\n";
		} else { 
			print SUBFILE "PUT_ID:\n";
		}
		if ($poly_A) {
			print SUBFILE "POLYA: Y\n";
		}
	    print SUBFILE "COMMENT:\n";
	    if ($comment) {
			print SUBFILE "$comment\n";
		}
		if ($chimeric) {	#1 if x's (from vector)(possible chimera) found in sequence
			print SUBFILE "A section within this EST was identified as potentially derived from\nvector sequence. It has been replaced by 'N' residues.\n";
	    }
		if ($chimeric_e) {	#1 if x's (from E.coli)(possible chimera) found in sequence
			print SUBFILE "A section within this EST was identified as potentially derived from\nE.coli sequence. It has been replaced by 'N' residues.\n";
	    }
	    print SUBFILE "SEQUENCE:\n";
	    print FASTAFILE ">$file ";
	    if($put_id && $put_id !~ /No\shits\sfound/) { 
			print FASTAFILE "Similar to $put_id\n";
	    } 
	    else { 
			print FASTAFILE "no similar sequence found\n";
	    }
	    
	    
	    my $n=0;
	    while ($seq=~/(.)/g) {
			print SUBFILE "$1";
			print FASTAFILE "$1";
			$n++;
			if(($n % 60) == 0) { print FASTAFILE "\n"; print SUBFILE "\n"; }
	    }
	    if(($n % 60) != 0) { print FASTAFILE "\n"; print SUBFILE "\n"; }
	    print SUBFILE "||\n";
	    close SUBFILE;
	    close FASTAFILE;
	}
    
  }
  print colored ("Done\n", "bold");


#Section 7 - submission
  print colored ("\nSubmission and saving of files\n\n", "bold");

  opendir (SUBFILES, "./subfiles") or die "Could not open ./subfiles\n";
  my @sub_files = grep /.sub/ ,   readdir SUBFILES;	#don't get . and ..
  foreach my $sub_file (@sub_files) {
	open (DBEST, "./subfiles/$sub_file") or die "could not open ./subfile/$sub_file\n";
	while (my $line = <DBEST>) {
		$dbESTsubmission .= $line;	
	}
  }
#add Pub, Lib and Cont files

  if (!$no_lib) {	#only put these files in sub file if they are to be submitted.
	$dbESTsubmission = (join "", @Libfile_data).$dbESTsubmission;
  }
  if (!$no_pub) {
	$dbESTsubmission = (join "", @Pubfile_data).$dbESTsubmission;
  }
  if (!$no_cont) {
	$dbESTsubmission = (join "", @Contfile_data).$dbESTsubmission;
  }
#write the sub file to a file
  open (FINALSUB, ">./dbEST_submission.txt");
  print FINALSUB $dbESTsubmission;
  close FINALSUB;

  print "The EST submission records have been merged into one file.\n";
  my $input_view = &get_input("Would you like to view this completed submission file? [y/n]"); 
  chomp $input_view;
  if ($input_view =~ /y/i) {
	if ($less_flag == 1) {
		system("less dbEST_submission.txt");
	} else {
		system("more dbEST_submission.txt");
	}
  }

  print "\nThe EST submission file may now be sent by e-mail to NCBI dbEST.\n";
  print "Enter yes to send file to dbEST now, or any other key to continue without\n";

  my $input_submit = &get_input("submitting (your file will be saved): "); 
  chomp $input_submit;

  my $sent_flag = 0;

  if ($input_submit =~ /^yes$/i) {
	
	my @info = &sendEST($dbESTsubmission);	#send sub file and get reply to for email to egdc
	my $mail_flag = shift @info;
	if ($mail_flag) {
		print "Mail sent - A copy of the mail was sent to the reply email entered above.\n";
		$sent_flag = 1;
	}
	
	my $reply_to = shift @info; 
	my $smtp_server = shift @info;
	
		}
	
  else {
	print "Ok, file not submitted to dbEST.\n"

  }

#merge logfiles and tidy up.
  if (-e "phredlogfile") {
	system ("cat phredlogfile >> logfile");
	unlink "phredlogfile";
  }
  if (-e "cmlogfile") {
	system ("cat cmlogfile >> logfile");
	unlink "cmlogfile";
  }
  if (-e "temp.seq") {
	unlink "temp.seq";
  }

  &save_files($sent_flag, $save_location);

  print "\n\n\nEnd of submission option   - Back to main menu.\n\n";
}
 








												#######n##################
######################################################/#\#####}#####################################################
#################### - - SUB ROUTINES - - ###########/###\####}####################+++++++#########################
####################################################/#####\###}~~~################################################
												##########################				



#####################################################################################

sub file_choices_page() {
#Asks where to get Publication/Lib/Cont file info from (and if it is required)

	my ($file_type) =shift @_;
	my ($db_file)  = shift @_;
	my ($skipintro) = shift @_;
	my $answer;
	
	unless ($skipintro) {	
		print colored ("\n$file_type file\n", "bold"); 
    	print "\n\tEach batch of EST submissions to dbEST must have an associated\n";
		print "\t$file_type file.\n";
		print "\tPlease choose one of the following options:\n";
	}
    print "\n\t1 - $file_type file already submitted\n";
	if (!-r $db_file) {	#this loop allows us to reset the location of the files, e.g. reset the $lib_file variable
		while (!-r $db_file) {	#file is (!not)readable, e.g. .db files haven't stayed in installation dir of basedir/file.db
			$db_file = &get_input("Warning: $db_file\nwas not found.\nPlease enter the location of the $file_type file: "); 
			chomp $db_file;
		}
		#correct .db file location if it has changed from default defined at top of script
		#and make $db_file point to the actual file
		if ($file_type eq "Library") {
			$Lib_file = $db_file."/Libfile.db";
			$db_file = $Lib_file;
		}
		if ($file_type eq "Publication") {
			$Pub_file = $db_file."/Pubfile.db";
			$db_file = $Pub_file;
		}
		if ($file_type eq "Contact") {
			$Cont_file = $db_file."/Contfile.db";
			$db_file = $Cont_file;
		}
	}
	open (DBFILE, "$db_file") or  die "Error: $db_file could not be opened!\n";
	my $num = 1;
	my $flag = 1;
	my $marker = 0;
	while (my $line =<DBFILE>) {
		if ($marker) {	#in the 1st line of TITLE free text in pub file
			$num++;
			if ($num == 2) {	#we only want to print this line once
				print "\n\tOr use a saved file...\n\n";
			}
			print "\t$num - ";
			chomp $line;	#keeps pub file entries in same format as others
			print $line; 
			print "\n";
			$marker = 0;
		} elsif ($line =~ /^TITLE:/) {	#pub file
			$marker = 1;	#next line is the title free text	
		} elsif ($line =~ /^NAME:\s(.*)/) {	#lib and cont files
			$num++;
			if ($num == 2) {	#we only want to print this line once
				print "\n\tOr use a saved file...\n\n";
			}
			print "\t$num - ";
			print $1; 
			print "\n";
		}
			
	}
	close DBFILE;
    
    my $flag1=0;
    while($flag1==0) {
		$answer= &get_input("\n\tDon't fancy one of those? Enter h for help or q to quit.\n");
		chomp $answer;
		if($answer =~/h/i || $answer =~/q/i ) { $flag1=1; next; }
		elsif ($answer =~ /^\d+$/) {	#if it's only digits
			if ($answer > $num || $answer == 0) {	#if answer is a number outside the right range...
				print "Please enter a number between 1 and $num, or h for help and q to quit:\n";
			} 
			elsif ($answer eq 1) { #else it must be in range so send back if 1
				$flag1=1;
				next;
			}
			else {					#one of the records from ESTfile.db has been selected
				$flag1 = 1;
				next;			
			}
		} 
		else {	#if answer is not digits...
			print "Please enter a number between 1 and $num, or h for help and q to quit:\n";
		}
    }
    return $answer;
}

############################################################################################


sub file_update() {
#Asks where to get Publication/Lib/Cont file info from (and if it is required)

	my ($file_type) =shift @_;
	my ($db_file)  = shift @_;
	my ($skipintro) = shift @_;
	my $answer; my $i; my $n; my $m;
	
	unless ($skipintro) {	
		print colored ("\n$file_type file\n", "bold"); 
    	print "\n\tEach batch of EST submissions to dbEST must have an associated\n";
		print "\t$file_type file.\n";
	}
	
	unless (-e "$db_file") {system ("touch $db_file")}
	open (DBFILE, "$db_file") or  die "Error: $db_file could not be opened!\n";
	my @list; my $marker=0;
	while (my $line =<DBFILE>) {
          if ($marker==1) {chomp $line; push (@list,$line);}
	  if ($line =~ /^TITLE:/) {	#pub file
			$marker = 1;	#next line is the title free text	
		} 
	  else {$marker=0} 	
	  if ($line =~ /^NAME:\s(.*)/)      { push (@list,$1);}	#lib and cont files
	  if ($line =~ /^RECORD_ID:\s(.*)/) { push (@list,$1);}	#EST files
	}
	close DBFILE;
    
        my $number = scalar @list;
	if ($number == 0) {
	  print "There are no entries for $file_type available yet.\n";
	  if ($file_type =~ "Library") {newLib_file_data()}
	  if ($file_type =~ "Publication") {newPub_file_data()}
	  if ($file_type =~ "Contact") {newCont_file_data()}
	  if ($file_type =~ "EST") {new_EST_file()}
	  print "\nBack to main menu\n\n";
	  &options;  
	}
        else { 
	  print "The following entries for $file_type are available for viewing - :\n\n";
          for ($i=0;$i<$number;$i++) {
	     $n = $i+1;
             print "$n - $list[$i]\n"; 
	  }   
	  my $m = $n + 1;
	  print "$m - or alternatively choose this option to add a new entry.\n";    
 	  print "\n\nSelect the respective number to view an entry or to create a new entry:\n";
          my $flag1=0;
          while($flag1==0) {
		$answer= &get_input("\n\n");
		chomp $answer;
		if ($answer =~ /^\d+$/) {	#if it's only digits
			if ($answer > $m || $answer == 0) {	#if answer is a number outside the right range...
				print "Please enter a number between 1 and $m .\n";
			} 
			else {					#one of the records from ESTfile.db has been selected
				$flag1 = 1;
				next;			
			}
		} 
		else {	#if answer is not digits...
			print "Please enter a number between 1 and $m, or h for help and q to quit:\n";
		}
	 }	
      
         if ($answer==$m) {
  	   if ($file_type =~ "Library") {newLib_file_data()}
	   if ($file_type =~ "Publication") {newPub_file_data()}
	   if ($file_type =~ "Contact") {newCont_file_data()}
	   if ($file_type =~ "EST") {new_EST_file()}
	 }
         else {
           open (DBFILE, "$db_file") or  die "Error: $db_file could not be opened!\n";
           my $dummy=1; print "\n\n";
           while (my $line =<DBFILE>) {
             if ($dummy == $answer) {print $line} 
	     if ($line =~ /^\|\|/) { $dummy++}
          }
          close DBFILE;
        }
      }	    
  print "\nBack to main menu\n\n\n"; &options;  

}

############################################################################################


sub get_file_data() {
#gets the selected record from ESTfile.db, Libfile.db, Pubfile.db or Contfile.db
	
	my ($record_num) = shift @_;
	my ($db_file) = shift @_;
	my ($type_id) = shift @_;
	my $count =0;
	my $flag;
	my @rec;
	
	if ($type_id ne "RECORD_ID: ") {
		$record_num = $record_num -1;	#set record num for all file types except ESTfile (different because no "file not needed" option)
	} else {	
		$record_num--;	#make record equal to the desired record number for all other types of file
	}	
	
	open (DBFILE, "$db_file");
	while (my $line =<DBFILE>) {
		if ($line =~ /^$type_id/) {
			$count++;
		}
		if ($count == $record_num) {
			$flag = 1;
		}
		if ($flag) {
			push (@rec, $line);
		}
		if ($line =~ /^\|\|/) {
			$flag = 0;
		}
	}
	print "\nPlease take a few seconds to check this information\n\n";
	print @rec; 
	#print "\nIs this the correct data? [y/n]\n";
	my $input = &get_input("\nIs this the correct data? [y/n]\n");
	chomp $input;
	if ($input =~ /y/i) {
		return @rec;
	} else {
		print "Ok, please choose again.\n";
		undef @rec;	#make the array null to indicate that no file has been selected
		return @rec;
	}
} 
		
################################################################################################

sub newPub_file_data() {
#gets info from user to make Pub file.
#General plan - from the user input side, it is better to get all the obligatory info, then ask for the optional
#info. Use the status to ask only relevent questions. storing the answers to the obligatory info as varibles
# (instead of just putting them straight into the array) means that they can be added later, with the
# non-obligatory information. This allows all the data to be put in the array in the right order. 

	my @nPub_file_data;
	my $ok_flag = 1;
		
	while ($ok_flag) {
		$nPub_file_data[0] = "TYPE: Pub\n";
		my $ob_info1 =  "STATUS: ";
		my $ob_info2 = "TITLE: \n";	#set up variables for the obligatory info (\n in TITLE so the free text starts on the next line)
		my $ob_info3 = "AUTHORS: \n";
		my $ob_info4 = "YEAR: ";
		my ($status, $nob_info1, $nob_info2, $nob_info3, $nob_info4, $nob_info5 );

		
		
		print "\nInformation for new Publication file\n";
		print "Please answer the following questions about the publication for the ESTs you wish\n";
		print "to submit.\n";
		my $flag = 1;
		while ($flag == 1) {
		
			print "\nWhat is the publication status? 1=unpublished, 2=submitted, 3=in press,\n4=published.\n";
			$status =  &get_input("Enter the appropriate number: ");
			chomp $status;
			if ($status !~ /^[1-4]$/) {
				print "Please enter a number between 1 and 4!";
			} else {
				$flag =0;
			}
		}
		$ob_info1 .= $status;	#save status for later (only ask relevent questions for non-obligatory part of pub file)
		$ob_info1 .= "\n";
		$flag = 1;
		while ($flag) {
			print "\nWhat is the title of the article? ";
			print "(The title you enter here will also be used in the CITATION field of the EST file.)\n";
			#print "Title: ";
			$pub_cit = &get_input("Title: ");	#get this for est file
			$pub_cit.= "\n";	#replace newline removed in get_input
			if  ($pub_cit =~ /\w+/) {	#check pub cit has someting in it.
				$flag =0;
			} else {
				print "You must enter something!\n";
			}
		}
		$ob_info2 .= $pub_cit;
		$flag = 1;
		while ($flag) {
			my $input .= &get_input("Who are the authors?\n");
			$input .= "\n";
			if ($input =~ /\w+/) {
				$flag = 0;
				$ob_info3 .= $input;
			} else {
				print "You must enter something!\n";
			}
		}
		$flag = 1;
		while ($flag) {
			my $input .=  &get_input("Year of publication?\n");
			$input .= "\n";
			if ($input =~ /^[0-9]{4}$/) {	#only 4 digits
				$flag = 0;
				$ob_info4 .= $input;
			} else {
				print "You must enter a year in the valid format, e.g. 2004!\n";
			}
		}
		if ($status == 2 ||$status== 3 ||$status== 4) {	#only ask questions that are relevent given the pub status
			$nob_info1 = "JOURNAL: " . &get_input("What is the publication journal?\n") . "\n";
		}
		if ($status== 4) {
			$nob_info2 = "VOLUME: " . &get_input("What is the volume number? ") . "\n";
			$nob_info3 = "ISSUE: " . &get_input("What is the issue number? ") . "\n";
			$nob_info4 = "PAGES: " . &get_input("What's the page numbers? ") . "\n";
			$nob_info5 = "MEDUID: " .&get_input("Medline unique identifer (if known): ") . "\n";
		}
		
#assemble pub file and display. if ok then proceed, else redo!
		$nPub_file_data [1] = $ob_info2;	#add title
		$nPub_file_data [2] = $ob_info3;	#add authors
		if ($status == 1) {
			$nPub_file_data [3] = $ob_info4;	#add year
			$nPub_file_data [4] = $ob_info1;	#add status
		} 
		elsif ($status == 2 || $status ==3) {
			$nPub_file_data [3] = $nob_info1;	#add journal
			$nPub_file_data [4] = $ob_info4;	#add year
			$nPub_file_data [5] = $ob_info1;	#add status
		} 
		elsif ($status == 4) {
			$nPub_file_data [3] = $nob_info1;	#add journal
			$nPub_file_data [4] = $nob_info2;	#add volume
			$nPub_file_data [5] = $nob_info3;	#add issue
			$nPub_file_data [6] = $nob_info4;	#add pages
			$nPub_file_data [7] = $ob_info4;	#add year
			$nPub_file_data [8] = $ob_info1;	#add status
		}
#separate section to add meduid, if present
		#if ($nob_info5 =~ /MEDUID:\s\w+/) {	#true if an identifier entered
		if ($nob_info5) {
			unshift @nPub_file_data, $nob_info5;	#add nob_info5 to start of the array
		}
		print "\n\tPlease check your new Pub file:\n @nPub_file_data\n";
		#print "\nAre you happy to continue? (Y/N): ";
		my $input = &get_input("\nAre you happy to continue? (Y/N): ");
		chomp $input;
		if ($input =~ /n/i) {
			print "Ok, please re-enter the details...\n";
		}else {
			push @nPub_file_data, "||\n";	#add double pipe file separator to end of array
			&save_new_file(@nPub_file_data);
			$ok_flag = 0;
		}
	}
	return @nPub_file_data;
}

################################################################################################

sub dummyPub_file_data() {	
#Pub file not needed so get 'title' info for est file

	my @dPub_file_data;
	
	#print "What is the title of the article in the Pub file you have submitted?\n";
	my $input .= &get_input("What is the title of the article in the Pub file you have submitted?\n");
	#chomp $input;
	$pub_cit = $input;	#get this for est file
	$dPub_file_data [0]	= "TYPE: Pub\n";
	$dPub_file_data [1]	= "TITLE:\n" .$input;
	push @dPub_file_data, "||\n";	#add double pipe file separator to end of array	
	return @dPub_file_data;
}
################################################################################################

sub newLib_file_data() {
#now updated to include all library file fields

	my @nLib_file_data;
	my $ok_flag = 1;	
	
	while ($ok_flag) {
		@nLib_file_data = "";
		$nLib_file_data[0] = "TYPE: Lib\n";
		print "\nInformation for new Library file\n";
		print "Please answer the following questions about the Library used to generate the ESTs\n";
		print "you wish to submit.\n";
		my $input;	#gets used for all inputs. sometimes use strict seems really stupid. or is it just me?
		my $flag =1;
		while ($flag) {
			#print "What is the name of the library?\n";
			$input = &get_input("What is the name of the library?\n"); 
			chomp $input;
			if ($input) {	#check that something has been entered
				$flag = 0;
			} else {
				print "You must enter the library name\n";
			}
		}
		$nLib_file_data[1] = "NAME: " . $input . "\n";
		
		$flag =1;	#reset flag for next question
		while ($flag) {
			#print "What is the scientific name of the organism from which the library was prepared?\n";
			$input = &get_input("What is the scientific name of the organism from which the library was prepared?\n"); 
			chomp $input;
			if ($input =~ /[A-Z]+\s[A-Z]+/i) {	#rough match of a binomial name
				$flag=0;
			} else {
				print "You must enter the bionomial scientific name of the organism\n";
			}
		}
		$nLib_file_data[2] = "ORGANISM: " . $input ."\n";
		print "The following information is not compulsory but please enter as much as you can.\nIf you can't enter the requested information, press enter.\n";
		#print "Organism strain: ";
		my $number = 3;	#for place in array. could use push or something but this works as well.
		$input = &get_input("Organism strain: "); 
		chomp $input;
		if ($input) {
			$nLib_file_data[$number]= "STRAIN: " .$input ."\n" ;
			$number++;
		}
		#print "Cultivar: ";
		$input = &get_input("Cultivar: "); 
		chomp $input;
		if ($input) {
			$nLib_file_data[$number]= "CULTIVAR: " .$input ."\n" ;
			$number++;
		}
		#print "Sex: ";
		$input =  &get_input("Sex: "); 
		chomp $input;
		if ($input) {
			$nLib_file_data[$number]= "SEX: " .$input ."\n" ;
			$number++;
		}
		#print "Organ: ";
		$input =  &get_input("Organ: ");
		chomp $input;
		if ($input) {
			$nLib_file_data[$number]= "ORGAN: " .$input ."\n" ;
			$number++;
		}
		#print "Tissue: ";
		$input =  &get_input("Tissue: ");
		chomp $input;
		if ($input) {
			$nLib_file_data[$number]= "TISSUE: " .$input ."\n" ;
			$number++;
		}
		#print "Cell type: ";
		$input = &get_input("Cell Type: "); 
		chomp $input;
		if ($input) {
			$nLib_file_data[$number]= "CELL_TYPE: " .$input ."\n" ;
			$number++;
		}
		#print "Cell line: ";
		$input = &get_input("Cell Line: "); 
		chomp $input;
		if ($input) {
			$nLib_file_data[$number]= "CELL_LINE: " .$input ."\n" ;
			$number++;
		}
		#print "Stage: ";
		$input =  &get_input("Stage: ");
		chomp $input;
		if ($input) {
			$nLib_file_data[$number]= "STAGE: " .$input ."\n" ;
			$number++;
		}
		#print "Host: ";
		$input =  &get_input("Host: ");
		chomp $input;
		if ($input) {
			$nLib_file_data[$number]= "HOST: " .$input ."\n" ;
			$number++;
		}
		#print "Vector: ";
		$input =  &get_input("Vector: ");
		chomp $input;
		if ($input) {
			$nLib_file_data[$number]= "VECTOR: " .$input ."\n" ;
			$number++;
		}
		#print "Vector type: ";
		$input =  &get_input("Vector type: ");
		chomp $input;
		if ($input) {
			$nLib_file_data[$number]= "V_TYPE: " .$input ."\n" ;
			$number++;
		}
		#print "Restriction enzyme 1: ";
		$input =  &get_input("Restriction enzyme 1: ");
		chomp $input;
		if ($input) {
			$nLib_file_data[$number]= "RE_1: " .$input ."\n" ;
			$number++;
		}
		#print "REstriction enzyme 2: ";
		$input =  &get_input("Restriction enzyme 2: ");
		chomp $input;
		if ($input) {
			$nLib_file_data[$number]= "RE_2: " .$input ."\n" ;
			$number++;
		}
		#print "Library description: ";
		$input =  &get_input("Library description: ");
		chomp $input;
		if ($input) {
			$nLib_file_data[$number]= "DESCR: \n" .$input ."\n" ;
			$number++;
		}
		print "\n\tPlease check your new Lib file:\n @nLib_file_data\n";
		#print "\nAre you happy to continue? (Y/N): ";
		$input = &get_input("\nAre you happy to continue? (Y/N): ");
		chomp $input;
		if ($input =~ /n/i) {
			print "Ok, please re-enter the details...\n";
		}else {
			push @nLib_file_data, "||\n";	#add double pipe file separator to end of array
			&save_new_file(@nLib_file_data);
			$ok_flag = 0;
		}
	}
	return @nLib_file_data;
}

################################################################################################

sub dummyLib_file_data() {
#Lib file not needed so get 'name' info for est file

	my @dLib_file_data;
	my $flag=1;
	my $input;
	
	while ($flag) {
		#print "What is the name of the library?\n";
		$input .= &get_input("What is the name of the library?\n");
		chomp $input;
		if ($input) { 
			$flag=0;
		} else { 
			print "You must enter the library name\n";
		}
	}
	#print "From what organism are the ESTs derived (e.g. Lumbricus rubellus)?\n";
	$species = &get_input("From what organism are the ESTs derived (e.g. Lumbricus rubellus)?\n"); 
	chomp $species;
	$flag = 1;
	while ($flag) {
		if ($species !~ /[A-Z]*\s[A-Z]/i) {
			#print "Please enter the binomial name for the species\n";
			$species = &get_input("Please enter the binomial name for the species\n"); 
			chomp $species;
		} else {
			$flag =0;
		}
	}

	$dLib_file_data [0]	= "TYPE: Lib\n";
	$dLib_file_data [1]	= "NAME: " .$input;
	push @dLib_file_data, "||\n";	#add double pipe file separator to end of array
	return @dLib_file_data;
}

################################################################################################

sub newCont_file_data() {


	my @nCont_file_data;
	my $ok_flag = 1;
		
	while ($ok_flag) {
		@nCont_file_data = "";
		$nCont_file_data[0] = "TYPE: Cont\n";
		print "\nInformation for new Contact file\n";
		print "Please answer the following questions about the contact person for the ESTs\n";
		print "you wish to submit.\n";
		my $input;	#gets used for all inputs. 
		my $flag =1;
		while ($flag) {
			#print "What is the name of the person submitting the ESTs?\n";
			$input = &get_input("What is the name of the person submitting the ESTs?\n"); 
			chomp $input;
			if ($input) {	#check that something has been entered
				$flag = 0;
			} else {
				print "You must enter a contact name\n";
			}
		}
		$nCont_file_data[1] = "NAME: " . $input . "\n";
		
		print "The following information is not compulsory but please enter as much as you can.\nIf you can't enter the requested information, press enter.\n";
		#print "FAX number: ";
		my $number = 2;
		$input = &get_input("FAX number: "); 
		chomp $input;
		if ($input) {
			$nCont_file_data[$number]= "FAX: " .$input ."\n" ;
			$number++;
		}
		#print "Telephone number: ";
		$input =  &get_input("Telephone number: ");
		chomp $input;
		if ($input) {
			$nCont_file_data[$number]= "TEL: " .$input ."\n" ;
			$number++;
		}
		#print "Email: ";
		$input =  &get_input("Email: ");
		chomp $input;
		if ($input) {
			$nCont_file_data[$number]= "EMAIL: " .$input ."\n" ;
			$number++;
		}
		#print "Laboratory providing EST: ";
		$input =  &get_input("Laboratory providing EST: ");
		chomp $input;
		if ($input) {
			$nCont_file_data[$number]= "LAB: " .$input ."\n" ;
			$number++;
		}
		#print "Institution name: ";
		$input =  &get_input("Institution name: ");
		chomp $input;
		if ($input) {
			$nCont_file_data[$number]= "INST: " .$input ."\n" ;
			$number++;
		}
		#print "Address (comma delineate): ";
		$input =  &get_input("Address (comma delineate): ");
		chomp $input;
		if ($input) {
			$nCont_file_data[$number]= "ADDR: " .$input ."\n" ;
			$number++;
		}

		print "\n\tPlease check your new Cont file:\n @nCont_file_data\n";
		#print "\nAre you happy to continue? (Y/N): ";
		$input = &get_input("\nAre you happy to continue? (Y/N): ");
		chomp $input;
		if ($input =~ /n/i) {
			print "Ok, please re-enter the details...\n";
		}else {
			push @nCont_file_data, "||\n";	#add double pipe file separator to end of array
			&save_new_file(@nCont_file_data);
			$ok_flag = 0;
		}
	}
	return @nCont_file_data;
	

}

################################################################################################

sub dummyCont_file_data() {#Cont file not needed so get 'cont name' info for est file

	my @dCont_file_data;
	my $flag=1;
	my $input;
	
	while ($flag) {
		#print "What is the name of the person submitting the ESTs?\n";
		$input .= &get_input("What is the name of the person submitting the ESTs?\n");
		chomp $input;
		if ($input) {
			$flag =0;
		} else {
			print "You must enter a name\n";
		}
	}
	$dCont_file_data [0]	= "TYPE: Cont\n";
	$dCont_file_data [1]	= "NAME: " .$input;
	push @dCont_file_data, "||\n";	#add double pipe file separator to end of array	
	return @dCont_file_data;
}

################################################################################################

sub save_new_file() {
#allows the new EST, Pub, Lib, or Cont file to be saved to the relevant .db file
	
	my @new_file = @_;
	my $file;	#gets set to scalar rep one of the .db files
	
	#print "Would you like to save this file for future use?(y/n):";
	my $input = &get_input("Would you like to save this file for future use?(y/n):");
	chomp $input;
	if ($input =~ /y/i) {
		foreach my $line (@new_file) {
			if ($line =~ /TYPE:\s.*Pub/) {
				$file = $Pub_file;
			} elsif ($line =~ /TYPE:\s.*Lib/) {
				$file = $Lib_file;	
			} elsif ($line =~ /TYPE:\s.*Cont/) {
				$file = $Cont_file;
			} elsif ($line =~ /TYPE:\s.*EST/) {
				$file = $EST_file;
				#print "Please enter a name for this file: ";	#get name for this record so it can be identifed when recalling saved files
				my $id = &get_input("Please enter a name for this file: "); 
				chomp $id;
				unshift @new_file, "RECORD_ID: $id\n";
				last; 
			}
		}
		open (DBFILE, ">>$file") or die "Could not open $file\n";	#use >> for appending to file
		print DBFILE @new_file;
		print "File saved to $file\n";
		close DBFILE;
	} else {
		print "Ok, file will not be saved.\n";
	}
}

################################################################################################

sub EST_file_data () {
#allows user to select saved file or enter new EST file info (via sub new_EST_file), both return an array with the file data.
	my $sub_flag = 1;
	
	print colored ("\nEST File\n", "bold"); 
	print "\n\tEach sequence submitted to dbEST must have an associated EST file.\n\tPlease choose one of the following options for the creation of this file:\n\n";
	while ($sub_flag) {
		$sub_flag = 0;	#hope everything goes ok
	print "\t1 - Enter information for a new EST file now\n";
	if (!-r $EST_file) {	#this loop allows us to reset the location of the file
		while (!-r $EST_file) {	#file is (!not)readable, e.g. .db files haven't stayed in installation dir of basedir/file.db
			#print "Warning: $EST_file was not found.\nPlease enter the location of the EST file: ";
			$EST_file = &get_input("Warning: $EST_file was not found.\nPlease enter the location of the EST file: ");
			chomp $EST_file;
		}
	}
	open (ESTFILE, "$EST_file") or  die "Error: $EST_file could not be opened!\n";
	my $num = 1;
	while (my $line =<ESTFILE>) {		
		if ($line =~ /^RECORD_ID:\s(.*)/) {
			$num++;
			if ($num == 2) {	#we only want to print this line once
				print "\n\tOr use a saved file...\n\n";
			}
			print "\t$num - ";
			print $1; 
			print "\n";
			
		}			
	}
	close ESTFILE;
   # print "\n\tDon't fancy one of those? Enter h for help or q to quit.\n";
	
	my $flag1=0;
    while($flag1==0) {
		my $answer= &get_input("\n\tDon't fancy one of those? Enter h for help or q to quit.\n");
		chomp $answer;
		if($answer=~ /h/i)  { &print_help(); next; }	#use less to show trace2dbest.doc
		if($answer=~ /q/i)  { print "Ok, trace2dbest exiting...\nBye \n"; exit();  } #exit program
		if($answer =~/h/i || $answer =~/q/i ) { $flag1=1; next; }
		elsif ($answer =~ /^\d+$/) {	#if it's only digits
			if ($answer > $num || $answer == 0) {	#if answer is a number outside the right range...
				print "Please enter a number between 1 and $num, or h for help and q to quit:\n";
			} 
			elsif ($answer eq 1) { 
				@ESTfile_data = &new_EST_file();
				$flag1=1;
			}
			else {	#one of the records from ESTfile.db has been selected
				my $type_id = "RECORD_ID: ";	#set type id for get_file_data	
				@ESTfile_data = &get_file_data($answer, $EST_file, $type_id);
				$flag1 = 1;
			}
		} 
		else {	#if answer is not digits...
			print "Please enter a number between 1 and $num, or h for help and q to quit:\n";
		}
    }
	if (@ESTfile_data) {
    	return @ESTfile_data;
	} else {
		$sub_flag = 1;	#everything not ok, return to near start
	}
	}
}
		
################################################################################################

sub new_EST_file() {
#gets info from user to make EST.db file

	my @nEST_file_data;
	my $input; 
	my $ok_flag = 1;
	
	while ($ok_flag) {
	
		my $flag = 1;
	    while ($flag) {
	    	$flag=0;
	    	#print "\nWhat was the sequencing primer used? [enter the primer name, optionally followed by the\nsequence in brackets()].\nE.g. SAC(GGGAACAAAAGCTGGAG): ";
	    	$input = &get_input("\nWhat was the sequencing primer used? [enter the primer name, optionally followed by the\nsequence in brackets()].\nE.g. SAC(GGGAACAAAAGCTGGAG): ");
			chomp $input;
	    	unless ($input) {   #simple check for some kind of input
	    		print "You must enter at least the name of the sequencing primer.\n";
	    		$flag=1;
	    	}
	    	$nEST_file_data[0] = "SEQ_PRIMER: $input\n";
	  	}

	  	#print "What was the forward PCR primer? ";
	  	$input = get_input("What was the forward PCR primer? ");
		chomp $input;
		$nEST_file_data[1] = "PCR_F: $input\n";

	  	#print "What was the reverse PCR primer? ";
	  	$input =  get_input("What was the reverse PCR primer? ");
		chomp $input;
	  	$nEST_file_data[2] = "PCR_B: $input\n";

	  	$flag =1;
	 	while ($flag) {
	      	#print "Which end was sequenced (5'/3')? If neither, just hit return: ";
	      	$input =  get_input("Which end was sequenced (5'/3')? If neither, just hit return: ");
			chomp $input;
	      	if ($input =~ /(5'|3')/ || !$input) {
	    		$flag = 0;
	      	} else { 
	    		print "Please enter 5' or 3'\n";
	      	}
	  	}
		$nEST_file_data[3] = "P_END: $input\n";

#get release date
		print "\nPlease enter the date you would like your data to be made public. This date\n";
		print "will be inserted into the PUBLIC field of the submission file.\n";
		#print "Enter the date in the format MM/DD/YYYY. For immediate release, just press enter.\n";
		my $input = get_input("Enter the date in the format MM/DD/YYYY. For immediate release, just press enter.\n");
		$input .= "\n";
		if ($input) {
			while ($input !~ /(\d\d)\/(\d\d)\/(\d\d\d\d)\n/) {
				if ($input=~/^\s$/)	{
					$input = "";
					last;
				}
				else	{	
					print "Please re-enter the date in the format MM/DD/YYYY\n";
					$input = get_input("Please re-enter the date in the format MM/DD/YYYY\n");
					$input .= "\n";
				} 
			}
		}
		chomp $input;
		$nEST_file_data[4] = "PUBLIC: $input\n";

#get comment
		#print "Please enter a comment about the ESTs for inclusion in the \"COMMENT\" field of\nthe EST file.\n";
		$input =  get_input("Please enter a comment about the ESTs for inclusion in the \"COMMENT\" field of\nthe EST file.\n");
		chomp $input;
		$nEST_file_data[5] = "COMMENT: $input\n";
	
#now check it's ok and save then return.
		print "\n\tPlease check the data you have entered:\n @nEST_file_data\n";
		print "\nAre you happy to continue? (Y/N): ";
		$input = get_input("\nAre you happy to continue? (Y/N): ");
		chomp $input;
		if ($input =~ /n/i) {
			print "Ok, please re-enter the details...\n";
		}else {
			unshift @nEST_file_data, "TYPE: EST\n";	#add file type to start of array
			push @nEST_file_data, "||\n";	#add double pipe file separator to end of array
			&save_new_file(@nEST_file_data);
			$ok_flag = 0;
		}
	}
	return @nEST_file_data;
}

################################################################################################

sub Get_vectors () {
#uses vector file path to read in (fasta format) file and print out vector names (fasta headers)

	my ($file) = @_;
	my $line;
	my @vectors;
	my $count;
	my $input;
	my $return;	#the return value, 1 for vector not there (or no FASTA headers found), 0 if it is.
		
	open (VECTORFILE, "<$file") || die "$file could not be opened";
	while ($line = <VECTORFILE>) {
		if ($line =~ /^>/) {
			push @vectors, $line;
			$count++;			
		}
	}
	if ($count) {
		print "\nThe following $count vectors were found in $file\n\n";
		print @vectors;
		#print "\nAre you satisfied that the relevant vector is here? (y/n): ";
		$input =  &get_input("\nAre you satisfied that the relevant vector is here? (y/n): ");
		chomp $input;
		if ($input =~ /n/i) {
			print "Ok, please re-select a vector file...\n\n";
			$return =1;
		}
	}else {
		print "ERROR: No FASTA headers were found in $file.\nPlease re-select a FASTA format file...\n\n";
		$return =1;
	}
	return $return;
}
#################################################################################

sub split_screened {
# converts a fasta file to individual
# txt files

	my $file=shift(@_);
	my $dir=shift(@_);
	my $flag=0;
	my $in_seq= "";
	my @heading;
	my $header;
	my $filename;
	
	open(FSAFILE, "$file") || die "Can't open $file\n";;	#catenated cross_match output file
	while(my $line=<FSAFILE>) {
		if($flag==1 && $line!~/^>/) {
			chop $line;
  			$in_seq.=$line;
  		}
 		if($flag==1 && $line=~/^>/) { # print to file
			$flag=0;
  			open(OUTFILE, ">$dir/$filename.seqsc") || die "Can't open $dir/$filename.seqsc\n";
  			print OUTFILE "$header\n";
  			my $n=0;
  			while ($in_seq=~/(.)/g) {
   				print OUTFILE "$1";
   				$n++;
   				if(($n % 60) == 0) { print OUTFILE "\n"; }
   			}
  			if(($n % 60) != 0) { print OUTFILE "\n"; }
  			$in_seq="";
  			close OUTFILE;
  		}
  		if($flag==0 && substr($line,0,1) eq ">" ) {	#an unusual regex.
  			chop $line; 
    		$flag=1;
    		$header=$line;
    		@heading = split(' ',$line);
    		$heading[0] =~ s/>//;
    		$filename = $heading[0];
    		next;
		}	
	}
#print last record to file
 	open(OUTFILE, ">>$dir/$filename.seqsc") || die "Can't open $filename.txt\n";
  	print OUTFILE "$header\n";
  	my $n=0;
  	while ($in_seq=~/(.)/g) {
   		print OUTFILE "$1";
   		$n++;
   		if(($n % 60) == 0) { print OUTFILE "\n"; }
   	}
   	if(($n % 60) != 0) { print OUTFILE "\n"; }
  	close OUTFILE;
}

################################################################################################

sub put_id {
#runs BLAST (either local or remote) and gets id line of top hit

        my $blast_type = shift (@_);
	my $blast_save = shift (@_);
	my $sequence = shift (@_);
	my $file = shift (@_);
	my $blast_db = shift (@_);
	my $bits_cutoff = shift (@_);	#sounds painful.
        my $prot_id = "";
	my $flag = 0;
	my $evalue = "0";
	my $scores;
	my $bits;	#ooh er
	
    open(TEMPFILE,">temp.seq");
    print TEMPFILE ">Query\n$sequence\n";
    close TEMPFILE;
	while ($flag < 5) {    ## Attempt blast 5 times
    	if ($blast_type == 1) {
			print "Remote BLAST of $file ...\n";
			system ("blastcl3 -p blastx -d nr -i temp.seq -o blast.out -T F");
			my $aflag = 0;
			
			while ($aflag == 0) {
	    		wait();
	    		open(FH,"blast.out");
	    		if( -s FH > 550) { $aflag=1; close(FH); next; }	#-s =non-zero file test operator
	    		close(FH);
			}
    	} else {   
			print "Local BLAST of $file ...\n";
			system("blastall -p blastx -d $blast_db -i temp.seq -o blast.out -T F");
		}

    	open(FH,"blast.out");

    	if(-s FH > 550) { $flag=10; close(FH); next; }
   		close(FH);
    	$flag++;
	}
	
    if($flag==10){
		open(BLASTFILE,"<blast.out");
		if ($blast_save ==1) {
			open (BLASTSAVE, ">>blast_reports/blasts_full");
			print BLASTSAVE "\n==========================================================================\n";
			print BLASTSAVE "\n$file BLAST report\n";
		}
		$flag=0;
		while (my $line=<BLASTFILE>) {
			if ($blast_save ==1) {
				print BLASTSAVE "$line";
			}
	    	if($line=~/^>(.*)/ && $flag==0) { $prot_id=$1; $flag=1; next; }
	    	if($line =~ /Expect\s=\s(\w.*)/ && $flag==1) { 
				chomp $line;
				$scores = $line;
				$scores =~ /Score\s+=\s+(\d+)/;
				$bits = $1;
				if ($bits >= $bits_cutoff) {
					$evalue=1;	#use as flag to say that hit was found
				}
				last;
			}
		}
		
		if ($evalue ==0) {	#catch no hit situation
			$prot_id = "";
		
		} else {
	   		$prot_id =~ s/>.*//;	#remove > from hit description
	    	$prot_id =~ s/\[(.*)\]/ \- $1/;	#remove square brackets from descr
			$prot_id .= ". $scores";	#add score and e-value
	    	open (BLASTHIT, ">>blast_reports/blasts_tophit");
			print BLASTHIT ("$file $prot_id\n");
		}
    } else {
		print LOG "$file blast failed\n";
    }
    unlink("blast.out");	#removes blast.out
	close BLASTFILE; close BLASTHIT;
	return $prot_id;
}

###############################################################################
sub get_id () {
}

#################################################################################

sub sendEST (){
#send the submission file by email to dbEST 
use Mail::Mailer;

	my ($dbESTsend) = @_;
	my $flag = 1;	#flag set to 0 if mail returns error
	my $recipient = "batch-sub\@ncbi.nlm.nih.gov";
	#my $recipient = "al.anthony\@ed.ac.uk";
	
	print "The EST submission file will now be sent by e-mail to\n";
	print "$recipient\n";
	#print "Please enter your e-mail address for replies:\n";
	my $replyto = &get_input("Please enter your e-mail address for replies:\n");
	chomp $replyto;
	
	
	my $subject = "EST submission";
	
	my $body = "\n$dbESTsend";
	
	my $SMTP_SERVER = &get_input("Please enter your SMTP server for outgoing mail (your\nlocal computer support will be able to tell you what this is):\n");
	chomp $SMTP_SERVER;
	
	print "Sending to $recipient...\n";
	
	eval {
		my $mailer = Mail::Mailer->new("smtp", Server => $SMTP_SERVER);

		$mailer -> open ({	From => "",
						To => $recipient,
						Cc => $replyto,
						'Reply-to' => $replyto,
						Subject => $subject });
					
		print $mailer $body;
		$mailer ->close;
	};
	if ($@) {	#this variable will contain any error messages returned
		print colored ("\nWARNING! - There was a problem sending this email.\n", "green bold");
		print "see logfile for details. trace2dbest continuing...\n\n";
		open (LOG, ">>logfile");
		print LOG "MAIL WARNING!\n$@\nsmtp server was $SMTP_SERVER\nrecipeint was $recipient\nreply to was $replyto\n";
		close LOG;
		$flag = 0;
	}
	#put smtp server and reply to in array and return it for use in EGTDC mail
	my @info = ($flag, $replyto, $SMTP_SERVER);
	
	return @info;
}
	
################################################################################

sub save_files (){
#saves all the output files in the users home dir or the standard common location.

	my ($sent) = shift @_;
	my ($place) = shift @_;
	my $location;
	my @traces;
	
	my $date= `date "+%m_%d_%y_%H-%M-%S"`;	#get date and time to use as a unique identifier for file
	chomp $date; 
	my $x = $date;	
	
	if ($sent) {	#add tag to indicate if file was submitted to dbEST or not
		$x .= "submitted";
	} else {
		$x .= "unsubmitted";
	}
	
	if ($place) {
		$location = "/home/db/est_solutions/$species/trace2dbest/$x";
	} else {
		$location = "$homedir/est_solutions/$species/trace2dbest/$x";	
	}
	
	mkdir "$location", 0777 or die "Can't make directory $location/raw_traces\n$!\n";
	
	my @files = ("dbEST_submission.txt", "blast_reports/","fasta/","fastafiles/","partigene/","phd_dir/","process/","qual/","scf/","subfiles/");
	foreach my $file (@files) {
		rename "$file", "$location/$file" or die "There was an error moving $file. Check logfile for details\n",
		 print LOG "rename $file to $location$file failed with $!\n";
	}
	
	#put relevant files in partigene dir 
	my @quals = glob ("$location/qual/*.qual");
	foreach my $qual (@quals) {
		copy ("$qual" , "$location/partigene") or die "Can't copy $qual to $location/partigene\n", print LOG "$qual to $location/partigene:$!\n";
	}
	my @seqs = glob ("$location/fasta/*.seq");
	foreach my $seq (@seqs) {
		copy ("$seq" , "$location/partigene") or die "Can't copy $seq/* to $location/partigene\n", print LOG "$seq/* to $location/partigene:$!\n";
	}
	
	system ("mv raw_traces $location/raw_traces");
	
	close LOG;
	rename "logfile", "$location/logfile" or die "Could not move logfile!\n$!";
	print "\n\nThe EST submission file and the other output directories have been\nsaved in the following location:\n";
	print "$location\n\n";
	sleep(2);
	
}

########################################################################################################################

sub print_help() {
#prints help document using less
 
#system("less $basedir/trace2dbest_UG.txt");
#temporarily disabled - incompatibility with some unix tastes
# Ok this is a bit of a joke ...
print "\nSearching for online help ..."; sleep (2);
print "\nCouldn't find online help - please use userguide provided with the trace2dbest package.\n\n";

}

##########################################################################################################################


#########################################################################################################################
sub find_program() { #### find executables
  my $prog=$_[0];
  my $check=$_[1];
  my $pathflag=0;
  my $path;
  my $finalpath;
  foreach $path (@PATH) {
    if (-f "$path/$prog") {
      $pathflag=1; $finalpath=$path; last;
    }
  }
  if($pathflag==0)   { 
    print colored("Can't find the $prog utility on your system\n","red bold");
    print colored("Please ensure it is installed and in your path\n","red bold");
    sleep(1);
    unless ($check) {exit()};
    return 0;
  }
  else  {  return "$finalpath/$prog";  }
}
#########################################################################################################################

sub query_for_exit() {
#exits program if 'n' entered
 
    my $input='';
    while($input!~/y|n/i) {
		#print "\b";
		$input= &get_input("\b");
		chomp $input;
    }

    if($input=~/^n/i) { print "Bye \n"; exit(); }
}

##########################################################################################################################

####################################################################################
sub yes_no() {   #### returns '1' for yes 
  my $yflag=0;
  print colored("\n[y/n] : ","green bold");
  my $input='';
  while($input!~/y|n/i)   {
    print "\b";
    $input=<STDIN>;
    chomp $input;
  }

  if($input=~/^y/i) { $yflag=1; }
  return $yflag;
}
####################################################################################

sub get_input() {
#uses either readline module or the traditional way to get user response to question printed to screen

	my $input;
	my $question = shift (@_);

	if ($read_gnu) {	#true if gnu readline module installed
		$input = $term->readline("$question");	#print question and get user input
	} else {	#do it the old way, without readline
		print "$question";
		$input =<>;
	}
	
	chomp $input;	#remove trailing newline
	$input =~ s/\s*$//;	#remove trailing space
	return $input;
}

######################################################################################################################

#########################################################################################################################
sub get_config() {
my $filename = "~/.trace2dbest.conf";
$filename =~ s{ ^ ~ ( [^/]* ) }
              { $1
                    ? (getpwnam($1))[7]
                    : ( $ENV{HOME} || $ENV{LOGDIR}
                         || (getpwuid($>))[7]
                       )
}ex;
open (CONFILE,"$filename") ||  die "Error opening .trace2dbest.conf\n";
    while (my $line=<CONFILE>) {     
      if ($line=~/^VECTOR\=(.+)/i) { $vector_file=$1; }
      if ($line=~/^ECOLI\=(.+)/i) { $ecoli_file=$1; } 
      if ($line=~/^LIB_FILE\=(.+)/i) { $Lib_file=$1; }
      if ($line=~/^PUB_FILE\=(.+)/i) { $Pub_file=$1; }     
      if ($line=~/^CONT_FILE\=(.+)/i) { $Cont_file=$1; } 
      if ($line=~/^EST_FILE\=(.+)/i) { $EST_file=$1; }
      if ($line=~/^PHRED_OPTIONS\=(.+)/i) { $phredoptions=$1; }
    }
close (CONFILE);
}

##############################################################################################################

############################################################################################################################
sub create_conf() {   #### creates configfile if not found in home directory
    my $config_file = $homedir; 
    $config_file = "$config_file" . "/.trace2dbest.conf";
    open (CONF, ">$config_file") || die "Sorry, could not open/.trace2dbest.conf $!";
    print CONF "### location of vector.seq ###\n";
    print CONF "VECTOR=$_[0]\n";
    print CONF "### location of ecoli.seq file  ###\n";
    print CONF "ECOLI=$_[1]\n";
    print CONF "### location of library file ###\n";
    print CONF "LIB_FILE=$_[2]\n";
    print CONF "### location of publication files ###\n";
    print CONF "PUB_FILE=$_[3]\n";
    print CONF "### location of cont file ###\n";
    print CONF "CONT_FILE=$_[4]\n";
    print CONF "### location of EST file ###\n";
    print CONF "EST_FILE=$_[5]\n"; 
    print CONF "### phred parameters (Change only if you are really sure what you are doing)###\n";
    print CONF "PHRED_OPTIONS=$_[6]\n";     
    close (CONF) || die "Error - cannot close: $!\n";
}    
#######################################################################################################