
# TFBS module for TFBS::PatternGen::YMF
#
# Copyright Wynand Alkema
#
# You may distribute this module under the same terms as perl itself
#

# POD

=head1 NAME

TFBS::PatternGen::MEME - a pattern factory that uses the MEME program

=head1 SYNOPSIS

    my $patterngen =
            TFBS::PatternGen::MEME->new(-seq_file=>'sequences.fa',
                                            -binary => 'meme'


    my $pfm = $patterngen->pattern(); # $pfm is now a TFBS::Matrix::PFM object

=head1 DESCRIPTION

TFBS::PatternGen::MEME builds position frequency matrices
using an external program MEME written by Bailey and Elkan.
For information and source code of MEME see

http://www.sdsc.edu/MEME


=head1 FEEDBACK

Please send bug reports and other comments to the author.

=head1 AUTHOR - Wynand Alkema


Wynand Alkema E<lt>Wynand.Alkema@cgb.ki.seE<gt>

=cut

package TFBS::PatternGen::YMF;
use vars qw(@ISA);
use strict;


# Object preamble - inherits from TFBS::PatternGen;

use TFBS::PatternGen;
use TFBS::PatternGen::YMF::Motif;
use File::Temp qw(:POSIX);
use Bio::Seq;
use Bio::SeqIO;
use File::Temp qw/ tempfile tempdir /;
@ISA = qw(TFBS::PatternGen);

=head2 new

 Title   : new
 Usage   : my $patterngen = TFBS::PatternGen::YMF->new(%args);
 Function: the constructor for the TFBS::PatternGen::MEME object
 Returns : a TFBS::PatternGen::MEME object
 Args    : This method takes named arguments;
            you must specify one of the following three
            -seq_list     # a reference to an array of strings
                          #   and/or Bio::Seq objects
              # or
            -seq_stream   # A Bio::SeqIO object
              # or
            -seq_file     # the name of the fasta file containing
                          #   all the sequences
           Other arguments are:
            -binary       # a fully qualified path to the 'meme' executable
                          #  OPTIONAL: default 'meme'
            -additional_params  # a string containing additional
                                #   command-line switches for the
                                #   meme program

=cut

sub new {
    my ($caller, %args) = @_;
    my $self = bless {}, ref($caller) || $caller;

    $self->{'width'}=$args{'-length_oligo'};
    $self->{'path_org'}=$args{'-pathoforganismtables'};
    $self->{'len_region'}=$args{'-length_region'};
    $self->{'config_file'}=$args{'-config_file'}||$args{'-stats_path'}."/stats.config";
    #The latter is the example configfile that comes with the installation of YMF
    
      
    $self->{'abs_stats_path'} = $args{'-abs_stats_path'} ;
    #This is the directory where the executable and the results file is
    #generated by the program are located
    
    $self->_create_seq_set(%args) or die ('Error creating sequence set');
    
    $self->_run_stats() or $self->throw("Error running stats.");
    return $self;
}

=head2 pattern

=head2 all_patterns

=head2 patternSet

The three methods listed above are used for the retrieval of patterns,
and are common to all TFBS::PatternGen::* classes. Please
see L<TFBS::PatternGen> for details.

=cut

sub _run_stats{
    my ($self)=shift;
    my $tmp_file = tmpnam();
    my $dumpfile = tmpnam();
    my $outstream = Bio::SeqIO->new(-file=>">$tmp_file", -format=>"fasta");
    foreach my $seqobj (@{ $self->{'seq_set'} } ) {
        $outstream->write_seq($seqobj);
    }
    $outstream->close();
    my $dir = tempdir();
    #print $dir;
    #change directory to directory where the program is located
    #system 'cd $dir.w;';
#    my $command="cd $dir;";
#    print $command;
#    system $command;
#    `$command`;
#    system 'ls -ltr';

    my $command_line =
        $self->{'abs_stats_path'}."/stats ".
    #  "stats ".
        $self->{'config_file'}." ".
        $self->{'len_region'}." ".
        $self->{'width'}." ".
        $self->{'path_org'}." ".
        "-sort ".#sorts on z-score
        $tmp_file 
        ." >$dumpfile"
        ;
#    print STDERR "cd $dir;$command_line\n";
    my $resultstring = `cd $dir;$command_line`;
 #   print STDERR $resultstring;
    
    $self->_parse_stats_output($resultstring,$command_line,$dumpfile,$dir);
    unlink $tmp_file;
    #unlink $dumpfile;
    return 1
}



#
sub _parse_stats_output{
    my ($self,$resultstring,$command_line,$dumpfile,$temp_dir)=@_;
    open DUMP,$dumpfile;
    while(<DUMP>){
        if ((/(^Error.*)/) or /(.*Aborting.*)/){
#        warn "Error running AnnSpec\nNo patterns produced";
            print "YMF Error message: \"$1\"\n";
            unlink $dumpfile;
            $self->throw ("Error running YMF using command:\n $command_line");
            return;
        }
    }
    unlink $dumpfile;
    open RES,"$temp_dir/results";
    my $skip=<RES>;
    while (<RES>){
        my ($word,$occ,$z_score,$expect,$var)=split;
        #print $word;
        my $motif =TFBS::PatternGen::YMF::Motif->new
                    (-word=>$word,
                    -tags => {z_score=>$z_score,
                            'occurences'=>$occ,
                     'expectation value'=>$expect,
                     'variance'=>$var}
                        );
    push @{ $self->{'motifs'} }, $motif;
    }
    my $command="rm -r $temp_dir";
    #print $command;
    `$command`;# or die "could not unlink $!";
# return 
}
#

1;