#!/usr/bin/perl

#    This library is free software; you can redistribute it and/or
#    modify it under the terms of the GNU Lesser General Public
#    License as published by the Free Software Foundation; either
#    version 2.1 of the License, or (at your option) any later version.
#
#    This library is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#    Lesser General Public License for more details.
#
#    You should have received a copy of the GNU Lesser General Public
#    License along with this library ('COPYING'); if not, write to the Free Software
#    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

use strict;
use warnings;
use XML::Twig;

=head1 NAME

genotype2mipe.pl - Inserts SNP data into MIPE file
  based on MIPE version v0.9
  Make sure SNPs are already defined in the USE part of the PCR record.
  arguments: * mipe_file
             * STDIN: tab-delimited list of data, in the following order:
	           PCR ID, sample ID, SNP ID, ambiguity code

=head1 SYNOPSIS

snp2mipe.pl my_file.mipe < my_data.txt

=head1 ADDITIONAL INFO

See http://mipe.sourceforge.net

=head1 AUTHOR

Jan Aerts (jan.aerts@bbsrc.ac.uk)

=cut

my $file = shift;
if ( not defined $file ) { die "Please provide filename\n" };
my $data = shift;

my $twig = XML::Twig->new( pretty_print => 'indented'
#                         , keep_atts_order => 1
			 , TwigHandlers => { pcr => \&pcr }
			 );
my @genotype_data = ( <STDIN> );
my %genotype_passed;
foreach ( @genotype_data ) {
  chomp;
  my ( $pcr_id_in, $sample_id_in, $snp_id_in, $genotype_in ) = split /\t/, $_;
  $genotype_passed{$sample_id_in}{$snp_id_in} = 0;
}

$twig->parsefile($file);
$twig->print;

foreach my $sample ( sort keys %genotype_passed ) {
  foreach my $snp ( sort keys %{$genotype_passed{$sample}} ) {
    if ( $genotype_passed{$sample}{$snp} == 0 ) {
      print STDERR "Data for sample $sample on SNP $snp not imported\n";
    }
  }
}

exit;

sub pcr {
  my ( $twig, $pcr ) = @_;
  
  my $pcr_id = $pcr->first_child('id')->text;

  LINE: foreach my $input_line ( @genotype_data ) {
    chomp $input_line;
    my ( $pcr_id_in, $sample_id_in, $snp_id_in, $genotype_in ) = split /\t/, $input_line;
  
    if ( $pcr_id =~ /$pcr_id_in/ ) {
      my $use = $pcr->first_child('use');
      if ( not defined $use ) {
        print STDERR "No USE part defined for PCR $pcr_id\n";
	next LINE;
      }
      
      my @snps = $use->children('snp');
      my @samples = $use->children('sample');

      my $new_sample = 1;
      foreach my $sample ( @samples ) {
        if ( $sample->first_child('id')->text eq $sample_id_in ) {
	  $new_sample = 0;
	  my @genotypes = $sample->children('genotype');
	  my $new_genotype = 1;
	  foreach my $genotype ( @genotypes ) {
	    if ( $genotype->first_child('snp_id')->text eq $snp_id_in ) {
	      $new_genotype = 0;
	      my $remark_elt = XML::Twig::Elt->new('remark', 'prev_amb ' . $genotype->first_child('amb')->text);
	      $genotype->first_child('amb')->set_text($genotype_in);
	      $remark_elt->paste('last_child', $genotype);
	      $genotype_passed{$sample_id_in}{$snp_id_in} = 1;
	    }
	  }
	  if ( $new_genotype ) {
	    my $snp_id_found = 0;
	    foreach my $snp ( @snps ) {
	      if ( $snp->first_child('id')->text eq $snp_id_in ) {
	        $snp_id_found = 1;
	      }
	    }
	    if ( not $snp_id_found ) {
	      print STDERR "SNP ID $snp_id_in not defined in MIPE file\n";
	      next LINE;
	    }
	    my $snp_id_elt = XML::Twig::Elt->new('snp_id', $snp_id_in);
	    my $amb_elt = XML::Twig::Elt->new('amb', $genotype_in);
	    my $genotype_elt = XML::Twig::Elt->new('genotype', '');
	    
	    $snp_id_elt->paste('first_child', $genotype_elt);
	    $amb_elt->paste('last_child', $genotype_elt);
	    $genotype_elt->paste('last_child', $sample);
	    $genotype_passed{$sample_id_in}{$snp_id_in} = 1;
	  }
	}
      }
      
      if ( $new_sample ) {
        my $snp_id_found = 0;
	foreach my $snp ( @snps ) {
	  if ( $snp->first_child('id')->text eq $snp_id_in ) {
	    $snp_id_found = 1;
	  }
	}
	if ( not $snp_id_found ) {
	  print STDERR "SNP ID $snp_id_in not defined in MIPE file\n";
	  next LINE;
	}
	my $sample_id_elt = XML::Twig::Elt->new('id', $sample_id_in);
	my $snp_id_elt = XML::Twig::Elt->new('snp_id', $snp_id_in);
	my $amb_elt = XML::Twig::Elt->new('amb', $genotype_in);
	my $genotype_elt = XML::Twig::Elt->new('genotype','');
	my $sample_elt = XML::Twig::Elt->new('sample','');
	
	$snp_id_elt->paste('first_child', $genotype_elt);
	$amb_elt->paste('last_child', $genotype_elt);
	
	$sample_id_elt->paste('first_child', $sample_elt);
	$genotype_elt->paste('last_child', $sample_elt);
	
        if ( scalar $use->children('remark') > 0 ) {
	  my @use_remarks = $use->children('remark');
	  my $first_remark = $use_remarks[0];
	  $sample_elt->paste('before', $first_remark);
	} else {
	  $sample_elt->paste('last_child', $use);
	}
	$genotype_passed{$sample_id_in}{$snp_id_in} = 1;
      }
      
    }
  }
}
