#!/usr/bin/perl

#    This library is free software; you can redistribute it and/or
#    modify it under the terms of the GNU Lesser General Public
#    License as published by the Free Software Foundation; either
#    version 2.1 of the License, or (at your option) any later version.
#
#    This library is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#    Lesser General Public License for more details.
#
#    You should have received a copy of the GNU Lesser General Public
#    License along with this library ('COPYING'); if not, write to the Free Software
#    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

use strict;
use warnings;
use CGI ':standard';
use XML::Twig;

=head1 NAME

mipe2html.pl - Generates HTML page based on MIPE file
  based on MIPE version v1.1
  arguments: * mipe_file
             * (optional) list of PCR IDs

=head1 SYNOPSIS

mipe2html.pl your_file.mipe <pcr_id1> <pcr_id2>

=head1 ADDITIONAL INFO

See http://mipe.sourceforge.net

=head1 AUTHOR

Jan Aerts (jan.aerts@bbsrc.ac.uk)

=cut

my ( $file, @pcr_ids ) = @ARGV;
if ( not defined $file ) { die "Please provide filename\n" };

print header,
  start_html('Overview MIPE file'),
  h1('Overview of MIPE file: ', $file);
print "Page created by mipe2html.pl (see <a href=http://mipe.sourceforge.net>http://mipe.sourceforge.net</a>)", br, "\n";
print "<p>";
my $requested;
if ( scalar @pcr_ids == 0 ) {
  $requested = 'all';
} else {
  $requested = join(';', @pcr_ids);
}
print "Requested PCR products: ", $requested, br, "\n";

print hr;

my $twig = XML::Twig->new( TwigHandlers => { pcr => \&pcr }
                         , pretty_print => 'indented' );
$twig->parsefile($file);
print end_html;
print "\n";
exit;

sub pcr {
  my ( $twig, $pcr ) = @_;

  my $to_include = 0;
  my $pcr_id = $pcr->{att}->{id};
  if ( scalar @pcr_ids > 0 ) {
    $to_include = 0;
    foreach ( @pcr_ids ) {
      if ( $pcr_id =~ /$_/i ) {
        $to_include = 1;
      }
    }
  } else {
    $to_include = 1;
  }

  if ( $to_include ) {
    print h2('PCR ID: ', $pcr_id), "\n";

    my @projects = $pcr->children('project');
    my @projects_text;
    foreach ( @projects ) {
      push @projects_text, $_->text;
    }
    if ( scalar @projects_text > 0 ) {
      print "Projects: ", join(', ', @projects_text), br, "\n";
    }

    my @researchers = $pcr->children('researcher');
    my @researchers_text;
    foreach ( @researchers ) {
      push @researchers_text, $_->text;
    }
    if ( scalar @researchers_text > 0 ) {
      print "Researchers: ", join(', ', @researchers_text), br;
    }
    my @remarks = $pcr->children('remark');
    if ( scalar @remarks > 0 ) {
      print h5('Remarks');
      print "<table border=1>", "\n";
      foreach ( @remarks ) {
        print "<tr><td>", $_->text, "</td></tr>\n";
      }
      print "</table>", br, "\n";
    }

    print h3('Design'), "\n";
    my $design_seq = $pcr->first_child('design')->first_child('seq')->text;
    $design_seq =~ s/(.{10})/$1 /g;
    $design_seq =~ s/(.{55})/$1\<br \/\>\n/g;
    print "seq:\n";
    print "<pre>", $design_seq, "</pre>\n";
    print "pos on source: ", $pcr->first_child('design')->first_child('range')->text, br, "\n";

    print h4('Source'), "\n";
    if ( defined $pcr->first_child('design')->first_child('source')->first_child('seq') ) {
      my $source_seq = $pcr->first_child('design')->first_child('source')->first_child('seq')->text;
      $source_seq =~ s/(.{10})/$1 /g;
      $source_seq =~ s/(.{55})/$1\<br \/\>\n/g;
      print "Seq:\n";
      print "<pre>", $source_seq, "</pre>\n";
    } elsif ( defined $pcr->first_child('design')->first_child('source')->first_child('file') ) {
      print "File: ", $pcr->first_child('design')->first_child('source')->first_child('file')->text, br, "\n";
    } elsif ( defined $pcr->first_child('design')->first_child('source')->first_child('accession') ) {
      print "Accession: ", $pcr->first_child('design')->first_child('source')->first_child('accession')->text, br, "\n";
    } else {
      die "No valid MIPE file (source)\n"
    };
    my $source_name = ( defined $pcr->first_child('design')->first_child('source')->first_child('name') ) ? $pcr->first_child('design')->first_child('source')->first_child('name')->text : 'UNKNOWN';
    my $source_species = ( defined $pcr->first_child('design')->first_child('source')->first_child('species') ) ? $pcr->first_child('design')->first_child('source')->first_child('species')->text : 'UNKNOWN';
    print "name: ", $source_name, br, "\n";
    print "species: ", $source_species, br, "\n";

    print h4('Primer1'), "\n";
    print "oligo: ", $pcr->first_child('design')->first_child('primer1')->first_child('oligo')->text, br, "\n";
    print "seq: ", $pcr->first_child('design')->first_child('primer1')->first_child('seq')->text, br, "\n";
    print "Tm: ", $pcr->first_child('design')->first_child('primer1')->first_child('tm')->text, br, "\n";

    print h4('Primer2'), "\n";
    print "oligo: ", $pcr->first_child('design')->first_child('primer2')->first_child('oligo')->text, br, "\n";
    print "seq: ", $pcr->first_child('design')->first_child('primer2')->first_child('seq')->text, br, "\n";
    print "Tm: ", $pcr->first_child('design')->first_child('primer2')->first_child('tm')->text, br, "\n";

    print h3('Use'), "\n";
    if ( not defined $pcr->first_child('use') ) {
      print "No data for use", br, "\n";
    } else {
      if ( defined $pcr->first_child('use')->first_child('seq') ) {
        my $use_seq = $pcr->first_child('use')->first_child('seq')->text;
        $use_seq =~ s/(.{10})/$1 /g;
	$use_seq =~ s/(.{55})/$1\<br \/\>\n/g;
        print "seq:\n";
        print "<pre>", $use_seq, "</pre>\n";
      } else {
        print "seq: unknown", br;
      }
      if ( defined $pcr->first_child('use')->first_child('revcomp') ) {
        print "revcomp: ", $pcr->first_child('use')->first_child('revcomp')->text, br, "\n";
      } else {
        print "revcomp: 0", br, "\n";
      }
  
      my @snps = $pcr->first_child('use')->children('snp');
      my %snps;
      foreach my $snp ( @snps ) {
        my $snp_id = $snp->{att}->{id};
        $snps{$snp_id}{amb} = ( defined $snp->first_child('amb') ) ? $snp->first_child('amb')->text : '';
        $snps{$snp_id}{pos} = $snp->first_child('pos')->text || die "File not in MIPE format (SNP pos)\n";
        $snps{$snp_id}{pos_design} = ( defined $snp->first_child('pos_design') ) ? $snp->first_child('pos_design')->text : '';
        $snps{$snp_id}{pos_source} = ( defined $snp->first_child('pos_source') ) ? $snp->first_child('pos_source')->text : '';
        $snps{$snp_id}{rank} = ( defined $snp->first_child('rank') ) ? $snp->first_child('rank')->text : '';
        my @remarks = $snp->children('remark');
        if ( scalar @remarks > 0 ) {
          $snps{$snp_id}{remarks} = 1;
  	my @remarks_text;
  	foreach ( @remarks ) {
  	  push @remarks_text, $_->text;
  	}
          $snps{$snp_id}{remarks_text} = join ('; ', @remarks_text);
        } else {
          $snps{$snp_id}{remarks} = 0;
        }
        
      }
  
      my @samples = $pcr->first_child('use')->children('sample');
      my %samples;
      my %genotypes;
      foreach my $sample ( @samples ) {
        my $sample_id = $sample->{att}->{id};
        $samples{$sample_id}{file} = ( defined $sample->first_child('file') ) ? $sample->first_child('file')->text : '';
        my @genotypes = $sample->children('genotype');
        foreach my $genotype ( @genotypes ) {
          my $snp_id = $genotype->first_child('snp_id')->text || die "File not in MIPE format (genotype)\n";
          $genotypes{$sample_id}{$snp_id} = $genotype->first_child('amb')->text;
        }
      }
  
      print h4('SNPs'), "\n";
      if ( scalar keys %snps == 0 ) {
        print "No SNPs", br, "\n";
      } else {
        print "<table border=1>\n";
        print "<tr><td></td><td>pos</td><td>pos_design</td><td>pos_source</td><td>amb</td><td>rank</td><td>remarks?</td></tr>\n";
        foreach my $snp_id ( sort { $snps{$a}{pos} <=> $snps{$b}{pos} } keys %snps ) {
          print "<tr>";
  	print "<td>$snp_id</td>";
          print "<td><center>$snps{$snp_id}{pos}</center></td>";
          print "<td><center>$snps{$snp_id}{pos_design}</center></td>";
          print "<td><center>$snps{$snp_id}{pos_source}</center></td>";
          print "<td><center>$snps{$snp_id}{amb}</center></td>";
          print "<td><center>$snps{$snp_id}{rank}</center></td>";
          print "<td><center>$snps{$snp_id}{remarks}</center></td>";
  	print "</tr>\n";
        }
        print "</table>", br, "\n";
      }
          
      print h5('SNP remarks'), "\n";
      print "<table border=1>\n";
      print "<tr><td></td><td>remark</td></tr>\n";
      my $found_snp_remark = 0;
      foreach my $snp_id ( sort { $snps{$a}{pos} <=> $snps{$b}{pos} } keys %snps ) {
        if ( defined $snps{$snp_id}{remarks_text} ) {
          print "<tr><td>$snp_id</td><td>$snps{$snp_id}{remarks_text}</td></tr>\n";
  	$found_snp_remark = 1;
        }
      }
      print "</table>", br, "\n";
      if ( $found_snp_remark == 0 ) {
        print "No SNPs with remark\n";
      }
  
      print h4('Samples'), "\n";
      if ( scalar keys %samples == 0 ) {
        print "No samples", br, "\n";
      } else {
        print "<table border=1>\n";
        print "<caption>Sample meta data</caption>\n";
        print "<tr><td></td><td>file</td></tr>\n";
        foreach my $sample_id ( sort keys %samples ) {
          print "<tr>";
          print "<td>$sample_id</td>";
  	print "<td>$samples{$sample_id}{file}</td>";
  	print "</tr>\n";
        }
        print "</table>", br, "\n";
  
        if ( scalar keys %genotypes == 0 ) {
          print "No genotyping data", br, "\n";
        } else {
          print "<table border=1>\n";
  	print "<caption>Sample genotyping data</caption>\n";
          print "<tr><td></td>";
  	foreach my $snp_id ( sort { $snps{$a}{pos} <=> $snps{$b}{pos} } keys %snps ) {
  	  print "<td>", $snps{$snp_id}{pos}, "</td>";
  	}
  	print "</tr>\n";
  	foreach my $sample_id ( sort keys %samples ) {
            print "<tr><td>", $sample_id, "</td>";
            foreach my $snp_id ( sort { $snps{$a}{pos} <=> $snps{$b}{pos} } keys %snps ) {
              my $genotype = ( defined $genotypes{$sample_id}{$snp_id} ) ? $genotypes{$sample_id}{$snp_id} : '';
              my $genotype_text;
              if ( $genotype eq 'A' ) {
                print '<td bgcolor="lightgreen"><center>A</center></td>';
              } elsif ( $genotype eq 'T' ) {
                print '<td bgcolor="red"><center>T</center></td>';
              } elsif ( $genotype eq 'G' ) {
                print '<td bgcolor="lightblue"><center>G</center></td>';
              } elsif ( $genotype eq 'C' ) {
                print '<td bgcolor="yellow"><center>C</center></td>';
              } else {
                print "<td><center>", $genotype, "<center></td>";
              }
  	  }
  	  print "</tr>\n";
  	}
  	print "</table>", br, "\n";
        }
      }
    }

    print hr;
  }
}


