#!/usr/bin/env perl

use strict;
use warnings;
use Carp;
use Getopt::Long qw(:config posix_default no_ignore_case bundling pass_through);
use lib "$ENV{TRINITY_HOME}/PerlLib";
use Fasta_reader;
use List::Util qw (shuffle);
#use Math::Random;


my $usage = <<__EOUSAGE__;

##################################################################
#
# --fasta <string>              fasta filename
# 
# --subst_rate <float>          rate of single base substitutions
#
# --insert_rate <float>         rate of insertion
#
# --insert_size <int>           default: 1
#
# --delete_rate <float>         rate for deletions
#
# --delete_size <int>           default: 1
#
#  * note, all rates must be 0 <= x <= 0.25
#
###################################################################


__EOUSAGE__

    ;

my $fasta_file;
my $subst_rate = 0;
my $insert_rate = 0;
my $insert_size = 1;
my $delete_rate = 0;
my $delete_size = 1;


&GetOptions (               
              'fasta=s' => \$fasta_file,
              'subst_rate=f' => \$subst_rate,
              
              'insert_rate=f' => \$insert_rate,
              'insert_size=i' => \$insert_size,

              'delete_rate=f' => \$delete_rate,
              'delete_size=i' => \$delete_size,
              
    );


unless ($fasta_file) {
    die $usage;
}
unless ($subst_rate || $insert_rate || $delete_rate) {
    die $usage;
}


foreach my $info_aref ( ['subst_rate', $subst_rate ],
                        ['insert_rate', $insert_rate ],
                        ['delete_rate', $delete_rate ] ) {
    
    my ($rate_type, $val) = @$info_aref;
    if ($val > 0.25) {
        die "Error, --$rate_type $val exceeds max val of 0.25 ";
    }
    

}


main: {

    my $fasta_reader = new Fasta_reader($fasta_file);

    my %fasta_seqs = $fasta_reader->retrieve_all_seqs_hash();

    foreach my $acc (keys %fasta_seqs) {

        
        my $seq = $fasta_seqs{$acc};
        
        my @seqarray = &convert_to_seqarray($seq);
                
        my %seen;


        if ($subst_rate) {
            @seqarray = &mutate_seq(\@seqarray, \%seen, $subst_rate, 'substitution');
        }
        if ($insert_rate) {
            @seqarray = &mutate_seq(\@seqarray, \%seen, $insert_rate, 'insertion');
        }
        if ($delete_rate) {
            @seqarray = &mutate_seq(\@seqarray, \%seen, $delete_rate, 'deletion');
        }
        
        my $mutated_seq = join("", @seqarray);
        print ">$acc mutated\n$mutated_seq\n";
        
    }
    

    
    exit(0);
    
}

####
sub convert_to_seqarray {
    my ($seq) = @_;

    my @chars = split(//, uc $seq);
    
    return(@chars);
}

####
sub mutate_seq {
    my ($seqarray_aref, $seen_href, $mut_rate, $mut_type) = @_;

    my $seqlen = $#$seqarray_aref + 1;

    my $num_mutations = int($mut_rate * $seqlen + 0.5);
    
    for (1..$num_mutations) {
        
        my $pos = -1;
        do {
            $pos = int(rand($seqlen));

        } while ($seen_href->{$pos});
        
        $seen_href->{$pos} = 1;
        
        ## Substitutions
        if ($mut_type eq 'substitution') {
            
            my $char = $seqarray_aref->[$pos];
                        
            my @mut_chars = grep { $_ ne $char } qw(G A T C);
        
            my $mut_base = $mut_chars[ int(rand(3)) ];
            
            $seqarray_aref->[$pos] = lc $mut_base;
        }

        ## Deletions
        elsif ($mut_type eq 'deletion') {
            for (my $i = $pos; $i < $pos + $delete_size && $i < $seqlen; $i++) {
                
                $seqarray_aref->[$i] = "";
                $seen_href->{$i} = 1;
            }
        }
        
        ## Insertions
        elsif ($mut_type eq 'insertion') {
            
            ## create insertion
            my $insertion_seq = "";
            my @bases = qw(G A T C);
            for (my $i = 0; $i < $insert_size; $i++) {
                my $base = $bases[int(rand(4))];
                $insertion_seq .= $base;
            }
            $seqarray_aref->[$pos] .= lc $insertion_seq;
        }
        else {
           confess "Error, do not understand mutation type: $mut_type ";
        }
        
    }
    

    return (@$seqarray_aref);
    
}


