#!/usr/bin/env perl

use strict;
use warnings;

use lib ("/usr/lib/trinityrnaseq/PerlLib/");
use Ascii_genome_illustrator;
use Cwd;

my $usage = "usage: $0 refseqs.fa target.fa min_per_id [min_pct_aligned]\n\n";

my $REFSEQS_FA = $ARGV[0] or die $usage;
my $TARGET_FA = $ARGV[1] or die $usage;
my $MIN_PER_ID = $ARGV[2] or die $usage;
my $MIN_PCT_ALIGNED = $ARGV[3] || 0;

main: {

    
    my $cmd = "blat $TARGET_FA $REFSEQS_FA -t=dna -q=dna -out=blast9 $TARGET_FA.blat.out";

    &process_cmd($cmd);

    # tried blastn, but it doesnt like the Trinity.fasta header format...
    #my $cmd = "makeblastdb -in trinity_out_dir/Trinity.fasta -dbtype nucl";    
    #$cmd = " blastn -db trinity_out_dir/Trinity.fasta -query refseqs.fa -outfmt 6 > blast.blastn.out";
    #&process_cmd($cmd);
    
    &generate_ascii_illustration("$TARGET_FA.blat.out");
    
    exit(0);
}

####
sub generate_ascii_illustration {
    my ($align_out_file) = @_;

    my %ref_seq_lengths = &get_seq_lengths($REFSEQS_FA);
    my %target_seq_lengths = &get_seq_lengths($TARGET_FA);
    
    my @hits = &parse_align_out($align_out_file);
    
    ## Generate a reference view.
    foreach my $ref_acc (keys %ref_seq_lengths) {
        my $length = $ref_seq_lengths{$ref_acc};
        my $ascii_illustration = new Ascii_genome_illustrator($ref_acc, 60);
        
        #$ascii_illustration->add_feature($ref_acc, 1, $length, "=");
        
        my @matches = grep { $_->{ref_acc} eq $ref_acc } @hits;
        #@matches = reverse sort {$a->{bitscore}<=>$b->{bitscore}} @matches;
        @matches = sort {$a->{ref_start}<=>$b->{ref_start}} @matches;


        foreach my $match (@matches) {
            
            my $target_acc = $match->{target_acc};
            my $target_start = $match->{target_start};
            my $target_end = $match->{target_end};
            my $per_id = $match->{per_id};
            
            if ($per_id < $MIN_PER_ID) { next; }

            my $ref_start = $match->{ref_start};
            my $ref_end = $match->{ref_end};

            my $target_length = $target_seq_lengths{$target_acc};
            my $pct_of_target_aligned = sprintf("%.2f", (abs($target_end-$target_start) + 1) / $target_length * 100);
            
            if ($pct_of_target_aligned < $MIN_PCT_ALIGNED) { next; }

            my $target_feature_name = "$target_acc $target_start-$target_end:$target_length ($pct_of_target_aligned\% aln, $per_id\% ID)";

            if ($target_start > $target_end) {
                ($ref_start, $ref_end) = ($ref_end, $ref_start);
            }
            
            $ascii_illustration->add_feature($target_feature_name, $ref_start, $ref_end, "-");
            
        }
        
        print $ascii_illustration->illustrate(1, $length);
        print "\n"; # spacer
        
    }
    
    return;
}

####
sub parse_align_out {
    my ($align_out_file) = @_;
    
    my @alignments;

    open (my $fh, $align_out_file) or die $!;
    while (<$fh>) {
        if (/^\#/) { next; }
        chomp;

=blast_outfmt6

# Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score


0       NM_012009_Sh2d1b1
1       comp0_c0_seq1
2       100.00
3       1433
4       0
5       0
6       1
7       1433
8       1
9       1433
10      0.0
11      2647

=cut

        my @x = split(/\t/);
        my ($ref_acc, $target_acc, $per_id, $align_len, $mismatches, $gaps, $ref_start, $ref_end, $target_start, $target_end, $evalue, $bitscore) = @x;
        
        my $struct = { target_acc => $target_acc,
                       ref_acc => $ref_acc,
                       
                       per_id => $per_id,
                       mismatches => $mismatches,
                       gaps => $gaps,
                       
                       target_start => $target_start,
                       target_end => $target_end,

                       ref_start => $ref_start,
                       ref_end => $ref_end,

                       evalue => $evalue,
                       bitscore => $bitscore,
                   };

        push (@alignments, $struct);
    }

    return(@alignments);

}




####
sub get_seq_lengths {
    my ($refseqs_fa) = @_;

    my %lengths;
    my $acc;
    
    open (my $fh, $refseqs_fa) or die $!;
    while (<$fh>) {
        if (/>(\S+)/) {
            $acc = $1;
        }
        else {
            my $seq = $_;
            chomp $seq;
            my $len = length($seq);
            $lengths{$acc} += $len;
        }
    }
    close $fh;
    
    return(%lengths);
}
    
####
sub process_cmd {
    my ($cmd) = @_;

    print STDERR "CMD: $cmd\n";
    my $ret = system($cmd);

    if ($ret) {
        die "Error, cmd: $cmd died with ret $ret";
    }
    
    return;
}
