File: mapping_to_phandango.pl

package info (click to toggle)
seer 1.1.4-8
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 3,692 kB
  • sloc: cpp: 2,945; perl: 596; python: 122; makefile: 92; sh: 43
file content (121 lines) | stat: -rwxr-xr-x 3,205 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/perl -w

use strict;
use warnings;

use Getopt::Long;

my $usage_message = <<USAGE;
Usage: ./mapping_to_phandango.pl -s <sam_file> -k <seer_kmers> > phandango.plot

Creates a plot file for visualisation in phandango

   Options

   Required
   -s, --sam        Sam file of kmers mapped to reference (unsorted)
   -k, --kmers      Kmers that were mapped

   Optional
   -m, --map        Minimum mapping quality
   --print_kmer     Also print k-mer sequence in output

   -h, --help       Displays this message

USAGE

#****************************************************************************************#
#* Main                                                                                 *#
#****************************************************************************************#

#* gets input parameters
my ($kmer_file, $print_kmer, $sam_file, $min_qual, $help);
GetOptions ("kmers|k=s"  => \$kmer_file,
            "print_kmer" => \$print_kmer,
            "sam|s=s" => \$sam_file,
            "map|m=s" => \$min_qual,
            "help|h"     => \$help
		   ) or die($usage_message);

if (defined($help) || !defined($kmer_file) || !defined($sam_file))
{
   print $usage_message;
}
else
{
   if (!defined($min_qual))
   {
      $min_qual = 0;
   }

   open(SAM, $sam_file) || die("Could not open sam file $sam_file: $!\n");
   open(KMERS, $kmer_file) || die("Could not open kmer file $kmer_file: $!\n");

   my $header = <KMERS>;
   my @header_fields = split("\t", $header);
   my $lrt = 0;
   if ($header_fields[4] eq "lrt_p_val")
   {
      $lrt = 1;
   }

   my %points;
   while (my $sam_line = <SAM>)
   {
      chomp $sam_line;

      if ($sam_line !~ /^@/)
      {
         my ($kmer_id, $flag, $chrom, $pos, $map_qual, $cigar, $mate_chrom, $mate_pos, $insert_size, $sequence, $quality, @optional) = split("\t", $sam_line);

         # Get position
         my $start = $pos;
         my $end = $pos + length($sequence) - 1;

         my $kmer = <KMERS>;
         chomp $kmer;

         # Don't report unmapped or low quality mapped kmers
         if ($map_qual > $min_qual && !($flag & 4))
         {
            my ($kmer, $maf, $unadj, $adj, @other) = split("\t", $kmer);
            if ($lrt)
            {
               $adj = $other[0];
            }

            if ($kmer ne $sequence)
            {
               print STDERR "WARNING: line $kmer_id sequence mismatch. Are input files in same order?\n";
            }

            my $log_p = 386; # Exponent limit of a double
            if ($adj > 0)
            {
               $log_p = -log($adj)/log(10);
            }

            $points{$kmer}{pval} = $log_p;
            $points{$kmer}{start} = $start;
            $points{$kmer}{pos} = "$start..$end";
         }
      }
   }

   # Sort and print the output
   my @keys = sort { $points{$a}{start} <=> $points{$b}{start} } keys(%points);
   foreach my $kmer (@keys)
   {
      if (defined($print_kmer))
      {
         print join("\t", "26", $kmer, $points{$kmer}{pos}, $points{$kmer}{pval}, "0") . "\n";
      }
      else
      {
         print join("\t", "26", ".", $points{$kmer}{pos}, $points{$kmer}{pval}, "0") . "\n";
      }
   }
}

exit(0);