File: aggregateBinDepths.pl

package info (click to toggle)
metabat 2.18-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 972 kB
  • sloc: cpp: 10,869; sh: 422; python: 297; perl: 163; makefile: 19; ansic: 11
file content (65 lines) | stat: -rwxr-xr-x 1,363 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/perl

use warnings;
use strict;

our $USAGE = "$0 depth.txt bin1.fa [...]\n\n";
die $USAGE unless scalar(@ARGV) >= 2;

our %hdepths;

open(my $d, "<", $ARGV[0]) || die;
my $header = <$d>;
my $reg = qr/^([^\t]+)\t([^\t]+)\t([^\t]+)\t/;
my $reg2 = qr/^(\S+)/;
while (<$d>) {
  my($contig, $len, $totalDepth) = $_ =~ $reg;
  my($contigName) = $contig =~ $reg2;
  $hdepths{$contigName} = [$len, $totalDepth];
}

close($d) || die;
shift;

printf("bin\ttotalLength\tAvgDepth\tStdDev\n");

my %hbins;
my @lbins;
foreach my $file (@ARGV) {
  open(my $f, "<", $file) || die;
  $reg = qr/^>(\S+)/;
  my @l_contigs;
  $hbins{$file} = \@l_contigs;
  push @lbins, $file;
  while (<$f>) {
    if ($_ =~ $reg) {
      my $contig = $1;
      push @l_contigs, $contig;
    }
  }
  close($f) || die;

  my $totalLen = 0;
  my $totalDepth = 0;

  foreach my $contig (@l_contigs) {
    my $rl = $hdepths{$contig};
    $totalLen += $rl->[0];
    $totalDepth += $rl->[0] * $rl->[1];
  }

  
  my($avgDepth, $stdDev) = (0,0);
  if ($totalLen > 0) {
    $avgDepth = $totalDepth / $totalLen;
    foreach my $contig (@l_contigs) {
      my $rl = $hdepths{$contig};
      my $diff = $avgDepth - $rl->[1];
      $stdDev += $diff * $diff * $rl->[0] / $totalLen;
    }
    $stdDev = sqrt( $stdDev );
  }

  printf("%s\t%d\t%0.2f\t%0.2f\n", $file, $totalLen, $avgDepth, $stdDev);
}