File: seq_n50.pl

package info (click to toggle)
wtdbg2 2.5-11
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid
  • size: 119,728 kB
  • sloc: ansic: 27,655; perl: 1,212; makefile: 125; sh: 83
file content (63 lines) | stat: -rwxr-xr-x 932 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/perl -w
#
# Author: Ruan Jue
#
use strict;
use Getopt::Std;

our ($opt_h, $opt_s);

getopts("hs");

my $total = 0;
my @nums = ();

my $len = 0;
while(<>){
	if(/^>(\S+)/){
		if($opt_s){
			print "\t$len\n" if($len);
			print $1;
		}
		push(@nums, $len),$total+=$len if($len);
		$len = 0;
	} else {
		$len += length($_) - 1;
	}
}
print "\t$len\n" if($opt_s and $len);
push(@nums, $len),$total+=$len if($len);

my $n_seq = @nums;
my $avg = sprintf("%0.2f", $total / $n_seq);

print "Total: $total\n";
print "Count: $n_seq\n";
print "Average: $avg\n";

my @nxxs = ();
for(my $i=0;$i<=10;$i++){
	push(@nxxs, int($total*$i*0.1));
}
push(@nxxs, $total + 1);

my $i = 0;
my $j = 0;

@nums = sort {$b <=> $a} @nums;

my $median = $nums[int($n_seq / 2)];

print "Median: $median\n";

$len = 0;

for(;$i<@nums;$i++){
	$len += $nums[$i];
	while($nxxs[$j] <= $len){
		print "N".$j."0: $nums[$i]\t". ($i + 1) . "\n";
		$j ++;
	}
}

1;