File: weedMaf.pl

package info (click to toggle)
augustus 3.3.2%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 486,188 kB
  • sloc: cpp: 51,969; perl: 20,926; ansic: 1,251; makefile: 935; python: 120; sh: 118
file content (85 lines) | stat: -rwxr-xr-x 1,602 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/perl
#
# weedMaf remove only-gap columns from a multiple alignment in MAF format
#
# input file format:
# 
# a score=5037
# s chlamy4.chromosome10    5424 36280 + 6579462 ATCA-CCACAG--ACC...
# s    volvox.scaffold_9 2188403 51128 + 9999999 ACCA-CCACGGGCACC...
# 
#
# 10.03.2013, Mario Stanke, mario.stanke@uni-greifswald.de

use strict;
use Getopt::Long;

my @alirows;         # array of all alignment rows (including gaps)
my @alidata;           # array of everying up to alignment rows

my $help = 0;
my $len;

GetOptions('help!'=>\$help);

exec("perldoc $0") if ($help);

while (<>) {
    if (!/^s\s/){
	my $line = $_;
	if (@alirows){
	    weed();
	    @alidata = @alirows = ();
	}
	print $line;
    } elsif (/(^s\s.*\s)(\S+)$/){
	push @alidata, $1;
	push @alirows, $2;
    }
}

if (@alirows){
    weed();
}

sub weed{
    $len = length($alirows[0]);
    my $k = @alirows;
    my @newalirows = ();
    foreach my $alirow (@alirows){
	if (length($alirow) != $len){
	    die ("Inconsistent alignment lengths");
	}
	push @newalirows, "";
    }
    for (my $i=0; $i<$len; $i++){
	my $onlygaps = 1;
	for (my $j=0; $j<@alirows && $onlygaps; $j++){
	    if (substr($alirows[$j], $i, 1) ne '-'){
		$onlygaps = 0;
	    }
	}
	if (!$onlygaps){
	    for (my $j=0; $j<@alirows; $j++){
		$newalirows[$j] .= substr($alirows[$j], $i, 1);
	    }
	}
    }
    for (my $j=0; $j<@alirows; $j++){
	print $alidata[$j] . $newalirows[$j] . "\n";
    }
    
}


__END__

=head1 NAME

weedMaf.pl remove only-gap columns from a multiple alignment in MAF format

=head1 SYNOPSIS

weedMaf.pl < in.maf > out.maf

=cut