File: rearrange.pl

package info (click to toggle)
murasaki 1.68.6-6
  • links: PTS, VCS
  • area: main
  • in suites: stretch
  • size: 1,928 kB
  • ctags: 3,100
  • sloc: cpp: 16,010; perl: 8,365; makefile: 186
file content (110 lines) | stat: -rwxr-xr-x 3,051 bytes parent folder | download | duplicates (5)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/perl -w

#Copyright (C) 2006-2008 Keio University
#(Kris Popendorf) <comp@bio.keio.ac.jp> (2006)
#
#This file is part of Murasaki.
#
#Murasaki is free software: you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation, either version 3 of the License, or
#(at your option) any later version.
#
#Murasaki is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#GNU General Public License for more details.
#
#You should have received a copy of the GNU General Public License
#along with Murasaki.  If not, see <http://www.gnu.org/licenses/>.

use File::Basename;
use Getopt::Long;
use Pod::Usage;
#use Data::Dump qw{dump};
use POSIX qw{floor};

use strict;

BEGIN {
  unshift(@INC,(fileparse($0))[1].'perlmodules');
}
use Murasaki;

my ($help,$man,$opt_output,$noRifts);

GetOptions('help|?' => \$help, man => \$man, 'output|o=s'=>\$opt_output,'norifts'=>\$noRifts);
pod2usage(1) if $help or $#ARGV<1;
pod2usage(-exitstatus => 0, -verbose => 2) if $man;

my ($infname,@permute)=@ARGV;
my $base=fileparse($infname,qr/\.anchors/);

die "$infname not found." unless -f $infname;
die "Only works on .anchor files." unless $infname=~m/^(.*)\.anchors$/;
my $prefix=$1;
my $seqs="$prefix.seqs";
die "No .seqs file found.\n" unless -f $seqs;
my $oprefix=$opt_output ? $opt_output:"$prefix.".join("-",@permute);

my @seqs=split("\n",slurp($seqs));

#check existing @permute to make sure values are valid
foreach my $i (0..$#permute){
  die "Bad permutation value $permute[$i] at permutation[$i]" unless $permute[$i]<=(scalar @seqs) and $permute[$i]>0;
}

@permute=map {$_-1} @permute; #switch from 1-index to 0-indexed

my ($oseqs,$outf)=("$oprefix.seqs","$oprefix.anchors");
foreach my $f ($oseqs,$outf){print "Warning: overwriting existing $f\n" if -f $f;}
open(my $infh,$infname) or die "Couldn't open $infname for reading.";
open(my $outfh,">$outf") or die "Couldn't open $outf for writing.";
open(my $oseqsh,">$oseqs") or die "Couldn't open $oseqs for writing.";

foreach my $p (0..$#permute){
  print $oseqsh $seqs[$permute[$p]]."\n";
}
close $oseqsh;

print "Wrote $oseqs\n";

while(my $line=<$infh>){
  my @anchors;
  while($line=~m/(\S+\t\S+\t\S+)/g){
    my $anchor=$1;
    if($noRifts){
      my ($start,$stop,$sign)=(split(/\t/,$anchor));
      goto NEXTLINE if $start==0 or $stop==0;
    }
    push(@anchors,$anchor);
  }
  print $outfh join("\t",map {$anchors[$_]} @permute)."\n";
 NEXTLINE:
}
close $outfh;
print "Wrote $outf\n";

sub slurp {
 local $/;
  open(my $fh,"@_") or return;
  return <$fh>;
}



__END__

=head1 NAME

rearrange.pl -- rearrange an anchors file according to some rules

=head1 SYNOPSIS

rearrange.pl <input anchors> <permutation>
eg.
rearrange.pl myalign.anchors 2 3 1 4

=head1 OPTIONS

--output - force output to go to some particular file (otherwise it's automatically derived from the input filenames)