1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
|
#!/usr/bin/perl
use strict;
my $infile;
my $outfile;
my $thisline;
my $id;
my $new_id;
my $sequence;
my $mingap;
$infile = shift || die "Must specify input filename!\n";
$outfile = shift;
if (!$outfile) {
$outfile = $infile;
$outfile =~ s/\.fsa$//;
$outfile .= ".tbl";
}
$mingap = shift;
if ($mingap < 1) {
$mingap = 1;
}
open (IN, $infile) || die "Unable to open $infile!\n";
open (OUT, ">$outfile") || die "Unable to open $outfile!\n";
while ($thisline = <IN>) {
$thisline =~ s/\r//g;
$thisline =~ s/\n//g;
if ($thisline =~ /\>\s*(\S+)/) {
$new_id = $1;
if ($id) {
&process_seq($id, $sequence, $mingap);
}
$id = $new_id;
$sequence = "";
} else {
$sequence .= $thisline;
}
}
if ($id) {
&process_seq($id, $sequence);
} else {
print "No sequences found!\n";
}
close(IN);
close(OUT);
sub process_seq
{
my $id = shift(@_);
my $sequence = shift(@_);
my $mingap = shift(@_);
my $pos;
my $offset = 0;
my $len;
print OUT ">Feature $id\n";
$sequence =~ s/\s//g;
$sequence = uc($sequence);
$pos = index($sequence, "N");
while ($pos != -1) {
$offset += $pos;
$sequence = substr($sequence, $pos);
$len = 0;
while(substr($sequence, $len, 1) eq "N") {;
$len++;
}
if ($len >= $mingap) {
print OUT $offset + 1;
print OUT "\t";
print OUT $offset + $len;
print OUT "\tassembly_gap\n";
print OUT "\t\t\tgap_type\twithin scaffold\n";
print OUT "\t\t\tlinkage_evidence\tpaired-ends\n";
print OUT "\t\t\testimated_length\t";
print OUT $len;
print OUT "\n";
}
$sequence = substr($sequence, $len);
$offset += $len;
$pos = index($sequence, "N");
}
}
|