File: MakeGapTable.pl

package info (click to toggle)
ncbi-tools6 6.1.20170106-2~bpo8%2B1
  • links: PTS, VCS
  • area: main
  • in suites: jessie-backports
  • size: 465,032 kB
  • sloc: ansic: 1,474,200; pascal: 6,740; cpp: 6,248; xml: 3,390; sh: 1,979; perl: 1,084; csh: 508; makefile: 431; ruby: 93; lisp: 81
file content (86 lines) | stat: -rwxr-xr-x 1,711 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/perl

use strict;

my $infile;
my $outfile;
my $thisline;
my $id;
my $new_id;
my $sequence;
my $mingap;

$infile = shift || die "Must specify input filename!\n";
$outfile = shift;
if (!$outfile) {
  $outfile = $infile;
  $outfile =~ s/\.fsa$//;
  $outfile .= ".tbl";
}

$mingap = shift;
if ($mingap < 1) {
  $mingap = 1;
}
 
open (IN, $infile) || die "Unable to open $infile!\n";
open (OUT, ">$outfile") || die "Unable to open $outfile!\n";
while ($thisline = <IN>) {
  $thisline =~ s/\r//g;
  $thisline =~ s/\n//g;
  if ($thisline =~ /\>\s*(\S+)/) {
    $new_id = $1;
    if ($id) {
      &process_seq($id, $sequence, $mingap);
    }
    $id = $new_id;
    $sequence = "";
  } else {
    $sequence .= $thisline;
  }
}
if ($id) {
  &process_seq($id, $sequence);
} else {
  print "No sequences found!\n";
}
close(IN);
close(OUT);

sub process_seq
{
  my $id = shift(@_);
  my $sequence = shift(@_);
  my $mingap = shift(@_);
  my $pos;
  my $offset = 0;
  my $len;

  print OUT ">Feature $id\n";
  $sequence =~ s/\s//g;
  $sequence = uc($sequence);
  $pos = index($sequence, "N");
  while ($pos != -1) {
    $offset += $pos;
    $sequence = substr($sequence, $pos);
    $len = 0;
    while(substr($sequence, $len, 1) eq "N") {;
      $len++;
    }
    if ($len >= $mingap) {
      print OUT $offset + 1;
      print OUT "\t";
      print OUT $offset + $len;
      print OUT "\tassembly_gap\n";
      print OUT "\t\t\tgap_type\twithin scaffold\n";
      print OUT "\t\t\tlinkage_evidence\tpaired-ends\n";
      print OUT "\t\t\testimated_length\t";
      print OUT $len;
      print OUT "\n";
    }
    $sequence = substr($sequence, $len);
    $offset += $len;
    $pos = index($sequence, "N");
  }
}