File: select_best_rbcontig_plus_read1.pl

package info (click to toggle)
bio-rainbow 2.0.4+dfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bullseye, buster, sid
  • size: 560 kB
  • sloc: ansic: 7,475; perl: 172; makefile: 129; sh: 49
file content (88 lines) | stat: -rwxr-xr-x 1,597 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/perl -w
#

use strict;
use warnings;

my $file = shift or die "Usage: $0 <rbasmed file> <rbdiv output file>\n";
my $div = shift or die "Usage: $0 <rbasmed file> <rbdiv output file>\n";

my $len = 0;
my $name = "";
my $seq = "";
my $reads = "";

open DIV, $div or die $!;
open IN, $file or die $!;
my %cls = ();
$_ = <DIV>;
my $dseq = $_;
my @de = split /\s+/, $dseq;
while (<IN>) {
	if (/^E/) {
		my @e = split /\s+/, $_;
		if ($len != 0) {
			if (&isoverlap($reads)) {
				print ">$name"."_L"."$len\n";
				print $seq, "\n"
			} else {
				print ">$name"."_L"."$len\n";
				print &gen_mock_ctg($seq, $de[2]), "\n";
			}
			while (<DIV>) {
				my @e2 = split /\s+/, $_;
				if ($e2[1] eq $e[1]) { # thanks to Jonathan Puritz for pointing this
					$dseq = $_;
					@de = @e2;
					last;
				}
			}
		}
		$name = $e[0].$e[1];
		$len = 0;
		$seq = "";
		$reads = "";
	} elsif (/^S/) {
		my @e = split /\s+/, $_;
		if ($len < length($e[1])) {
			$seq = $e[1];
			$len = length $e[1];
			<IN>;
			$_ = <IN>;
			$reads = $_;
		}
	} 
}
close IN;
close DIV;
if (&isoverlap($reads)) {
	print ">$name"."_L"."$len\n";
	print $seq, "\n"
} else {
	print ">$name"."_L"."$len\n";
	print &gen_mock_ctg($seq, $de[2]), "\n";
}

1;

sub isoverlap {
	my $reads = shift;
	my @ids = split /\s+/, $reads;
	my $ret = 0;
	foreach my $id (@ids) {
		next if $id =~ /R/;
		my @rec = split /:/, $id;
		if ($rec[2] == 0) {
			$ret = 1;
			return $ret;
		}
	}
	return $ret;
}

sub gen_mock_ctg {
	my @seqs = @_;
	$seqs[1] =~ tr/ACGTacgt/TGCAtgca/;
	$seqs[1] = reverse $seqs[1];
	return $seqs[0].("N"x10).$seqs[1];
}