File: Clean360.pl

package info (click to toggle)
ampliconnoise 1.29-10
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 6,040 kB
  • sloc: ansic: 18,080; sh: 2,899; perl: 2,089; makefile: 235
file content (118 lines) | stat: -rwxr-xr-x 1,954 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/perl

use POSIX;

my $nTotal = 0;
my $nClean = 0;
my $flowSeq = "TACG";

my $primer    = $ARGV[0];
my $out       = $ARGV[1];

my $ffile = "${out}.fa";
my $dfile = "${out}.dat";
open(FFILE, ">${ffile}") or die "Can't open ${out}.fa\n";
open(DFILE, ">${dfile}") or die "Can't open ${out}.dat\n";

while(my $line = <STDIN>){
    chomp($line);
 
    if($line =~ />(.*)/){
	my $id = $1;
	#print "$id\n";
	$line = <STDIN>;
	chomp($line);
	#print "$id $line\n";
	my @flows = split(/ /,$line);
	
	#print "@flows\n";
	my $length = shift(@flows);
	my $read;
	my $newLength = 0;

	#print "$length\n";
	#print "$length @flows\n";
	while($newLength*4 < $length){
	    my $signal = 0;
	    my $noise  = 0;

	    for($j = 0; $j < 4; $j++){
		my $f = $flows[$j + 4*$newLength];
		if($f > 0.5){
		    $signal++;
		    if($f < 0.7 || $f > 9.49){
			$noise++;
		    }
		}
		
	    }
    
	    if($noise > 0 || $signal == 0){
		last;
	    }

	    $newLength++;
	}

	$length = $newLength*4;

	my $read = flowToSeq($length,@flows);
	   
	if($length >= 360 && $read=~/^TCAG.*(${primer}.*)/){
	    $length = 360;
		    
	    printf DFILE "$id $length ";
	    for($j = 0; $j < 360; $j++){
		printf DFILE "$flows[$j] ";
	    }
		
	    printf DFILE "\n";
		    

		
	    printf FFILE ">$id\n";
	    printf FFILE "$1\n";
		
	    $nClean++;
	    goto found;
	}

	found : $nTotal++;
    }
}

close(DFILE);
close(FFILE);

open my $in,  '<',  $dfile      or die "Can't read old file: $!";
open my $out, '>', "$dfile.new" or die "Can't write new file: $!";

print $out "$nClean 360\n";

while( <$in> )
{     
    print $out $_;
}

close $out;

rename("$dfile.new", $dfile);

exit(0);

sub flowToSeq()
{
    my ($length, @flowgram) = @_;
    my $retSeq = "";

    for(my $i = 0; $i < $length; $i++){
	my $signal = floor($flowgram[$i] + 0.5);
	my $base   = substr($flowSeq, $i % 4, 1);

	for(my $j = 0; $j < $signal; $j++){
	    $retSeq .= $base;
	}
    }

    return $retSeq;
}