File: event_prediction.pl

package info (click to toggle)
kissplice 2.6.7-2
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 16,752 kB
  • sloc: cpp: 8,783; python: 1,618; perl: 389; sh: 72; makefile: 18
file content (87 lines) | stat: -rw-r--r-- 2,197 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/perl -w


if(@ARGV !=3){
print "format:event_prediction.pl events_tab.txt events_tab_k.txt k\n";
print "Please specify the names of the input files and the output file\n"; 
exit 1;
}

$infile1=shift;  #idio me $infile=$ARGV[0]
#idio me $outfile= $ARGV[1]
$outfile=shift;
$k=shift;
open(IN, "$infile1") || die "Cannot open: $!\n";

open(OUT, ">$outfile") || die "Cannot open: $!\n";
my $count5=0;
my $prediction=0;
print OUT "Cycle\tUpperpath\tLowerPath\tLength_Upper\tLength_Lower\tUpper_Coverage_1\tUpper_Coverage_2\tLow_Coverage_1\tLow_Coverage_2\tPrediction\n";
while($line=<IN>){
$prediction=0;
chomp($line);
if ($line=~m/^Cycle\t(.*)/){
next;} # ignore first line with headers
#Cycle	Upperpath	LowerPath	Length_Upper	Length_Lower	Upper_Coverage_1	Upper_Coverage_2	Low_Coverage_1	Low_Coverage_2	Prediction

#0	TTGATCCATAAATTGAGTCTGCAATAGTAAATGGAGCTTCAATATATTCATAAGCTTGAAGAATTGTAAAATAGATTCCTAGTAAAACTGTAAAAAATAAT	TTGATCCATAAATTGAGTCTGCAATAGTAAATGGAGCTTCAATATATTCACAAGCTTGAAGAATTGTAAAATAGATTCCTAGTAAAACTGTAAAAAATAAT	101	101	69	63	3	2
#3 -> 159919 [label="RF"];


@cyclecolumns=split (/\t/, $line);
 $ul= $cyclecolumns[3];
 $ll=$cyclecolumns[4];

if($ul==$ll){
if($ul==2*$k+1){
$prediction=1;
}
elsif($ul> 2*$k+1){
$prediction=2;
}
}

elsif($ul-$ll<5){

if($ul==2*$k+1 || $ul>2*$k+1){
$prediction=3;
}elsif($ul<2*$k+1){
$prediction=4;
}
}
elsif($ul-$ll >=5){
if($ll<=2*$k && $ll>= 2*$k-3){
$prediction=5;
$count5++;
}elsif($ll>2*$k+1){
$prediction=6;
}
}

if($ll< 2*$k-10){
$prediction=7;
}

push(@cyclecolumns, $prediction);

 $line2 = join("\t",@cyclecolumns);

#print "prediction: $prediction\n";
print OUT $line2;
print OUT "\n";
}

print "Count of 5: $count5\n";
print OUT "MEMO\n";
print OUT "Prediction\n";
print OUT "0.\tNo automatic prediction\n";
print OUT "1.\tSNP (LL=UL=2k+1)\n";
print OUT "2.\tMultiple SNPs (LL=UL >2k+1)\n";
print OUT "3.\tIndel SNPs (LL=UL +-5, UL=2k+1 OR UL>2k+1)\n";
print OUT "4.\tIndel SNPs with overlapping switching nodes (LL=UL +-5, UL<2k+1)\n";
print OUT "5.\tSimple Splicing Event (UL-LL>5, 2k+1-4<=LL<=2k)\n";
print OUT "6.\tComplex Splicing Event(UL-LL>5, LL>2k+1)\n";
print OUT "7.\tSuspicious cases (LL<2k-10) \n";

close(OUT);
close(IN);