1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
|
#!/usr/bin/perl -w
if(@ARGV !=3){
print "format:event_prediction.pl events_tab.txt events_tab_k.txt k\n";
print "Please specify the names of the input files and the output file\n";
exit 1;
}
$infile1=shift; #idio me $infile=$ARGV[0]
#idio me $outfile= $ARGV[1]
$outfile=shift;
$k=shift;
open(IN, "$infile1") || die "Cannot open: $!\n";
open(OUT, ">$outfile") || die "Cannot open: $!\n";
my $count5=0;
my $prediction=0;
print OUT "Cycle\tUpperpath\tLowerPath\tLength_Upper\tLength_Lower\tUpper_Coverage_1\tUpper_Coverage_2\tLow_Coverage_1\tLow_Coverage_2\tPrediction\n";
while($line=<IN>){
$prediction=0;
chomp($line);
if ($line=~m/^Cycle\t(.*)/){
next;} # ignore first line with headers
#Cycle Upperpath LowerPath Length_Upper Length_Lower Upper_Coverage_1 Upper_Coverage_2 Low_Coverage_1 Low_Coverage_2 Prediction
#0 TTGATCCATAAATTGAGTCTGCAATAGTAAATGGAGCTTCAATATATTCATAAGCTTGAAGAATTGTAAAATAGATTCCTAGTAAAACTGTAAAAAATAAT TTGATCCATAAATTGAGTCTGCAATAGTAAATGGAGCTTCAATATATTCACAAGCTTGAAGAATTGTAAAATAGATTCCTAGTAAAACTGTAAAAAATAAT 101 101 69 63 3 2
#3 -> 159919 [label="RF"];
@cyclecolumns=split (/\t/, $line);
$ul= $cyclecolumns[3];
$ll=$cyclecolumns[4];
if($ul==$ll){
if($ul==2*$k+1){
$prediction=1;
}
elsif($ul> 2*$k+1){
$prediction=2;
}
}
elsif($ul-$ll<5){
if($ul==2*$k+1 || $ul>2*$k+1){
$prediction=3;
}elsif($ul<2*$k+1){
$prediction=4;
}
}
elsif($ul-$ll >=5){
if($ll<=2*$k && $ll>= 2*$k-3){
$prediction=5;
$count5++;
}elsif($ll>2*$k+1){
$prediction=6;
}
}
if($ll< 2*$k-10){
$prediction=7;
}
push(@cyclecolumns, $prediction);
$line2 = join("\t",@cyclecolumns);
#print "prediction: $prediction\n";
print OUT $line2;
print OUT "\n";
}
print "Count of 5: $count5\n";
print OUT "MEMO\n";
print OUT "Prediction\n";
print OUT "0.\tNo automatic prediction\n";
print OUT "1.\tSNP (LL=UL=2k+1)\n";
print OUT "2.\tMultiple SNPs (LL=UL >2k+1)\n";
print OUT "3.\tIndel SNPs (LL=UL +-5, UL=2k+1 OR UL>2k+1)\n";
print OUT "4.\tIndel SNPs with overlapping switching nodes (LL=UL +-5, UL<2k+1)\n";
print OUT "5.\tSimple Splicing Event (UL-LL>5, 2k+1-4<=LL<=2k)\n";
print OUT "6.\tComplex Splicing Event(UL-LL>5, LL>2k+1)\n";
print OUT "7.\tSuspicious cases (LL<2k-10) \n";
close(OUT);
close(IN);
|