File: ProteinModelSelection.pl

package info (click to toggle)
raxml 8.2.12%2Bdfsg-8
  • links: PTS, VCS
  • area: main
  • in suites: bookworm
  • size: 4,516 kB
  • sloc: ansic: 57,710; perl: 125; sh: 63; makefile: 52
file content (114 lines) | stat: -rw-r--r-- 3,264 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env perl

#print $ARGV[0]." ".$ARGV[1]." ".$#ARGV."\n";

#adapt as required, modify to read ./raxmHPC if raxml is not in your path

$raxmlExecutable = "raxmlHPC-AVX";
# $raxmlExecutable = "raxmlHPC-SSE3";
# $raxmlExecutable = "raxmlHPC-PTHREADS -T 4";
# $raxmlExecutable = "raxmlHPC-PTHREADS-AVX -T 4";
# $raxmlExecutable = "raxmlHPC-PTHREADS-SSE3 -T 4";

if(($#ARGV < 0) || ($#ARGV > 1))
  {
    print "ERROR: usage: \"perl ProteinModelSelection.pl alignmentFileName [modelFileName] \"\n";
    exit;
  }

$alignmentName = $ARGV[0];

$UNLIKELY = -1.0E300;

sub getLH
  {
    my $fileID = $_[0];  
    open(CPF, $fileID);
    my @lines = <CPF>;	
    close(CPF);	
    my $numIT = @lines;   	
    my $lastLH = pop(@lines);  
    my $k = index($lastLH, '-');   
    my $LH = substr($lastLH, $k);     
    return $LH;
  }

sub getTIME
  {
    my $fileID = $_[0];  
    open(CPF, $fileID);
    my @lines = <CPF>;	
    close(CPF);	
    my $numIT = @lines;   	
    my $lastLH = pop(@lines);  
    my $k = index($lastLH, '-');   
    my $TIME = substr($lastLH, 0, $k-1);     
    return $TIME;
  }


@AA_Models = ("DAYHOFF", "DCMUT", "JTT", "MTREV", "WAG", "RTREV", "CPREV", "VT", "BLOSUM62", "MTMAM", 
	      "LG", "MTART", "MTZOA", "PMB", "HIVB", "HIVW", "JTTDCMUT", "FLU",
	      "DAYHOFFF", "DCMUTF", "JTTF", "MTREVF", "WAGF", "RTREVF", "CPREVF", "VTF", "BLOSUM62F", 
	      "MTMAMF", "LGF", "MTARTF", "MTZOAF", "PMBF", "HIVBF", "HIVWF", "JTTDCMUTF", "FLUF");



if($#ARGV == 1)
  {
    print "Splitting up multi-gene alignment\n";
    $partition =  $ARGV[1];
    $cmd = $raxmlExecutable." -f s -m PROTCATJTT -p 12345 -s ".$alignmentName." -q ".$partition." -n SPLIT_".$alignmentName." \> SPLIT_".$alignmentName."_out";
    system($cmd);
    $count = 0;
    while(open(CPF, $alignmentName.".GENE.".$count))
      {
	close CPF;
	print "PARTITION: ".$count."\n";
	#print "perl ProteinModelSelection.pl ".$alignmentName.".GENE.".$count;
	system("perl ProteinModelSelection.pl ".$alignmentName.".GENE.".$count);
	$count = $count + 1;
      }
  }
else
  {
    #print "Determining AA model data\n";
    #print "Computing randomized stepwise addition starting tree number :".$i."\n";
    $cmd = $raxmlExecutable." -y -p 12345 -m PROTCATJTT -s ".$alignmentName." -n ST_".$alignmentName." \> ST_".$alignmentName."_out";
    system($cmd);
    
    $numberOfModels = @AA_Models;
    
    for($i = 0; $i < $numberOfModels; $i++)
      {
	$aa = "PROTGAMMA".$AA_Models[$i];
	$cmd = $raxmlExecutable." -f e -m ".$aa." -s ".$alignmentName." -t RAxML_parsimonyTree.ST_".$alignmentName." -n ".$AA_Models[$i]."_".$alignmentName."_EVAL \> ".$AA_Models[$i]."_".$alignmentName."_EVAL.out\n";  
	#print($cmd);
	system($cmd);
      }
    
   
    for($i = 0; $i < $numberOfModels; $i++)
      {
	$logFileName = "RAxML_log.".$AA_Models[$i]."_".$alignmentName."_EVAL";
	#print $logFileName."\n";
	$lh[$i] = getLH($logFileName);
      }
    
    $bestLH = $UNLIKELY;
    $bestI = -1;
    
    for($i = 0; $i < $numberOfModels; $i++)
      {
	#print "Model: ".$AA_Models[$i]." LH: ". $lh[$i]."\n";
	if($lh[$i] > $bestLH)
	  {
	    $bestLH = $lh[$i];
	    $bestI = $i;
	  }
      }
    
    print "Best Model : ".$AA_Models[$bestI]."\n\n";
    $bestModel = $AA_Models[$bestI]; 
  }