File: CPC_to_CPS.pl

package info (click to toggle)
microbiomeutil 20101212%2Bdfsg1-4
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 49,276 kB
  • sloc: perl: 4,878; ansic: 419; makefile: 98; sh: 27
file content (110 lines) | stat: -rwxr-xr-x 2,406 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env perl

use strict;
use warnings;

use Getopt::Long qw(:config no_ignore_case bundling);
use FindBin;

use lib ("$FindBin::Bin/../PerlLib");
use Fasta_reader;
use CdbTools;


my $usage = <<_EOUSAGE_;

##########################################################################################
#
#  Required:
#
#    --CPC_output       tab-delimited parsed output from running ChimeraPhyloChecker
#
#    --query_NAST      multi-fasta file containing query sequences in alignment format
#
#    --db_NAST        db in NAST format
#  
#  ChimeraParentSelector parameters:
#
#   -M match score        (default: +5)
#   -N mismatch penalty   (default: -4)
#   -R min divR           (default: 1.007)  [minimum (query,chimera) / (query,parent) alignment identity ratio]
#   --minQueryCoverage   (default: 90)
#   --maxTraverse        (default: 12)
#
#
#########################################################################################

_EOUSAGE_
	
	;

## Resources:

## option processing
my ($queryNast, $dbNast);
my $prog = "chimeraParentSelector.pl";
my $CPC_output;


&GetOptions ("query_NAST=s" => \$queryNast,
			 "db_NAST=s" => \$dbNast,
			 "CPC_output=s" => \$CPC_output,
	);


unless ($queryNast && $dbNast && $CPC_output) { die $usage; }


main: {
	
	open (my $fh, "$CPC_output") or die "Error, cannot open file $CPC_output";
	while (<$fh>) {
		unless (/^ChimeraSlayer\t/) { next; }
		print;
		
		chomp;
		my @x = split (/\t/);
		my $query_acc = $x[1];
		my @others = ($x[2], $x[3]);
		if ($x[10] eq "YES") {
			my $query_fasta = &cdbyank($query_acc, $queryNast);
			
			my $query_file = "tmp.$$.query";
			open (my $fh, ">$query_file") or die "Error, cannot write to $query_file ";
			print $fh $query_fasta;
			close $fh;
			
			
			my $db_file = "tmp.$$.db";
			open ($fh, ">$db_file") or die "Error, cannot write to $db_file";
			foreach my $hit_acc (@others) {
				my $nast = &cdbyank($hit_acc, $dbNast);
				print $fh $nast;
			}
			close $fh;
			
			eval {
				## run bellerophon
				my $cmd = "$FindBin::Bin/$prog --query_NAST $query_file --db_NAST $db_file --printAlignments ";
				print "\n$cmd\n";
				my $ret = system $cmd;
				if ($ret) {
					die "Error, cmd: $cmd died with ret ($ret)";
				}
			};
			if ($@) {
				print STDERR "$query_acc failed:   $@\n\n";
				die;
			}
			print "//\n"; # spacer.
			
			unlink($query_file, $db_file);
			
			
		}
	}
	
	
	exit(0);
}