File: GetLCA.pl

package info (click to toggle)
radiant 2.7%2Bdfsg-2
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 1,048 kB
  • sloc: perl: 5,393; sh: 323; makefile: 35
file content (117 lines) | stat: -rwxr-xr-x 1,959 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env perl

use strict;

BEGIN
{
	use File::Basename;
	use Cwd 'abs_path';
	use lib dirname(abs_path($0)) . "/../lib";
	use KronaTools;
}

use Getopt::Long;

my $help;
my $stream;
my $tax;

GetOptions
(
	'h' => \$help,
	'help' => \$help,
	's' => \$stream,
	'tax=s' => \$tax
);

if ( defined $tax )
{
	setOption('taxonomy', $tax);
}

if ( $help )
{
	print '
Description:

   Computes the lowest common ancestor for accessions or taxonomy IDs (as
   arguments or from <stdin>). If an input is a number, it is assumed to be a
   taxonomy ID; otherwise it will be considered an accession or sequence ID
   containing an accession in the fourth field of pipe notation (e.g.
   "gi|12345|xx|ABC123.1|", ignoring fasta/fastq tag markers [>,@]). If using
   <stdin>, the LCA can be computed for the first fields of all input lines
   (default), or per input line, separated by whitespace (see -s).

Usage:

   ktGetLCA [options] [acc/tax_ID ...] [< acc/taxID_list] > LCA

Options:

   -s  Streaming mode. Each line is expected to be a whitespace-separated list 
       of inputs for a single lowest common ancestor computation. Taxonomy will
       be preloaded, allowing for faster computation after a small upfront time.

';
	exit;
}

if ( $stream )
{
	loadTaxonomy();
}

my $stdin;

if ( @ARGV == 0 || $stream )
{
	$stdin = 1;
}

my @taxIDs;

while ( my $in = $stdin ? <STDIN> : shift @ARGV )
{
	chomp $in;
	
	if ( $stream )
	{
		if ( $in eq "" )
		{
			print "\n";
			next;
		}
		
		my @taxIDs;
		
		foreach my $id (split /\s+/, $in)
		{
			my $taxID = getTaxIDFromAcc(getAccFromSeqID($id));
			
			if ( $taxID != 0 )
			{
				push @taxIDs, $taxID;
			}
		}
		
		my $lca = taxLowestCommonAncestor(@taxIDs);
		print "$lca\n";
	}
	else
	{
		my $taxID = getTaxIDFromAcc(getAccFromSeqID($in));
		
		if ( $taxID != 0 )
		{
			push @taxIDs, $taxID;
		}
	}
}

if ( ! $stream )
{
	my $lca = taxLowestCommonAncestor(@taxIDs);
	print "$lca\n";
}

printWarnings();