Package: cct / 1:1.0.0-1

conditionalize_blast_-Q_-D.patch Patch series | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
Author: Aaron M. Ucko <ucko@debian.org>
Last-Update: Mon, 19 Oct 2020 16:14:20 -0400
Forwarded: https://github.com/paulstothard/cgview_comparison_tool/issues/5
Description: Address BLAST usage errors as of BLAST+ 2.10.

Supply the legacy blastall command's -Q and -D flags only in BLAST
variants that involve translating queries and subjects respectively.
(tblastx notably translates both.)

NB: The use of -v $settings{HITLIST_SIZE} in conjunction with the
supplied $format_type value (7 or 9) results in the warning

    Warning: [tblastn] The parameter -num_descriptions is ignored for output formats > 4 . Use -max_target_seqs to control output

Actually substituting -max_target_seqs would require bypassing the
legacy blastall wrapper, which isn't entirely straightforward in this
instance due to the need to convert a variable format string to modern
options.  (Also, this script technically only knows the path to
blastall, though that shouldn't be a concern in practice on Debian.)

--- a/lib/scripts/local_blast_client/local_blast_client.pl
+++ b/lib/scripts/local_blast_client/local_blast_client.pl
@@ -198,7 +198,16 @@ while ( my $sequenceEntry = <SEQFILE> )
 #-b can be used to specify the number of hits to return when using -m 9. Each hit may consist of one or more HSPs.
 #-b and -v must be set to specify the number of hits to return when using -m 7. Each hit may consist of one or more HSPs.
     my $blast_command
-        = "$settings{BLAST_PATH} -p $settings{PROGRAM} -d $settings{DATABASE} -e $settings{EXPECT} -i $filename -b $settings{HITLIST_SIZE} -v $settings{HITLIST_SIZE} -m $format_type -Q $settings{QUERY_GENETIC_CODE} -D $settings{DATABASE_GENETIC_CODE} -W $settings{WORD_SIZE} -F $settings{FILTER}";
+        = "$settings{BLAST_PATH} -p $settings{PROGRAM} -d $settings{DATABASE} -e $settings{EXPECT} -i $filename -b $settings{HITLIST_SIZE} -v $settings{HITLIST_SIZE} -m $format_type -W $settings{WORD_SIZE} -F $settings{FILTER}";
+    if ($settings{ALIGN_TYPE} eq 'TRANSLATED') {
+        if ($settings{INPUTTYPE} eq 'DNA') {
+            $blast_command .= "-Q $settings{QUERY_GENETIC_CODE}";
+        }
+        # Not elsif; tblastx notably accepts both.
+        if ($settings{ENTREZ_DB} eq 'nucleotide') {
+            $blast_command .= "-D $settings{DATABASE_GENETIC_CODE}";
+        }
+    }
 
     print
         "Performing BLAST search for sequence number $seqCount ($sequenceTitle).\n";