1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
|
Author: Laszlo Kajan <lkajan@rostlab.org>
Description: use Python3 style print()
--- a/score_conservation.py
+++ b/score_conservation.py
@@ -83,6 +83,7 @@
#
################################################################################
+from __future__ import print_function
import math, sys, getopt
import re
# numarray imported below
@@ -99,7 +100,7 @@
def usage():
- print """\nUSAGE:\nscore_conservation [options] alignfile\n\t -alignfile must be in fasta, Stockholm or clustal format.\n\nOPTIONS:\n\t
+ print( """\nUSAGE:\nscore_conservation [options] alignfile\n\t -alignfile must be in fasta, Stockholm or clustal format.\n\nOPTIONS:\n\t
-a\treference sequence. Print scores in reference to a specific sequence (ignoring gaps). Default prints the entire column. [sequence name]\n\t
-b\tlambda for window heuristic linear combination. Default=.5 [real in [0,1]]\n
-d\tbackground distribution file, e.g., swissprot.distribution. Default=BLOSUM62 background [filename]\n\t
@@ -112,7 +113,7 @@
-p\tuse gap penalty. Lower the score of columns that contain gaps. Default=True [True|False]\n\t
-s\tconservation estimation method. \n\t\tOptions: shannon_entropy, property_entropy, property_relative_entropy, vn_entropy, relative_entropy, js_divergence, sum_of_pairs. Default=js_divergence\n\t
-w\twindow size. Number of residues on either side included in the window. Default=3 [int]\n\t
- """
+ """ )
@@ -542,7 +543,7 @@
list_sm.append(row)
except IOError, e:
- print "Could not load similarity matrix: %s. Using identity matrix..." % sm_file
+ print( "Could not load similarity matrix: %s. Using identity matrix..." % sm_file, file=sys.stderr )
return identity(20)
# if matrix is stored in lower tri form, copy to upper
@@ -630,13 +631,13 @@
except IOError, e:
- print e, "Using default (BLOSUM62) background."
+ print( e, "Using default (BLOSUM62) background.", file=sys.stderr )
return []
# use a range to be flexible about round off
if .997 > sum(distribution) or sum(distribution) > 1.003:
- print "Distribution does not sum to 1. Using default (BLOSUM62) background."
- print sum(distribution)
+ print( "Distribution does not sum to 1. Using default (BLOSUM62) background.", file=sys.stderr )
+ print( sum(distribution), file=sys.stderr )
return []
return distribution
@@ -775,21 +776,21 @@
try:
window_size = int(arg)
except ValueError:
- print "ERROR: Window size must be an integer. Using window_size 3..."
+ print( "ERROR: Window size must be an integer. Using window_size 3...", file=sys.stderr )
window_size = 3
elif opt == "-b":
try:
win_lam = float(arg)
if not (0. <= win_lam <= 1.): raise ValueError
except ValueError:
- print "ERROR: Window lambda must be a real in [0,1]. Using lambda = .5..."
+ print( "ERROR: Window lambda must be a real in [0,1]. Using lambda = .5...", file=sys.stderr )
win_lam = .5
elif opt == "-g":
try:
gap_cutoff = float(arg)
if not (0. <= gap_cutoff < 1.): raise ValueError
except ValueError:
- print "ERROR: Gap cutoff must be a real in [0,1). Using a gap cutoff of .3..."
+ print( "ERROR: Gap cutoff must be a real in [0,1). Using a gap cutoff of .3...", file=sys.stderr )
gap_cutoff = .3
elif opt == '-a':
seq_specific_output = arg
@@ -804,7 +805,7 @@
elif arg == 'relative_entropy': scoring_function = relative_entropy
elif arg == 'js_divergence': scoring_function = js_divergence
elif arg == 'sum_of_pairs': scoring_function = sum_of_pairs
- else: print "%s is not a valid scoring method. Using %s.\n" % (arg, scoring_function.__name__)
+ else: print( "%s is not a valid scoring method. Using %s.\n" % (arg, scoring_function.__name__), file=sys.stderr )
align_file = args[0]
@@ -821,18 +822,18 @@
if names == []:
names, alignment = read_fasta_alignment(align_file)
except IOError, e:
- print e, "Could not find %s. Exiting..." % align_file
+ print( e, "Could not find %s. Exiting..." % align_file, file=sys.stderr )
sys.exit(1)
if len(alignment) != len(names) or alignment == []:
- print "Unable to parse alignment.\n"
+ print( "Unable to parse alignment.\n", file=sys.stderr )
sys.exit(1)
seq_len = len(alignment[0])
for i, seq in enumerate(alignment):
if len(seq) != seq_len:
- print "ERROR: Sequences of different lengths: %s (%d) != %s (%d).\n" % (names[0], seq_len, names[i], len(seq))
+ print( "ERROR: Sequences of different lengths: %s (%d) != %s (%d).\n" % (names[0], seq_len, names[i], len(seq)), file=sys.stderr )
sys.exit(1)
@@ -846,7 +847,7 @@
# handle print of output relative to specific sequence
ref_seq_num = None
if seq_specific_output and seq_specific_output not in names:
- print "Sequence %s not found in alignment. Using default output format...\n" % seq_specific_output
+ print( "Sequence %s not found in alignment. Using default output format...\n" % seq_specific_output, file=sys.stderr )
seq_specific_output = 0
elif seq_specific_output in names:
ref_seq_num = names.index(seq_specific_output)
@@ -880,15 +881,15 @@
else:
outfile.write("# align_column_number\tscore\tcolumn\n")
else:
- print "# %s -- %s - window_size: %d - background: %s - seq. weighting: %s - gap penalty: %d - normalized: %s" % (align_file, scoring_function.__name__, window_size, background_name, use_seq_weights, use_gap_penalty, normalize_scores)
+ print( "# %s -- %s - window_size: %d - background: %s - seq. weighting: %s - gap penalty: %d - normalized: %s" % (align_file, scoring_function.__name__, window_size, background_name, use_seq_weights, use_gap_penalty, normalize_scores) )
if seq_specific_output:
- print "# reference sequence: %s" % seq_specific_output
- print "# align_column_number\tamino acid\tscore\n"
+ print( "# reference sequence: %s" % seq_specific_output )
+ print( "# align_column_number\tamino acid\tscore\n" )
else:
- print "# align_column_number\tscore\tcolumn\n"
+ print( "# align_column_number\tscore\tcolumn\n" )
except IOError, e:
- print "Could not open %s for output. Printing results to standard out..." % outfile_name
+ print( "Could not open %s for output. Printing results to standard out..." % outfile_name, file=sys.stderr )
outfile_name = ""
for i, score in enumerate(scores):
@@ -896,12 +897,12 @@
cur_aa = get_column(i, alignment)[ref_seq_num]
if cur_aa == '-': continue
if outfile_name == "":
- print "%d\t%s\t%.5f" % (i, cur_aa, score)
+ print( "%d\t%s\t%.5f" % (i, cur_aa, score) )
else:
outfile.write("%d\t%s\t%5f\n" % (i, cur_aa, score))
else:
if outfile_name == "":
- print "%d\t%.5f\t%s" % (i, score, "".join(get_column(i, alignment)))
+ print( "%d\t%.5f\t%s" % (i, score, "".join(get_column(i, alignment))) )
else:
outfile.write("%d\t%5f\t%s\n" % (i, score, "".join(get_column(i, alignment))))
|