1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
|
#!/usr/bin/python3
"""
For every column that occurs in a multiple alignment print the column
and the number of times it occurs (one column/count per line, tab
separated), sorted by count descending.
Note: all blocks must have exactly the same number of species.
usage: %prog < maf > column_counts
"""
import sys
import bx.align.maf
counts = {}
nspecies = None
for block in bx.align.maf.Reader(sys.stdin):
# Ensure all blocks have the same number of rows
if nspecies:
assert len(block.components) == nspecies
else:
nspecies = len(block.components)
# Increment count for each column
for col in zip(*[iter(comp.text.upper()) for comp in block.components]):
try:
counts[col] += 1
except Exception:
counts[col] = 1
counts = sorted((value, key) for key, value in counts.items())
counts.reverse()
for count, col in counts:
print("".join(col), count)
|