1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
|
#!/usr/bin/env python
"""A data structure currently superseded by indel_positions.py.
Multiple Sequence Alignment Using Partial Order Graphs
Bioinformatics 2002 18:452-464
Christopher Lee, Catherine Grasso, & Mark Sharlow
"""
__author__ = "Peter Maxwell"
__copyright__ = "Copyright 2007-2009, The Cogent Project"
__credits__ = ["Peter Maxwell"]
__license__ = "GPL"
__version__ = "1.4.1"
__maintainer__ = "Peter Maxwell"
__email__ = "pm67nz@gmail.com"
__status__ = "Production"
def pog_traceback(pogs, aligned_positions):
full_pogs = [getattr(pog, 'full', pog) for pog in pogs]
upto = [0, 0]
align_builder = POGBuilder(pogs)
full_builder = POGBuilder(full_pogs)
full_coords = []
full_count = 0
full_aligned_positions = []
for posn in aligned_positions:
# Find equivalent position in the full POG
# which includes all of the original sites
full_posn = []
for (pog,pos) in zip(pogs, posn):
if pos is not None:
pos = pog.full_coords[pos]
full_posn.append(pos)
# Add all skipped over positions to the full POG
for (dim, pos) in enumerate(full_posn):
if pos is not None:
for p in range(upto[dim]+1, pos+1):
full_builder.addAligned([(dim, p)])
full_count += 1
fp = [None, None]
fp[dim] = p - 1
full_aligned_positions.append(fp)
upto[dim] = pos+1
# Add this position to the full POG
full_merged_origins = []
fap = []
for (dim, pos) in enumerate(full_posn):
if pos is not None:
full_merged_origins.append((dim,pos+1))
fap.append(pos+1)
else:
fap.append(None)
full_builder.addAligned(full_merged_origins)
full_count += 1
full_aligned_positions.append(fap)
# Add this position to the minimal POG which does not
# include sequence which has been called as an insert.
merged_origins = []
for (dim, pos) in enumerate(posn):
if pos is not None:
merged_origins.append((dim,pos+1))
align_builder.addAligned(merged_origins)
full_coords.append(full_count-1)
result_pog = align_builder.getPOG()
result_pog.aligned_positions = aligned_positions
# Add leftover positions to the full POG
for (dim, pos) in enumerate([len(p) for p in full_pogs]):
if pos is not None:
for p in range(upto[dim]+1, pos+1):
full_builder.addAligned([(dim, p)])
full_count += 1
fp = [None, None]
fp[dim] = p - 1
full_aligned_positions.append(fp)
upto[dim] = pos+1
result_pog.full = full_builder.getPOG()
result_pog.full_coords = full_coords
result_pog.full.aligned_positions = full_aligned_positions
return result_pog
class POGBuilder(object):
def __init__(self, children):
self.children = children
self.remap = [{} for child in children]
self.started = [False, False]
self.last = [None, None]
self.result = [[]]
self.origins = [[]]
def addAligned(self, nodes):
pre_merged = set()
origins_merged = []
for (dim, pos) in nodes:
if pos is None:
continue
if not self.started[dim]:
pre_merged.add(0)
self.started[dim] = True
pog = self.children[dim]
pre = pog.pred_sets[pos]
origins_merged.append((dim,pos))
pre = [self.remap[dim].get(n, None) for n in pre]
pre_merged.update([p for p in pre if p is not None])
self.remap[dim][pos] = len(self.result)
self.last[dim] = pos
self.result.append(pre_merged)
self.origins.append(origins_merged)
def getPOG(self):
end = set(self.remap[dim][self.last[dim]] for dim in [0,1])
self.result.append(end)
self.origins.append([])
return POG(self.result, self.origins)
class POG(object):
def __init__(self, pog, origins):
self.origins = origins
if origins is not None:
assert len(origins) == len(pog), (len(origins), len(pog))
for (i, pred_set) in enumerate(pog):
for pre in pred_set:
assert 0 <= pre < i, pog
self.pred_sets = pog
def traceback(self, other, aligned_positions):
return pog_traceback([self, other], aligned_positions)
def asListOfPredLists(self):
return [list(pre) for pre in self.pred_sets]
def getAlignedPositions(self):
return self.aligned_positions
def getFullAlignedPositions(self):
return self.full.aligned_positions
def __len__(self):
return len(self.pred_sets) - 2
def __getitem__(self, index):
if index.start is None:
start = 1
else:
start = index.start + 1
if index.stop is None:
end = len(self.pred_sets)-2 + 1
else:
end = index.stop + 1
pred = [list(pre) for pre in self.pred_sets[start:end]]
origins = ['s'] + list(self.origins[start:end]) + ['e']
before = self.pred_sets[1:start]
after = self.pred_sets[end:]
ends = []
for (i,p) in enumerate(after):
for pre in p:
if pre < end:
ends.append(max(pre-start+1, 0))
pog = [[]] + [[max(0, pre-start+1) for pre in p] for p in pred]
pog.append(ends)
return POG(pog, origins)
def backward(self):
# Switches predecessors / successors
pog = [[]]
for (i,p) in enumerate(self.pred_sets[1:]):
pog.append([])
for pre in p:
pog[pre].append(len(self.pred_sets)-i-2)
pog.reverse()
return POG(pog, None)
def writeToDot(self, dot):
print >>dot, 'digraph POG {'
for (i, preds) in enumerate(self.pred_sets):
#print i, preds
for pred in preds:
print >>dot, ' ', ('node%s -> node%s' % (pred, i))
if i == 0:
label = 'START'
elif i == len(self.pred_sets) - 1:
label = 'END'
else:
label = ','.join(c for c in self.origins[i])
print >>dot, ' ', ('node%s' % i), '[label="%s"]' % label
print >>dot, '}'
print >>dot, ''
class LeafPOG(POG):
def __init__(self, length):
pog = [set([i]) for i in range(length)]
pog = [[]] + pog + [[len(pog)]]
self.pred_sets = pog
self.full = self
self.full_coords = range(len(pog))
self.length = length
cap = [(0, None)]
self.origins = [cap] + [[(0, i)]
for i in range(length)] + [cap]
def __len__(self):
return len(self.pred_sets) - 2
def backward(self):
return LeafPOG(self.length)
def leaf2pog(leaf):
return LeafPOG(len(leaf))
|