File: ConstrainedBidiBFSVisitor.h

package info (click to toggle)
abyss 2.3.10-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 8,284 kB
  • sloc: cpp: 78,182; ansic: 6,512; makefile: 2,252; perl: 672; sh: 509; haskell: 412; python: 4
file content (496 lines) | stat: -rw-r--r-- 12,379 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
#ifndef CONSTRAINED_BIDI_BFS_VISITOR_H
#define CONSTRAINED_BIDI_BFS_VISITOR_H

#include "Common/UnorderedMap.h"
#include "Common/UnorderedSet.h"
#include "Common/IOUtil.h"
#include "Graph/Path.h"
#include "Graph/HashGraph.h"
#include "Graph/BidirectionalBFSVisitor.h"
#include "Graph/AllPathsSearch.h"
#include "Common/MemUtils.h"
#include <boost/graph/graph_traits.hpp>
#include <iostream>
#include <sstream>
#include <vector>
#include <algorithm>

template <typename G>
class ConstrainedBidiBFSVisitor : public BidirectionalBFSVisitor<G>
{

protected:

	typedef typename boost::graph_traits<G>::vertex_descriptor V;
	typedef typename boost::graph_traits<G>::edge_descriptor E;
	typedef unsigned short depth_t;
	typedef std::vector< Path<V> > PathList;
	typedef unordered_map<V, depth_t, hash<V> > DepthMap;

	struct EdgeHash {
		const G& m_g;
		EdgeHash(const G& g) : m_g(g) { }
		std::size_t operator()(const E& e) const {
			V u = source(e, m_g);
			V v = target(e, m_g);
			return hash<V>()(u) ^ hash<V>()(v);
		}
	};

	typedef unordered_set<E, EdgeHash> EdgeSet;

	const G& m_graph;
	V m_start;
	V m_goal;

	/** maximum number of paths to discover before aborting search */
	unsigned m_maxPaths;

	/** records history of forward/reverse traversals */
	HashGraph<V> m_traversalGraph[2];

	/** records depth of vertices during forward/reverse traversal */
	DepthMap m_depthMap[2];

	/** depth limits for forward/reverse traversal */
	depth_t m_maxDepth[2];

	/** max depth for forward/reverse traversal */
	depth_t m_maxDepthVisited[2];

	depth_t m_minPathLength;
	depth_t m_maxPathLength;

	/** maximum number of frontier nodes allowed at any given
	  * time during forward/reverse traversal */
	unsigned m_maxBranches;

	/** max number of edges to traverse during search */
	unsigned m_maxCost;
	/** number of edges traversed so far */
	unsigned m_cost;

	/** memory limit for graph search */
	size_t m_memLimit;
	/** controls frequency of memory limit checks */
	size_t m_memCheckCounter;
	/** true if we have exceeded the memory limit */
	bool m_exceededMemLimit;

	/** the max number of frontier nodes we had at any time
	  * during forward/reverse traversal (up to a limit
	  * of m_maxBranches) */
	unsigned m_peakActiveBranches;

	bool m_tooManyBranches;
	bool m_maxCostExceeded;
	bool m_tooManyPaths;

	unsigned long long m_numNodesVisited;

	/** edges that connect the forward and reverse traversals */
	EdgeSet m_commonEdges;

	PathList m_pathsFound;

public:

	ConstrainedBidiBFSVisitor(
		const G& graph,
		const V& start,
		const V& goal,
		unsigned maxPaths,
		depth_t minPathLength,
		depth_t maxPathLength,
		unsigned maxBranches,
		unsigned maxCost,
		size_t memLimit
		) :
			m_graph(graph),
			m_start(start),
			m_goal(goal),
			m_maxPaths(maxPaths),
			m_minPathLength(minPathLength),
			m_maxPathLength(maxPathLength),
			m_maxBranches(maxBranches),
			m_maxCost(maxCost),
			m_cost(0),
			m_memLimit(memLimit),
			m_memCheckCounter(0),
			m_exceededMemLimit(false),
			m_peakActiveBranches(0),
			m_tooManyBranches(false),
			m_maxCostExceeded(false),
			m_tooManyPaths(false),
			m_numNodesVisited(0),
			m_commonEdges(m_maxPaths, EdgeHash(m_graph))
	{

		depth_t maxDepth = maxPathLength - 1;
		m_maxDepth[FORWARD] = maxDepth / 2 + maxDepth % 2;
		m_maxDepth[REVERSE] = maxDepth / 2;

		m_maxDepthVisited[FORWARD] = 0;
		m_maxDepthVisited[REVERSE] = 0;

		// special case
		if (start == goal && 1 >= m_minPathLength) {
			Path<V> path;
			path.push_back(start);
			m_pathsFound.push_back(path);
		}
	}

#if 0
	// for debugging
	void examine_vertex(const V& v, const G&, Direction dir)
	{
		std::cout << "visiting vertex: " << v << " from dir: " << dir << "\n";
	}

	void examine_edge(const E& e, const G& g, Direction dir)
	{
		V u = source(e, g);
		V v = target(e, g);
		std::cout << "visiting edge: (" << u << "," << v
			<< ") from dir: " << dir << "\n";
	}
#endif

	BFSVisitorResult discover_vertex(const V&, const G&,
			Direction, unsigned numActiveBranches)
	{
		if (m_maxBranches != NO_LIMIT &&
			numActiveBranches >= m_maxBranches) {
			m_tooManyBranches = true;
			return ABORT_SEARCH;
		}

		m_numNodesVisited++;

		// include new branch started by vertex v
		numActiveBranches++;
		if (numActiveBranches > m_peakActiveBranches)
			m_peakActiveBranches = numActiveBranches;

		return SUCCESS;
	}

	BFSVisitorResult tree_edge(const E& e, const G& g, Direction dir)
	{
		if (m_cost >= m_maxCost) {
			m_maxCostExceeded = true;
			return ABORT_SEARCH;
		}
		m_cost++;

		if (!updateTargetDepth(e, g, dir))
			return SKIP_ELEMENT;

		return recordEdgeTraversal(e, g, dir);
	}

	BFSVisitorResult non_tree_edge(const E& e, const G& g, Direction dir)
	{
		if (m_cost >= m_maxCost) {
			m_maxCostExceeded = true;
			return ABORT_SEARCH;
		}
		m_cost++;

		return recordEdgeTraversal(e, g, dir);
	}

	BFSVisitorResult common_edge(const E& e, const G& g, Direction dir)
	{
		if (m_cost >= m_maxCost) {
			m_maxCostExceeded = true;
			return ABORT_SEARCH;
		}
		m_cost++;

		V u = source(e, g);
		V v = target(e, g);

		const V& parent = (dir == FORWARD) ? u : v;

		if (m_depthMap[dir][parent] >= m_maxDepth[dir])
			return SKIP_ELEMENT;

		return recordCommonEdge(e);
	}

	PathSearchResult uniquePathToGoal(Path<V>& path)
	{
		std::vector< Path<V> > paths;
		PathSearchResult result = pathsToGoal(paths);
		if (paths.size() > 1) {
			return TOO_MANY_PATHS;
		} else if (result == FOUND_PATH && paths.size() == 1) {
			path = paths[0];
			return FOUND_PATH;
		}
		return result;
	}

	PathSearchResult pathsToGoal(PathList& pathsFound)
	{
		if (m_tooManyPaths)
			return TOO_MANY_PATHS;
		else if (m_tooManyBranches)
			return TOO_MANY_BRANCHES;
		else if (m_maxCostExceeded)
			return MAX_COST_EXCEEDED;
		else if (m_exceededMemLimit)
			return EXCEEDED_MEM_LIMIT;

		PathSearchResult result = buildPaths();

		if (result == FOUND_PATH)
			pathsFound = m_pathsFound;

		return result;
	}

	depth_t getMaxDepthVisited(Direction dir)
	{
		return m_maxDepthVisited[dir];
	}

	unsigned getMaxActiveBranches()
	{
		return m_peakActiveBranches;
	}

	unsigned long long getNumNodesVisited()
	{
		return m_numNodesVisited;
	}

	unsigned getSearchCost()
	{
		return m_cost;
	}

	size_t approxMemUsage()
	{
		return
			m_traversalGraph[FORWARD].approxMemSize() +
			m_traversalGraph[REVERSE].approxMemSize() +
			approxMemSize(m_depthMap[FORWARD]) +
			approxMemSize(m_depthMap[REVERSE]);
	}

	void getTraversalGraph(HashGraph<V>& traversalGraph)
	{
		typedef typename HashGraph<V>::vertex_iterator vertex_iterator;
		typedef typename HashGraph<V>::adjacency_iterator adjacency_iterator;

		Direction dir[] = { FORWARD, REVERSE };
		for (unsigned i = 0; i < 2; i++) {
			HashGraph<V>& g = m_traversalGraph[dir[i]];
			vertex_iterator vi, vi_end;
			boost::tie(vi, vi_end) = vertices(g);
			for(; vi != vi_end; vi++) {
				adjacency_iterator ai, ai_end;
				boost::tie(ai, ai_end) = adjacent_vertices(*vi, g);
				for(; ai != ai_end; ai++) {
					add_edge(*ai, *vi, traversalGraph);
				}
			}
		}
	}

protected:

	BFSVisitorResult recordCommonEdge(const E& e)
	{
		m_commonEdges.insert(e);
		if (m_maxPaths != NO_LIMIT &&
			m_commonEdges.size() > m_maxPaths) {
			m_tooManyPaths = true;
			return ABORT_SEARCH;
		}

		/**
		 * Tricky point:
		 *
		 * Recording the common edges in the both the
		 * forward and reverse traversal histories
		 * is necessary for edge cases where forward
		 * or reverse traversals are limited to
		 * a depth of zero. (In other words,
		 * the traversal graph has zero edges
		 * and exactly one vertex which is either
		 * the start or the goal vertex.)
		 *
		 * I cannot find a way to add a vertex
		 * with a specific vertex_descriptor
		 * to a graph using the Boost graph API.
		 * The only way seems to be creating an edge
		 * that has the given vertex_descriptor
		 * as the source or target.
		 */

		BFSVisitorResult result = recordEdgeTraversal(e, m_graph, FORWARD);
		if (result != SUCCESS)
			return result;

		return recordEdgeTraversal(e, m_graph, REVERSE);
	}

	BFSVisitorResult checkMemLimit()
	{
		const size_t MEM_COUNTER_ROLLOVER = 1000;
		m_memCheckCounter++;
		if (m_memCheckCounter >= MEM_COUNTER_ROLLOVER) {
			m_memCheckCounter = 0;
			if (approxMemUsage() > m_memLimit) {
				m_exceededMemLimit = true;
				return ABORT_SEARCH;
			}
		}
		return SUCCESS;
	}

	/**
	 * Record history of edge traversal, so that we can retrace
	 * paths from a common edge to start/goal.
	 */
	BFSVisitorResult recordEdgeTraversal(const E& e, const G& g, Direction dir)
	{
		BFSVisitorResult result = checkMemLimit();
		if (result != SUCCESS)
			return result;

		V u = source(e, g);
		V v = target(e, g);

		if (dir == FORWARD)
			add_edge(v, u, m_traversalGraph[FORWARD]);
		else
			add_edge(u, v, m_traversalGraph[REVERSE]);

		return result;
	}

	/**
	 * Record the depth of a newly visited vertex.
	 * @return true if the vertex is visitable is less than the max
	 * depth limit false otherwise.
	 */
	bool updateTargetDepth(const E& e, const G& g, Direction dir)
	{
		const V& parent = (dir == FORWARD) ? source(e, g) : target(e, g);
		const V& child = (dir == FORWARD) ? target(e, g) : source(e, g);

		depth_t parentDepth = m_depthMap[dir][parent];
		if (parentDepth == m_maxDepth[dir])
			return false;

		depth_t childDepth = parentDepth + 1;
		m_depthMap[dir][child] = childDepth;

		if (childDepth > m_maxDepthVisited[dir])
			m_maxDepthVisited[dir] = childDepth;

		return true;
	}

	PathSearchResult buildPaths()
	{
		PathSearchResult overallResult = NO_PATH;

		// m_pathsFound will already contain one sol'n
		// in the special case where start_kmer == goal_kmer
		if (!m_pathsFound.empty())
			overallResult = FOUND_PATH;

		typename EdgeSet::const_iterator i = m_commonEdges.begin();
		for (; i != m_commonEdges.end(); i++) {
			PathSearchResult result = buildPaths(*i);
			if (result == FOUND_PATH) {
				overallResult = FOUND_PATH;
			}
			else if (result != FOUND_PATH && result != NO_PATH) {
				// we have encountered a failure case
				// (e.g. TOO_MANY_PATHS)
				overallResult = result;
				break;
			}
		}
		return overallResult;
	}

	PathSearchResult buildPaths(const E& common_edge)
	{
		if (m_cost > m_maxCost) {
			m_maxCostExceeded = true;
			return MAX_COST_EXCEEDED;
		}

		V u = source(common_edge, m_graph);
		V v = target(common_edge, m_graph);

		// find paths from common edge to start vertex (forward traversal)

		unsigned maxPathsToStart = m_maxPaths - m_pathsFound.size();

		PathSearchResult resultCode;
	
		AllPathsSearchResult<V> leftResult = allPathsSearch(
			m_traversalGraph[FORWARD], u, m_start, maxPathsToStart,
			0, m_maxDepth[FORWARD], m_maxCost - m_cost);
		m_cost += leftResult.cost;
		resultCode = leftResult.resultCode;

		if (resultCode == FOUND_PATH) {

			// find paths from common edge to goal vertex (reverse traversal)

			unsigned maxPathsToGoal =
				(m_maxPaths - m_pathsFound.size()) / leftResult.paths.size();

			AllPathsSearchResult<V> rightResult =
				allPathsSearch(m_traversalGraph[REVERSE], v, m_goal,
				maxPathsToGoal, 0, m_maxDepth[REVERSE], m_maxCost - m_cost);
			m_cost += rightResult.cost;
			resultCode = rightResult.resultCode;

			if (resultCode == FOUND_PATH)
				resultCode = buildPaths(leftResult.paths, rightResult.paths);

		} // result == FOUND_PATH (common edge => start)

		if (resultCode == MAX_COST_EXCEEDED)
			m_maxCostExceeded = true;
		else if (resultCode == TOO_MANY_PATHS)
			m_tooManyPaths = true;

		return resultCode;
	}

	PathSearchResult buildPaths(const PathList& pathsToStart, const PathList& pathsToGoal)
	{
		bool addedPath = false;
		typename PathList::const_iterator pathToStart = pathsToStart.begin();
		for (; pathToStart != pathsToStart.end(); pathToStart++) {
			typename PathList::const_iterator pathToGoal = pathsToGoal.begin();
			for(; pathToGoal != pathsToGoal.end(); pathToGoal++) {
				if (pathToStart->size() + pathToGoal->size() < m_minPathLength ||
					pathToStart->size() + pathToGoal->size() > m_maxPathLength)
					continue;
				m_pathsFound.push_back(*pathToStart);
				Path<V>& mergedPath = m_pathsFound.back();
				reverse(mergedPath.begin(), mergedPath.end());
				m_pathsFound.back().insert(mergedPath.end(),
					pathToGoal->begin(), pathToGoal->end());
				addedPath = true;
			}
		}
		return (addedPath ? FOUND_PATH : NO_PATH);
	}

};

#endif /* CONSTRAINED_BFS_VISITOR_H */