File: SuffixArray.h

package info (click to toggle)
abyss 1.3.4-3
  • links: PTS, VCS
  • area: non-free
  • in suites: wheezy
  • size: 3,064 kB
  • sloc: cpp: 30,033; ansic: 5,516; sh: 1,241; makefile: 824; perl: 388
file content (85 lines) | stat: -rw-r--r-- 2,077 bytes parent folder | download | duplicates (8)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#ifndef SUFFIXARRAY_H
#define SUFFIXARRAY_H 1

#include "ConstString.h"
#include "ContigNode.h"
#include <algorithm>
#include <cassert>
#include <utility>
#include <vector>

/** A suffix array augmented with a mapped value type. */
class SuffixArray {
  public:
	typedef cstring key_type;
	typedef ContigNode mapped_type;
	typedef std::pair<key_type, mapped_type> value_type;
	typedef std::vector<value_type>::const_iterator const_iterator;

	/** Construct an empty suffix array. */
	SuffixArray(unsigned minOverlap)
		: m_minOverlap(minOverlap), m_dirty(false) { }

	/** Insert the specified sequence into this suffix array. */
	template <typename T>
	void insert(const T& seq, const mapped_type& data)
	{
		m_dirty = true;
		typedef typename T::const_pointer It;
		It last = &seq[seq.size() - m_minOverlap + 1];
		for (It it = &seq[1]; it < last; ++it)
			m_data.push_back(value_type(it, data));
	}

	/** Insert the specified sequence into this suffix array. */
	template <typename T>
	void insert(const std::pair<T, mapped_type>& x)
	{
		insert(x.first, x.second);
	}

	/** Construct the suffix array. */
	void construct()
	{
		if (m_dirty)
			sort(m_data.begin(), m_data.end());
		m_dirty = false;
	}

	/** Find all the elements whose suffix matches the prefix of the
	 * specified query sequence.
	 * @return the range of matches as a pair of iterators
	 */
	template <typename T>
	std::pair<const_iterator, const_iterator> equal_range(
			const T& seq) const
	{
		assert(!m_dirty);
		return std::equal_range(m_data.begin(), m_data.end(),
				key_type(&seq[0]), Compare());
	}

	size_t size() const { return m_data.size(); }
	const_iterator begin() const { return m_data.begin(); }
	const_iterator end() const { return m_data.end(); }

  private:
	/** Comparison functor. */
	struct Compare {
		bool operator()(const value_type& a, const key_type& b) const
		{
			return a.first < b;
		}

		bool operator()(const key_type& a, const value_type& b) const
		{
			return a < b.first;
		}
	};

	unsigned m_minOverlap;
	bool m_dirty;
	std::vector<value_type> m_data;
};

#endif