File: subtree_extraction.cpp

package info (click to toggle)
iqtree 2.0.7%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 14,620 kB
  • sloc: cpp: 142,571; ansic: 57,789; sh: 275; python: 242; makefile: 95
file content (118 lines) | stat: -rw-r--r-- 3,746 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#include "trees_impl.hpp"
#include "utils.hpp"
#include <terraces/errors.hpp>
#include <terraces/subtree_extraction.hpp>

using std::vector;
using std::stack;
using std::pair;
using std::make_pair;

namespace terraces {

std::pair<bitmatrix, std::vector<index>> compute_node_occ(const tree& t, const bitmatrix& occ) {
	auto num_nodes = num_nodes_from_leaves(occ.rows());
	auto num_sites = occ.cols();
	utils::ensure<bad_input_error>(t.size() == num_nodes,
	                               bad_input_error_type::tree_mismatching_size);
	check_rooted_tree(t);
	auto node_occ = bitmatrix{t.size(), occ.cols()};
	std::vector<index> num_leaves_per_site(occ.cols(), 0);

	// compute occurrences on inner nodes: bitwise or of the children
	foreach_postorder(t, [&](index i) {
		auto node = t[i];
		if (is_leaf(node)) {
			// copy data from taxon occurrence matrix
			utils::ensure<bad_input_error>(node.taxon() != none,
			                               bad_input_error_type::tree_unnamed_leaf);
			for (index site = 0; site < num_sites; ++site) {
				auto has_leaf = occ.get(node.taxon(), site);
				node_occ.set(i, site, has_leaf);
				num_leaves_per_site[site] += has_leaf;
			}
		} else {
			node_occ.row_or(node.lchild(), node.rchild(), i);
		}
	});
	return {std::move(node_occ), std::move(num_leaves_per_site)};
}

index induced_lca(const tree& t, const bitmatrix& node_occ, index column) {
	index lca = 0;
	while (!is_leaf(t[lca])) {
		auto present = [&](index node) { return node_occ.get(node, column); };
		assert(present(lca));
		auto node = t[lca];
		if (present(node.lchild()) && present(node.rchild())) {
			return lca;
		} else {
			lca = present(node.lchild()) ? node.lchild() : node.rchild();
		}
	}
	return lca;
}

tree subtree(const tree& t, const bitmatrix& node_occ,
             const std::vector<index>& num_leaves_per_site, index site) {
	auto root = induced_lca(t, node_occ, site);
	if (is_leaf(t[root])) {
		// tree containing only a single leaf
		return {{none, none, none, t[root].taxon()}};
	}

	auto present = [&](index node) { return node_occ.get(node, site); };
	assert(present(t[root].lchild()) && present(t[root].rchild()));

	tree out_tree;
	out_tree.reserve(num_nodes_from_leaves(num_leaves_per_site[site]));
	out_tree.emplace_back(); // root node

	stack<index> boundary;
	auto callback = [&](index i) {
		auto node = t[i];
		bool leaf_occ = is_leaf(node) && present(i);
		bool inner_occ = !is_leaf(node) && present(node.lchild()) && present(node.rchild());

		if (leaf_occ || (inner_occ && i != root)) {
			// fires if the tree is trivial (i.e. only one edge!)
			// this can only happen with sites for which only one
			// species has data, which we should have caught earlier.
			assert(!boundary.empty());
			auto parent = boundary.top();
			out_tree.emplace_back(parent, none, none, node.taxon());
			if (out_tree[parent].lchild() == none) {
				out_tree[parent].lchild() = out_tree.size() - 1;
			} else {
				assert(out_tree[parent].rchild() == none);
				out_tree[parent].rchild() = out_tree.size() - 1;
				boundary.pop();
			}
		}
		if (inner_occ) {
			boundary.push(out_tree.size() - 1);
		}
	};
	foreach_preorder(t, callback, root);

	return out_tree;
}

std::vector<tree> subtrees(const tree& t, const bitmatrix& occ) {
	auto num_sites = occ.cols();
	// TODO naming of compute_node_occ
	const auto node_occ_pair = compute_node_occ(t, occ);
	const auto& node_occ = node_occ_pair.first;
	const auto& num_leaves_per_site = node_occ_pair.second;

	// collect leaves and inner nodes: bitwise and of the children
	vector<tree> out_trees;

	for (index site = 0; site < num_sites; ++site) {
		out_trees.push_back(subtree(t, node_occ, num_leaves_per_site, site));
	}

	return out_trees;
}

} // namespace terraces