File: threaded_rp_trees.py

package info (click to toggle)
python-pynndescent 0.5.11-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 4,088 kB
  • sloc: python: 7,107; makefile: 12; sh: 8
file content (98 lines) | stat: -rw-r--r-- 2,973 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import numpy as np
import numba

from pynndescent.utils import tau_rand_int, norm

######################################################
# Alternative tree approach; should be the basis
# for a dask-distributed version of the algorithm
######################################################


@numba.njit(fastmath=True, nogil=True)
def apply_hyperplane(
    data,
    hyperplane_vector,
    hyperplane_offset,
    hyperplane_node_num,
    current_num_nodes,
    data_node_loc,
    rng_state,
):

    left_node = current_num_nodes
    right_node = current_num_nodes + 1

    for i in range(data_node_loc.shape[0]):
        if data_node_loc[i] != hyperplane_node_num:
            continue

        margin = hyperplane_offset
        for d in range(hyperplane_vector.shape[0]):
            margin += hyperplane_vector[d] * data[i, d]

        if margin == 0:
            if abs(tau_rand_int(rng_state)) % 2 == 0:
                data_node_loc[i] = left_node
            else:
                data_node_loc[i] = right_node
        elif margin > 0:
            data_node_loc[i] = left_node
        else:
            data_node_loc[i] = right_node

    return


@numba.njit(fastmath=True, nogil=True)
def make_euclidean_hyperplane(data, indices, rng_state):
    left_index = tau_rand_int(rng_state) % indices.shape[0]
    right_index = tau_rand_int(rng_state) % indices.shape[0]
    right_index += left_index == right_index
    right_index = right_index % indices.shape[0]
    left = indices[left_index]
    right = indices[right_index]

    # Compute the normal vector to the hyperplane (the vector between
    # the two points) and the offset from the origin
    hyperplane_offset = 0.0
    hyperplane_vector = np.empty(data.shape[1], dtype=np.float32)

    for d in range(data.shape[1]):
        hyperplane_vector[d] = data[left, d] - data[right, d]
        hyperplane_offset -= (
            hyperplane_vector[d] * (data[left, d] + data[right, d]) / 2.0
        )

    return hyperplane_vector, hyperplane_offset


@numba.njit(fastmath=True, nogil=True)
def make_angular_hyperplane(data, indices, rng_state):
    left_index = tau_rand_int(rng_state) % indices.shape[0]
    right_index = tau_rand_int(rng_state) % indices.shape[0]
    right_index += left_index == right_index
    right_index = right_index % indices.shape[0]
    left = indices[left_index]
    right = indices[right_index]

    left_norm = norm(data[left])
    right_norm = norm(data[right])

    if left_norm == 0.0:
        left_norm = 1.0

    if right_norm == 0.0:
        right_norm = 1.0

    # Compute the normal vector to the hyperplane (the vector between
    # the two points) and the offset from the origin
    hyperplane_offset = 0.0
    hyperplane_vector = np.empty(data.shape[1], dtype=np.float32)

    for d in range(data.shape[1]):
        hyperplane_vector[d] = (data[left, d] / left_norm) - (
            data[right, d] / right_norm
        )

    return hyperplane_vector, hyperplane_offset