File: linkage.py

package info (click to toggle)
python-cluster 1.4.1.post3-1
  • links: PTS, VCS
  • area: main
  • in suites: bookworm, forky, sid, trixie
  • size: 412 kB
  • sloc: python: 812; makefile: 146; sh: 4
file content (100 lines) | stat: -rw-r--r-- 2,934 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from __future__ import division
from functools import wraps


def cached(fun):
    """
    memoizing decorator for linkage functions.

    Parameters have been hardcoded (no ``*args``, ``**kwargs`` magic), because,
    the way this is coded (interchangingly using sets and frozensets) is true
    for this specific case. For other cases that is not necessarily guaranteed.
    """

    _cache = {}

    @wraps(fun)
    def newfun(a, b, distance_function):
        frozen_a = frozenset(a)
        frozen_b = frozenset(b)
        if (frozen_a, frozen_b) not in _cache:
            result = fun(a, b, distance_function)
            _cache[(frozen_a, frozen_b)] = result
        return _cache[(frozen_a, frozen_b)]
    return newfun


@cached
def single(a, b, distance_function):
    """
    Given two collections ``a`` and ``b``, this will return the distance of the
    points which are closest together.  ``distance_function`` is used to
    determine the distance between two elements.

    Example::

        >>> single([1, 2], [3, 4], lambda x, y: abs(x-y))
        1  # (distance between 2 and 3)
    """
    left_a, right_a = min(a), max(a)
    left_b, right_b = min(b), max(b)
    result = min(distance_function(left_a, right_b),
                 distance_function(left_b, right_a))
    return result


@cached
def complete(a, b, distance_function):
    """
    Given two collections ``a`` and ``b``, this will return the distance of the
    points which are farthest apart.  ``distance_function`` is used to determine
    the distance between two elements.

    Example::

        >>> single([1, 2], [3, 4], lambda x, y: abs(x-y))
        3  # (distance between 1 and 4)
    """
    left_a, right_a = min(a), max(a)
    left_b, right_b = min(b), max(b)
    result = max(distance_function(left_a, right_b),
                 distance_function(left_b, right_a))
    return result


@cached
def average(a, b, distance_function):
    """
    Given two collections ``a`` and ``b``, this will return the mean of all
    distances. ``distance_function`` is used to determine the distance between
    two elements.

    Example::

        >>> single([1, 2], [3, 100], lambda x, y: abs(x-y))
        26
    """
    distances = [distance_function(x, y)
                 for x in a for y in b]
    return sum(distances) / len(distances)


@cached
def uclus(a, b, distance_function):
    """
    Given two collections ``a`` and ``b``, this will return the *median* of all
    distances. ``distance_function`` is used to determine the distance between
    two elements.

    Example::

        >>> single([1, 2], [3, 100], lambda x, y: abs(x-y))
        2.5
    """
    distances = sorted([distance_function(x, y)
                        for x in a for y in b])
    midpoint, rest = len(distances) // 2, len(distances) % 2
    if not rest:
        return sum(distances[midpoint-1:midpoint+1]) / 2
    else:
        return distances[midpoint]