File: rank.py

package info (click to toggle)
python-agate 1.9.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,996 kB
  • sloc: python: 8,512; makefile: 126
file content (64 lines) | stat: -rw-r--r-- 1,640 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from decimal import Decimal
from functools import cmp_to_key

from agate.computations.base import Computation
from agate.data_types import Number


class Rank(Computation):
    """
    Calculate rank order of the values in a column.

    Uses the "competition" ranking method: if there are four values and the
    middle two are tied, then the output will be `[1, 2, 2, 4]`.

    Null values will always be ranked last.

    :param column_name:
        The name of the column to rank.
    :param comparer:
        An optional comparison function. If not specified ranking will be
        ascending, with nulls ranked last.
    :param reverse:
        Reverse sort order before ranking.
    """
    def __init__(self, column_name, comparer=None, reverse=None):
        self._column_name = column_name
        self._comparer = comparer
        self._reverse = reverse

    def get_computed_data_type(self, table):
        return Number()

    def run(self, table):
        """
        :returns:
            :class:`int`
        """
        column = table.columns[self._column_name]

        if self._comparer:
            data_sorted = sorted(column.values(), key=cmp_to_key(self._comparer))
        else:
            data_sorted = column.values_sorted()

        if self._reverse:
            data_sorted.reverse()

        ranks = {}
        rank = 0

        for c in data_sorted:
            rank += 1

            if c in ranks:
                continue

            ranks[c] = Decimal(rank)

        new_column = []

        for row in table.rows:
            new_column.append(ranks[row[self._column_name]])

        return new_column