File: group_by.py

package info (click to toggle)
python-agate 1.9.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,996 kB
  • sloc: python: 8,512; makefile: 126
file content (63 lines) | stat: -rw-r--r-- 2,020 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from collections import OrderedDict

from agate.data_types import Text
from agate.tableset import TableSet


def group_by(self, key, key_name=None, key_type=None):
    """
    Create a :class:`.TableSet` with a table for each unique key.

    Note that group names will always be coerced to a string, regardless of the
    format of the input column.

    :param key:
        Either the name of a column from the this table to group by, or a
        :class:`function` that takes a row and returns a value to group by.
    :param key_name:
        A name that describes the grouped properties. Defaults to the
        column name that was grouped on or "group" if grouping with a key
        function. See :class:`.TableSet` for more.
    :param key_type:
        An instance of any subclass of :class:`.DataType`. If not provided
        it will default to a :class`.Text`.
    :returns:
        A :class:`.TableSet` mapping where the keys are unique values from
        the :code:`key` and the values are new :class:`.Table` instances
        containing the grouped rows.
    """
    key_is_row_function = hasattr(key, '__call__')

    if key_is_row_function:
        key_name = key_name or 'group'
        key_type = key_type or Text()
    else:
        column = self._columns[key]

        key_name = key_name or column.name
        key_type = key_type or column.data_type

    groups = OrderedDict()

    for row in self._rows:
        if key_is_row_function:
            group_name = key(row)
        else:
            group_name = row[column.name]

        group_name = key_type.cast(group_name)

        if group_name not in groups:
            groups[group_name] = []

        groups[group_name].append(row)

    if not groups:
        return TableSet([self._fork([])], [], key_name=key_name, key_type=key_type)

    output = OrderedDict()

    for group, rows in groups.items():
        output[group] = self._fork(rows)

    return TableSet(output.values(), output.keys(), key_name=key_name, key_type=key_type)