File: pivot.py

package info (click to toggle)
python-agate 1.9.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,996 kB
  • sloc: python: 8,512; makefile: 126
file content (132 lines) | stat: -rw-r--r-- 4,654 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
from agate import utils
from agate.aggregations import Count


def pivot(self, key=None, pivot=None, aggregation=None, computation=None, default_value=utils.default, key_name=None):
    """
    Create a new table by grouping the data, aggregating those groups,
    applying a computation, and then organizing the groups into new rows and
    columns.

    This is sometimes called a "crosstab".

    +---------+---------+--------+
    |  name   |  race   | gender |
    +=========+=========+========+
    |  Joe    |  white  | male   |
    +---------+---------+--------+
    |  Jane   |  black  | female |
    +---------+---------+--------+
    |  Josh   |  black  | male   |
    +---------+---------+--------+
    |  Jim    |  asian  | female |
    +---------+---------+--------+

    This table can be pivoted with :code:`key` equal to "race" and
    :code:`columns` equal to "gender". The default aggregation is
    :class:`.Count`. This would result in the following table.

    +---------+---------+--------+
    |  race   |  male   | female |
    +=========+=========+========+
    |  white  |  1      | 0      |
    +---------+---------+--------+
    |  black  |  1      | 1      |
    +---------+---------+--------+
    |  asian  |  0      | 1      |
    +---------+---------+--------+

    If one or more keys are specified then the resulting table will
    automatically have :code:`row_names` set to those keys.

    See also the related method :meth:`.Table.denormalize`.

    :param key:
        Either the name of a column from the this table to group by, a
        sequence of such column names, a :class:`function` that takes a
        row and returns a value to group by, or :code:`None`, in which case
        there will be only a single row in the output table.
    :param pivot:
        A column name whose unique values will become columns in the new
        table, or :code:`None` in which case there will be a single value
        column in the output table.
    :param aggregation:
        An instance of an :class:`.Aggregation` to perform on each group of
        data in the pivot table. (Each cell is the result of an aggregation
        of the grouped data.)

        If not specified this defaults to :class:`.Count` with no arguments.
    :param computation:
        An optional :class:`.Computation` instance to be applied to the
        aggregated sequence of values before they are transposed into the
        pivot table.

        Use the class name of the aggregation as your column name argument
        when constructing your computation. (This is "Count" if using the
        default value for :code:`aggregation`.)
    :param default_value:
        Value to be used for missing values in the pivot table. Defaults to
        :code:`Decimal(0)`. If performing non-mathematical aggregations you
        may wish to set this to :code:`None`.
    :param key_name:
        A name for the key column in the output table. This is most
        useful when the provided key is a function. This argument is not
        valid when :code:`key` is a sequence.
    :returns:
        A new :class:`.Table`.
    """
    if key is None:
        key = []
    elif not utils.issequence(key):
        key = [key]
    elif key_name:
        raise ValueError('key_name is not a valid argument when key is a sequence.')

    if aggregation is None:
        aggregation = Count()

    groups = self

    for k in key:
        groups = groups.group_by(k, key_name=key_name)

    aggregation_name = str(aggregation)
    computation_name = str(computation) if computation else None

    def apply_computation(table):
        computed = table.compute([
            (computation_name, computation)
        ])

        excluded = computed.exclude([aggregation_name])

        return excluded

    if pivot is not None:
        groups = groups.group_by(pivot)

        column_type = aggregation.get_aggregate_data_type(self)

        table = groups.aggregate([
            (aggregation_name, aggregation)
        ])

        pivot_count = len(set(table.columns[pivot].values()))

        if computation is not None:
            column_types = computation.get_computed_data_type(table)
            table = apply_computation(table)

        column_types = [column_type] * pivot_count

        table = table.denormalize(key, pivot, computation_name or aggregation_name, default_value=default_value,
                                  column_types=column_types)
    else:
        table = groups.aggregate([
            (aggregation_name, aggregation)
        ])

        if computation:
            table = apply_computation(table)

    return table