File: __init__.py

package info (click to toggle)
python-agate 1.9.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,996 kB
  • sloc: python: 8,512; makefile: 126
file content (200 lines) | stat: -rw-r--r-- 7,075 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
"""
The :class:`.TableSet` class collects a set of related tables in a single data
structure. The most common way of creating a :class:`.TableSet` is using the
:meth:`.Table.group_by` method, which is similar to SQL's ``GROUP BY`` keyword.
The resulting set of tables will all have identical columns structure.

:class:`.TableSet` functions as a dictionary. Individual tables in the set can
be accessed by using their name as a key. If the table set was created using
:meth:`.Table.group_by` then the names of the tables will be the grouping
factors found in the original data.

:class:`.TableSet` replicates the majority of the features of :class:`.Table`.
When methods such as :meth:`.TableSet.select`, :meth:`.TableSet.where` or
:meth:`.TableSet.order_by` are used, the operation is applied to *each* table
in the set and the result is a new :class:`TableSet` instance made up of
entirely new :class:`.Table` instances.

:class:`.TableSet` instances can also contain other TableSet's. This means you
can chain calls to :meth:`.Table.group_by` and :meth:`.TableSet.group_by`
and end up with data grouped across multiple dimensions.
:meth:`.TableSet.aggregate` on nested TableSets will then group across multiple
dimensions.
"""

from io import StringIO
from itertools import zip_longest

from agate.data_types import Text
from agate.mapped_sequence import MappedSequence


class TableSet(MappedSequence):
    """
    An group of named tables with identical column definitions. Supports
    (almost) all the same operations as :class:`.Table`. When executed on a
    :class:`TableSet`, any operation that would have returned a new
    :class:`.Table` instead returns a new :class:`TableSet`. Any operation
    that would have returned a single value instead returns a dictionary of
    values.

    TableSet is implemented as a subclass of :class:`.MappedSequence`

    :param tables:
        A sequence :class:`Table` instances.
    :param keys:
        A sequence of keys corresponding to the tables. These may be any type
        except :class:`int`.
    :param key_name:
        A name that describes the grouping properties. Used as the column
        header when the groups are aggregated. Defaults to the column name that
        was grouped on.
    :param key_type:
        An instance some subclass of :class:`.DataType`. If not provided it
        will default to a :class`.Text`.
    :param _is_fork:
        Used internally to skip certain validation steps when data
        is propagated from an existing tablset.
    """
    def __init__(self, tables, keys, key_name='group', key_type=None, _is_fork=False):
        tables = tuple(tables)
        keys = tuple(keys)

        self._key_name = key_name
        self._key_type = key_type or Text()
        self._sample_table = tables[0]

        while isinstance(self._sample_table, TableSet):
            self._sample_table = self._sample_table[0]

        self._column_types = self._sample_table.column_types
        self._column_names = self._sample_table.column_names

        if not _is_fork:
            for table in tables:
                if any(not isinstance(a, type(b)) for a, b in zip_longest(table.column_types, self._column_types)):
                    raise ValueError('Not all tables have the same column types!')

                if table.column_names != self._column_names:
                    raise ValueError('Not all tables have the same column names!')

        MappedSequence.__init__(self, tables, keys)

    def __str__(self):
        """
        Print the tableset's structure via :meth:`TableSet.print_structure`.
        """
        structure = StringIO()

        self.print_structure(output=structure)

        return structure.getvalue()

    @property
    def key_name(self):
        """
        Get the name of the key this TableSet is grouped by. (If created using
        :meth:`.Table.group_by` then this is the original column name.)
        """
        return self._key_name

    @property
    def key_type(self):
        """
        Get the :class:`.DataType` this TableSet is grouped by. (If created
        using :meth:`.Table.group_by` then this is the original column type.)
        """
        return self._key_type

    @property
    def column_types(self):
        """
        Get an ordered list of this :class:`.TableSet`'s column types.

        :returns:
            A :class:`tuple` of :class:`.DataType` instances.
        """
        return self._column_types

    @property
    def column_names(self):
        """
        Get an ordered list of this :class:`TableSet`'s column names.

        :returns:
            A :class:`tuple` of strings.
        """
        return self._column_names

    def _fork(self, tables, keys, key_name=None, key_type=None):
        """
        Create a new :class:`.TableSet` using the metadata from this one.

        This method is used internally by functions like
        :meth:`.TableSet.having`.
        """
        if key_name is None:
            key_name = self._key_name

        if key_type is None:
            key_type = self._key_type

        return TableSet(tables, keys, key_name, key_type, _is_fork=True)

    def _proxy(self, method_name, *args, **kwargs):
        """
        Calls a method on each table in this :class:`.TableSet`.
        """
        tables = []

        for key, table in self.items():
            tables.append(getattr(table, method_name)(*args, **kwargs))

        return self._fork(
            tables,
            self.keys()
        )


from agate.tableset.aggregate import aggregate
from agate.tableset.bar_chart import bar_chart
from agate.tableset.column_chart import column_chart
from agate.tableset.from_csv import from_csv
from agate.tableset.from_json import from_json
from agate.tableset.having import having
from agate.tableset.line_chart import line_chart
from agate.tableset.merge import merge
from agate.tableset.print_structure import print_structure
from agate.tableset.proxy_methods import (bins, compute, denormalize, distinct, exclude, find, group_by, homogenize,
                                          join, limit, normalize, order_by, pivot, select, where)
from agate.tableset.scatterplot import scatterplot
from agate.tableset.to_csv import to_csv
from agate.tableset.to_json import to_json

TableSet.aggregate = aggregate
TableSet.bar_chart = bar_chart
TableSet.bins = bins
TableSet.column_chart = column_chart
TableSet.compute = compute
TableSet.denormalize = denormalize
TableSet.distinct = distinct
TableSet.exclude = exclude
TableSet.find = find
TableSet.from_csv = from_csv
TableSet.from_json = from_json
TableSet.group_by = group_by
TableSet.having = having
TableSet.homogenize = homogenize
TableSet.join = join
TableSet.limit = limit
TableSet.line_chart = line_chart
TableSet.merge = merge
TableSet.normalize = normalize
TableSet.order_by = order_by
TableSet.pivot = pivot
TableSet.print_structure = print_structure
TableSet.scatterplot = scatterplot
TableSet.select = select
TableSet.to_csv = to_csv
TableSet.to_json = to_json
TableSet.where = where