1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
|
"""
The :class:`.TableSet` class collects a set of related tables in a single data
structure. The most common way of creating a :class:`.TableSet` is using the
:meth:`.Table.group_by` method, which is similar to SQL's ``GROUP BY`` keyword.
The resulting set of tables will all have identical columns structure.
:class:`.TableSet` functions as a dictionary. Individual tables in the set can
be accessed by using their name as a key. If the table set was created using
:meth:`.Table.group_by` then the names of the tables will be the grouping
factors found in the original data.
:class:`.TableSet` replicates the majority of the features of :class:`.Table`.
When methods such as :meth:`.TableSet.select`, :meth:`.TableSet.where` or
:meth:`.TableSet.order_by` are used, the operation is applied to *each* table
in the set and the result is a new :class:`TableSet` instance made up of
entirely new :class:`.Table` instances.
:class:`.TableSet` instances can also contain other TableSet's. This means you
can chain calls to :meth:`.Table.group_by` and :meth:`.TableSet.group_by`
and end up with data grouped across multiple dimensions.
:meth:`.TableSet.aggregate` on nested TableSets will then group across multiple
dimensions.
"""
from io import StringIO
from itertools import zip_longest
from agate.data_types import Text
from agate.mapped_sequence import MappedSequence
class TableSet(MappedSequence):
"""
An group of named tables with identical column definitions. Supports
(almost) all the same operations as :class:`.Table`. When executed on a
:class:`TableSet`, any operation that would have returned a new
:class:`.Table` instead returns a new :class:`TableSet`. Any operation
that would have returned a single value instead returns a dictionary of
values.
TableSet is implemented as a subclass of :class:`.MappedSequence`
:param tables:
A sequence :class:`Table` instances.
:param keys:
A sequence of keys corresponding to the tables. These may be any type
except :class:`int`.
:param key_name:
A name that describes the grouping properties. Used as the column
header when the groups are aggregated. Defaults to the column name that
was grouped on.
:param key_type:
An instance some subclass of :class:`.DataType`. If not provided it
will default to a :class`.Text`.
:param _is_fork:
Used internally to skip certain validation steps when data
is propagated from an existing tablset.
"""
def __init__(self, tables, keys, key_name='group', key_type=None, _is_fork=False):
tables = tuple(tables)
keys = tuple(keys)
self._key_name = key_name
self._key_type = key_type or Text()
self._sample_table = tables[0]
while isinstance(self._sample_table, TableSet):
self._sample_table = self._sample_table[0]
self._column_types = self._sample_table.column_types
self._column_names = self._sample_table.column_names
if not _is_fork:
for table in tables:
if any(not isinstance(a, type(b)) for a, b in zip_longest(table.column_types, self._column_types)):
raise ValueError('Not all tables have the same column types!')
if table.column_names != self._column_names:
raise ValueError('Not all tables have the same column names!')
MappedSequence.__init__(self, tables, keys)
def __str__(self):
"""
Print the tableset's structure via :meth:`TableSet.print_structure`.
"""
structure = StringIO()
self.print_structure(output=structure)
return structure.getvalue()
@property
def key_name(self):
"""
Get the name of the key this TableSet is grouped by. (If created using
:meth:`.Table.group_by` then this is the original column name.)
"""
return self._key_name
@property
def key_type(self):
"""
Get the :class:`.DataType` this TableSet is grouped by. (If created
using :meth:`.Table.group_by` then this is the original column type.)
"""
return self._key_type
@property
def column_types(self):
"""
Get an ordered list of this :class:`.TableSet`'s column types.
:returns:
A :class:`tuple` of :class:`.DataType` instances.
"""
return self._column_types
@property
def column_names(self):
"""
Get an ordered list of this :class:`TableSet`'s column names.
:returns:
A :class:`tuple` of strings.
"""
return self._column_names
def _fork(self, tables, keys, key_name=None, key_type=None):
"""
Create a new :class:`.TableSet` using the metadata from this one.
This method is used internally by functions like
:meth:`.TableSet.having`.
"""
if key_name is None:
key_name = self._key_name
if key_type is None:
key_type = self._key_type
return TableSet(tables, keys, key_name, key_type, _is_fork=True)
def _proxy(self, method_name, *args, **kwargs):
"""
Calls a method on each table in this :class:`.TableSet`.
"""
tables = []
for key, table in self.items():
tables.append(getattr(table, method_name)(*args, **kwargs))
return self._fork(
tables,
self.keys()
)
from agate.tableset.aggregate import aggregate
from agate.tableset.bar_chart import bar_chart
from agate.tableset.column_chart import column_chart
from agate.tableset.from_csv import from_csv
from agate.tableset.from_json import from_json
from agate.tableset.having import having
from agate.tableset.line_chart import line_chart
from agate.tableset.merge import merge
from agate.tableset.print_structure import print_structure
from agate.tableset.proxy_methods import (bins, compute, denormalize, distinct, exclude, find, group_by, homogenize,
join, limit, normalize, order_by, pivot, select, where)
from agate.tableset.scatterplot import scatterplot
from agate.tableset.to_csv import to_csv
from agate.tableset.to_json import to_json
TableSet.aggregate = aggregate
TableSet.bar_chart = bar_chart
TableSet.bins = bins
TableSet.column_chart = column_chart
TableSet.compute = compute
TableSet.denormalize = denormalize
TableSet.distinct = distinct
TableSet.exclude = exclude
TableSet.find = find
TableSet.from_csv = from_csv
TableSet.from_json = from_json
TableSet.group_by = group_by
TableSet.having = having
TableSet.homogenize = homogenize
TableSet.join = join
TableSet.limit = limit
TableSet.line_chart = line_chart
TableSet.merge = merge
TableSet.normalize = normalize
TableSet.order_by = order_by
TableSet.pivot = pivot
TableSet.print_structure = print_structure
TableSet.scatterplot = scatterplot
TableSet.select = select
TableSet.to_csv = to_csv
TableSet.to_json = to_json
TableSet.where = where
|