File: compute.py

package info (click to toggle)
python-agate 1.9.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,996 kB
  • sloc: python: 8,512; makefile: 126
file content (64 lines) | stat: -rw-r--r-- 2,055 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from collections import OrderedDict
from copy import copy

from agate.rows import Row


def compute(self, computations, replace=False):
    """
    Create a new table by applying one or more :class:`.Computation` instances
    to each row.

    :param computations:
        A sequence of pairs of new column names and :class:`.Computation`
        instances.
    :param replace:
        If :code:`True` then new column names can match existing names, and
        those columns will be replaced with the computed data.
    :returns:
        A new :class:`.Table`.
    """
    column_names = list(copy(self._column_names))
    column_types = list(copy(self._column_types))

    for new_column_name, computation in computations:
        new_column_type = computation.get_computed_data_type(self)

        if new_column_name in column_names:
            if not replace:
                raise ValueError(
                    'New column name "%s" already exists. Specify replace=True to replace with computed data.'
                )

            i = column_names.index(new_column_name)
            column_types[i] = new_column_type
        else:
            column_names.append(new_column_name)
            column_types.append(new_column_type)

        computation.validate(self)

    new_columns = OrderedDict()

    for new_column_name, computation in computations:
        new_columns[new_column_name] = computation.run(self)

    new_rows = []

    for i, row in enumerate(self._rows):
        # Slow version if using replace
        if replace:
            values = []

            for j, column_name in enumerate(column_names):
                if column_name in new_columns:
                    values.append(new_columns[column_name][i])
                else:
                    values.append(row[j])
        # Faster version if not using replace
        else:
            values = row.values() + tuple(c[i] for c in new_columns.values())

        new_rows.append(Row(values, column_names))

    return self._fork(new_rows, column_names, column_types)