File: slug.py

package info (click to toggle)
python-agate 1.9.1-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,996 kB
  • sloc: python: 8,512; makefile: 126
file content (61 lines) | stat: -rw-r--r-- 2,059 bytes parent folder | download | duplicates (2)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from agate.aggregations.has_nulls import HasNulls
from agate.computations.base import Computation
from agate.data_types import Text
from agate.exceptions import DataTypeError
from agate.utils import issequence, slugify


class Slug(Computation):
    """
    Convert text values from one or more columns into slugs. If multiple column
    names are given, values from those columns will be appended in the given
    order before standardizing.

    :param column_name:
        The name of a column or a sequence of column names containing
        :class:`.Text` values.
    :param ensure_unique:
        If True, any duplicate values will be appended with unique identifers.
        Defaults to False.
    """
    def __init__(self, column_name, ensure_unique=False, **kwargs):
        self._column_name = column_name
        self._ensure_unique = ensure_unique
        self._slug_args = kwargs

    def get_computed_data_type(self, table):
        return Text()

    def validate(self, table):
        if issequence(self._column_name):
            column_names = self._column_name
        else:
            column_names = [self._column_name]

        for column_name in column_names:
            column = table.columns[column_name]

            if not isinstance(column.data_type, Text):
                raise DataTypeError('Slug column must contain Text data.')

            if HasNulls(column_name).run(table):
                raise ValueError('Slug column cannot contain `None`.')

    def run(self, table):
        """
        :returns:
            :class:`string`
        """
        new_column = []

        for row in table.rows:
            if issequence(self._column_name):
                column_value = ''
                for column_name in self._column_name:
                    column_value = column_value + ' ' + row[column_name]

                new_column.append(column_value)
            else:
                new_column.append(row[self._column_name])

        return slugify(new_column, ensure_unique=self._ensure_unique, **self._slug_args)