1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
from __future__ import absolute_import, print_function, division
from petl.util.base import values, header, Table
def typeset(table, field):
"""
Return a set containing all Python types found for values in the given
field. E.g.::
>>> import petl as etl
>>> table = [['foo', 'bar', 'baz'],
... ['A', 1, '2'],
... ['B', u'2', '3.4'],
... [u'B', u'3', '7.8', True],
... ['D', u'xyz', 9.0],
... ['E', 42]]
>>> sorted(etl.typeset(table, 'foo'))
['str']
>>> sorted(etl.typeset(table, 'bar'))
['int', 'str']
>>> sorted(etl.typeset(table, 'baz'))
['NoneType', 'float', 'str']
The `field` argument can be a field name or index (starting from zero).
"""
s = set()
for v in values(table, field):
try:
s.add(type(v).__name__)
except IndexError:
pass # ignore short rows
return s
Table.typeset = typeset
def diffheaders(t1, t2):
"""
Return the difference between the headers of the two tables as a pair of
sets. E.g.::
>>> import petl as etl
>>> table1 = [['foo', 'bar', 'baz'],
... ['a', 1, .3]]
>>> table2 = [['baz', 'bar', 'quux'],
... ['a', 1, .3]]
>>> add, sub = etl.diffheaders(table1, table2)
>>> add
{'quux'}
>>> sub
{'foo'}
"""
t1h = set(header(t1))
t2h = set(header(t2))
return t2h - t1h, t1h - t2h
Table.diffheaders = diffheaders
def diffvalues(t1, t2, f):
"""
Return the difference between the values under the given field in the two
tables, e.g.::
>>> import petl as etl
>>> table1 = [['foo', 'bar'],
... ['a', 1],
... ['b', 3]]
>>> table2 = [['bar', 'foo'],
... [1, 'a'],
... [3, 'c']]
>>> add, sub = etl.diffvalues(table1, table2, 'foo')
>>> add
{'c'}
>>> sub
{'b'}
"""
t1v = set(values(t1, f))
t2v = set(values(t2, f))
return t2v - t1v, t1v - t2v
Table.diffvalues = diffvalues
def strjoin(s):
"""
Return a function to join sequences using `s` as the separator. Intended
for use with :func:`petl.transform.conversions.convert`.
"""
return lambda l: s.join(map(str, l))
def nthword(n, sep=None):
"""
Construct a function to return the nth word in a string. E.g.::
>>> import petl as etl
>>> s = 'foo bar'
>>> f = etl.nthword(0)
>>> f(s)
'foo'
>>> g = etl.nthword(1)
>>> g(s)
'bar'
Intended for use with :func:`petl.transform.conversions.convert`.
"""
return lambda s: s.split(sep)[n]
def coalesce(*fields, **kwargs):
"""
Return a function which accepts a row and returns the first non-missing
value from the specified fields. Intended for use with
:func:`petl.transform.basics.addfield`.
"""
missing = kwargs.get('missing', None)
default = kwargs.get('default', None)
def _coalesce(row):
for f in fields:
v = row[f]
if v is not missing:
return v
return default
return _coalesce
|