1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
|
# -*- coding: utf-8 -*-
# <nbformat>3.0</nbformat>
# <codecell>
data = """type,price,quantity
Apples
Cortland,0.30,24
Red Delicious,0.40,24
Oranges
Navel,0.50,12
"""
# <codecell>
import petl.interactive as etl
from petl.io import StringSource
# <codecell>
tbl1 = (etl
.fromcsv(StringSource(data))
)
tbl1
# <headingcell level=2>
# Option 1 - using existing petl functions
# <codecell>
def make_room_for_category(row):
if len(row) == 1:
return (row[0], 'X', 'X', 'X')
else:
return (None,) + tuple(row)
tbl2 = tbl1.rowmap(make_room_for_category, fields=['category', 'type', 'price', 'quantity'])
tbl2
# <codecell>
tbl3 = tbl2.filldown()
tbl3
# <codecell>
tbl4 = tbl3.ne('type', 'X')
tbl4
# <headingcell level=2>
# Option 2 - custom transformer
# <codecell>
class CustomTransformer(object):
def __init__(self, source):
self.source = source
def __iter__(self):
it = iter(self.source)
# construct new header
source_fields = it.next()
out_fields = ('category',) + tuple(source_fields)
yield out_fields
# transform data
current_category = None
for row in it:
if len(row) == 1:
current_category = row[0]
else:
yield (current_category,) + tuple(row)
# <codecell>
tbl5 = CustomTransformer(tbl1)
# <codecell>
# just so it formats nicely as HTML in the notebook...
etl.wrap(tbl5)
|