1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
|
#!/usr/bin/env python
import proof
import agate
def load_data(data):
data['exonerations'] = agate.Table.from_csv('examples/realdata/exonerations-20150828.csv')
print(data['exonerations'])
def confessions(data):
num_false_confessions = data['exonerations'].aggregate(agate.Count('false_confession', True))
print('False confessions: %i' % num_false_confessions)
@proof.never_cache
def median_age(data):
median_age = data['exonerations'].aggregate(agate.Median('age'))
print('Median age at time of arrest: %i' % median_age)
data['exonerations'].bins('age', 10, 0, 100).print_bars('age', width=80)
data['exonerations'].pivot('age').order_by('age').print_bars('age', width=80)
data['exonerations'].bins('age').print_bars('age', width=80)
def years_in_prison(data):
data['with_years_in_prison'] = data['exonerations'].compute([
('years_in_prison', agate.Change('convicted', 'exonerated'))
])
def youth(data):
sorted_by_age = data['exonerations'].order_by('age')
youngest_ten = sorted_by_age.limit(10)
youngest_ten.print_table(max_columns=7)
def states(data):
by_state = data['with_years_in_prison'].group_by('state')
state_totals = by_state.aggregate([
('count', agate.Count())
])
sorted_totals = state_totals.order_by('count', reverse=True)
sorted_totals.print_table(max_rows=5)
medians = by_state.aggregate([
('count', agate.Count()),
('median_years_in_prison', agate.Median('years_in_prison'))
])
sorted_medians = medians.order_by('median_years_in_prison', reverse=True)
sorted_medians.print_table(max_rows=5)
def race_and_age(data):
# Filters rows without age data
only_with_age = data['with_years_in_prison'].where(
lambda r: r['age'] is not None
)
# Group by race
race_groups = only_with_age.group_by('race')
# Sub-group by age cohorts (20s, 30s, etc.)
race_and_age_groups = race_groups.group_by(
lambda r: '%i0s' % (r['age'] // 10),
key_name='age_group'
)
# Aggregate medians for each group
medians = race_and_age_groups.aggregate([
('count', agate.Count()),
('median_years_in_prison', agate.Median('years_in_prison'))
])
# Sort the results
sorted_groups = medians.order_by('median_years_in_prison', reverse=True)
# Print out the results
sorted_groups.print_table(max_rows=10)
analysis = proof.Analysis(load_data)
analysis.then(confessions)
analysis.then(median_age)
analysis.then(youth)
years_analysis = analysis.then(years_in_prison)
years_analysis.then(states)
years_analysis.then(race_and_age)
analysis.run()
|