1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
|
from .aggregation import Reducer, SortDirection
class FieldOnlyReducer(Reducer):
def __init__(self, field):
super(FieldOnlyReducer, self).__init__(field)
self._field = field
class count(Reducer):
"""
Counts the number of results in the group
"""
NAME = 'COUNT'
def __init__(self):
super(count, self).__init__()
class sum(FieldOnlyReducer):
"""
Calculates the sum of all the values in the given fields within the group
"""
NAME = 'SUM'
def __init__(self, field):
super(sum, self).__init__(field)
class min(FieldOnlyReducer):
"""
Calculates the smallest value in the given field within the group
"""
NAME = 'MIN'
def __init__(self, field):
super(min, self).__init__(field)
class max(FieldOnlyReducer):
"""
Calculates the largest value in the given field within the group
"""
NAME = 'MAX'
def __init__(self, field):
super(max, self).__init__(field)
class avg(FieldOnlyReducer):
"""
Calculates the mean value in the given field within the group
"""
NAME = 'AVG'
def __init__(self, field):
super(avg, self).__init__(field)
class tolist(FieldOnlyReducer):
"""
Returns all the matched properties in a list
"""
NAME = 'TOLIST'
def __init__(self, field):
super(tolist, self).__init__(field)
class count_distinct(FieldOnlyReducer):
"""
Calculate the number of distinct values contained in all the results in
the group for the given field
"""
NAME = 'COUNT_DISTINCT'
def __init__(self, field):
super(count_distinct, self).__init__(field)
class count_distinctish(FieldOnlyReducer):
"""
Calculate the number of distinct values contained in all the results in the
group for the given field. This uses a faster algorithm than
`count_distinct` but is less accurate
"""
NAME = 'COUNT_DISTINCTISH'
class quantile(Reducer):
"""
Return the value for the nth percentile within the range of values for the
field within the group.
"""
NAME = 'QUANTILE'
def __init__(self, field, pct):
super(quantile, self).__init__(field, str(pct))
self._field = field
class stddev(FieldOnlyReducer):
"""
Return the standard deviation for the values within the group
"""
NAME = 'STDDEV'
def __init__(self, field):
super(stddev, self).__init__(field)
class first_value(Reducer):
"""
Selects the first value within the group according to sorting parameters
"""
NAME = 'FIRST_VALUE'
def __init__(self, field, *byfields):
"""
Selects the first value of the given field within the group.
### Parameter
- **field**: Source field used for the value
- **byfields**: How to sort the results. This can be either the
*class* of `aggregation.Asc` or `aggregation.Desc` in which
case the field `field` is also used as the sort input.
`byfields` can also be one or more *instances* of `Asc` or `Desc`
indicating the sort order for these fields
"""
fieldstrs = []
if len(byfields) == 1 and isinstance(byfields[0], type) and \
issubclass(byfields[0], SortDirection):
byfields = [byfields[0](field)]
for f in byfields:
fieldstrs += [f.field, f.DIRSTRING]
args = [field]
if fieldstrs:
args += ['BY'] + fieldstrs
super(first_value, self).__init__(*args)
self._field = field
class random_sample(Reducer):
"""
Returns a random sample of items from the dataset, from the given property
"""
NAME = 'RANDOM_SAMPLE'
def __init__(self, field, size):
"""
### Parameter
**field**: Field to sample from
**size**: Return this many items (can be less)
"""
args = [field, str(size)]
super(random_sample, self).__init__(*args)
self._field = field
|