File: aggregates.py

package info (click to toggle)
python-django 3%3A6.0~alpha1-1
  • links: PTS, VCS
  • area: main
  • in suites: experimental
  • size: 62,204 kB
  • sloc: python: 370,694; javascript: 19,376; xml: 211; makefile: 187; sh: 28
file content (399 lines) | stat: -rw-r--r-- 13,456 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
"""
Classes to represent the definitions of aggregate functions.
"""

from django.core.exceptions import FieldError, FullResultSet
from django.db import NotSupportedError
from django.db.models.expressions import (
    Case,
    ColPairs,
    Func,
    OrderByList,
    Star,
    Value,
    When,
)
from django.db.models.fields import IntegerField, TextField
from django.db.models.functions import Coalesce
from django.db.models.functions.mixins import (
    FixDurationInputMixin,
    NumericOutputFieldMixin,
)

__all__ = [
    "Aggregate",
    "AnyValue",
    "Avg",
    "Count",
    "Max",
    "Min",
    "StdDev",
    "StringAgg",
    "Sum",
    "Variance",
]


class AggregateFilter(Func):
    arity = 1
    template = " FILTER (WHERE %(expressions)s)"

    def as_sql(self, compiler, connection, **extra_context):
        if not connection.features.supports_aggregate_filter_clause:
            raise NotSupportedError(
                "Aggregate filter clauses are not supported on this database backend."
            )
        try:
            return super().as_sql(compiler, connection, **extra_context)
        except FullResultSet:
            return "", ()

    @property
    def condition(self):
        return self.source_expressions[0]

    def __str__(self):
        return self.arg_joiner.join(str(arg) for arg in self.source_expressions)


class AggregateOrderBy(OrderByList):
    template = " ORDER BY %(expressions)s"

    def as_sql(self, compiler, connection, **extra_context):
        if not connection.features.supports_aggregate_order_by_clause:
            raise NotSupportedError(
                "This database backend does not support specifying an order on "
                "aggregates."
            )

        return super().as_sql(compiler, connection, **extra_context)


class Aggregate(Func):
    template = "%(function)s(%(distinct)s%(expressions)s%(order_by)s)%(filter)s"
    contains_aggregate = True
    name = None
    window_compatible = True
    allow_distinct = False
    allow_order_by = False
    empty_result_set_value = None

    def __init__(
        self,
        *expressions,
        distinct=False,
        filter=None,
        default=None,
        order_by=None,
        **extra,
    ):
        if distinct and not self.allow_distinct:
            raise TypeError("%s does not allow distinct." % self.__class__.__name__)
        if order_by and not self.allow_order_by:
            raise TypeError("%s does not allow order_by." % self.__class__.__name__)
        if default is not None and self.empty_result_set_value is not None:
            raise TypeError(f"{self.__class__.__name__} does not allow default.")

        self.distinct = distinct
        self.filter = None if filter is None else AggregateFilter(filter)
        self.default = default
        self.order_by = AggregateOrderBy.from_param(
            f"{self.__class__.__name__}.order_by", order_by
        )
        super().__init__(*expressions, **extra)

    def get_source_fields(self):
        # Don't consider filter and order by expression as they have nothing
        # to do with the output field resolution.
        return [e._output_field_or_none for e in super().get_source_expressions()]

    def get_source_expressions(self):
        source_expressions = super().get_source_expressions()
        return [*source_expressions, self.filter, self.order_by]

    def set_source_expressions(self, exprs):
        *exprs, self.filter, self.order_by = exprs
        return super().set_source_expressions(exprs)

    def resolve_expression(
        self, query=None, allow_joins=True, reuse=None, summarize=False, for_save=False
    ):
        # Aggregates are not allowed in UPDATE queries, so ignore for_save
        c = super().resolve_expression(query, allow_joins, reuse, summarize)
        if summarize:
            # Summarized aggregates cannot refer to summarized aggregates.
            for ref in c.get_refs():
                if query.annotations[ref].is_summary:
                    raise FieldError(
                        f"Cannot compute {c.name}('{ref}'): '{ref}' is an aggregate"
                    )
        elif not self.is_summary:
            # Call Aggregate.get_source_expressions() to avoid
            # returning self.filter and including that in this loop.
            expressions = super(Aggregate, c).get_source_expressions()
            for index, expr in enumerate(expressions):
                if expr.contains_aggregate:
                    before_resolved = self.get_source_expressions()[index]
                    name = (
                        before_resolved.name
                        if hasattr(before_resolved, "name")
                        else repr(before_resolved)
                    )
                    raise FieldError(
                        "Cannot compute %s('%s'): '%s' is an aggregate"
                        % (c.name, name, name)
                    )
        if (default := c.default) is None:
            return c
        if hasattr(default, "resolve_expression"):
            default = default.resolve_expression(query, allow_joins, reuse, summarize)
            if default._output_field_or_none is None:
                default.output_field = c._output_field_or_none
        else:
            default = Value(default, c._output_field_or_none)
        c.default = None  # Reset the default argument before wrapping.
        coalesce = Coalesce(c, default, output_field=c._output_field_or_none)
        coalesce.is_summary = c.is_summary
        return coalesce

    @property
    def default_alias(self):
        expressions = [
            expr for expr in self.get_source_expressions() if expr is not None
        ]
        if len(expressions) == 1 and hasattr(expressions[0], "name"):
            return "%s__%s" % (expressions[0].name, self.name.lower())
        raise TypeError("Complex expressions require an alias")

    def get_group_by_cols(self):
        return []

    def as_sql(self, compiler, connection, **extra_context):
        if (
            self.distinct
            and not connection.features.supports_aggregate_distinct_multiple_argument
            and len(super().get_source_expressions()) > 1
        ):
            raise NotSupportedError(
                f"{self.name} does not support distinct with multiple expressions on "
                f"this database backend."
            )

        distinct_sql = "DISTINCT " if self.distinct else ""
        order_by_sql = ""
        order_by_params = []
        filter_sql = ""
        filter_params = []

        if (order_by := self.order_by) is not None:
            order_by_sql, order_by_params = compiler.compile(order_by)

        if self.filter is not None:
            try:
                filter_sql, filter_params = compiler.compile(self.filter)
            except NotSupportedError:
                # Fallback to a CASE statement on backends that don't support
                # the FILTER clause.
                copy = self.copy()
                copy.filter = None
                source_expressions = copy.get_source_expressions()
                condition = When(self.filter.condition, then=source_expressions[0])
                copy.set_source_expressions([Case(condition)] + source_expressions[1:])
                return copy.as_sql(compiler, connection, **extra_context)

        extra_context.update(
            distinct=distinct_sql,
            filter=filter_sql,
            order_by=order_by_sql,
        )
        sql, params = super().as_sql(compiler, connection, **extra_context)
        return sql, (*params, *order_by_params, *filter_params)

    def _get_repr_options(self):
        options = super()._get_repr_options()
        if self.distinct:
            options["distinct"] = self.distinct
        if self.filter:
            options["filter"] = self.filter
        if self.order_by:
            options["order_by"] = self.order_by
        return options


class AnyValue(Aggregate):
    function = "ANY_VALUE"
    name = "AnyValue"
    arity = 1
    window_compatible = False

    def as_sql(self, compiler, connection, **extra_context):
        if not connection.features.supports_any_value:
            raise NotSupportedError(
                "ANY_VALUE is not supported on this database backend."
            )
        return super().as_sql(compiler, connection, **extra_context)


class Avg(FixDurationInputMixin, NumericOutputFieldMixin, Aggregate):
    function = "AVG"
    name = "Avg"
    allow_distinct = True
    arity = 1


class Count(Aggregate):
    function = "COUNT"
    name = "Count"
    output_field = IntegerField()
    allow_distinct = True
    empty_result_set_value = 0
    arity = 1
    allows_composite_expressions = True

    def __init__(self, expression, filter=None, **extra):
        if expression == "*":
            expression = Star()
        if isinstance(expression, Star) and filter is not None:
            raise ValueError("Star cannot be used with filter. Please specify a field.")
        super().__init__(expression, filter=filter, **extra)

    def resolve_expression(self, *args, **kwargs):
        result = super().resolve_expression(*args, **kwargs)
        source_expressions = result.get_source_expressions()

        # In case of composite primary keys, count the first column.
        if isinstance(expr := source_expressions[0], ColPairs):
            if self.distinct:
                raise ValueError(
                    "COUNT(DISTINCT) doesn't support composite primary keys"
                )

            source_expressions[0] = expr.get_cols()[0]
            result.set_source_expressions(source_expressions)

        return result


class Max(Aggregate):
    function = "MAX"
    name = "Max"
    arity = 1


class Min(Aggregate):
    function = "MIN"
    name = "Min"
    arity = 1


class StdDev(NumericOutputFieldMixin, Aggregate):
    name = "StdDev"
    arity = 1

    def __init__(self, expression, sample=False, **extra):
        self.function = "STDDEV_SAMP" if sample else "STDDEV_POP"
        super().__init__(expression, **extra)

    def _get_repr_options(self):
        return {**super()._get_repr_options(), "sample": self.function == "STDDEV_SAMP"}


class StringAggDelimiter(Func):
    arity = 1
    template = "%(expressions)s"

    def __init__(self, value):
        self.value = value
        super().__init__(value)

    def as_mysql(self, compiler, connection, **extra_context):
        template = " SEPARATOR %(expressions)s"

        return self.as_sql(
            compiler,
            connection,
            template=template,
            **extra_context,
        )


class StringAgg(Aggregate):
    template = "%(function)s(%(distinct)s%(expressions)s%(order_by)s)%(filter)s"
    function = "STRING_AGG"
    name = "StringAgg"
    allow_distinct = True
    allow_order_by = True
    output_field = TextField()

    def __init__(self, expression, delimiter, **extra):
        self.delimiter = StringAggDelimiter(delimiter)
        super().__init__(expression, self.delimiter, **extra)

    def as_oracle(self, compiler, connection, **extra_context):
        if self.order_by:
            template = (
                "%(function)s(%(distinct)s%(expressions)s) WITHIN GROUP (%(order_by)s)"
                "%(filter)s"
            )
        else:
            template = "%(function)s(%(distinct)s%(expressions)s)%(filter)s"

        return self.as_sql(
            compiler,
            connection,
            function="LISTAGG",
            template=template,
            **extra_context,
        )

    def as_mysql(self, compiler, connection, **extra_context):
        extra_context["function"] = "GROUP_CONCAT"

        template = "%(function)s(%(distinct)s%(expressions)s%(order_by)s%(delimiter)s)"
        extra_context["template"] = template

        c = self.copy()
        # The creation of the delimiter SQL and the ordering of the parameters
        # must be handled explicitly, as MySQL puts the delimiter at the end of
        # the aggregate using the `SEPARATOR` declaration (rather than treating
        # as an expression like other database backends).
        delimiter_params = []
        if c.delimiter:
            delimiter_sql, delimiter_params = compiler.compile(c.delimiter)
            # Drop the delimiter from the source expressions.
            c.source_expressions = c.source_expressions[:-1]
            extra_context["delimiter"] = delimiter_sql

        sql, params = c.as_sql(compiler, connection, **extra_context)

        return sql, (*params, *delimiter_params)

    def as_sqlite(self, compiler, connection, **extra_context):
        if connection.get_database_version() < (3, 44):
            return self.as_sql(
                compiler,
                connection,
                function="GROUP_CONCAT",
                **extra_context,
            )

        return self.as_sql(compiler, connection, **extra_context)


class Sum(FixDurationInputMixin, Aggregate):
    function = "SUM"
    name = "Sum"
    allow_distinct = True
    arity = 1


class Variance(NumericOutputFieldMixin, Aggregate):
    name = "Variance"
    arity = 1

    def __init__(self, expression, sample=False, **extra):
        self.function = "VAR_SAMP" if sample else "VAR_POP"
        super().__init__(expression, **extra)

    def _get_repr_options(self):
        return {**super()._get_repr_options(), "sample": self.function == "VAR_SAMP"}