In the documentation https://docs.djangoproject.com/en/4.2/topics/db/aggregation/#order-of-annotate-and-values-clauses, the .values()
call dictates the group by if you have an .annotation()
with aggregation function (e.g. sum).
Somehow if you proceed to add more field annotations without any aggregation function, this isn’t included in the group by in the generated SQL query. For some reason, if you have a field annotation with coalesce, it gets added in the group by.
Snippet:
from decimal import Decimal
from django.db import models
from django.db.models import OuterRef, Subquery, Sum
from django.db.models.functions import Coalesce
class A(models.Model):
name = models.CharField(max_length=100)
amount = models.DecimalField(max_digits=12, decimal_places=2)
class B(models.Model):
name = models.CharField(max_length=100)
foo = models.CharField(max_length=200)
class BSnapshot(models.Model):
version_name = models.TextField()
name = models.CharField(max_length=100)
foo = models.CharField(max_length=200)
def run():
B.objects.bulk_create([
B(name='Alice', foo='live_alice_foo'),
B(name='Bob', foo='live_bob_foo'),
B(name='Eve', foo='live_eve_foo'),
])
BSnapshot.objects.bulk_create([
BSnapshot(version_name='v1', name='Alice', foo='snap_v1_alice'),
BSnapshot(version_name='v2', name='Alice', foo='snap_v2_alice'),
BSnapshot(version_name='v1', name='Charlie', foo='snap_v1_charlie'),
])
A.objects.bulk_create([
A(name='Alice', amount=Decimal('10.50')),
A(name='Alice', amount=Decimal('5.25')),
A(name='Bob', amount=Decimal('7.00')),
A(name='Charlie', amount=Decimal('3.00')),
A(name='Dennis', amount=Decimal('4.00')), # no B or snapshot -> will be filtered out
])
version_name = 'v1'
live_foo = B.objects.filter(name=OuterRef('name'))
snapshot_foo = BSnapshot.objects.filter(name=OuterRef('name'), version_name=version_name)
foo = Coalesce(
Subquery(snapshot_foo.values('foo')[:1]),
Subquery(live_foo.values('foo')[:1])
)
version_1 = (
A.objects
.values('name')
.order_by('name')
.annotate(amount_sum=Sum('amount'))
.annotate(foo=foo)
.filter(foo__isnull=False)
)
version_2 = (
A.objects
.values('name')
.order_by('name')
.annotate(amount_sum=Sum('amount'))
.annotate(foo=Subquery(snapshot_foo.values('foo')[:1]))
.filter(foo__isnull=False)
)
print(version_1.query.group_by)
print(version_2.query.group_by)
list(version_1)
list(version_2)
The group by for both querysets is only Col(app_a, app.A.name),
While the generated SQL is
SELECT "app_a"."name" AS "name",
(CAST(SUM("app_a"."amount") AS NUMERIC)) AS "amount_sum",
COALESCE(
(SELECT U0."foo" AS "foo"
FROM "app_bsnapshot" U0
WHERE (U0."name" = ("app_a"."name")
AND U0."version_name" = 'v1')
LIMIT 1),
(SELECT U0."foo" AS "foo"
FROM "app_b" U0
WHERE U0."name" = ("app_a"."name")
LIMIT 1)) AS "foo"
FROM "app_a"
WHERE COALESCE(
(SELECT U0."foo" AS "foo"
FROM "app_bsnapshot" U0
WHERE (U0."name" = ("app_a"."name")
AND U0."version_name" = 'v1')
LIMIT 1),
(SELECT U0."foo" AS "foo"
FROM "app_b" U0
WHERE U0."name" = ("app_a"."name")
LIMIT 1)) IS NOT NULL
GROUP BY 1,
3
ORDER BY 1 ASC
SELECT "app_a"."name" AS "name",
(CAST(SUM("app_a"."amount") AS NUMERIC)) AS "amount_sum",
(SELECT U0."foo" AS "foo"
FROM "app_bsnapshot" U0
WHERE (U0."name" = ("app_a"."name")
AND U0."version_name" = 'v1')
LIMIT 1) AS "foo"
FROM "app_a"
WHERE
(SELECT U0."foo" AS "foo"
FROM "app_bsnapshot" U0
WHERE (U0."name" = ("app_a"."name")
AND U0."version_name" = 'v1')
LIMIT 1) IS NOT NULL
GROUP BY 1
ORDER BY 1 ASC
Tested the code on both 4.2 and 5.2. I was redirected here for clarification if this is a bug after filing a bug ticket.