I have an the following queryset in my Django app:
queryset = (
(
models.Activity.objects.filter(organization=organization_pk)
.annotate(
period=Trunc("timestamp", period, output_field=DateTimeField()),
)
.values(
"data_source__uuid", "event_type__uuid", "identity__uuid", "period"
)
.distinct()
.annotate(count=Count("id"))
)
.order_by("-period")
)
Which produuces queries similar to this when being used through my DRF views:
SELECT "integrations_datasource"."uuid",
"core_eventtype"."uuid",
"core_identity"."uuid",
Date_trunc('month', "core_activity"."timestamp" at time zone 'UTC') AS "period",
Count("core_activity"."id") AS "count"
FROM "core_activity"
INNER JOIN "integrations_datasource"
ON (
"core_activity"."data_source_id" = "integrations_datasource"."id")
LEFT OUTER JOIN "core_eventtype"
ON (
"core_activity"."event_type_id" = "core_eventtype"."id")
LEFT OUTER JOIN "core_identity"
ON (
"core_activity"."identity_id" = "core_identity"."id")
WHERE "core_activity"."organization_id" = 1
GROUP BY "integrations_datasource"."uuid",
"core_eventtype"."uuid",
"core_identity"."uuid",
4
ORDER BY 4 DESC limit 500 offset 500
I have found the following refactor of the above query to be much more performant in my environment:
WITH filtered_activity AS (
SELECT DISTINCT
data_source_id,
event_type_id,
identity_id,
DATE_TRUNC('month', timestamp AT TIME ZONE 'UTC') AS period,
COUNT(id) AS count
FROM
core_activity
WHERE
organization_id = 1
GROUP BY
data_source_id,
event_type_id,
identity_id,
period
)
SELECT
ds.uuid AS datasource_uuid,
et.uuid AS eventtype_uuid,
idn.uuid AS identity_uuid,
fa.period,
fa.count
FROM
filtered_activity fa
INNER JOIN
integrations_datasource ds ON fa.data_source_id = ds.id
LEFT JOIN
core_eventtype et ON fa.event_type_id = et.id
LEFT JOIN
core_identity idn ON fa.identity_id = idn.id
ORDER BY
fa.period DESC
LIMIT 500 OFFSET 500;
But I am struggling to figure out how to create ORM queryset and subquery that does this. Any ideas? I am using Postgres as the RDMS.