How to perform a JOIN on a paginated subquery in Django?

Suppose there are models A and B. where A has one to many relation with B.
How to do following in the ORM:

select * from A
inner join (
    select * from B limit 10
) on B.a_id = A.id

The best solution I could find so far is to use raw query. So, is it possible to do this in ORM without raw query? If not, then why? this is a very basic sql query. almost all other ORMs are able to do this.

Please not give solutions that generate queries like following:

select * from A
inner join B
on B.a_id = A.id
where B.id in (
    select id from B limit 10
)

because although it generate same output. it is extremely less efficient than the first query I mention.

Please show the actual models involved, and use them in your example. It’s a lot easier to talk about real code than trying to describe something in the abstract.

That’s an assertion that you should demonstrate using an explain analyze for the table definitions and quantity of data involved.

1 Like

Sure. Following are the models I am using:

class Project(models.Model):
    name = models.CharField()

    user = models.ForeignKey(User, on_delete=models.CASCADE)
class Brand(models.Model):
    name = models.CharField()
    domain = models.CharField()
    is_own = models.BooleanField()

    project = models.ForeignKey(Project, on_delete=models.CASCADE)

    class Meta:
        constraints = [
            UniqueConstraint(
                fields=("name", "project"), name="UQ_brand_name_project_id"
            ),
        ]
class PromptSet(models.Model):
    is_active = models.BooleanField(default=True)
    name = models.CharField()

    project = models.ForeignKey(Project, on_delete=models.CASCADE)
class Conversation(models.Model):
    prompt_set = models.ForeignKey(PromptSet, on_delete=models.CASCADE)
class Prompt(models.Model):
    name = models.CharField(null=True)
    content = models.TextField()
    level = models.IntegerField()

    conversation = models.ForeignKey(
        Conversation, on_delete=models.CASCADE, related_name="prompts"
    )

    class Meta:
        constraints = [
            models.UniqueConstraint(
                fields=("level", "conversation"),
                name="UQ_level_conversation",
            )
        ]
class Evaluation(models.Model):
    ai_model = models.CharField(choices=AIModelsEnum.choices)
    datetime = models.DateTimeField()
    ai_response = models.TextField()

    prompt = models.ForeignKey(Prompt, on_delete=models.CASCADE)
    brand = models.ManyToManyField(
        Brand,
        through="EvaluationCount",
    )

    class Meta:
        constraints = [
            models.UniqueConstraint(
                fields=("ai_model", "datetime", "prompt"),
                name="UQ_ai_model_datetime_prompt",
            )
        ]
class EvaluationCount(models.Model):
    name_count = models.IntegerField()
    source_count = models.IntegerField()

    brand = models.ForeignKey(Brand, on_delete=models.CASCADE, null=True)
    evaluation = models.ForeignKey(Evaluation, on_delete=models.CASCADE)

    class Meta:
        constraints = [
            models.UniqueConstraint(
                fields=(
                    "brand",
                    "evaluation",
                ),
                name="UQ_brand_evaluation",
            )
        ]

currently django generates following query:

SELECT
	"prompt_sets_conversation"."prompt_set_id",
	"prompts_evaluation"."ai_model",
	"prompt_sets_promptset"."name",
	SUM("prompts_evaluationcount"."name_count") AS "total_brand_mentions",
	SUM(
		CASE
			WHEN "projects_brand"."is_own" THEN "prompts_evaluationcount"."name_count"
			ELSE 0
		END
	) AS "own_brand_mentions",
	SUM("prompts_evaluationcount"."source_count") AS "total_source_mentions",
	SUM(
		CASE
			WHEN "projects_brand"."is_own" THEN "prompts_evaluationcount"."source_count"
			ELSE 0
		END
	) AS "own_source_mentions"
FROM
	"prompts_evaluation"
	INNER JOIN "prompts_prompt" ON (
		"prompts_evaluation"."prompt_id" = "prompts_prompt"."id"
	)
	INNER JOIN "prompt_sets_conversation" ON (
		"prompts_prompt"."conversation_id" = "prompt_sets_conversation"."id"
	)
	INNER JOIN "prompt_sets_promptset" ON (
		"prompt_sets_conversation"."prompt_set_id" = "prompt_sets_promptset"."id"
	)
	INNER JOIN "projects_project" ON (
		"prompt_sets_promptset"."project_id" = "projects_project"."id"
	)
	LEFT OUTER JOIN "prompts_evaluationcount" ON (
		"prompts_evaluation"."id" = "prompts_evaluationcount"."evaluation_id"
	)
	LEFT OUTER JOIN "projects_brand" ON (
		"prompts_evaluationcount"."brand_id" = "projects_brand"."id"
	)
WHERE
	(
		"prompts_evaluation"."datetime" = (
			SELECT
				MAX(U0."datetime") AS "latest_datetime"
			FROM
				"prompts_evaluation" U0
				INNER JOIN "prompts_prompt" U1 ON (U0."prompt_id" = U1."id")
				INNER JOIN "prompt_sets_conversation" U2 ON (U1."conversation_id" = U2."id")
			WHERE
				(
					U0."ai_model" = ("prompts_evaluation"."ai_model")
					AND U2."prompt_set_id" = ("prompt_sets_conversation"."prompt_set_id")
				)
			GROUP BY
				U2."prompt_set_id",
				U0."ai_model"
		)
		AND "prompt_sets_conversation"."prompt_set_id" IN (
			SELECT
				U0."id"
			FROM
				"prompt_sets_promptset" U0
				INNER JOIN "prompt_sets_conversation" U1 ON (U0."id" = U1."prompt_set_id")
				INNER JOIN "prompt_sets_promptset" U2 ON (U1."prompt_set_id" = U2."id")
			WHERE
				U2."project_id" = 17
			LIMIT
				2
		)
		AND "projects_project"."user_id" = 15
		AND "prompt_sets_promptset"."project_id" = 17
	)
GROUP BY
	"prompt_sets_conversation"."prompt_set_id",
	"prompts_evaluation"."ai_model",
	"prompt_sets_promptset"."name"

This query is taking about a minute to execute. you can see the result of explain analyze of it here.

But the same output can be generated by following SQL query that I have manually written:

SELECT
	"prompt_sets_conversation"."prompt_set_id",
	"prompts_evaluation"."ai_model",
	"prompt_sets_promptset"."name",
	SUM("prompts_evaluationcount"."name_count") AS "total_brand_mentions",
	SUM(
		CASE
			WHEN "projects_brand"."is_own" THEN "prompts_evaluationcount"."name_count"
			ELSE 0
		END
	) AS "own_brand_mentions",
	SUM("prompts_evaluationcount"."source_count") AS "total_source_mentions",
	SUM(
		CASE
			WHEN "projects_brand"."is_own" THEN "prompts_evaluationcount"."source_count"
			ELSE 0
		END
	) AS "own_source_mentions"
FROM
	PROMPTS_EVALUATION
	INNER JOIN "prompts_prompt" ON (
		"prompts_evaluation"."prompt_id" = "prompts_prompt"."id"
	)
	INNER JOIN "prompt_sets_conversation" ON (
		"prompts_prompt"."conversation_id" = "prompt_sets_conversation"."id"
	)
	INNER JOIN "prompt_sets_promptset" ON (
		"prompt_sets_conversation"."prompt_set_id" = "prompt_sets_promptset"."id"
	)
	INNER JOIN (
		SELECT
			PROMPT_SETS_PROMPTSET.ID,
			AI_MODEL,
			MAX(DATETIME)
		FROM
			PROMPTS_EVALUATION
			INNER JOIN "prompts_prompt" ON (
				"prompts_evaluation"."prompt_id" = "prompts_prompt"."id"
			)
			INNER JOIN "prompt_sets_conversation" ON (
				"prompts_prompt"."conversation_id" = "prompt_sets_conversation"."id"
			)
			INNER JOIN (
				SELECT
					PROMPT_SETS_PROMPTSET.*
				FROM
					PROMPT_SETS_PROMPTSET
					INNER JOIN "projects_project" ON (
						"prompt_sets_promptset"."project_id" = "projects_project"."id"
					)
				WHERE
					PROJECTS_PROJECT.ID = 17
				ORDER BY
					PROMPT_SETS_PROMPTSET.ID
				LIMIT
					2
			) "prompt_sets_promptset" ON (
				"prompt_sets_conversation"."prompt_set_id" = "prompt_sets_promptset"."id"
			)
		GROUP BY
			PROMPT_SETS_PROMPTSET.ID,
			AI_MODEL
	) "temp" ON (
		TEMP.ID = PROMPT_SETS_PROMPTSET.ID
		AND TEMP.MAX = PROMPTS_EVALUATION.DATETIME
		AND TEMP.AI_MODEL = PROMPTS_EVALUATION.AI_MODEL
	)
	INNER JOIN "projects_project" ON (
		"prompt_sets_promptset"."project_id" = "projects_project"."id"
	)
	LEFT OUTER JOIN "prompts_evaluationcount" ON (
		"prompts_evaluation"."id" = "prompts_evaluationcount"."evaluation_id"
	)
	LEFT OUTER JOIN "projects_brand" ON (
		"prompts_evaluationcount"."brand_id" = "projects_brand"."id"
	)
WHERE
	PROJECTS_PROJECT.ID = 17
	AND "projects_project"."user_id" = 15
GROUP BY
	"prompt_sets_conversation"."prompt_set_id",
	"prompts_evaluation"."ai_model",
	"prompt_sets_promptset"."name"

This query gets executed in milliseconds. you can see the output of explain analyze of it here.

Thanks for the information, just another bit more piece of information would help.
The code that you’re using to generate the query with the ORM.

Django code:

# Subquery to get the latest datetime for each prompt set and AI model combination
latest_datetime_subquery = (
    Evaluation.objects.filter(
        prompt__conversation__prompt_set=OuterRef(
            "prompt__conversation__prompt_set"
        ),
        ai_model=OuterRef("ai_model"),
    )
    .values("prompt__conversation__prompt_set", "ai_model")
    .annotate(latest_datetime=Max("datetime"))
    .values("latest_datetime")
)

# Get the paginated prompt sets
paged_prompt_sets = PromptSet.objects.filter(
    conversation__prompt_set__project_id=self.project_id,
).values("id")[start:end]

# Query to get evaluations with aggregated metrics
Evaluation.objects.filter(
        prompt__conversation__prompt_set__project_id=self.project_id,
        prompt__conversation__prompt_set__project__user=self.user,
        datetime=Subquery(latest_datetime_subquery),
        prompt__conversation__prompt_set__id__in=paged_prompt_sets,
    )
    .values("prompt__conversation__prompt_set", "ai_model")
    .annotate(
        # Calculate total brand mentions across all evaluations
        total_brand_mentions=Sum("evaluationcount__name_count"),
        # Calculate own brand mentions (where brand.is_own is True)
        own_brand_mentions=Sum(
            Case(
                When(
                    evaluationcount__brand__is_own=True,
                    then=F("evaluationcount__name_count"),
                ),
                default=0,
            )
        ),
        # Calculate total source mentions across all evaluations
        total_source_mentions=Sum("evaluationcount__source_count"),
        # Calculate own source mentions (where brand.is_own is True)
        own_source_mentions=Sum(
            Case(
                When(
                    evaluationcount__brand__is_own=True,
                    then=F("evaluationcount__source_count"),
                ),
                default=0,
            )
        ),
    )
    .values(
        "prompt__conversation__prompt_set",
        "ai_model",
        "total_brand_mentions",
        "own_brand_mentions",
        "total_source_mentions",
        "own_source_mentions",
        "prompt__conversation__prompt_set__name",
    )

@KenWhitesell @leandrodesouzadev