Complexe counts with Subquery

miyou995 · May 10, 2025, 6:44pm

Hello, hope you are doing well.
on my CRM i want to measure the performance of my sales team so i created a Table using django-tables2 to display some KPIs on it. to make it simple i have this models.

class LeadQueryset(models.QuerySet):

    def current_state(self):
        latest_state = LeadState.objects.filter(lead=OuterRef('pk')).order_by('-created')
        return self.annotate(
            current_state_id=Subquery(latest_state.values('state')[:1]),
            current_state_created=Subquery(latest_state.values('created')[:1]),
            current_state_user_id=Subquery(latest_state.values('user__id')[:1])
        )


class Lead(TimeStampModel):
    name        = models.CharField(max_length=50,blank=True, null=True)
    phone       = models.CharField(verbose_name="Tél 1",max_length=20)
    email       = models.CharField(max_length=50, verbose_name='E-mail',blank=True, null=True)
    objects     = LeadQueryset.as_manager()



LEAD_STATE_CHOICES = ( 
        (1, _("not affected")),
        (2, _("not treated")),
        (3, _("not qualified")),
        (4, _("qualified")),
        (5, _("not interested")),
        (6, _("already called")),

    )

class LeadState(TimeStampModel):
    lead        = models.ForeignKey(Lead, related_name="states", on_delete=models.CASCADE)
    state       = models.PositiveSmallIntegerField(verbose_name=_("status"), choices=LEAD_STATE_CHOICES, default=1)
    description = models.TextField(blank=True, null=True)
    user        = models.ForeignKey(User, verbose_name="Commercial",related_name="lead_states", on_delete=models.SET_NULL,  blank=True, null=True)
    
    class Meta:
        ordering = ('created',)
        indexes = [
            # models.Index(fields=['lead', 'state', 'user', 'created']),
            # models.Index(fields=['user', 'state', 'lead']),
            models.Index(fields=['lead', '-created'], name='ls_lead_created_idx'),

            # 2) Today‐range scans per user/state
            models.Index(fields=['user','state','-created'], name='ls_user_state_created_idx'),

        ]

on the UserQueryset i want a method " include_counts() " where i want to annotate some KPIs ex: qualified_today , qualified_total, affected_today, affected_total

i tested multiple variations for this include_counts()
variation 1 :


class UserQueryset(models.QuerySet):

    def include_counts(self):

        Lead = apps.get_model('leads', 'Lead')
        LeadState = apps.get_model('leads', 'LeadState')
        now = timezone.now()
        today = now.date()
        start_time = (now - timedelta(days=1)).replace(hour=18, minute=0, second=0, microsecond=0)

        # Define subqueries
        affected_today_sq = LeadState.objects.filter(
            user=OuterRef('pk'), state=2,
            created__range=[start_time, now]
        ).values('user') \
        .annotate(c=Count('lead', distinct=True)) \
        .values('c')[:1]

        qualified_today_sq = LeadState.objects.filter(
            user=OuterRef('pk'), state=4,
            created__date=today
        ).values('user') \
        .annotate(c=Count('lead', distinct=True)) \
        .values('c')[:1]


        base_queryset = Lead.objects.current_state()
        # total & current-state subqueries
        affected_total_sq = base_queryset.filter(
            current_state_user_id=OuterRef('pk')
        ).values('current_state_user_id') \
        .annotate(c=Count('pk')) \
        .values('c')[:1]

        qualified_total_sq = base_queryset.filter(
            current_state_user_id=OuterRef('pk'), current_state_id=4
        ).values('current_state_user_id') \
        .annotate(c=Count('pk')) \
        .values('c')[:1]

        not_qualified_total_sq = base_queryset.filter(
            current_state_user_id=OuterRef('pk'), current_state_id=3
        ).values('current_state_user_id') \
        .annotate(c=Count('pk')) \
        .values('c')[:1]

        not_treated_total_sq = base_queryset.filter(
            current_state_user_id=OuterRef('pk'), current_state_id=2
        ).values('current_state_user_id') \
        .annotate(c=Count('pk')) \
        .values('c')[:1]

        # Annotate all six
        return self.annotate(
            affected_today=Subquery(affected_today_sq, output_field=IntegerField()),
            affected_total=Subquery(affected_total_sq, output_field=IntegerField()),
            qualified_today=Subquery(qualified_today_sq, output_field=IntegerField()),
            qualified_total=Subquery(qualified_total_sq, output_field=IntegerField()),
            not_qualified_total=Subquery(not_qualified_total_sq, output_field=IntegerField()),
            not_treated_total=Subquery(not_treated_total_sq, output_field=IntegerField()),
        )

for 100k lead it takes arround 20s and 17 Subplan on the QueryPlan because Lead.objects.current_state()
has 3 Subquery

Variation 2

class UserQueryset(models.QuerySet):
    def include_counts(self):

        LeadState = apps.get_model('leads', "LeadState")
        today = timezone.now().date()
        now = timezone.now()
        yesterday = now - timedelta(days=1)
        start_time = yesterday.replace(hour=18, minute=0, second=0, microsecond=0)

        current_state = Subquery(
            LeadState.objects
                .filter(lead=OuterRef('lead'))
                .values('lead')
                .annotate(max_created=Max('created'))
                .values('max_created')[:1]
        )

        affected_today = Count(
                'lead_states__lead',
                filter=Q(
                    lead_states__state=2,
                    lead_states__created__gte=start_time,
                    lead_states__created__lte=now
                ),
                distinct=True
            )

        affected_total=Count(
            'lead_states__lead',
            filter=Q(lead_states__state=2),
            distinct=True
        )


        qualified_today = (
            LeadState.objects
                .filter(
                    user=OuterRef('pk'),
                    state=4,
                    created__date=today,
                    created=current_state  
                )
                .values('user')  
                .annotate(cnt=Count('lead', distinct=True))
                .values('cnt')[:1]  # we only need the count
        )
        
        qualified_total = (
            LeadState.objects
                .filter(
                    user=OuterRef('pk'),
                    state=4,
                    created=current_state
                )
                .values('user')  
                .annotate(cnt=Count('lead', distinct=True))
                .values('cnt')[:1] 
        )

        not_treated_total = (
            LeadState.objects
                .filter(
                    user=OuterRef('pk'),
                    state=2,
                    created=current_state
                )
                .values('user')  
                .annotate(cnt=Count('lead', distinct=True))
                .values('cnt')[:1] 
        )

        not_qualified_subquery = (
            LeadState.objects
                .filter(
                    user=OuterRef('pk'),
                    state=3,
                    created=current_state
                )
                .values('user')  
                .annotate(cnt=Count('lead', distinct=True))
                .values('cnt')[:1] 
        )

        return self.annotate(
            affected_today=affected_today,
            affected_total=affected_total,
            qualified_today=Subquery(qualified_today),
            qualified_total=Subquery(qualified_total),
            not_qualified_total=Subquery(not_qualified_subquery),
            not_treated_total=Subquery(not_treated_total)
        )

on the variation 2 i’m getting the same results because current_state also use Subquery so to get for ex: qualified_today use 2 nested Subquery

what i’m a missing to speed up the calculation ?
i’m thinking on adding the current_state and the current_user on the Lead Model and update the Lead model on LeasState change but i’m pretty sure there is a cool way to speed up the include_counts query !

i would be very happy to discuss about this problem
thank you by advance .

Topic		Replies	Views
Subquery needs to return more than 1 row? Using the ORM	19	1377	October 2, 2024
Simple annotation of related count using OuterRef ORM	4	162	July 11, 2025
Query performance question Using Django	15	3298	September 15, 2021
combine queryset with array in views ? Using the ORM	13	3858	May 7, 2023
annotate with subquery in Django Using Django	13	9170	June 26, 2020

Complexe counts with Subquery

Related topics