Hello, hope you are doing well.
on my CRM i want to measure the performance of my sales team so i created a Table using django-tables2 to display some KPIs on it. to make it simple i have this models.
class LeadQueryset(models.QuerySet):
def current_state(self):
latest_state = LeadState.objects.filter(lead=OuterRef('pk')).order_by('-created')
return self.annotate(
current_state_id=Subquery(latest_state.values('state')[:1]),
current_state_created=Subquery(latest_state.values('created')[:1]),
current_state_user_id=Subquery(latest_state.values('user__id')[:1])
)
class Lead(TimeStampModel):
name = models.CharField(max_length=50,blank=True, null=True)
phone = models.CharField(verbose_name="Tél 1",max_length=20)
email = models.CharField(max_length=50, verbose_name='E-mail',blank=True, null=True)
objects = LeadQueryset.as_manager()
LEAD_STATE_CHOICES = (
(1, _("not affected")),
(2, _("not treated")),
(3, _("not qualified")),
(4, _("qualified")),
(5, _("not interested")),
(6, _("already called")),
)
class LeadState(TimeStampModel):
lead = models.ForeignKey(Lead, related_name="states", on_delete=models.CASCADE)
state = models.PositiveSmallIntegerField(verbose_name=_("status"), choices=LEAD_STATE_CHOICES, default=1)
description = models.TextField(blank=True, null=True)
user = models.ForeignKey(User, verbose_name="Commercial",related_name="lead_states", on_delete=models.SET_NULL, blank=True, null=True)
class Meta:
ordering = ('created',)
indexes = [
# models.Index(fields=['lead', 'state', 'user', 'created']),
# models.Index(fields=['user', 'state', 'lead']),
models.Index(fields=['lead', '-created'], name='ls_lead_created_idx'),
# 2) Today‐range scans per user/state
models.Index(fields=['user','state','-created'], name='ls_user_state_created_idx'),
]
on the UserQueryset i want a method " include_counts() " where i want to annotate some KPIs ex: qualified_today , qualified_total, affected_today, affected_total
i tested multiple variations for this include_counts()
variation 1 :
class UserQueryset(models.QuerySet):
def include_counts(self):
Lead = apps.get_model('leads', 'Lead')
LeadState = apps.get_model('leads', 'LeadState')
now = timezone.now()
today = now.date()
start_time = (now - timedelta(days=1)).replace(hour=18, minute=0, second=0, microsecond=0)
# Define subqueries
affected_today_sq = LeadState.objects.filter(
user=OuterRef('pk'), state=2,
created__range=[start_time, now]
).values('user') \
.annotate(c=Count('lead', distinct=True)) \
.values('c')[:1]
qualified_today_sq = LeadState.objects.filter(
user=OuterRef('pk'), state=4,
created__date=today
).values('user') \
.annotate(c=Count('lead', distinct=True)) \
.values('c')[:1]
base_queryset = Lead.objects.current_state()
# total & current-state subqueries
affected_total_sq = base_queryset.filter(
current_state_user_id=OuterRef('pk')
).values('current_state_user_id') \
.annotate(c=Count('pk')) \
.values('c')[:1]
qualified_total_sq = base_queryset.filter(
current_state_user_id=OuterRef('pk'), current_state_id=4
).values('current_state_user_id') \
.annotate(c=Count('pk')) \
.values('c')[:1]
not_qualified_total_sq = base_queryset.filter(
current_state_user_id=OuterRef('pk'), current_state_id=3
).values('current_state_user_id') \
.annotate(c=Count('pk')) \
.values('c')[:1]
not_treated_total_sq = base_queryset.filter(
current_state_user_id=OuterRef('pk'), current_state_id=2
).values('current_state_user_id') \
.annotate(c=Count('pk')) \
.values('c')[:1]
# Annotate all six
return self.annotate(
affected_today=Subquery(affected_today_sq, output_field=IntegerField()),
affected_total=Subquery(affected_total_sq, output_field=IntegerField()),
qualified_today=Subquery(qualified_today_sq, output_field=IntegerField()),
qualified_total=Subquery(qualified_total_sq, output_field=IntegerField()),
not_qualified_total=Subquery(not_qualified_total_sq, output_field=IntegerField()),
not_treated_total=Subquery(not_treated_total_sq, output_field=IntegerField()),
)
for 100k lead it takes arround 20s and 17 Subplan on the QueryPlan because Lead.objects.current_state()
has 3 Subquery
Variation 2
class UserQueryset(models.QuerySet):
def include_counts(self):
LeadState = apps.get_model('leads', "LeadState")
today = timezone.now().date()
now = timezone.now()
yesterday = now - timedelta(days=1)
start_time = yesterday.replace(hour=18, minute=0, second=0, microsecond=0)
current_state = Subquery(
LeadState.objects
.filter(lead=OuterRef('lead'))
.values('lead')
.annotate(max_created=Max('created'))
.values('max_created')[:1]
)
affected_today = Count(
'lead_states__lead',
filter=Q(
lead_states__state=2,
lead_states__created__gte=start_time,
lead_states__created__lte=now
),
distinct=True
)
affected_total=Count(
'lead_states__lead',
filter=Q(lead_states__state=2),
distinct=True
)
qualified_today = (
LeadState.objects
.filter(
user=OuterRef('pk'),
state=4,
created__date=today,
created=current_state
)
.values('user')
.annotate(cnt=Count('lead', distinct=True))
.values('cnt')[:1] # we only need the count
)
qualified_total = (
LeadState.objects
.filter(
user=OuterRef('pk'),
state=4,
created=current_state
)
.values('user')
.annotate(cnt=Count('lead', distinct=True))
.values('cnt')[:1]
)
not_treated_total = (
LeadState.objects
.filter(
user=OuterRef('pk'),
state=2,
created=current_state
)
.values('user')
.annotate(cnt=Count('lead', distinct=True))
.values('cnt')[:1]
)
not_qualified_subquery = (
LeadState.objects
.filter(
user=OuterRef('pk'),
state=3,
created=current_state
)
.values('user')
.annotate(cnt=Count('lead', distinct=True))
.values('cnt')[:1]
)
return self.annotate(
affected_today=affected_today,
affected_total=affected_total,
qualified_today=Subquery(qualified_today),
qualified_total=Subquery(qualified_total),
not_qualified_total=Subquery(not_qualified_subquery),
not_treated_total=Subquery(not_treated_total)
)
on the variation 2 i’m getting the same results because current_state also use Subquery so to get for ex: qualified_today use 2 nested Subquery
what i’m a missing to speed up the calculation ?
i’m thinking on adding the current_state and the current_user on the Lead Model and update the Lead model on LeasState change but i’m pretty sure there is a cool way to speed up the include_counts query !
i would be very happy to discuss about this problem
thank you by advance .