In a personal finance project I’m working on, I am performing a couple of at least slightly complex queries, then performing various aggregations on those queries and finally doing some simple calculations between the aggregate values. The way I’m doing it feels messy and is making the request fairly slow but I’m not sure how to improve it or what Django patterns I’m missing in my implementation.
I have models of BudgetItem, Purchase, and Income that are linked via a Category model. I query BudgetItem to get all BudgetItems for a certain month, then annotate with subqueries to get the sum of Purchase objects and Income objects that match the Category of the BudgetItem. There are 2 querysets for this - one for spending and one for saving. Then I perform some aggregates on the querysets to get values that represent the total value of purchase across all categories for the month or the total amount budgeted across all categories, etc. Then I take those values and perform additional calculations.
Models
class MonthlyBudget(models.Model):
date = models.DateField()
user = models.ForeignKey(
settings.AUTH_USER_MODEL,
on_delete=models.CASCADE,
related_name="monthly_budgets",
null=False,
)
class BudgetItem(models.Model):
user = models.ForeignKey(
settings.AUTH_USER_MODEL,
on_delete=models.CASCADE,
related_name="budget_items",
null=False,
)
category = models.ForeignKey(
Category,
on_delete=models.CASCADE,
related_name="budget_items",
blank=True,
null=False,
)
amount = models.DecimalField(
max_digits=12, decimal_places=2, blank=True, null=True, default=0
)
monthly_budget = models.ForeignKey(
MonthlyBudget,
null=False,
on_delete=models.CASCADE,
related_name="budget_items",
default=None,
)
savings = models.BooleanField(blank=True, null=False)
class Category(models.Model):
name = models.CharField(db_index=True, max_length=250, blank=False)
user = models.ForeignKey(
settings.AUTH_USER_MODEL,
on_delete=models.CASCADE,
related_name="categories",
null=False,
default=None,
)
class Purchase(models.Model):
item = models.CharField(max_length=250, blank=True)
date = models.DateField(db_index=True, null=True, default=None)
user = models.ForeignKey(
settings.AUTH_USER_MODEL,
on_delete=models.CASCADE,
related_name="purchases",
null=False,
)
amount = models.DecimalField(max_digits=12, decimal_places=2, blank=True, null=True)
source = models.CharField(max_length=250, blank=True)
location = models.CharField(max_length=250, blank=True)
category = models.ForeignKey(
Category,
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name="purchases",
)
savings = models.BooleanField(null=False, default=False)
class Income(models.Model):
user = ForeignKey(
settings.AUTH_USER_MODEL,
on_delete=models.CASCADE,
related_name="incomes",
null=False,
)
amount = models.DecimalField(max_digits=12, decimal_places=2, blank=True, null=True)
date = models.DateField(blank=True, null=True)
source = models.CharField(max_length=250, blank=True)
payer = models.CharField(max_length=250, blank=True)
category = models.ForeignKey(
Category,
on_delete=models.SET_NULL,
null=True,
blank=True,
related_name="incomes",
)
notes = models.TextField(blank=True)
View
I’m using a CBV with the logic happening in get_context_data
def get_context_data(self, **kwargs):
kwargs = super().get_context_data(**kwargs)
category_purchases = Purchase.objects.filter(
category=OuterRef("category"),
date__year=self.object.date.year,
date__month=self.object.date.month,
user=self.request.user,
).values("category")
category_incomes = Income.objects.filter(
category=OuterRef("category"),
date__year=self.object.date.year,
date__month=self.object.date.month,
user=self.request.user,
).values("category")
# Perform queries with annotations so the sum total of the amount spent/saved via purchases and incomes is attached to each budget item
budgetitems = (
(
BudgetItem.objects.filter(user=self.request.user)
.filter(monthly_budget=self.object, savings=False)
.annotate(
spent=ExpressionWrapper(
Coalesce(
Subquery(
category_purchases.annotate(total=Sum("amount")).values(
"total"
)
),
Value(0),
),
output_field=DecimalField(),
),
income=ExpressionWrapper(
Coalesce(
Subquery(
category_incomes.annotate(total=Sum("amount")).values(
"total"
)
),
Value(0),
),
output_field=DecimalField(),
),
diff=F("amount") - F("spent") + F("income"),
)
).select_related()
).order_by("category__name")
savings_items = (
(
BudgetItem.objects.filter(
user=self.request.user,
monthly_budget=self.object,
savings=True,
).annotate(
saved=ExpressionWrapper(
Coalesce(
Sum(
"category__purchases__amount",
filter=Q(
category__purchases__date__month=self.object.date.month,
category__purchases__date__year=self.object.date.year,
category__purchases__user=self.request.user,
),
),
Value(0),
),
output_field=DecimalField(),
),
amount_total=Sum("amount", distinct=True),
diff=F("amount_total") - F("saved"),
)
)
.order_by("category__name")
.prefetch_related("category")
)
uncategorized_purchases_amount = Purchase.objects.filter(
user=self.request.user,
category__name=None,
date__month=self.object.date.month,
date__year=self.object.date.year,
).aggregate(
amount=ExpressionWrapper(
Coalesce(
Sum("amount"),
Value(0),
),
output_field=DecimalField(),
)
)
uncategorized_purchases = {
"amount": uncategorized_purchases_amount["amount"],
"remaining": (0 - uncategorized_purchases_amount["amount"]),
"budgeted": 0,
}
# Perform multiple aggregates using the querysets and use those aggregate values in calculations
total_spending_budgeted = budgetitems.aggregate(
amount=ExpressionWrapper(
Coalesce(Sum("amount"), Value(0)), output_field=DecimalField()
)
)
total_spending_spent = budgetitems.aggregate(
amount=ExpressionWrapper(
Coalesce(Sum("spent"), Value(0)), output_field=DecimalField()
)
)
total_spending_spent["amount"] += uncategorized_purchases_amount["amount"]
total_spending_remaining = budgetitems.aggregate(
amount=ExpressionWrapper(
Coalesce(Sum("diff"), Value(0)), output_field=DecimalField()
)
)
total_spending_remaining["amount"] -= uncategorized_purchases_amount["amount"]
total_saved = savings_items.aggregate(
amount=ExpressionWrapper(
Coalesce(Sum("saved"), Value(0)), output_field=DecimalField()
)
)
total_savings_budgeted = savings_items.aggregate(
amount=ExpressionWrapper(
Coalesce(Sum("amount_total"), Value(0)), output_field=DecimalField()
)
)
total_savings_remaining = savings_items.aggregate(
amount=ExpressionWrapper(
Coalesce(Sum("diff"), Value(0)), output_field=DecimalField()
)
)
total_budgeted = (
total_spending_budgeted["amount"] + total_savings_budgeted["amount"]
)
total_spent = total_spending_spent["amount"] + total_saved["amount"]
total_remaining = (
total_spending_remaining["amount"] + total_savings_remaining["amount"]
)
purchases = (
Purchase.objects.filter(
user=self.request.user,
date__year=self.object.date.year,
date__month=self.object.date.month,
)
.order_by("date", "source")
.prefetch_related("category")
)
incomes = (
Income.objects.filter(
user=self.request.user,
date__month=self.object.date.month,
date__year=self.object.date.year,
)
.order_by("date", "source")
.prefetch_related("category")
)
total_income = incomes.filter(category=None).aggregate(
amount=ExpressionWrapper(
Coalesce(Sum("amount"), Value(0)), output_field=DecimalField()
)
)
free_income = total_income["amount"] - total_spent
kwargs.update(
{
"budget_items": budgetitems,
"savings_items": savings_items,
"purchases": purchases,
"incomes": incomes,
"total_budgeted": total_budgeted,
"total_spent": total_spent,
"total_spending_budgeted": total_spending_budgeted,
"total_spending_spent": total_spending_spent,
"total_spending_remaining": total_spending_remaining,
"total_remaining": total_remaining,
"total_saved": total_saved,
"total_savings_budgeted": total_savings_budgeted,
"total_savings_remaining": total_savings_remaining,
"total_income": total_income,
"free_income": free_income,
"uncategorized_purchases": uncategorized_purchases,
"months": [
(calendar.month_name[month], month) for month in range(1, 13)
],
}
)
return kwargs
Per debug toolbar, I’m down to 20 queries, but the overall request time is long. I’m guessing this has to do with all the calculations being performed. Should I just expect this type of work to be slow or am I missing something obvious in the implementation? And, secondarily, if I want to reuse these queries and calculations in a different view, what would be the best way to go about that? I know I need to move the logic out of the view but I’m not sure where to put it.