N+1 queries in SerializerMethodField

Original question: Edit - Stack Overflow

I have this view

    def get_queryset(self) -> QuerySet[Good]:
        ....
        qs = (
            Good.objects.values('brand_id', 'brand__name')
            .annotate(
                ....
            )
            .prefetch_related(Prefetch('history', StocksHistory.objects.filter(Q(**subquery_filter_args))))
            .order_by('-total_sales')
        )
        return qs

and serializer

class ExtendedBrandSerializer(serializers.ModelSerializer):
    ...
    history = serializers.SerializerMethodField()

    class Meta:
        model = Good
        fields = (
            ...
            'history',
        )

    def get_history(self, good: dict) -> dict:
      ....

      return StocksHistorySerializer(
        StocksHistory.objects.extra(select={'day': 'date( snap_at )'})
        .values('day')
        .filter(history_filter_query)
        .annotate(
            ....
        ),
        many=True,
      ).data

Relation: StocksHistory (*) -> (1) Good.

I have N+1 queries in SerializerMethodField. How can I fix it?

Perhaps there is a way to move annotate from serializer to view?

The bottom line is that I also need the history key in the response, which will contain a list of these child objects.

Hi,

It is feasible, but can you post your models and the complete queries plis?

@DanielGnzlzVll thnx

serializers

class StocksHistorySerializer(serializers.ModelSerializer):
    brand_id = serializers.IntegerField(source='good__brand_id')

    class Meta:
        model = StocksHistory
        fields = (
            'snap_at',
            'feedbacks',
            'rating',
            'price',
            'base_price',
            'price_with_discount',
            'sales',
            'revenue',
            'stock_balances',
        )


class ExtendedBrandSerializer(serializers.ModelSerializer):
    brand_name = serializers.CharField(source='brand__name')
    total_sales = serializers.IntegerField()
    avg_sales_per_day = serializers.FloatField()
    total_revenue = serializers.IntegerField()
    avg_revenue_per_day = serializers.FloatField()

    rating = serializers.FloatField()
    feedbacks = serializers.FloatField()

    base_price = serializers.FloatField(default=0)
    price = serializers.FloatField(default=0)

    discount = serializers.FloatField(default=0)
    price_with_discount = serializers.FloatField(default=0)

    max_price = serializers.FloatField()
    min_price = serializers.FloatField()
    avg_price = serializers.FloatField()

    history = serializers.SerializerMethodField()

    class Meta:
        model = Good
        fields = (
            'brand_id',
            'brand_name',
            'total_sales',
            'avg_sales_per_day',
            'total_revenue',
            'avg_revenue_per_day',
            'rating',
            'feedbacks',
            'base_price',
            'price',
            'discount',
            'price_with_discount',
            'max_price',
            'min_price',
            'avg_price',
            'history',
        )

    def get_history(self, good: dict) -> dict:
      history_filter_args = {'good__brand': data['brand_id']}
      if all([key in context for key in ['history_from_date', 'history_to_date']]):
        history_filter_args.update({'snap_at__range': (context['from_date'], context['to_date'])})
      history_filter_query = Q(**history_filter_args)

      return StocksHistorySerializer(
        StocksHistory.objects.extra(select={'day': 'date( snap_at )'})
        .values('day')
        .filter(history_filter_query)
        .annotate(
            sales=Sum('sales', filter=history_filter_query),
            feedbacks=Sum('feedbacks', filter=history_filter_query),
            rating=Avg('rating', filter=history_filter_query),
            base_price=Avg('base_price', filter=history_filter_query),
            price=Avg('price', filter=history_filter_query),
            price_with_discount=Avg('price_with_discount', filter=history_filter_query),
            revenue=Avg('revenue', filter=history_filter_query),
            stock_balances=Avg('stock_balances', filter=history_filter_query),
            snap_at=TruncDay('snap_at', filter=history_filter_query),
        ),
        many=True,
      ).data

view

class GoodsTopByBrandView(generics.ListAPIView):
    serializer_class = ExtendedBrandSerializer
    filter_backends = [DjangoFilterBackend, filters.SearchFilter]
    ordering = ['-total_sales']

    def get_serializer_context(self):
        context = super().get_serializer_context()
        if all([key in self.request.query_params for key in ['history_from_date', 'history_to_date']]):
            from_date, to_date = self._get_date_range()
            context['from_date'] = from_date
            context['to_date'] = to_date
        return context

    def get_queryset(self) -> QuerySet[Good]:
        subquery_filter_args = {}
        history_filter_args = {}
        if all([key in self.request.query_params for key in ['history_from_date', 'history_to_date']]):
            from_date, to_date = self._get_date_range()
            subquery_filter_args.update({'snap_at__range': (from_date, to_date)})
            history_filter_args.update({'history__snap_at__range': (from_date, to_date)})
        history_filter_query = Q(**history_filter_args)

        qs = (
            Good.objects.values('brand_id', 'brand__name')
            .annotate(
                total_sales=Sum('history__sales', filter=history_filter_query),
                avg_sales_per_day=Avg('history__sales', filter=history_filter_query),
                total_revenue=Sum('history__revenue', filter=history_filter_query),
                avg_revenue_per_day=Avg('history__revenue', filter=history_filter_query),
                rating=Avg('history__rating', filter=history_filter_query),
                feedbacks=Sum('history__feedbacks', filter=history_filter_query),
                base_price=Avg('history__base_price', filter=history_filter_query),
                price=Avg('history__price', filter=history_filter_query),
                price_with_discount=Avg('history__price_with_discount', filter=history_filter_query),
                discount=Avg('history__discount', filter=history_filter_query),
                max_price=Max('history__price', filter=history_filter_query),
                min_price=Min('history__price', filter=history_filter_query),
                avg_price=Avg('history__price', filter=history_filter_query),
            )
            .prefetch_related(Prefetch('history', StocksHistory.objects.filter(Q(**subquery_filter_args))))
            .order_by('-total_sales')
        )
        return qs

    def _get_date_range(self) -> tuple[datetime, datetime]:
        _date_format = '%Y-%m-%d'

        from_date = datetime.strptime(self.request.query_params['history_from_date'], _date_format).replace(tzinfo=utc)
        to_date = datetime.strptime(self.request.query_params['history_to_date'], _date_format).replace(tzinfo=utc)
        return from_date, to_date

models

class Brand(TimestampedModel):
    id = models.IntegerField(primary_key=True)
    name = models.CharField(max_length=1024, null=True)


class Supplier(TimestampedModel):
    id = models.IntegerField(primary_key=True)
    name = models.CharField(max_length=1024, null=True)


class Good(TimestampedModel):
    id = models.AutoField(primary_key=True)
    name = models.CharField(max_length=1024, null=True)

    subj_name = models.CharField(max_length=520, null=True)
    subj_root_name = models.CharField(max_length=520, null=True)

    supplier = models.ForeignKey(Supplier, on_delete=models.CASCADE, null=True)
    brand = models.ForeignKey(Brand, on_delete=models.CASCADE, null=True)

    first_seen_at = models.DateTimeField(null=True)
    last_checked_at = models.DateTimeField(null=True)

    def __str__(self):
        return f'[{self.id}] {self.name}'


class StocksHistory(TimestampedModel):
    id = models.AutoField(primary_key=True)

    good = models.ForeignKey(Good, related_name='history', on_delete=models.CASCADE)
    wh_data = models.JSONField(null=True, blank=True)

    stock_balances = models.IntegerField(default=0)
    sales = models.IntegerField(default=0)

    price = models.IntegerField(default=0)

    base_price = models.IntegerField(default=0)
    discount = models.IntegerField(default=0)
    price_with_discount = models.IntegerField(default=0)

    rating = models.IntegerField(null=True, blank=True)
    feedbacks = models.IntegerField(null=True, blank=True)

    revenue = models.IntegerField(default=0)

    snap_at = models.DateTimeField(null=True)

At the moment, I have done this with two queries and manually combine the querysets into the response I need, but I do not like this solution.

view

    def get_queryset(self) -> QuerySet[Good]:
        subquery_filter_args = {}
        history_filter_args = {}
        if all([key in self.request.query_params for key in ['history_from_date', 'history_to_date']]):
            from_date, to_date = get_history_date_range(request=self.request)
            subquery_filter_args.update({'snap_at__range': (from_date, to_date)})
            history_filter_args.update({'history__snap_at__range': (from_date, to_date)})
        history_filter_query = Q(**history_filter_args)

        qs = (
            Good.objects.values('brand_id', 'brand__name')
            .annotate(
                total_sales=Sum('history__sales', filter=history_filter_query),
                avg_sales_per_day=Avg('history__sales', filter=history_filter_query),
                total_revenue=Sum('history__revenue', filter=history_filter_query),
                avg_revenue_per_day=Avg('history__revenue', filter=history_filter_query),
                rating=Avg('history__rating', filter=history_filter_query),
                feedbacks=Sum('history__feedbacks', filter=history_filter_query),
                base_price=Avg('history__base_price', filter=history_filter_query),
                price=Avg('history__price', filter=history_filter_query),
                price_with_discount=Avg('history__price_with_discount', filter=history_filter_query),
                discount=Avg('history__discount', filter=history_filter_query),
                max_price=Max('history__price', filter=history_filter_query),
                min_price=Min('history__price', filter=history_filter_query),
                avg_price=Avg('history__price', filter=history_filter_query),
            )
            .prefetch_related(Prefetch('history', StocksHistory.objects.filter(Q(**subquery_filter_args))))
            .order_by('-total_sales')
        )
        return qs

    def list(self, request, *args, **kwargs):
        queryset = self.filter_queryset(self.get_queryset())
        page = self.paginate_queryset(queryset)
        if page is not None:
            serializer = self.get_serializer(page, many=True)
            data = serializer.data

            add_histories_to_top_response(request=self.request, data=data)

            response = self.get_paginated_response(data)
            return response

        serializer = self.get_serializer(queryset, many=True)
        data = serializer.data
        add_histories_to_top_response(request=self.request, data=data)
        return Response(data)

helper

def add_histories_to_top_response(request: Request, data: OrderedDict):
    history_filter_args = {'good__brand__in': tuple(item['brand_id'] for item in data)}
    if all([key in request.query_params for key in ['history_from_date', 'history_to_date']]):
        from_date, to_date = get_history_date_range()
        history_filter_args.update({'history__snap_at__range': (from_date, to_date)})
    history_filter_query = Q(**history_filter_args)

    histories = StocksHistorySerializer(
        StocksHistory.objects.extra(select={'day': 'date( snap_at )'})
        .values('day')
        .filter(history_filter_query)
        .annotate(
            sales=Sum('sales', filter=history_filter_query),
            feedbacks=Sum('feedbacks', filter=history_filter_query),
            rating=Avg('rating', filter=history_filter_query),
            base_price=Avg('base_price', filter=history_filter_query),
            price=Avg('price', filter=history_filter_query),
            price_with_discount=Avg('price_with_discount', filter=history_filter_query),
            revenue=Avg('revenue', filter=history_filter_query),
            stock_balances=Avg('stock_balances', filter=history_filter_query),
            snap_at=TruncDay('snap_at', filter=history_filter_query),
        )
        .values(
            'snap_at',
            'feedbacks',
            'rating',
            'price',
            'base_price',
            'price_with_discount',
            'sales',
            'revenue',
            'stock_balances',
            'good__brand_id',
        ),
        many=True,
    ).data

    set_histories(data, histories)


def set_histories(data: OrderedDict, histories: OrderedDict):
    for item in data:
        item['history'] = []
        for key, group in itertools.groupby(histories, lambda x: x['brand_id']):
            if item['brand_id'] == key:
                item['history'] = list(group)


def get_history_date_range(request: Request) -> tuple[datetime, datetime]:
    _date_format = '%Y-%m-%d'

    from_date = datetime.strptime(request.query_params['history_from_date'], _date_format).replace(tzinfo=utc)
    to_date = datetime.strptime(request.query_params['history_to_date'], _date_format).replace(tzinfo=utc)
    return from_date, to_date